Tuesday, 2 August 2016

Learn how to create Variables, Vectors, List, Matrices and Data frames in R language

#------------------------------Variables----------------------------------


# A variable is like a folder with a name on the front. You can place something inside the folder,
# look at it, replace it with something else, but the name on the front of the folder stays the same.

# To assign a value to a variable we use the assignment command <-.
# You can give a variable any name made up of letters, numbers, and .
# or _, provided it starts with a letter, or . then a letter. Note that names are case sensitive

x<-1000
x

# To display the value of a variable x on the screen we just type x.
# This is in fact shorthand for print(x).
# Later we will see that in some situations we have to use the longer format, or its near equivalent show(x)

print(x)
show(x)
(1 + 1/x)^x
(y <- (1 + 1/x)^x)

# In common with most programming languages,
# R allows the use of = for vari- able assignment, as well as <-.

x <-200
x=300
x

a <-3

a

b <- sqrt(a*a+3)

b <- sqrt(a*a)

b

# If you wouldnt get list of variables that you defined particular session you can list them all using

ls()

# ---------------------------Functions---------------------------------------------

# In mathematics a function takes one or more arguments (or inputs)
# and pro- duces one or more outputs (or return values).

x <- c(1,27,56,70)
x
x[4]
y <- x[4]
y

x[5]
# To call or invoke a built-in (or user-defined) function in R you write the name of the function
# followed by its argument values enclosed in parentheses and separated by commas.

1:100

seq(from = 2, to = 9, by = 2)

#Some arguments are optional, and have predefined default values, for example, if we omitbythen R usesby=1:

seq(from = 1, to = 9)

# To find out about default values and alternative usages of the built-in function fname,
# you can access the built-in help by typing help(fname) or ?fname.

help("sqrt")

?sqrt

#Every function has a default order for the arguments.

# If you provide arguments in this order, then they do not need to be named

# but you can choose to give the arguments out of order provided
# you give them names in the format argument_name = expression.

seq(1, 9, 2)

seq(to = 9, from = 1)

seq(by = -2, 9, 1)

x <- 9
seq(1, x, x/3)

# ---------------------------------------Vectors -------------------------------------

# A vector is an indexed list of variables. You can think of a vector as a drawer in a filing cabinet:
# the drawer has a name on the outside
# and within it are files labelled sequentially 1,2,3,... from the front.

# In fact, a simple variable is just a vector with length 1 (also called atomic).
# To create vectors of length greater than 1, we use functions that produce vector- valued output.

# There are many of these, but the three basic functions for constructingvectorsarec(...)
# (combine); seq(from,to,by)(sequence); and rep(x, times) (repeat)

x <- c(1,2,3,4,5,6)

getwd()
x
x+1
mean(x)

median(x)

y <- read.csv(file = "4c_data.CSV" )

head(y)

mean(y$Sales)

help("var")

var(x)

x[1]
x[9]

(x <- seq(1, 20, by = 2))

(y <- rep(3, 4))

(z <- c(y, x))

z <- c(x, y)   #without paranthesis, you have to again type z

z

(x <- 100:110)

i <- c(1, 3, 2)

j <- c(-1, -2, -3)

x[i]
x[j]

# It is possible to have a vector with no elements. The function length(x) gives
# the number of elements of x.

x <- c()  

length(x)

# Algebraic operations on vectors act on each element separately, that is ele- mentwise.

x <- c(1, 2, 3)
y <- c(4, 5, 6)

x*y

x+y

y^x

# When you apply an algebraic expression to two vectors of unequal length,
# R automatically repeats the shorter vector until
# it has something the same length as the longer vector.

c(1, 2, 3, 4) + c(1, 2)

(1:10)^c(1, 2)

2 + c(1, 2, 3)

2 * c(1, 2, 3)

(1:10)^2

# R will still duplicate the shorter vector even if it cannot match the longer vector with a whole number of multiples,
# but in this case it will produce a warning.


c(1,2,3) + c(1,2)

# A useful set of functions taking vector arguments are sum(...), prod(...), max(...),
# min(...), sqrt(...), sort(x), mean(x), and var(x).
# Note that functions applied to a vector may be defined to act element wise or
# may act on the whole vector input to return a result:

sqrt(1:6)

mean(1:6)

sort(c(5, 1, 3, 4, 2))

sort(c(20,40,50,20,90,70), decreasing = TRUE)

sort(c(20,40,50,20,90,70), decreasing = TRUE)

# --------------------   mean and variance -------------------------------------

x <- c(1.2, 0.9, 0.8, 1, 1.2)

x.mean <- sum(x)/length(x)

x.mean - mean(x)

x.var <- sum((x - x.mean)^2)/(length(x) - 1)

x.var - var(x)

# -----------------------  simple numerical integration  ------------------------

dt <- 0.005
t <- seq(0, pi/6, by = dt)
ft <- cos(t)
(I <- sum(ft) * dt)
I - sin(pi/6)

# In this example note that t is a vector, so ft is also a vector, where ft[i] equals cos(t[i])


# ---------------------- exponential limit -----------------------------------------


x <- seq(10, 200, by = 10)
y <- (1 + 1/x)^x
exp(1) - y

plot(x, y)

# ------------------------ Finding Type --------------------------------------------

q <-  numeric(10)

typeof(q)

a <- "dscienze"

typeof(a)

b <- c("Hello", "World")
b
typeof(b)

b <- c("Hello", 4)

b

typeof(b)

a = character(20)

a

# --------------------- Factors --------------------------------

setwd("~/Documents/R-Files")
getwd()

ex <- read.csv("R-Class Folder/students.csv")
ex
summary(ex$treatment)

ex$treatment

summary(ex$low)

ex1 <- factor(ex$low)
ex1
summary(ex1)

# --------------------- Missing Values ----------------------------
a <- NA             # assign NA to variable A

is.na(a)            # is it missing?

a <- c(11,NA,13)    # now try a vector

is.na(a)            # is it missing?


mean(a)                      # NAs can propagate

mean(a, na.rm = TRUE)        # NAs can be removed  -- mean(a, na.rm = TRUE, NULL=TRUE)

# We also mention the null object, called NULL,
# which is returned by some func- tions and expressions.
# Note that NA and NULL are not equivalent.
# NA is a place- holder for something that exists but is missing.
# NULL stands for something that never existed at all.

# ---------------------- Expressions and assignments ----------------

#In R, the term expression is used to denote a phrase of code that can be executed.

seq(10, 20, by = 3)

4

mean(c(1, 2, 3))

1>2

# If the evaluation of an expression is saved, using the <- operator,
# then the com- bination is called an assignment. The following are examples of assignments.

x1 <- seq(10, 20, by = 3)
x2 <- 4
x3 <- mean(c(1, 2, 3))
x4 <- 1 > 2
x4

# ---------------------Logical expressions ---------------------------------

# A logical expression is formed using the comparison operators <, >, <=, >=, == (equal to),
# and != (not equal to); and the logical operators & (and), | (or), and ! (not).
# The order of operations can be controlled using parentheses ( ).
# Two other comparison operators, && and ||

# The value of a logical expression is either TRUE or FALSE.
# The integers 1 and 0 can also be used to represent TRUE and FALSE, respectively

c(0, 0, 1, 1) | c(0, 1, 0, 1)

xor(c(0, 0, 1, 1), c(0, 1, 0, 1))

c(0, 0, 1, 1) & c(0, 1, 0, 1)

# One way of extracting a subvector is to provide an subset as a vector of TRUE/FALSE values,
# the same length as x.

x <- 1:20
x%%4 == 0
12%%4

(y <- x[x%%4 == 0])

# Another example

x <- c(1, NA, 3, 4)
x>2
x[x > 2]
subset(x, subset = x > 2)

# If you wish to know the index positions of TRUE elements of a logical vector x, then use which(x).
x <- c(1, 1, 2, 3, 5, 8, 13)
which(x%%2 == 0)  #  remember Position not the value


# ---------------------Sequential && and || ---------------------------------

# The logical operators && and || are sequentially evaluated versions of & and |, respectively.

# Suppose that x and y are logical expressions. To evaluate x & y, R first eval- uates x and y,
# then returns TRUE if x and y are both TRUE, FALSE otherwise.


# To evaluate x && y, R first evaluates x. If x is FALSE then R returns FALSE without evaluating y.
# If x is TRUE then R evaluates y and returns TRUE if y is TRUE, FALSE otherwise.




x <- 0
x * sin(1/x) == 0

(x == 0) | (sin(1/x) == 0)

(x == 0) || (sin(1/x) == 0)

# Note that && and || only work on scalars, whereas & and
# | work on vectors on an element-by-element basis.

# A scalar quantity is a one dimensional measurement of a quantity,
# like temperature, or weight. A vector has more than one number associated with it.
# A simple example is velocity. It has a magnitude, called speed,
# as well as a direction, like North or Southwest or 10 degrees west of North



# --------------------- Matrices ---------------------------------

# A matrix is created from a vector using the function matrix, which has the form
# matrix(data, nrow = 1, ncol = 1, byrow = FALSE).

# and byrow can be either TRUE or FALSE (defaults to FALSE) and indicates
# whether you would like to fill the matrix up row-by-row or column-by-column,

# To create a diagonal matrix we use diag(x).
# To join matrices with rows of the same length (stacking vertically) use rbind(...).
# To join matrices with columns of the same length (stacking horizontally) use cbind(...).

(A <- matrix(1:6, nrow = 2, ncol = 3, byrow = FALSE))

(A <- matrix(1:6, nrow = 2, ncol = 3, byrow = TRUE))

A[1, 3] <- 0
A
A[, 2:3]

(B <- diag(c(1, 2, 3)))

# The usual algebraic operations

# To perform matrix multiplication we use the operator %*%.
# for example nrow(x), ncol(x), det(x) (the determinant), t(x) (the transpose)
# solve(A, B), which returns x such that A %*% x == B.
# If A is invertible then solve(A) returns the matrix inverse of A.

(A <- matrix(c(3, 5, 2, 3), nrow = 2, ncol = 2))

(B <- matrix(c(1, 1, 0, 1), nrow = 2, ncol = 2))

A %*% B  # Matrix Mulitplication

A * B

(A <- matrix(c(3, 5, 2, 3), nrow = 2, ncol = 2))

(A.inv <- solve(A))

A %*% A.inv

# --------------------- The Work Space ---------------------------------

# If you wish to find out if an object is a matrix or vector,
# then you use is.matrix(x) and is.vector(x).
# Of course mathematically speaking, a vector is equivalent to a matrix with one row or column
# but they are treated as different types of object in R
# To create a matrix A with one column from a vector x, we use A <- as.matrix(x)

# To create a vector from the columns of a matrix A we use as.vector(A)

# To list all currently defined objects, use ls() or objects(). To remove object x, use rm(x).
# To remove all currently defined objects, use rm(list = ls())

# To save all of your existing objects to a file called fname in the current working directory,
# use save.image(file = "fname")

# To save specific objects (say x and y) use save(x, y, file = "fname")

# To load a set of saved objects use load(file = "fname")

# which will save your existing objects to the file .RData in the current working directory

ls()
save.image(file = "Variables-Class3")
rm(list = ls())

ls()

load(file = "Variables-Class3")
ls()


# --------------------- Data Frames ---------------------------------

# We can create a data frame using the data.frame() function. For example,
# the above shown data frame can be created as follows.

x <- data.frame(SN=1:2, Age=c(21,15),Name=c("Ravi","Chandra"))

x <- data.frame("SN"=1:2,"Age"=c(21,15),"Name"=c("Ravi","Chandra"))

str(x)    # structure of x

# Notice above that the third column, Name is of type factor,
# instead of a character vector. By default,
# data.frame() function converts character vector into factor.
# To suppress this behavior, we can pass the argument stringsAsFactors=FALSE.


x <- data.frame("SN"=1:2,"Age"=c(21,15),"Name"=c("Ravi","Chandra"),stringsAsFactors=FALSE)
str(x)

# Many data input functions of R like, read.table(), read.csv(),
# read.delim(), read.fwf() also read data into a data frame.

# We can use either [, [[ or $ operator to access columns of data frame.

x["Name"]    # list

x$Name

x[["Name"]]

x[[3]]

#  ------------------------- Factors -----------------------


# Factor is a data structure used for fields that takes only predefined,
# finite number of values (categorical data).

x <- factor(c("single","married","married","single")); x

# if a variable is a factor or not using class() function. Similarly,
# levels of a factor can be checked using the levels() function

class(x)

levels(x)

# adding levels

x <- factor(c("single","married","married","single"), levels=c("single","married","divorced")); x

x <- factor(c("single","married","married","single"))  # knowing structure

str(x)

# -------- Accessing like a matrix ---------------

# Data frames can be accessed like a matrix by providing index for row and column.
# To illustrate this, we use datasets already available in R.
# Datasets that are available can be listed with the command library(help = "datasets").

#  Datasets that are available can be listed with the command library(help = "datasets").

# We will use the trees dataset which contains Girth, Height and Volume for Black Cherry Trees.
# A data frame can be examined using functions like str() and head().

library(help = "datasets")

str(trees)

head(trees,n=1)

head(trees)

trees

# Now we proceed to access the data frame like a matrix.

trees[2:3,]      # select 2nd and 3rd row

trees[trees$Height > 82,]   # selects rows with Height greater than

trees[10:12,2]              # which gives the 10 to 12 2nd column that is "Height"

trees[10:12,3]

trees[10:12,4]              # Which Returns Null

# We can see in the last case that the returned type is a vector since we extracted data from a single column.
# This behavior can be avoided by passing the argument drop=FALSE as follows.

trees[10:12,2, drop=FALSE]

x <- data.frame(SN=1:2, Age=c(21,15), Name=c("Ravi","Chandra")); x

x[1,"Age"] <- 20; x     # replacing age 21 by 20

#  Replacing Factor value

levels(x$Name) <-c(levels(x$Name), 'Deepthi')
x
x$Name[x$Name == "Ravi"] <- "Deepthi"
x

# adding Components

rbind(x,list(1,16,"Ravi"))    # Rows can be added to a data frame using the rbind() function.

cbind(x,State=c("NY","FL"))   # Similarly, we can add columns using cbind().

x$State <- c("NY","FL"); x    # we can also add new columns through simple list-like assignments.

# Data frame columns can be deleted by assigning NULL to it.

x$State <- NULL ;x

x <- x[-1,] ;x




No comments:

Post a Comment