#------------------------------Variables----------------------------------
# A variable is like a folder with a name on the front. You can place something inside the folder,
# look at it, replace it with something else, but the name on the front of the folder stays the same.
# To assign a value to a variable we use the assignment command <-.
# You can give a variable any name made up of letters, numbers, and .
# or _, provided it starts with a letter, or . then a letter. Note that names are case sensitive
x<-1000
x
# To display the value of a variable x on the screen we just type x.
# This is in fact shorthand for print(x).
# Later we will see that in some situations we have to use the longer format, or its near equivalent show(x)
print(x)
show(x)
(1 + 1/x)^x
(y <- (1 + 1/x)^x)
# In common with most programming languages,
# R allows the use of = for vari- able assignment, as well as <-.
x <-200
x=300
x
a <-3
a
b <- sqrt(a*a+3)
b <- sqrt(a*a)
b
# If you wouldnt get list of variables that you defined particular session you can list them all using
ls()
# ---------------------------Functions---------------------------------------------
# In mathematics a function takes one or more arguments (or inputs)
# and pro- duces one or more outputs (or return values).
x <- c(1,27,56,70)
x
x[4]
y <- x[4]
y
x[5]
# To call or invoke a built-in (or user-defined) function in R you write the name of the function
# followed by its argument values enclosed in parentheses and separated by commas.
1:100
seq(from = 2, to = 9, by = 2)
#Some arguments are optional, and have predefined default values, for example, if we omitbythen R usesby=1:
seq(from = 1, to = 9)
# To find out about default values and alternative usages of the built-in function fname,
# you can access the built-in help by typing help(fname) or ?fname.
help("sqrt")
?sqrt
#Every function has a default order for the arguments.
# If you provide arguments in this order, then they do not need to be named
# but you can choose to give the arguments out of order provided
# you give them names in the format argument_name = expression.
seq(1, 9, 2)
seq(to = 9, from = 1)
seq(by = -2, 9, 1)
x <- 9
seq(1, x, x/3)
# ---------------------------------------Vectors -------------------------------------
# A vector is an indexed list of variables. You can think of a vector as a drawer in a filing cabinet:
# the drawer has a name on the outside
# and within it are files labelled sequentially 1,2,3,... from the front.
# In fact, a simple variable is just a vector with length 1 (also called atomic).
# To create vectors of length greater than 1, we use functions that produce vector- valued output.
# There are many of these, but the three basic functions for constructingvectorsarec(...)
# (combine); seq(from,to,by)(sequence); and rep(x, times) (repeat)
x <- c(1,2,3,4,5,6)
getwd()
x
x+1
mean(x)
median(x)
y <- read.csv(file = "4c_data.CSV" )
head(y)
mean(y$Sales)
help("var")
var(x)
x[1]
x[9]
(x <- seq(1, 20, by = 2))
(y <- rep(3, 4))
(z <- c(y, x))
z <- c(x, y) #without paranthesis, you have to again type z
z
(x <- 100:110)
i <- c(1, 3, 2)
j <- c(-1, -2, -3)
x[i]
x[j]
# It is possible to have a vector with no elements. The function length(x) gives
# the number of elements of x.
x <- c()
length(x)
# Algebraic operations on vectors act on each element separately, that is ele- mentwise.
x <- c(1, 2, 3)
y <- c(4, 5, 6)
x*y
x+y
y^x
# When you apply an algebraic expression to two vectors of unequal length,
# R automatically repeats the shorter vector until
# it has something the same length as the longer vector.
c(1, 2, 3, 4) + c(1, 2)
(1:10)^c(1, 2)
2 + c(1, 2, 3)
2 * c(1, 2, 3)
(1:10)^2
# R will still duplicate the shorter vector even if it cannot match the longer vector with a whole number of multiples,
# but in this case it will produce a warning.
c(1,2,3) + c(1,2)
# A useful set of functions taking vector arguments are sum(...), prod(...), max(...),
# min(...), sqrt(...), sort(x), mean(x), and var(x).
# Note that functions applied to a vector may be defined to act element wise or
# may act on the whole vector input to return a result:
sqrt(1:6)
mean(1:6)
sort(c(5, 1, 3, 4, 2))
sort(c(20,40,50,20,90,70), decreasing = TRUE)
sort(c(20,40,50,20,90,70), decreasing = TRUE)
# -------------------- mean and variance -------------------------------------
x <- c(1.2, 0.9, 0.8, 1, 1.2)
x.mean <- sum(x)/length(x)
x.mean - mean(x)
x.var <- sum((x - x.mean)^2)/(length(x) - 1)
x.var - var(x)
# ----------------------- simple numerical integration ------------------------
dt <- 0.005
t <- seq(0, pi/6, by = dt)
ft <- cos(t)
(I <- sum(ft) * dt)
I - sin(pi/6)
# In this example note that t is a vector, so ft is also a vector, where ft[i] equals cos(t[i])
# ---------------------- exponential limit -----------------------------------------
x <- seq(10, 200, by = 10)
y <- (1 + 1/x)^x
exp(1) - y
plot(x, y)
# ------------------------ Finding Type --------------------------------------------
q <- numeric(10)
typeof(q)
a <- "dscienze"
typeof(a)
b <- c("Hello", "World")
b
typeof(b)
b <- c("Hello", 4)
b
typeof(b)
a = character(20)
a
# --------------------- Factors --------------------------------
setwd("~/Documents/R-Files")
getwd()
ex <- read.csv("R-Class Folder/students.csv")
ex
summary(ex$treatment)
ex$treatment
summary(ex$low)
ex1 <- factor(ex$low)
ex1
summary(ex1)
# --------------------- Missing Values ----------------------------
a <- NA # assign NA to variable A
is.na(a) # is it missing?
a <- c(11,NA,13) # now try a vector
is.na(a) # is it missing?
mean(a) # NAs can propagate
mean(a, na.rm = TRUE) # NAs can be removed -- mean(a, na.rm = TRUE, NULL=TRUE)
# We also mention the null object, called NULL,
# which is returned by some func- tions and expressions.
# Note that NA and NULL are not equivalent.
# NA is a place- holder for something that exists but is missing.
# NULL stands for something that never existed at all.
# ---------------------- Expressions and assignments ----------------
#In R, the term expression is used to denote a phrase of code that can be executed.
seq(10, 20, by = 3)
4
mean(c(1, 2, 3))
1>2
# If the evaluation of an expression is saved, using the <- operator,
# then the com- bination is called an assignment. The following are examples of assignments.
x1 <- seq(10, 20, by = 3)
x2 <- 4
x3 <- mean(c(1, 2, 3))
x4 <- 1 > 2
x4
# ---------------------Logical expressions ---------------------------------
# A logical expression is formed using the comparison operators <, >, <=, >=, == (equal to),
# and != (not equal to); and the logical operators & (and), | (or), and ! (not).
# The order of operations can be controlled using parentheses ( ).
# Two other comparison operators, && and ||
# The value of a logical expression is either TRUE or FALSE.
# The integers 1 and 0 can also be used to represent TRUE and FALSE, respectively
c(0, 0, 1, 1) | c(0, 1, 0, 1)
xor(c(0, 0, 1, 1), c(0, 1, 0, 1))
c(0, 0, 1, 1) & c(0, 1, 0, 1)
# One way of extracting a subvector is to provide an subset as a vector of TRUE/FALSE values,
# the same length as x.
x <- 1:20
x%%4 == 0
12%%4
(y <- x[x%%4 == 0])
# Another example
x <- c(1, NA, 3, 4)
x>2
x[x > 2]
subset(x, subset = x > 2)
# If you wish to know the index positions of TRUE elements of a logical vector x, then use which(x).
x <- c(1, 1, 2, 3, 5, 8, 13)
which(x%%2 == 0) # remember Position not the value
# ---------------------Sequential && and || ---------------------------------
# The logical operators && and || are sequentially evaluated versions of & and |, respectively.
# Suppose that x and y are logical expressions. To evaluate x & y, R first eval- uates x and y,
# then returns TRUE if x and y are both TRUE, FALSE otherwise.
# To evaluate x && y, R first evaluates x. If x is FALSE then R returns FALSE without evaluating y.
# If x is TRUE then R evaluates y and returns TRUE if y is TRUE, FALSE otherwise.
x <- 0
x * sin(1/x) == 0
(x == 0) | (sin(1/x) == 0)
(x == 0) || (sin(1/x) == 0)
# Note that && and || only work on scalars, whereas & and
# | work on vectors on an element-by-element basis.
# A scalar quantity is a one dimensional measurement of a quantity,
# like temperature, or weight. A vector has more than one number associated with it.
# A simple example is velocity. It has a magnitude, called speed,
# as well as a direction, like North or Southwest or 10 degrees west of North
# --------------------- Matrices ---------------------------------
# A matrix is created from a vector using the function matrix, which has the form
# matrix(data, nrow = 1, ncol = 1, byrow = FALSE).
# and byrow can be either TRUE or FALSE (defaults to FALSE) and indicates
# whether you would like to fill the matrix up row-by-row or column-by-column,
# To create a diagonal matrix we use diag(x).
# To join matrices with rows of the same length (stacking vertically) use rbind(...).
# To join matrices with columns of the same length (stacking horizontally) use cbind(...).
(A <- matrix(1:6, nrow = 2, ncol = 3, byrow = FALSE))
(A <- matrix(1:6, nrow = 2, ncol = 3, byrow = TRUE))
A[1, 3] <- 0
A
A[, 2:3]
(B <- diag(c(1, 2, 3)))
# The usual algebraic operations
# To perform matrix multiplication we use the operator %*%.
# for example nrow(x), ncol(x), det(x) (the determinant), t(x) (the transpose)
# solve(A, B), which returns x such that A %*% x == B.
# If A is invertible then solve(A) returns the matrix inverse of A.
(A <- matrix(c(3, 5, 2, 3), nrow = 2, ncol = 2))
(B <- matrix(c(1, 1, 0, 1), nrow = 2, ncol = 2))
A %*% B # Matrix Mulitplication
A * B
(A <- matrix(c(3, 5, 2, 3), nrow = 2, ncol = 2))
(A.inv <- solve(A))
A %*% A.inv
# --------------------- The Work Space ---------------------------------
# If you wish to find out if an object is a matrix or vector,
# then you use is.matrix(x) and is.vector(x).
# Of course mathematically speaking, a vector is equivalent to a matrix with one row or column
# but they are treated as different types of object in R
# To create a matrix A with one column from a vector x, we use A <- as.matrix(x)
# To create a vector from the columns of a matrix A we use as.vector(A)
# To list all currently defined objects, use ls() or objects(). To remove object x, use rm(x).
# To remove all currently defined objects, use rm(list = ls())
# To save all of your existing objects to a file called fname in the current working directory,
# use save.image(file = "fname")
# To save specific objects (say x and y) use save(x, y, file = "fname")
# To load a set of saved objects use load(file = "fname")
# which will save your existing objects to the file .RData in the current working directory
ls()
save.image(file = "Variables-Class3")
rm(list = ls())
ls()
load(file = "Variables-Class3")
ls()
# --------------------- Data Frames ---------------------------------
# We can create a data frame using the data.frame() function. For example,
# the above shown data frame can be created as follows.
x <- data.frame(SN=1:2, Age=c(21,15),Name=c("Ravi","Chandra"))
x <- data.frame("SN"=1:2,"Age"=c(21,15),"Name"=c("Ravi","Chandra"))
str(x) # structure of x
# Notice above that the third column, Name is of type factor,
# instead of a character vector. By default,
# data.frame() function converts character vector into factor.
# To suppress this behavior, we can pass the argument stringsAsFactors=FALSE.
x <- data.frame("SN"=1:2,"Age"=c(21,15),"Name"=c("Ravi","Chandra"),stringsAsFactors=FALSE)
str(x)
# Many data input functions of R like, read.table(), read.csv(),
# read.delim(), read.fwf() also read data into a data frame.
# We can use either [, [[ or $ operator to access columns of data frame.
x["Name"] # list
x$Name
x[["Name"]]
x[[3]]
# ------------------------- Factors -----------------------
# Factor is a data structure used for fields that takes only predefined,
# finite number of values (categorical data).
x <- factor(c("single","married","married","single")); x
# if a variable is a factor or not using class() function. Similarly,
# levels of a factor can be checked using the levels() function
class(x)
levels(x)
# adding levels
x <- factor(c("single","married","married","single"), levels=c("single","married","divorced")); x
x <- factor(c("single","married","married","single")) # knowing structure
str(x)
# -------- Accessing like a matrix ---------------
# Data frames can be accessed like a matrix by providing index for row and column.
# To illustrate this, we use datasets already available in R.
# Datasets that are available can be listed with the command library(help = "datasets").
# Datasets that are available can be listed with the command library(help = "datasets").
# We will use the trees dataset which contains Girth, Height and Volume for Black Cherry Trees.
# A data frame can be examined using functions like str() and head().
library(help = "datasets")
str(trees)
head(trees,n=1)
head(trees)
trees
# Now we proceed to access the data frame like a matrix.
trees[2:3,] # select 2nd and 3rd row
trees[trees$Height > 82,] # selects rows with Height greater than
trees[10:12,2] # which gives the 10 to 12 2nd column that is "Height"
trees[10:12,3]
trees[10:12,4] # Which Returns Null
# We can see in the last case that the returned type is a vector since we extracted data from a single column.
# This behavior can be avoided by passing the argument drop=FALSE as follows.
trees[10:12,2, drop=FALSE]
x <- data.frame(SN=1:2, Age=c(21,15), Name=c("Ravi","Chandra")); x
x[1,"Age"] <- 20; x # replacing age 21 by 20
# Replacing Factor value
levels(x$Name) <-c(levels(x$Name), 'Deepthi')
x
x$Name[x$Name == "Ravi"] <- "Deepthi"
x
# adding Components
rbind(x,list(1,16,"Ravi")) # Rows can be added to a data frame using the rbind() function.
cbind(x,State=c("NY","FL")) # Similarly, we can add columns using cbind().
x$State <- c("NY","FL"); x # we can also add new columns through simple list-like assignments.
# Data frame columns can be deleted by assigning NULL to it.
x$State <- NULL ;x
x <- x[-1,] ;x
# A variable is like a folder with a name on the front. You can place something inside the folder,
# look at it, replace it with something else, but the name on the front of the folder stays the same.
# To assign a value to a variable we use the assignment command <-.
# You can give a variable any name made up of letters, numbers, and .
# or _, provided it starts with a letter, or . then a letter. Note that names are case sensitive
x<-1000
x
# To display the value of a variable x on the screen we just type x.
# This is in fact shorthand for print(x).
# Later we will see that in some situations we have to use the longer format, or its near equivalent show(x)
print(x)
show(x)
(1 + 1/x)^x
(y <- (1 + 1/x)^x)
# In common with most programming languages,
# R allows the use of = for vari- able assignment, as well as <-.
x <-200
x=300
x
a <-3
a
b <- sqrt(a*a+3)
b <- sqrt(a*a)
b
# If you wouldnt get list of variables that you defined particular session you can list them all using
ls()
# ---------------------------Functions---------------------------------------------
# In mathematics a function takes one or more arguments (or inputs)
# and pro- duces one or more outputs (or return values).
x <- c(1,27,56,70)
x
x[4]
y <- x[4]
y
x[5]
# To call or invoke a built-in (or user-defined) function in R you write the name of the function
# followed by its argument values enclosed in parentheses and separated by commas.
1:100
seq(from = 2, to = 9, by = 2)
#Some arguments are optional, and have predefined default values, for example, if we omitbythen R usesby=1:
seq(from = 1, to = 9)
# To find out about default values and alternative usages of the built-in function fname,
# you can access the built-in help by typing help(fname) or ?fname.
help("sqrt")
?sqrt
#Every function has a default order for the arguments.
# If you provide arguments in this order, then they do not need to be named
# but you can choose to give the arguments out of order provided
# you give them names in the format argument_name = expression.
seq(1, 9, 2)
seq(to = 9, from = 1)
seq(by = -2, 9, 1)
x <- 9
seq(1, x, x/3)
# ---------------------------------------Vectors -------------------------------------
# A vector is an indexed list of variables. You can think of a vector as a drawer in a filing cabinet:
# the drawer has a name on the outside
# and within it are files labelled sequentially 1,2,3,... from the front.
# In fact, a simple variable is just a vector with length 1 (also called atomic).
# To create vectors of length greater than 1, we use functions that produce vector- valued output.
# There are many of these, but the three basic functions for constructingvectorsarec(...)
# (combine); seq(from,to,by)(sequence); and rep(x, times) (repeat)
x <- c(1,2,3,4,5,6)
getwd()
x
x+1
mean(x)
median(x)
y <- read.csv(file = "4c_data.CSV" )
head(y)
mean(y$Sales)
help("var")
var(x)
x[1]
x[9]
(x <- seq(1, 20, by = 2))
(y <- rep(3, 4))
(z <- c(y, x))
z <- c(x, y) #without paranthesis, you have to again type z
z
(x <- 100:110)
i <- c(1, 3, 2)
j <- c(-1, -2, -3)
x[i]
x[j]
# It is possible to have a vector with no elements. The function length(x) gives
# the number of elements of x.
x <- c()
length(x)
# Algebraic operations on vectors act on each element separately, that is ele- mentwise.
x <- c(1, 2, 3)
y <- c(4, 5, 6)
x*y
x+y
y^x
# When you apply an algebraic expression to two vectors of unequal length,
# R automatically repeats the shorter vector until
# it has something the same length as the longer vector.
c(1, 2, 3, 4) + c(1, 2)
(1:10)^c(1, 2)
2 + c(1, 2, 3)
2 * c(1, 2, 3)
(1:10)^2
# R will still duplicate the shorter vector even if it cannot match the longer vector with a whole number of multiples,
# but in this case it will produce a warning.
c(1,2,3) + c(1,2)
# A useful set of functions taking vector arguments are sum(...), prod(...), max(...),
# min(...), sqrt(...), sort(x), mean(x), and var(x).
# Note that functions applied to a vector may be defined to act element wise or
# may act on the whole vector input to return a result:
sqrt(1:6)
mean(1:6)
sort(c(5, 1, 3, 4, 2))
sort(c(20,40,50,20,90,70), decreasing = TRUE)
sort(c(20,40,50,20,90,70), decreasing = TRUE)
# -------------------- mean and variance -------------------------------------
x <- c(1.2, 0.9, 0.8, 1, 1.2)
x.mean <- sum(x)/length(x)
x.mean - mean(x)
x.var <- sum((x - x.mean)^2)/(length(x) - 1)
x.var - var(x)
# ----------------------- simple numerical integration ------------------------
dt <- 0.005
t <- seq(0, pi/6, by = dt)
ft <- cos(t)
(I <- sum(ft) * dt)
I - sin(pi/6)
# In this example note that t is a vector, so ft is also a vector, where ft[i] equals cos(t[i])
# ---------------------- exponential limit -----------------------------------------
x <- seq(10, 200, by = 10)
y <- (1 + 1/x)^x
exp(1) - y
plot(x, y)
# ------------------------ Finding Type --------------------------------------------
q <- numeric(10)
typeof(q)
a <- "dscienze"
typeof(a)
b <- c("Hello", "World")
b
typeof(b)
b <- c("Hello", 4)
b
typeof(b)
a = character(20)
a
# --------------------- Factors --------------------------------
setwd("~/Documents/R-Files")
getwd()
ex <- read.csv("R-Class Folder/students.csv")
ex
summary(ex$treatment)
ex$treatment
summary(ex$low)
ex1 <- factor(ex$low)
ex1
summary(ex1)
# --------------------- Missing Values ----------------------------
a <- NA # assign NA to variable A
is.na(a) # is it missing?
a <- c(11,NA,13) # now try a vector
is.na(a) # is it missing?
mean(a) # NAs can propagate
mean(a, na.rm = TRUE) # NAs can be removed -- mean(a, na.rm = TRUE, NULL=TRUE)
# We also mention the null object, called NULL,
# which is returned by some func- tions and expressions.
# Note that NA and NULL are not equivalent.
# NA is a place- holder for something that exists but is missing.
# NULL stands for something that never existed at all.
# ---------------------- Expressions and assignments ----------------
#In R, the term expression is used to denote a phrase of code that can be executed.
seq(10, 20, by = 3)
4
mean(c(1, 2, 3))
1>2
# If the evaluation of an expression is saved, using the <- operator,
# then the com- bination is called an assignment. The following are examples of assignments.
x1 <- seq(10, 20, by = 3)
x2 <- 4
x3 <- mean(c(1, 2, 3))
x4 <- 1 > 2
x4
# ---------------------Logical expressions ---------------------------------
# A logical expression is formed using the comparison operators <, >, <=, >=, == (equal to),
# and != (not equal to); and the logical operators & (and), | (or), and ! (not).
# The order of operations can be controlled using parentheses ( ).
# Two other comparison operators, && and ||
# The value of a logical expression is either TRUE or FALSE.
# The integers 1 and 0 can also be used to represent TRUE and FALSE, respectively
c(0, 0, 1, 1) | c(0, 1, 0, 1)
xor(c(0, 0, 1, 1), c(0, 1, 0, 1))
c(0, 0, 1, 1) & c(0, 1, 0, 1)
# One way of extracting a subvector is to provide an subset as a vector of TRUE/FALSE values,
# the same length as x.
x <- 1:20
x%%4 == 0
12%%4
(y <- x[x%%4 == 0])
# Another example
x <- c(1, NA, 3, 4)
x>2
x[x > 2]
subset(x, subset = x > 2)
# If you wish to know the index positions of TRUE elements of a logical vector x, then use which(x).
x <- c(1, 1, 2, 3, 5, 8, 13)
which(x%%2 == 0) # remember Position not the value
# ---------------------Sequential && and || ---------------------------------
# The logical operators && and || are sequentially evaluated versions of & and |, respectively.
# Suppose that x and y are logical expressions. To evaluate x & y, R first eval- uates x and y,
# then returns TRUE if x and y are both TRUE, FALSE otherwise.
# To evaluate x && y, R first evaluates x. If x is FALSE then R returns FALSE without evaluating y.
# If x is TRUE then R evaluates y and returns TRUE if y is TRUE, FALSE otherwise.
x <- 0
x * sin(1/x) == 0
(x == 0) | (sin(1/x) == 0)
(x == 0) || (sin(1/x) == 0)
# Note that && and || only work on scalars, whereas & and
# | work on vectors on an element-by-element basis.
# A scalar quantity is a one dimensional measurement of a quantity,
# like temperature, or weight. A vector has more than one number associated with it.
# A simple example is velocity. It has a magnitude, called speed,
# as well as a direction, like North or Southwest or 10 degrees west of North
# --------------------- Matrices ---------------------------------
# A matrix is created from a vector using the function matrix, which has the form
# matrix(data, nrow = 1, ncol = 1, byrow = FALSE).
# and byrow can be either TRUE or FALSE (defaults to FALSE) and indicates
# whether you would like to fill the matrix up row-by-row or column-by-column,
# To create a diagonal matrix we use diag(x).
# To join matrices with rows of the same length (stacking vertically) use rbind(...).
# To join matrices with columns of the same length (stacking horizontally) use cbind(...).
(A <- matrix(1:6, nrow = 2, ncol = 3, byrow = FALSE))
(A <- matrix(1:6, nrow = 2, ncol = 3, byrow = TRUE))
A[1, 3] <- 0
A
A[, 2:3]
(B <- diag(c(1, 2, 3)))
# The usual algebraic operations
# To perform matrix multiplication we use the operator %*%.
# for example nrow(x), ncol(x), det(x) (the determinant), t(x) (the transpose)
# solve(A, B), which returns x such that A %*% x == B.
# If A is invertible then solve(A) returns the matrix inverse of A.
(A <- matrix(c(3, 5, 2, 3), nrow = 2, ncol = 2))
(B <- matrix(c(1, 1, 0, 1), nrow = 2, ncol = 2))
A %*% B # Matrix Mulitplication
A * B
(A <- matrix(c(3, 5, 2, 3), nrow = 2, ncol = 2))
(A.inv <- solve(A))
A %*% A.inv
# --------------------- The Work Space ---------------------------------
# If you wish to find out if an object is a matrix or vector,
# then you use is.matrix(x) and is.vector(x).
# Of course mathematically speaking, a vector is equivalent to a matrix with one row or column
# but they are treated as different types of object in R
# To create a matrix A with one column from a vector x, we use A <- as.matrix(x)
# To create a vector from the columns of a matrix A we use as.vector(A)
# To list all currently defined objects, use ls() or objects(). To remove object x, use rm(x).
# To remove all currently defined objects, use rm(list = ls())
# To save all of your existing objects to a file called fname in the current working directory,
# use save.image(file = "fname")
# To save specific objects (say x and y) use save(x, y, file = "fname")
# To load a set of saved objects use load(file = "fname")
# which will save your existing objects to the file .RData in the current working directory
ls()
save.image(file = "Variables-Class3")
rm(list = ls())
ls()
load(file = "Variables-Class3")
ls()
# --------------------- Data Frames ---------------------------------
# We can create a data frame using the data.frame() function. For example,
# the above shown data frame can be created as follows.
x <- data.frame(SN=1:2, Age=c(21,15),Name=c("Ravi","Chandra"))
x <- data.frame("SN"=1:2,"Age"=c(21,15),"Name"=c("Ravi","Chandra"))
str(x) # structure of x
# Notice above that the third column, Name is of type factor,
# instead of a character vector. By default,
# data.frame() function converts character vector into factor.
# To suppress this behavior, we can pass the argument stringsAsFactors=FALSE.
x <- data.frame("SN"=1:2,"Age"=c(21,15),"Name"=c("Ravi","Chandra"),stringsAsFactors=FALSE)
str(x)
# Many data input functions of R like, read.table(), read.csv(),
# read.delim(), read.fwf() also read data into a data frame.
# We can use either [, [[ or $ operator to access columns of data frame.
x["Name"] # list
x$Name
x[["Name"]]
x[[3]]
# ------------------------- Factors -----------------------
# Factor is a data structure used for fields that takes only predefined,
# finite number of values (categorical data).
x <- factor(c("single","married","married","single")); x
# if a variable is a factor or not using class() function. Similarly,
# levels of a factor can be checked using the levels() function
class(x)
levels(x)
# adding levels
x <- factor(c("single","married","married","single"), levels=c("single","married","divorced")); x
x <- factor(c("single","married","married","single")) # knowing structure
str(x)
# -------- Accessing like a matrix ---------------
# Data frames can be accessed like a matrix by providing index for row and column.
# To illustrate this, we use datasets already available in R.
# Datasets that are available can be listed with the command library(help = "datasets").
# Datasets that are available can be listed with the command library(help = "datasets").
# We will use the trees dataset which contains Girth, Height and Volume for Black Cherry Trees.
# A data frame can be examined using functions like str() and head().
library(help = "datasets")
str(trees)
head(trees,n=1)
head(trees)
trees
# Now we proceed to access the data frame like a matrix.
trees[2:3,] # select 2nd and 3rd row
trees[trees$Height > 82,] # selects rows with Height greater than
trees[10:12,2] # which gives the 10 to 12 2nd column that is "Height"
trees[10:12,3]
trees[10:12,4] # Which Returns Null
# We can see in the last case that the returned type is a vector since we extracted data from a single column.
# This behavior can be avoided by passing the argument drop=FALSE as follows.
trees[10:12,2, drop=FALSE]
x <- data.frame(SN=1:2, Age=c(21,15), Name=c("Ravi","Chandra")); x
x[1,"Age"] <- 20; x # replacing age 21 by 20
# Replacing Factor value
levels(x$Name) <-c(levels(x$Name), 'Deepthi')
x
x$Name[x$Name == "Ravi"] <- "Deepthi"
x
# adding Components
rbind(x,list(1,16,"Ravi")) # Rows can be added to a data frame using the rbind() function.
cbind(x,State=c("NY","FL")) # Similarly, we can add columns using cbind().
x$State <- c("NY","FL"); x # we can also add new columns through simple list-like assignments.
# Data frame columns can be deleted by assigning NULL to it.
x$State <- NULL ;x
x <- x[-1,] ;x
No comments:
Post a Comment