# the student should copy this file line-by-line to R and type the first word after # input it. 1+2 # compute the value of 1+2 x <- 6 # x is 6 a1 <- c(1.2,3.5,3.7) # generate a vector a1=(1.2,3.5,3.7) a2 <- 1:3 # generate a vector a2=(1,2,3) a3 <- seq(-1,0,0.5) # generate a vector a3=(-1,0.5,1) A <- cbind(a1,a2,a3) # combine a matrix based on a1(column 1), a2(column 2), a3(column 3) At <- t(A) # define a new matrix At= transpose of A Att <- rbind(a1,a2,a3) # combine a matrix based on a1(row 1), a2(row 2), a3(row 3) B <- A%*%At # B is define as the product of the matrix A and At Bp <- A*At #Bp is define as the product of the corresponding entries of A and At A.inverse <- solve(A) # A.inverse is the inverse of matrix A A.firstrow <- A[1,] # A.firstrow is the first row of matrix A A.nofirstrow <- A[-1,] # A.nofirstrow is the matrix without the first row of A A.firstcolumn <- A[,1] # A.firstrow is the first column of matrix A A.nofirstcolumn <- A[,-1] # A.nofirstrow is the matrix without the first column of A A11 <- A[1,1] # the (1,1)-th entry of matrix A ############## # fit an linear model and plot. ############## savings <- read.table("u:\\stat526\\dataset\\savings.dat",h=T) savings savings <- read.table("u:/stat526/dataset/savings.dat",h=T) savings summary(savings) g <- lm(Savings~Pop15+Pop75+DispInc+Growth,data=savings) # the fitted model is Savings=28.57-0.4612*Pop15-1.6918*PoP75-0.0003368*DispInc+0.4097*Growth names(g) # see the information contained by g names(summary(g)) g$residuals # see the residual g$fitted.values # see the fitted.values # Residual Plot plot(g$fitted.value,g$residuals,xlab="Fitted Value",ylab="Residual",main="Residual Plot") abline(0,0) # QQ-plot qqnorm(g$res) qqline(g$res) # plot fitted values and observed values for response related to "Pop15" Y <- cbind(savings$Pop15,savings$Savings,g$fitted.value) Y.sort <- apply(Y,2,sort) dimnames(Y.sort)[[2]] <- c("Pop15","Savings","Fitted") matplot(Y.sort[,1],Y.sort[,-1],xlab="Pop15",type="l") legend(21,21,c("Observed","Fitted"),lty=1:2) #################### #Plot four figures in one page #################### x <- seq(-1,1,0.01) y1 <- sin(pi*x) y2 <- cos(pi*x) y3 <- asin(x) y4 <- acos(x) par(mfrow=c(2,2)) plot(x,y1,type="l") abline(0,0) plot(x,y2,type="l") abline(0,0) plot(x,y3,type="l") abline(0,0) plot(x,y4,type="l") abline(0,0) #################### #Print plot #################### postscript("foo.ps") par(mfrow=c(2,2)) plot(x,y1,type="l") abline(0,0) plot(x,y2,type="l") abline(0,0) plot(x,y3,type="l") abline(0,0) plot(x,y4,type="l") abline(0,0) dev.off() # close the device(file) pdf("foo.pdf") par(mfrow=c(2,2)) plot(x,y1,type="l") abline(0,0) plot(x,y2,type="l") abline(0,0) plot(x,y3,type="l") abline(0,0) plot(x,y4,type="l") abline(0,0) dev.off() # close the device(file) ### You can find a pdf file and ps file on your firectory. # You still can change your directory by R # You can copy-paste your plot to your "word" file(*.doc). ### ################### # Some Interest Plots ################### # Norm pdf ######## x <- seq(-8,8,0.01) y1 <- dnorm(x,0,1) y2 <- dnorm(x,0,0.5) y3 <- dnorm(x,0,2) y4 <- dnorm(x,0,4) matplot(x,cbind(y1,y2,y3,y4),type="l",xlab="x",ylab="Normal PDF",lty=1:4,col=rep(1,4)) legend(-8,0.8,lty=1:4,c(expression(paste(sigma,"=1")),expression(paste(sigma,"=0.5")),expression(paste(sigma,"=2")),expression(paste(sigma,"=4")))) ###### # t pdf ###### x <- seq(-4,4,0.01) y1 <- dt(x,1) y2 <- dt(x,5) y3 <- dt(x,20) y4 <- dnorm(x,0,1) matplot(x,cbind(y1,y2,y3,y4),type="l",xlab="x",ylab="Normal PDF",lty=1:4,col=rep(1,4)) legend(-4,0.4,lty=1:4,c("df=1","df=5","df=20","df=infty")) ############### # Compute the p-values and q-values ############### pnorm(-5) # norm probability x <- seq(0.01,0.99,0.01) qnorm(x) # norm quantile pt(-5,20) # t-probability with df=20 qt(x,20) # t-quantile with df=20 pchisq(0.1,20) # chisq probability with df=20 qchisq(x,20) # chisq quantile with df=20 # we still have: pf, qf, pgamma, qgamma, ppois, qpois, and so on. ################# #Simulations ################# # Let us generate 100 random variables from N(0,1) x <- rnorm(100) # we still have: qf,qt, rgamma, rpois, and so on. #### # Confidence Interval #### # normal #### mu <- 2 sigma <- 4 run <- 100 n <- 10 x <- rnorm(n,mu,sigma) x.mean <- mean(x) x.var <- var(x) CI <- c(x.mean-qt(0.975,n-1)*sqrt(x.var)/sqrt(n),x.mean+qt(0.975,n-1)*sqrt(x.var)/sqrt(n)) CI (mu>=CI[1])*(mu<=CI[2]) b <- rep(0,run) for(i in 1:run){ x <- rnorm(n,mu,sigma) x.mean <- mean(x) x.var <- var(x) CI <- c(x.mean-qt(0.975,n-1)*sqrt(x.var)/sqrt(n),x.mean+qt(0.975,n-1)*sqrt(x.var)/sqrt(n)) b[i] <- (mu>=CI[1])*(mu<=CI[2]) } mean(b) #### # Binomial # (p.hat-1.96*sqrt(p.hat(1-p.hat)/n),p.hat+1.96*sqrt(p.hat(1-p.hat)/n)) # where hat p=x/n #### n <- 10 p <- .3 x <- rbinom(1,n,p) x p.hat <- x/n lower <- p.hat-1.96*sqrt(p.hat*(1-p.hat)/n) upper <- p.hat+1.96*sqrt(p.hat*(1-p.hat)/n) c(lower,upper) ########## # cover probability ########## run <- 100 n <- 10 p <- 0.3 x <- rbinom(run,n,p) x p.hat <- x/n lower <- p.hat-1.96*sqrt(p.hat*(1-p.hat)/n) upper <- p.hat+1.96*sqrt(p.hat*(1-p.hat)/n) result <- cbind(x,p,n,lower,upper) result sum((lowerp)) ########### # Look for different values of p # for n = 10, 20, 50, 100 ########### run <- 100 n <- 10 power <- matrix(0,99,2) for(i in 1:99){ p <- 0.01*i x <- rbinom(run,n,p) p.hat <- x/n lower <- p.hat-1.96*sqrt(p.hat*(1-p.hat)/n) upper <- p.hat+1.96*sqrt(p.hat*(1-p.hat)/n) result <- cbind(x,p,n,lower,upper) power[i,1] <- p power[i,2] <- mean((lowerp)) } power.10 <- power n <- 20 power <- matrix(0,99,2) for(i in 1:99){ p <- 0.01*i x <- rbinom(run,n,p) p.hat <- x/n lower <- p.hat-1.96*sqrt(p.hat*(1-p.hat)/n) upper <- p.hat+1.96*sqrt(p.hat*(1-p.hat)/n) result <- cbind(x,p,n,lower,upper) power[i,1] <- p power[i,2] <- mean((lowerp)) } power.20 <- power n <- 50 power <- matrix(0,99,2) for(i in 1:99){ p <- 0.01*i x <- rbinom(run,n,p) p.hat <- x/n lower <- p.hat-1.96*sqrt(p.hat*(1-p.hat)/n) upper <- p.hat+1.96*sqrt(p.hat*(1-p.hat)/n) result <- cbind(x,p,n,lower,upper) power[i,1] <- p power[i,2] <- mean((lowerp)) } power.50 <- power n <- 100 power <- matrix(0,99,2) for(i in 1:99){ p <- 0.01*i x <- rbinom(run,n,p) p.hat <- x/n lower <- p.hat-1.96*sqrt(p.hat*(1-p.hat)/n) upper <- p.hat+1.96*sqrt(p.hat*(1-p.hat)/n) result <- cbind(x,p,n,lower,upper) power[i,1] <- p power[i,2] <- mean((lowerp)) } power.100 <- power power.all <- cbind(power.10,power.20[,2],power.50[,2],power.100[,2]) matplot(power.all[,1],power.all[,2:5],type="l",lty=1:4) legend(0.4,0.4,c("n=10","n=20","n=50","n=100"),lty=1:4) ############################ # Hypothesis Testing and Type I error Probability: Test mu=2 ############################ # Some Concepts # (1) Type I error prbabilities: P(Reject H0|H0) # (2) Type II error probabilities: P(Accept H0|H1) # (3) Significance Level: Max(Type I error probabilities) # (4) Power Function: P(Reject H0)=(a) Type I error probabilities under H0 # (b) 1-Type II error probabilities under H1 ########## mu <- 2 sigma <- 4 run <- 10000 b <- rep(0,run) n <- 10 for(i in 1:run){ x <- rnorm(n,mu,sigma) x.mean <- mean(x) x.var <- var(x) b[i] <- abs((x.mean-2)*sqrt(n)/sqrt(x.var))>qt(0.975,n-1) } mean(b) ########## # Power Function ########## bb <- matrix(0,61,2) b <- rep(0,run) run <- 10000 n <- 10 sigma <- 4 for(j in 1:61){ mu <- -4+0.2*(j-1) bb[j,1] <- mu for(i in 1:run){ x <- rnorm(n,mu,sigma) x.mean <- mean(x) x.var <- var(x) b[i] <- abs((x.mean-2)*sqrt(n)/sqrt(x.var))>qt(0.975,n-1) } bb[j,2] <- mean(b) } bb ######### # Matrix row and column operation ######### x <- rnorm(1000) mean(x) var(x) median(x) summary(x) apply(X,1,mean) # row mean apply(X,1,var) # row variance apply(X,2,mean) # column mean apply(X,2,var) # cloumn variance ######### #functions ######### # define a function f(x)=x^5+7*x^2+2*sin(x)+exp(x) function.needed <- function(x){ result <- x^5+7*x^2+2*sin(x)+exp(x) result } x <- c(0.6,1.8,2.7,4.5) function.needed(x) ############# # logic operation ############# x <- rnorm(100) index1 <- x< -1.96 # if x is less than -1.96, it is true; otherwise it is false. index2 <- x> 1.96 # if x is greater than 1.96 it is true; otherwise it is false. sum(index1) # how many true for the first sum(index2) # how many true for the second sum((1-index1)*(1-index2)) # how many false for both # the key words are: < > <= >= ==; "true" is 1 and "false" is "0" ############# # Condition ############# Define f(x)=x^2 if x<0 and f(x)=1 if x>=0 function.discontinuous <- function(x){ if (x<0) result <- x^2 else result <- 1 result } function.discontinuous(-2) function.discontinuous(2) # If ... else ... can only be used in a variable. It does not work on vector and matrix. # If we want the function works on vector or matrix. The program should be function.vector <- function(x){ y1 <- x^2 index <- x<0 result <- index*y1+(1-index) result } x <- seq(-2,2,0.01) y <- function.vector(x) plot(x,y,type="l") ############### Order Rank and Sort ############## # Let us look how to plot the two curve correctly. Y <- rnorm(100) sort(Y) rank(Y) order(Y) ############ # sort: output from small to large # rank: give ranks from the first to the last # order: give the position according to the smallest to the biggest ############ ############ # Solve f(x)=0 # Example f(x)=e(x)-3x # Using the Newton-Raphson Method ############ xx <- 0.5 ff <- exp(xx)-3*xx dff <- exp(xx)-3 c(xx,ff,dff) xx <- xx-ff/dff ###### # Iterative ###### ff <- exp(xx)-3*xx dff <- exp(xx)-3 xx <- xx-ff/dff c(xx,ff,dff) ########### # Some Maps ########### # Google Maps ######### library(RgoogleMaps) require(ggmap) center <- c(-86.88,40.41) kk <- get_map(location =center,color ="color",source = "google",maptype="terrain",zoom=10) ggmap(kk,extent = "device",ylab = "Latitude",xlab = "Longitude") kk <- get_map(location = c(lon = -0.016179, lat = 51.538525),color = "color",source = "google",maptype = "terrain",zoom = 10) ggmap(kk,extent = "device",ylab = "Latitude",xlab = "Longitude") kk <- get_map(location = c(lon = -0.016179, lat = 51.538525),color = "color",source = "google",maptype = "satellite",zoom = 16) ggmap(kk,extent = "device",ylab = "Latitude",xlab = "Longitude") kk <- get_map(location = c(lon = -0.016179, lat = 51.538525),color = "color",source = "google",maptype = "roadmap",zoom = 16) ggmap(kk,extent = "device",ylab = "Latitude",xlab = "Longitude") kk <- get_map(location = c(lon = -0.016179, lat = 51.538525),color = "color",source = "google",maptype = "hybrid",zoom = 16) ggmap(kk,extent = "device",ylab = "Latitude",xlab = "Longitude") kk <- get_map(location = c(lon = -0.016179, lat = 51.538525),color = "color",source = "google",maptype = "toner",zoom = 16) ggmap(kk,extent = "device",ylab = "Latitude",xlab = "Longitude") ######## # Other Maps ####### library(mapdata) map('usa') map('china') map('japan') ############# library(maptools) library(rgdal) library(spdep) library(foreign) setwd("u:\\simulation\\temp\\surveys") getinfo.shape("indianacolo.shp") MAP<-readShapePoly("indianacolo.shp") plot(MAP) DATA <- read.dbf("indianacolo.dbf", as.is = FALSE) names(DATA) RATE <- DATA$COLOMAL/DATA$MAL tmp <- cut(RATE,breaks=7) summary(tmp) RATE.CUT <- c(min(RATE)*0.99,quantile(RATE,c(1/7,2/7,3/7,4/7,5/7,6/7)),max(RATE)*1.01) tmp <- cut(RATE,breaks=RATE.CUT) summary(tmp) plot(MAP,col=cm.colors(7)[tmp]) plot(MAP,col=heat.colors(7)[tmp]) plot(MAP,col=terrain.colors(7)[tmp]) plot(MAP,col=topo.colors(7)[tmp])