# the student should copy this file line-by-line to R and type the first word after
# input it.

1+2 # compute the value of 1+2
x <- 6 # x is 6
a1 <- c(1.2,3.5,3.7) # generate a vector a1=(1.2,3.5,3.7)
a2 <- 1:3  # generate a vector a2=(1,2,3)
a3 <- seq(-1,0,0.5)  # generate a vector a3=(-1,0.5,1)
A <- cbind(a1,a2,a3) # combine a matrix based on a1(column 1), a2(column 2), a3(column 3)
At <- t(A)  # define a new matrix At= transpose of A
Att <- rbind(a1,a2,a3) # combine a matrix based on a1(row 1), a2(row 2), a3(row 3)

B <- A%*%At   # B is define as the product of the matrix A and At
Bp <- A*At    #Bp is define as the product of the corresponding entries of A and At
A.inverse <- solve(A) # A.inverse is the inverse of matrix A

A.firstrow <- A[1,] # A.firstrow is the first row of matrix A
A.nofirstrow <- A[-1,] # A.nofirstrow is the matrix without the first row of A  

A.firstcolumn <- A[,1] # A.firstrow is the first column of matrix A
A.nofirstcolumn <- A[,-1] # A.nofirstrow is the matrix without the first column of A 

A11 <- A[1,1] # the (1,1)-th entry of matrix A

##############
# fit an linear model and plot.
##############

savings <- read.table("u:\\stat526\\dataset\\savings.dat",h=T)
savings
savings <- read.table("u:/stat526/dataset/savings.dat",h=T)
savings
summary(savings)
g <- lm(Savings~Pop15+Pop75+DispInc+Growth,data=savings)

# the fitted model is Savings=28.57-0.4612*Pop15-1.6918*PoP75-0.0003368*DispInc+0.4097*Growth

names(g)  # see the information contained by g
names(summary(g)) 
g$residuals  # see the residual
g$fitted.values # see the fitted.values

# Residual Plot

plot(g$fitted.value,g$residuals,xlab="Fitted Value",ylab="Residual",main="Residual Plot")
abline(0,0)  

# QQ-plot

qqnorm(g$res)
qqline(g$res)

# plot fitted values and observed values for response related to "Pop15"

Y <- cbind(savings$Pop15,savings$Savings,g$fitted.value)
Y.sort <- apply(Y,2,sort)
dimnames(Y.sort)[[2]] <- c("Pop15","Savings","Fitted")
matplot(Y.sort[,1],Y.sort[,-1],xlab="Pop15",type="l")
legend(21,21,c("Observed","Fitted"),lty=1:2)

####################
#Plot four figures in one page
####################

x <- seq(-1,1,0.01)
y1 <- sin(pi*x)
y2 <- cos(pi*x)
y3 <- asin(x)
y4 <- acos(x)

par(mfrow=c(2,2))
plot(x,y1,type="l")
abline(0,0)
plot(x,y2,type="l")
abline(0,0)
plot(x,y3,type="l")
abline(0,0)
plot(x,y4,type="l")
abline(0,0)

####################
#Print plot 
####################

postscript("foo.ps")
par(mfrow=c(2,2))
plot(x,y1,type="l")
abline(0,0)
plot(x,y2,type="l")
abline(0,0)
plot(x,y3,type="l")
abline(0,0)
plot(x,y4,type="l")
abline(0,0)

dev.off()  # close the device(file)


pdf("foo.pdf")
par(mfrow=c(2,2))
plot(x,y1,type="l")
abline(0,0)
plot(x,y2,type="l")
abline(0,0)
plot(x,y3,type="l")
abline(0,0)
plot(x,y4,type="l")
abline(0,0)

dev.off()  # close the device(file)

### You can find a pdf file and ps file on your firectory.
# You still can change your directory by R
# You can copy-paste your plot to your "word" file(*.doc).
###

###################
# Some Interest Plots
###################
# Norm pdf
########

x <- seq(-8,8,0.01)
y1 <- dnorm(x,0,1)
y2 <- dnorm(x,0,0.5)
y3 <- dnorm(x,0,2)
y4 <- dnorm(x,0,4)

matplot(x,cbind(y1,y2,y3,y4),type="l",xlab="x",ylab="Normal PDF",lty=1:4,col=rep(1,4))
legend(-8,0.8,lty=1:4,c(expression(paste(sigma,"=1")),expression(paste(sigma,"=0.5")),expression(paste(sigma,"=2")),expression(paste(sigma,"=4"))))

######
# t pdf
######

x <- seq(-4,4,0.01)
y1 <- dt(x,1)
y2 <- dt(x,5)
y3 <- dt(x,20)
y4 <- dnorm(x,0,1)

matplot(x,cbind(y1,y2,y3,y4),type="l",xlab="x",ylab="Normal PDF",lty=1:4,col=rep(1,4))
legend(-4,0.4,lty=1:4,c("df=1","df=5","df=20","df=infty"))

###############
# Compute the p-values and q-values
###############

pnorm(-5)  # norm probability
x <- seq(0.01,0.99,0.01)  
qnorm(x)  # norm quantile


pt(-5,20)  # t-probability with df=20
qt(x,20)  # t-quantile with df=20


pchisq(0.1,20)  # chisq probability with df=20
qchisq(x,20)  # chisq quantile with df=20

# we still have: pf, qf, pgamma, qgamma, ppois, qpois, and so on.


#################
#Simulations
#################

# Let us generate 100 random variables from N(0,1)

x <- rnorm(100)

# we still have: qf,qt, rgamma, rpois, and so on.


####
# Confidence Interval
####
# normal
####

mu <- 2
sigma <- 4

run <- 100

n <- 10
x <- rnorm(n,mu,sigma)
x.mean <- mean(x)
x.var <- var(x)

CI <- c(x.mean-qt(0.975,n-1)*sqrt(x.var)/sqrt(n),x.mean+qt(0.975,n-1)*sqrt(x.var)/sqrt(n))
CI

(mu>=CI[1])*(mu<=CI[2])

b <- rep(0,run)

for(i in 1:run){
x <- rnorm(n,mu,sigma)
x.mean <- mean(x)
x.var <- var(x)

CI <- c(x.mean-qt(0.975,n-1)*sqrt(x.var)/sqrt(n),x.mean+qt(0.975,n-1)*sqrt(x.var)/sqrt(n))
b[i] <- (mu>=CI[1])*(mu<=CI[2])
}

mean(b)

####
# Binomial
# (p.hat-1.96*sqrt(p.hat(1-p.hat)/n),p.hat+1.96*sqrt(p.hat(1-p.hat)/n))
# where hat p=x/n
####

n <- 10
p <- .3
x <- rbinom(1,n,p)
x

p.hat <- x/n
lower <- p.hat-1.96*sqrt(p.hat*(1-p.hat)/n)
upper <- p.hat+1.96*sqrt(p.hat*(1-p.hat)/n)
c(lower,upper)

##########
# cover probability
##########

run <- 100
n <- 10
p <- 0.3
x <- rbinom(run,n,p)
x
p.hat <- x/n
lower <- p.hat-1.96*sqrt(p.hat*(1-p.hat)/n)
upper <- p.hat+1.96*sqrt(p.hat*(1-p.hat)/n)
result <- cbind(x,p,n,lower,upper)
result
sum((lower<p)*(upper>p))


###########
# Look for different values of p
# for n = 10, 20, 50, 100
###########
run <- 100

n <- 10
power <- matrix(0,99,2)
for(i in 1:99){
p <- 0.01*i
x <- rbinom(run,n,p)
p.hat <- x/n
lower <- p.hat-1.96*sqrt(p.hat*(1-p.hat)/n)
upper <- p.hat+1.96*sqrt(p.hat*(1-p.hat)/n)
result <- cbind(x,p,n,lower,upper)
power[i,1] <- p
power[i,2] <- mean((lower<p)*(upper>p))
}

power.10 <- power

n <- 20
power <- matrix(0,99,2)
for(i in 1:99){
p <- 0.01*i
x <- rbinom(run,n,p)
p.hat <- x/n
lower <- p.hat-1.96*sqrt(p.hat*(1-p.hat)/n)
upper <- p.hat+1.96*sqrt(p.hat*(1-p.hat)/n)
result <- cbind(x,p,n,lower,upper)
power[i,1] <- p
power[i,2] <- mean((lower<p)*(upper>p))
}

power.20 <- power

n <- 50
power <- matrix(0,99,2)
for(i in 1:99){
p <- 0.01*i
x <- rbinom(run,n,p)
p.hat <- x/n
lower <- p.hat-1.96*sqrt(p.hat*(1-p.hat)/n)
upper <- p.hat+1.96*sqrt(p.hat*(1-p.hat)/n)
result <- cbind(x,p,n,lower,upper)
power[i,1] <- p
power[i,2] <- mean((lower<p)*(upper>p))
}

power.50 <- power

n <- 100
power <- matrix(0,99,2)
for(i in 1:99){
p <- 0.01*i
x <- rbinom(run,n,p)
p.hat <- x/n
lower <- p.hat-1.96*sqrt(p.hat*(1-p.hat)/n)
upper <- p.hat+1.96*sqrt(p.hat*(1-p.hat)/n)
result <- cbind(x,p,n,lower,upper)
power[i,1] <- p
power[i,2] <- mean((lower<p)*(upper>p))
}

power.100 <- power

power.all <- cbind(power.10,power.20[,2],power.50[,2],power.100[,2])
matplot(power.all[,1],power.all[,2:5],type="l",lty=1:4)
legend(0.4,0.4,c("n=10","n=20","n=50","n=100"),lty=1:4)

############################
# Hypothesis Testing and Type I error Probability: Test mu=2
############################
# Some Concepts
# (1) Type I error prbabilities:  P(Reject H0|H0)
# (2) Type II error probabilities: P(Accept H0|H1)
# (3) Significance Level: Max(Type I error probabilities)
# (4) Power Function: P(Reject H0)=(a) Type I error probabilities under H0
#                                  (b) 1-Type II error probabilities under H1 
##########

mu <- 2
sigma <- 4
run <- 10000

b <- rep(0,run)
n <- 10

for(i in 1:run){
x <- rnorm(n,mu,sigma)
x.mean <- mean(x)
x.var <- var(x)

b[i] <- abs((x.mean-2)*sqrt(n)/sqrt(x.var))>qt(0.975,n-1)
}

mean(b)

##########
# Power Function
##########


bb <- matrix(0,61,2)
b <- rep(0,run)
run <- 10000
n <- 10
sigma <- 4


for(j in 1:61){
mu <- -4+0.2*(j-1)
bb[j,1] <- mu

for(i in 1:run){
x <- rnorm(n,mu,sigma)
x.mean <- mean(x)
x.var <- var(x)

b[i] <- abs((x.mean-2)*sqrt(n)/sqrt(x.var))>qt(0.975,n-1)
}


bb[j,2] <- mean(b)
}

bb

#########
# Matrix row and column operation
#########

x <- rnorm(1000)
mean(x)
var(x)
median(x)
summary(x)


apply(X,1,mean)  # row mean
apply(X,1,var)  # row variance
apply(X,2,mean)  # column mean
apply(X,2,var)  # cloumn variance



#########
#functions
#########

# define a function f(x)=x^5+7*x^2+2*sin(x)+exp(x)

function.needed  <- function(x){
result <- x^5+7*x^2+2*sin(x)+exp(x)
result
}

x <- c(0.6,1.8,2.7,4.5)
function.needed(x)


#############
# logic operation
#############

x <- rnorm(100)
index1 <- x< -1.96  # if x is less than -1.96, it is true; otherwise it is false.
index2 <- x> 1.96  # if x is greater than 1.96 it is true; otherwise it is false.
sum(index1)  #  how many true for the first 
sum(index2)  #  how many true for the second
sum((1-index1)*(1-index2))  # how many false for both

# the key words are:  <  >  <= >=  ==; "true" is 1 and "false" is "0" 

#############
# Condition
#############

Define f(x)=x^2 if x<0 and f(x)=1 if x>=0

function.discontinuous <- function(x){
if (x<0) result <- x^2 else result <- 1
result
} 

function.discontinuous(-2)
function.discontinuous(2)

# If ... else ...  can only be used in a variable. It does not work on vector and matrix.
# If we want the function works on vector or matrix. The program should be

function.vector <- function(x){
y1 <- x^2
index <- x<0
result <- index*y1+(1-index)
result
} 
 
x <- seq(-2,2,0.01)
y <- function.vector(x)
plot(x,y,type="l")


###############
Order Rank and Sort
##############

# Let us look how to plot the two curve correctly.

Y <- rnorm(100)
sort(Y)  
rank(Y) 
order(Y)

############
# sort: output from small to large 
# rank: give ranks from the first to the last
# order: give the position according to the smallest to the biggest
############


############
# Solve f(x)=0
# Example f(x)=e(x)-3x
# Using the Newton-Raphson Method
############

xx <- 0.5

ff <- exp(xx)-3*xx
dff <- exp(xx)-3
c(xx,ff,dff)
xx <- xx-ff/dff

######
# Iterative
######

ff <- exp(xx)-3*xx
dff <- exp(xx)-3
xx <- xx-ff/dff
c(xx,ff,dff)

###########
# Some Maps
###########
# Google Maps
#########

library(RgoogleMaps)
require(ggmap)

center <- c(-86.88,40.41)
kk <- get_map(location =center,color ="color",source = "google",maptype="terrain",zoom=10)
ggmap(kk,extent = "device",ylab = "Latitude",xlab = "Longitude")

kk <- get_map(location = c(lon = -0.016179, lat = 51.538525),color = "color",source = "google",maptype = "terrain",zoom = 10)
ggmap(kk,extent = "device",ylab = "Latitude",xlab = "Longitude")

kk <- get_map(location = c(lon = -0.016179, lat = 51.538525),color = "color",source = "google",maptype = "satellite",zoom = 16)
ggmap(kk,extent = "device",ylab = "Latitude",xlab = "Longitude")

kk <- get_map(location = c(lon = -0.016179, lat = 51.538525),color = "color",source = "google",maptype = "roadmap",zoom = 16)
ggmap(kk,extent = "device",ylab = "Latitude",xlab = "Longitude")


kk <- get_map(location = c(lon = -0.016179, lat = 51.538525),color = "color",source = "google",maptype = "hybrid",zoom = 16)
ggmap(kk,extent = "device",ylab = "Latitude",xlab = "Longitude")

kk <- get_map(location = c(lon = -0.016179, lat = 51.538525),color = "color",source = "google",maptype = "toner",zoom = 16)
ggmap(kk,extent = "device",ylab = "Latitude",xlab = "Longitude")

########
# Other Maps
#######

library(mapdata)
map('usa')
map('china')
map('japan')

#############

library(maptools)
library(rgdal)
library(spdep)
library(foreign)

setwd("u:\\simulation\\temp\\surveys")
getinfo.shape("indianacolo.shp")
MAP<-readShapePoly("indianacolo.shp")                
plot(MAP)
DATA <- read.dbf("indianacolo.dbf", as.is = FALSE)           
names(DATA)
RATE <- DATA$COLOMAL/DATA$MAL
tmp <- cut(RATE,breaks=7)
summary(tmp)

RATE.CUT <- c(min(RATE)*0.99,quantile(RATE,c(1/7,2/7,3/7,4/7,5/7,6/7)),max(RATE)*1.01)
tmp <- cut(RATE,breaks=RATE.CUT)
summary(tmp)

plot(MAP,col=cm.colors(7)[tmp])
plot(MAP,col=heat.colors(7)[tmp])
plot(MAP,col=terrain.colors(7)[tmp])
plot(MAP,col=topo.colors(7)[tmp])