###########
# Problem 1
###########
# (a)
#####
# Two sample binomial method
#####
# Formula 1
###
> 
> p1 <- 784/(784+236)
> p2 <- 311/(311+66)
> 
> p <- (784+311)/(784+236+311+66)
> z <- (p1-p2)/sqrt(p*(1-p)*(1/(784+236)+1/(311+66)))
> z
[1] -2.269420

####
# Formula 2
####

> z <- (p1-p2)/sqrt(p1*(1-p1)/(784+236)+p2*(1-p2)/(311+66))
> z
[1] -2.384864
##########
# You can use either formula 1 or firmula 2 to test. Both of them give
# The same comclusion.
# Since |z|>1.96, we reject the null hypothesis of independence and 
# conclude that the two variables are not independent.
##########
# Odds Ratio
####
> theta <- 784*66/(311*236)
> sigmasq.log <- 1/784+1/236+1/311+1/66
> z <- abs(log(theta)/sqrt(sigmasq.log))
> z
[1] 2.262080
#####
# Since the absolute value of the log of odds ratio divided by its standard error
# is 2.262, which is greater than 1.96, we reject the null hypothesis of independence
# and conclude two two variables are not independent.
# The two methods are consistent.
#####


#####
# (b)
#####
> theta*exp(-1.96*sqrt(sigmasq.log))
[1] 0.5207729
> theta*exp(1.96*sqrt(sigmasq.log))
[1] 0.954392

##########
# The 95% confidence interval for the odds ratio is [0.5208,0.9544]. 
##########

######
# (c)
######

Those who favor Gun Registration and Death Penalty are not independent. People who favor Gun Registration
faver less on Death Penalty. Of those who favor Gun Registration, they were 30% less favor the Death 
Penalty than those who oppose Gun Registration. The error of the study is +/- 21%.


############
# Problem 2
############
# (a) Two-sample 
#     binomial
#####
# <40
#####
> p1 <- 577/611
> p2 <- 682/739
> p <- (577+682)/(611+739)
> z <- (p1-p2)/sqrt(p*(1-p)*(1/611+1/739))
> z
[1] 1.567137
###
# 40-59
###
> p1 <- 164/168
> p2 <- 245/319
> p <- (164+245)/(168+319)
> z <- (p1-p2)/sqrt(p*(1-p)*(1/168+1/319))
> z
[1] 5.954072

#####
# Or you can have
#####
# Age <40
####
> p1 <- 577/611
> p2 <- 682/739
> p <- (577+682)/(611+739)
> z <- (p1-p2)/sqrt(p1*(1-p1)/611+p2*(1-p2)/739)
> z
[1] 1.591122
####
# Age 40-59
####
> p1 <- 164/168
> p2 <- 245/319
> p <- (164+245)/(168+319)
> z <- (p1-p2)/sqrt(p1*(1-p1)/168+p2*(1-p2)/319)
> z
[1] 7.885662

######
# For age<40, the null hypothesis of indepdence is accepted since the absolute 
# value of z is less than 1.96.
# However, for age between 40 and 59, the null hypothesis of indepdence is rejected.
######

#####
# Odds ratio
#####
# Age <40
###
> theta <- 577*57/(682*34)
> sigmasq.log <- 1/577+1/34+1/682+1/57
> z <- abs(log(theta)/sqrt(sigmasq.log))
> z
[1] 1.560609
> theta*exp(-1.96*sqrt(sigmasq.log))
[1] 0.9144388
> theta*exp(1.96*sqrt(sigmasq.log))
[1] 2.199987
####
# Age 40-59
####
> theta <- 164*74/(245*4)
> sigmasq.log <- 1/164+1/4+1/245+1/74
> z <- abs(log(theta)/sqrt(sigmasq.log))
> theta*exp(-1.96*sqrt(sigmasq.log))
[1] 4.4415
> theta*exp(1.96*sqrt(sigmasq.log))
[1] 34.52783
> theta
[1] 12.38367
#########
# The same conclusion as we see based on two sample binomial method. 
# The null hypothesis of indepenence for age <40 is accepted, 
# but the null hypothesis of independence for age between 40 and 59 is rejected.
#########


######
# (b)
######
# For age <40, the 95% confidence interval for the odds ratio is
# 0.91 and 2.20. For age between 40 and 59, it is 4.44 and 34.53.
######


#####
# (c)
#####
# For age < 40, the breathing test result is not related to smoking status, but for age 
# between 40 and 59, they are related. We can probably say that smoking will cause abnormal
# breathing test result in a long period, such as ten or more years.
# For age between 40 and 59, the risk for those who smoke is about 12.4 times as high as those
# who did not smoke. The minimum is at least 4.4 times and the maximum is about 34 times.
#####


#####################
# Problem 3
#####################
# Please repeated my code in 
# your computer. The result should be
# close but not identical. The plot is omited. You 
# Can get the plot from my last command.
# the values of b are not required to output.
# The curve exactly is not a continuous function of n.
#########
> p <- 0.5
> m <- 10
> n <- 10
> 
> b <- matrix(0,100,2)
> 
> for(k in 1:100){
+ 
+ m <- 5+k
+ n <- 5+k
+ 
+ b[k,1] <- m
+ 
+ run <- 10000
+ result <- matrix(0,run,4)
+ 
+ for(i in 1:run){
+ x11 <- rbinom(1,m,p)
+ x12 <- m-x11
+ x21 <- rbinom(1,n,p)
+ x22 <- n-x21
+ result[i,1] <- min(x11,x12,x21,x22)
+ result[i,2] <- log(x11*x22/(x21*x12))
+ result[i,3] <- 1/x11+1/x12+1/x21+1/x22
+ result[i,4] <- abs(result[i,2]/sqrt(result[i,3]))
+ }
+ 
+ RESULT <- result[result[,1]>0,]
+ 
+ b[k,2] <- mean(RESULT[,4]>1.96)
+ 
+ }
> b
       [,1]       [,2]
  [1,]    6 0.01897048
  [2,]    7 0.04247861
  [3,]    8 0.01361097
  [4,]    9 0.02824289
  [5,]   10 0.03502609
  [6,]   11 0.04784785
  [7,]   12 0.06168018
  [8,]   13 0.02800840
  [9,]   14 0.03631089
 [10,]   15 0.04230000
 [11,]   16 0.04730000
 [12,]   17 0.05880588
 [13,]   18 0.06510651
 [14,]   19 0.04140000
 [15,]   20 0.03870000
 [16,]   21 0.04630000
 [17,]   22 0.04730000
 [18,]   23 0.05400000
 [19,]   24 0.06170000
 [20,]   25 0.04620000
 [21,]   26 0.03850000
 [22,]   27 0.04390000
 [23,]   28 0.04650000
 [24,]   29 0.04460000
 [25,]   30 0.05010000
 [26,]   31 0.05760000
 [27,]   32 0.05710000
 [28,]   33 0.04630000
 [29,]   34 0.03820000
 [30,]   35 0.04170000
 [31,]   36 0.04540000
 [32,]   37 0.04660000
 [33,]   38 0.05260000
 [34,]   39 0.05870000
 [35,]   40 0.05680000
 [36,]   41 0.05830000
 [37,]   42 0.04530000
 [38,]   43 0.04270000
 [39,]   44 0.04430000
 [40,]   45 0.04830000
 [41,]   46 0.04250000
 [42,]   47 0.04710000
 [43,]   48 0.05170000
 [44,]   49 0.05600000
 [45,]   50 0.05490000
 [46,]   51 0.05840000
 [47,]   52 0.04790000
 [48,]   53 0.04140000
 [49,]   54 0.04180000
 [50,]   55 0.04960000
 [51,]   56 0.05000000
 [52,]   57 0.04600000
 [53,]   58 0.05340000
 [54,]   59 0.05410000
 [55,]   60 0.05680000
 [56,]   61 0.05760000
 [57,]   62 0.06100000
 [58,]   63 0.04200000
 [59,]   64 0.04400000
 [60,]   65 0.04460000
 [61,]   66 0.04400000
 [62,]   67 0.04630000
 [63,]   68 0.04760000
 [64,]   69 0.04950000
 [65,]   70 0.05030000
 [66,]   71 0.05540000
 [67,]   72 0.05740000
 [68,]   73 0.05940000
 [69,]   74 0.05990000
 [70,]   75 0.04570000
 [71,]   76 0.04250000
 [72,]   77 0.04280000
 [73,]   78 0.04590000
 [74,]   79 0.04500000
 [75,]   80 0.04860000
 [76,]   81 0.04970000
 [77,]   82 0.05120000
 [78,]   83 0.05400000
 [79,]   84 0.05430000
 [80,]   85 0.05310000
 [81,]   86 0.05840000
 [82,]   87 0.06020000
 [83,]   88 0.04490000
 [84,]   89 0.04170000
 [85,]   90 0.04620000
 [86,]   91 0.04440000
 [87,]   92 0.04560000
 [88,]   93 0.04510000
 [89,]   94 0.04900000
 [90,]   95 0.04790000
 [91,]   96 0.04880000
 [92,]   97 0.05150000
 [93,]   98 0.05540000
 [94,]   99 0.05120000
 [95,]  100 0.05550000
 [96,]  101 0.05550000
 [97,]  102 0.04700000
 [98,]  103 0.04160000
 [99,]  104 0.04340000
[100,]  105 0.04670000
> plot(b[,1],b[,2],type="l",xlab="n",ylab="rejection rate")