########### # Problem 1 ########### # (a) ##### # Two sample binomial method ##### # Formula 1 ### > > p1 <- 784/(784+236) > p2 <- 311/(311+66) > > p <- (784+311)/(784+236+311+66) > z <- (p1-p2)/sqrt(p*(1-p)*(1/(784+236)+1/(311+66))) > z [1] -2.269420 #### # Formula 2 #### > z <- (p1-p2)/sqrt(p1*(1-p1)/(784+236)+p2*(1-p2)/(311+66)) > z [1] -2.384864 ########## # You can use either formula 1 or firmula 2 to test. Both of them give # The same comclusion. # Since |z|>1.96, we reject the null hypothesis of independence and # conclude that the two variables are not independent. ########## # Odds Ratio #### > theta <- 784*66/(311*236) > sigmasq.log <- 1/784+1/236+1/311+1/66 > z <- abs(log(theta)/sqrt(sigmasq.log)) > z [1] 2.262080 ##### # Since the absolute value of the log of odds ratio divided by its standard error # is 2.262, which is greater than 1.96, we reject the null hypothesis of independence # and conclude two two variables are not independent. # The two methods are consistent. ##### ##### # (b) ##### > theta*exp(-1.96*sqrt(sigmasq.log)) [1] 0.5207729 > theta*exp(1.96*sqrt(sigmasq.log)) [1] 0.954392 ########## # The 95% confidence interval for the odds ratio is [0.5208,0.9544]. ########## ###### # (c) ###### Those who favor Gun Registration and Death Penalty are not independent. People who favor Gun Registration faver less on Death Penalty. Of those who favor Gun Registration, they were 30% less favor the Death Penalty than those who oppose Gun Registration. The error of the study is +/- 21%. ############ # Problem 2 ############ # (a) Two-sample # binomial ##### # <40 ##### > p1 <- 577/611 > p2 <- 682/739 > p <- (577+682)/(611+739) > z <- (p1-p2)/sqrt(p*(1-p)*(1/611+1/739)) > z [1] 1.567137 ### # 40-59 ### > p1 <- 164/168 > p2 <- 245/319 > p <- (164+245)/(168+319) > z <- (p1-p2)/sqrt(p*(1-p)*(1/168+1/319)) > z [1] 5.954072 ##### # Or you can have ##### # Age <40 #### > p1 <- 577/611 > p2 <- 682/739 > p <- (577+682)/(611+739) > z <- (p1-p2)/sqrt(p1*(1-p1)/611+p2*(1-p2)/739) > z [1] 1.591122 #### # Age 40-59 #### > p1 <- 164/168 > p2 <- 245/319 > p <- (164+245)/(168+319) > z <- (p1-p2)/sqrt(p1*(1-p1)/168+p2*(1-p2)/319) > z [1] 7.885662 ###### # For age<40, the null hypothesis of indepdence is accepted since the absolute # value of z is less than 1.96. # However, for age between 40 and 59, the null hypothesis of indepdence is rejected. ###### ##### # Odds ratio ##### # Age <40 ### > theta <- 577*57/(682*34) > sigmasq.log <- 1/577+1/34+1/682+1/57 > z <- abs(log(theta)/sqrt(sigmasq.log)) > z [1] 1.560609 > theta*exp(-1.96*sqrt(sigmasq.log)) [1] 0.9144388 > theta*exp(1.96*sqrt(sigmasq.log)) [1] 2.199987 #### # Age 40-59 #### > theta <- 164*74/(245*4) > sigmasq.log <- 1/164+1/4+1/245+1/74 > z <- abs(log(theta)/sqrt(sigmasq.log)) > theta*exp(-1.96*sqrt(sigmasq.log)) [1] 4.4415 > theta*exp(1.96*sqrt(sigmasq.log)) [1] 34.52783 > theta [1] 12.38367 ######### # The same conclusion as we see based on two sample binomial method. # The null hypothesis of indepenence for age <40 is accepted, # but the null hypothesis of independence for age between 40 and 59 is rejected. ######### ###### # (b) ###### # For age <40, the 95% confidence interval for the odds ratio is # 0.91 and 2.20. For age between 40 and 59, it is 4.44 and 34.53. ###### ##### # (c) ##### # For age < 40, the breathing test result is not related to smoking status, but for age # between 40 and 59, they are related. We can probably say that smoking will cause abnormal # breathing test result in a long period, such as ten or more years. # For age between 40 and 59, the risk for those who smoke is about 12.4 times as high as those # who did not smoke. The minimum is at least 4.4 times and the maximum is about 34 times. ##### ##################### # Problem 3 ##################### # Please repeated my code in # your computer. The result should be # close but not identical. The plot is omited. You # Can get the plot from my last command. # the values of b are not required to output. # The curve exactly is not a continuous function of n. ######### > p <- 0.5 > m <- 10 > n <- 10 > > b <- matrix(0,100,2) > > for(k in 1:100){ + + m <- 5+k + n <- 5+k + + b[k,1] <- m + + run <- 10000 + result <- matrix(0,run,4) + + for(i in 1:run){ + x11 <- rbinom(1,m,p) + x12 <- m-x11 + x21 <- rbinom(1,n,p) + x22 <- n-x21 + result[i,1] <- min(x11,x12,x21,x22) + result[i,2] <- log(x11*x22/(x21*x12)) + result[i,3] <- 1/x11+1/x12+1/x21+1/x22 + result[i,4] <- abs(result[i,2]/sqrt(result[i,3])) + } + + RESULT <- result[result[,1]>0,] + + b[k,2] <- mean(RESULT[,4]>1.96) + + } > b [,1] [,2] [1,] 6 0.01897048 [2,] 7 0.04247861 [3,] 8 0.01361097 [4,] 9 0.02824289 [5,] 10 0.03502609 [6,] 11 0.04784785 [7,] 12 0.06168018 [8,] 13 0.02800840 [9,] 14 0.03631089 [10,] 15 0.04230000 [11,] 16 0.04730000 [12,] 17 0.05880588 [13,] 18 0.06510651 [14,] 19 0.04140000 [15,] 20 0.03870000 [16,] 21 0.04630000 [17,] 22 0.04730000 [18,] 23 0.05400000 [19,] 24 0.06170000 [20,] 25 0.04620000 [21,] 26 0.03850000 [22,] 27 0.04390000 [23,] 28 0.04650000 [24,] 29 0.04460000 [25,] 30 0.05010000 [26,] 31 0.05760000 [27,] 32 0.05710000 [28,] 33 0.04630000 [29,] 34 0.03820000 [30,] 35 0.04170000 [31,] 36 0.04540000 [32,] 37 0.04660000 [33,] 38 0.05260000 [34,] 39 0.05870000 [35,] 40 0.05680000 [36,] 41 0.05830000 [37,] 42 0.04530000 [38,] 43 0.04270000 [39,] 44 0.04430000 [40,] 45 0.04830000 [41,] 46 0.04250000 [42,] 47 0.04710000 [43,] 48 0.05170000 [44,] 49 0.05600000 [45,] 50 0.05490000 [46,] 51 0.05840000 [47,] 52 0.04790000 [48,] 53 0.04140000 [49,] 54 0.04180000 [50,] 55 0.04960000 [51,] 56 0.05000000 [52,] 57 0.04600000 [53,] 58 0.05340000 [54,] 59 0.05410000 [55,] 60 0.05680000 [56,] 61 0.05760000 [57,] 62 0.06100000 [58,] 63 0.04200000 [59,] 64 0.04400000 [60,] 65 0.04460000 [61,] 66 0.04400000 [62,] 67 0.04630000 [63,] 68 0.04760000 [64,] 69 0.04950000 [65,] 70 0.05030000 [66,] 71 0.05540000 [67,] 72 0.05740000 [68,] 73 0.05940000 [69,] 74 0.05990000 [70,] 75 0.04570000 [71,] 76 0.04250000 [72,] 77 0.04280000 [73,] 78 0.04590000 [74,] 79 0.04500000 [75,] 80 0.04860000 [76,] 81 0.04970000 [77,] 82 0.05120000 [78,] 83 0.05400000 [79,] 84 0.05430000 [80,] 85 0.05310000 [81,] 86 0.05840000 [82,] 87 0.06020000 [83,] 88 0.04490000 [84,] 89 0.04170000 [85,] 90 0.04620000 [86,] 91 0.04440000 [87,] 92 0.04560000 [88,] 93 0.04510000 [89,] 94 0.04900000 [90,] 95 0.04790000 [91,] 96 0.04880000 [92,] 97 0.05150000 [93,] 98 0.05540000 [94,] 99 0.05120000 [95,] 100 0.05550000 [96,] 101 0.05550000 [97,] 102 0.04700000 [98,] 103 0.04160000 [99,] 104 0.04340000 [100,] 105 0.04670000 > plot(b[,1],b[,2],type="l",xlab="n",ylab="rejection rate")