R-IC09.R (9985B)
1 #*******************************************************# 2 #*******************************************************# 3 # STAT320 - IC09 Solutions # 4 #*******************************************************# 5 #*******************************************************# 6 #***************************************************# 7 # Type I Error/Power for Two Sample Proportions # 8 #***************************************************# 9 10 #***************************************************# 11 #***************************************************# 12 #***************************************************# 13 #***************************************************# 14 #***************************************************# 15 16 #***************************************************# 17 # Means P-value 18 #***************************************************# 19 set.seed(257) 20 #***************************************************# 21 22 two.prop.permute.pval.f = function(xA,nA,xB,nB,LowTail = TRUE,NSim=100000) 23 { 24 xobs = xA/nA - xB/nB 25 # Reconstruct sample A 26 xsampA = rep(0,nA) 27 xsampA[1:xA] = rep(1,xA) 28 29 # Reconstruct sample B 30 xsampB = rep(0,nB) 31 xsampB[1:xB] = rep(1,xB) 32 33 # Combine Samples to one super sample 34 supersample = c(xsampA,xsampB) 35 36 # Create object where simulated DIFFERENCES of averages will be stored 37 ODiffProps = numeric(NSim) 38 39 # Repeat simulations NSim times 40 for(i in 1:NSim) 41 { 42 43 # Shuffle the elements inthe supersample 44 shuffledsupersample = sample(supersample) 45 46 # Generate sample 1 by resampling super Sample WITHOUT replacement 47 # in practice we select the first nA elements of the shuffled supersample 48 NewSampleA = shuffledsupersample[1:nA] 49 50 # Assign remaining values in supersample to Sample B 51 # that is the elements (nA+1) through (nA+nB) 52 NewSampleB = shuffledsupersample[nA+1:nB] 53 54 # Save difference in means simulated samples 55 ODiffProps[i] = mean(NewSampleA) - mean(NewSampleB ) 56 } 57 # Compute p-value 58 if(LowTail==TRUE) 59 { 60 # if alternative Ha is mu < mu0 61 p = sum(ODiffProps<=xobs)/NSim 62 }else 63 { 64 # if alternative Ha is mu > mu0 65 p = sum(ODiffProps>=xobs)/NSim 66 } 67 return(p) 68 } 69 70 71 two.prop.bootstrap.pval.f = function(xA,nA,xB,nB,LowTail = TRUE,NSim=100000) 72 { 73 xobs = xA/nA - xB/nB 74 # Reconstruct sample A 75 xsampA = rep(0,nA) 76 xsampA[1:xA] = rep(1,xA) 77 78 # Reconstruct sample B 79 xsampB = rep(0,nB) 80 xsampB[1:xB] = rep(1,xB) 81 82 # Combine Samples to one super sample 83 supersample = c(xsampA,xsampB) 84 85 # Create object where simulated DIFFERENCES of averages will be stored 86 ODiffProps = numeric(NSim) 87 88 # Repeat simulations NSim times 89 for(i in 1:NSim) 90 { 91 92 # Generate sample 1 by resampling super Sample WITH replacement 93 NewSampleA = sample(supersample,size = nA,replace = TRUE) 94 95 # Generate sample 2 by resampling super Sample WITH replacement 96 NewSampleB = sample(supersample,size = nB,replace = TRUE) 97 98 # Save difference in means simulated samples 99 ODiffProps[i] = mean(NewSampleA) - mean(NewSampleB ) 100 } 101 # Compute p-value 102 if(LowTail==TRUE) 103 { 104 # if alternative Ha is mu < mu0 105 p = sum(ODiffProps<=xobs)/NSim 106 }else 107 { 108 # if alternative Ha is mu > mu0 109 p = sum(ODiffProps>=xobs)/NSim 110 } 111 return(p) 112 } 113 114 115 #***************************************************# 116 # Means CI 117 #***************************************************# 118 119 two.prop.CI.f = function(xA,nA,xB,nB,alpha=.05,NSim=100000) 120 { 121 # Reconstruct sample A 122 xsampA = rep(0,nA) 123 xsampA[1:xA] = rep(1,xA) 124 125 # Reconstruct sample B 126 xsampB = rep(0,nB) 127 xsampB[1:xB] = rep(1,xB) 128 129 # Create object where simulated DIFFERENCES of averages will be stored 130 ODiffProps = numeric(NSim) 131 132 # Repeat simulations NSim times 133 for(i in 1:NSim) 134 { 135 # Generate sample 1 by resampling original Sample A WITH replacement 136 NewSampleA = sample(xsampA,size = nA,replace = TRUE) 137 138 # Generate sample 2 by resampling original Sample B WITH replacement 139 NewSampleB = sample(xsampB,size = nB,replace = TRUE) 140 141 # Save difference in means simulated samples 142 ODiffProps[i] = mean(NewSampleA) - mean(NewSampleB) 143 } 144 145 # Compute CI 146 ci =quantile(ODiffProps,probs = c(alpha/2,1-alpha/2)) 147 return(ci) 148 } 149 150 151 152 153 #***************************************************# 154 #***************************************************# 155 #***************************************************# 156 #***************************************************# 157 #***************************************************# 158 #***************************************************# 159 # Type I Simulation Study 160 #***************************************************# 161 #***************************************************# 162 #***************************************************# 163 #***************************************************# 164 #***************************************************# 165 #***************************************************# 166 167 NSTudyReps = 3000 168 NSim = 10000 169 170 nA = 529 171 nB = 326 172 173 BootNullPVals = numeric(NSTudyReps) 174 PermNullPVals = numeric(NSTudyReps) 175 176 #***************************************************# 177 # The true proportion in the Common Population: 178 # We randomly assign this value. We should explore different 179 # values to see what happens. 180 #***************************************************# 181 182 183 #***************************************************# 184 185 start_time <- Sys.time() 186 187 for(s in 1:NSTudyReps) 188 { 189 # Generating Samples A and B Under the Null and Counting the successes 190 191 192 193 194 195 196 197 # p-val using bootstrap method 198 p1 = two.prop.bootstrap.pval.f(nSA,nA,nSB,nB,LowTail = FALSE,NSim=NSim) 199 200 # p-val using permutation method 201 p2 = two.prop.permute.pval.f(nSA,nA,nSB,nB,LowTail = FALSE,NSim=NSim) 202 203 # Storing each p-val into the appropriate vector 204 BootNullPVals[s] = p1 205 PermNullPVals[s] = p2 206 cat(s,"\n") 207 } 208 209 end_time <- Sys.time() 210 211 end_time - start_time 212 213 # Compute the percentage of replicates that p-value less than 214 # given alpha 215 216 a=.05 217 sum(BootNullPVals<=a)/NSTudyReps 218 sum(PermNullPVals<=a)/NSTudyReps 219 220 221 #***************************************************# 222 #***************************************************# 223 #***************************************************# 224 #***************************************************# 225 #***************************************************# 226 # Power Simulation Study 227 #***************************************************# 228 #***************************************************# 229 #***************************************************# 230 #***************************************************# 231 #***************************************************# 232 #***************************************************# 233 234 NSTudyReps = 3000 235 NSim = 10000 236 237 238 nA = 529 239 nB = 326 240 241 BootPowPVals = numeric(NSTudyReps) 242 PermPowPVals = numeric(NSTudyReps) 243 244 #***************************************************# 245 # The true proportions in each Population is Unknown: 246 # We randomly assign these values. We should explore different 247 # values to see what happens. 248 # Note, the two values can be exactly the same or different 249 #***************************************************# 250 251 252 #***************************************************# 253 254 start_time <- Sys.time() 255 256 for(s in 1:NSTudyReps) 257 { 258 # Generating Samples A and B Under the Alternative and Counting the successes 259 260 261 262 263 264 265 266 # p-val using bootstrap method 267 p1 = two.prop.bootstrap.pval.f(nSA,nA,nSB,nB,LowTail = FALSE,NSim=NSim) 268 269 # p-val using permutation method 270 p2 = two.prop.permute.pval.f(nSA,nA,nSB,nB,LowTail = FALSE,NSim=NSim) 271 272 # Storing each p-val into the appropriate vector 273 BootPowPVals[s] = p1 274 PermPowPVals[s] = p2 275 cat(s,"\n") 276 } 277 278 end_time <- Sys.time() 279 280 end_time - start_time 281 282 # Compute the percentage of replicates that p-value less than 283 # given alpha. This will give us the power of the method 284 285 a=.05 286 sum(BootPowPVals<=a)/NSTudyReps 287 sum(PermPowPVals<=a)/NSTudyReps 288 289 290 291 #***************************************************# 292 #***************************************************# 293 #***************************************************# 294 #***************************************************# 295 #***************************************************# 296 # CI Study 297 #***************************************************# 298 #***************************************************# 299 #***************************************************# 300 #***************************************************# 301 #***************************************************# 302 #***************************************************# 303 304 NSTudyReps = 1000 305 NSim = 10000 306 307 nA = 529 308 nB = 326 309 a = .05 310 311 CIIncludeTrueDiff = numeric(NSTudyReps) 312 313 #***************************************************# 314 # The true proportions in the Two Population: 315 # We randomly assign these values. We should explore different 316 # values to see what happens. 317 # Note, the two values can be exactly the same or different 318 #***************************************************# 319 320 321 322 # The CI is for the difference in the two proportions 323 324 TrueDiff = pTA - pTB 325 326 #***************************************************# 327 328 start_time <- Sys.time() 329 330 for(s in 1:NSTudyReps) 331 { 332 # Generating Samples A and B Under the True Proportions and Counting the successes 333 334 335 336 337 338 # CI using bootstrap method 339 CI = two.prop.CI.f(nSA,nA,nSB,nB,alpha=a,NSim=NSim) 340 341 # Check if CI included true difference 342 if(CI[1] <= TrueDiff && TrueDiff <= CI[2]) 343 { 344 CIIncludeTrueDiff[s]=1 345 }else 346 { 347 CIIncludeTrueDiff[s]=0 348 } 349 cat(s,"\n") 350 } 351 352 end_time <- Sys.time() 353 354 end_time - start_time 355 356 # Compute the percentage of replicates that CI included truediff 357 358 sum(CIIncludeTrueDiff)/NSTudyReps 359 360 361 362 363 364