R-IC10.R (13263B)
1 #*******************************************************# 2 #*******************************************************# 3 # STAT320 - IC10 Solutions # 4 #*******************************************************# 5 #*******************************************************# 6 #***************************************************# 7 # Type I/Power for Two Sample Means # 8 #***************************************************# 9 10 #***************************************************# 11 #***************************************************# 12 #***************************************************# 13 #***************************************************# 14 #***************************************************# 15 16 #***************************************************# 17 # Means P-value 18 #***************************************************# 19 set.seed(3697) 20 #***************************************************# 21 22 two.mean.perm.pval.f = function(xsampA,xsampB,LowTail = TRUE,NSim=100000) 23 { 24 # Compute Sizes of each sample 25 nA = length(xsampA) 26 nB = length(xsampB) 27 28 xobs = mean(xsampA)-mean(xsampB) 29 30 # Combine Samples to one super sample 31 supersample = c(xsampA,xsampB) 32 33 # Create object where simulated DIFFERENCES of averages will be stored 34 ODiffBars = numeric(NSim) 35 36 # Repeat simulations NSim times 37 for(i in 1:NSim) 38 { 39 shuffledsupersample = sample(supersample) 40 41 # Generate sample 1 by resampling super Sample WITHOUT replacement 42 # in practice we select the first nA elements of the shuffled supersample 43 NewSampleA = shuffledsupersample[1:nA] 44 45 # Assign remaining values in supersample to Sample B 46 # that is the elements (nA+1) through (nA+nB) 47 NewSampleB = shuffledsupersample[nA+1:nB] 48 49 # Save difference in means simulated samples 50 ODiffBars[i] = mean(NewSampleA) - mean(NewSampleB ) 51 } 52 # Compute p-value 53 if(LowTail==TRUE) 54 { 55 # if alternative Ha is mu < mu0 56 p = sum(ODiffBars<=xobs)/NSim 57 }else 58 { 59 # if alternative Ha is mu > mu0 60 p = sum(ODiffBars>=xobs)/NSim 61 } 62 return(p) 63 } 64 65 66 two.mean.perm.recenter.pval.f = function(xsampA,xsampB,LowTail = TRUE,NSim=100000) 67 { 68 # Compute Sizes of each sample 69 nA = length(xsampA) 70 nB = length(xsampB) 71 72 xobs = mean(xsampA)-mean(xsampB) 73 74 # center samples at 0 by subtracting the means 75 xsampA = xsampA - mean(xsampA) 76 77 xsampB = xsampB - mean(xsampB) 78 79 # Combine Samples to one super sample 80 supersample = c(xsampA,xsampB) 81 82 # Create object where simulated DIFFERENCES of averages will be stored 83 ODiffBars = numeric(NSim) 84 85 # Repeat simulations NSim times 86 for(i in 1:NSim) 87 { 88 shuffledsupersample = sample(supersample) 89 90 # Generate sample 1 by resampling super Sample WITHOUT replacement 91 # in practice we select the first nA elements of the shuffled supersample 92 NewSampleA = shuffledsupersample[1:nA] 93 94 # Assign remaining values in supersample to Sample B 95 # that is the elements (nA+1) through (nA+nB) 96 NewSampleB = shuffledsupersample[nA+1:nB] 97 98 # Save difference in means simulated samples 99 ODiffBars[i] = mean(NewSampleA) - mean(NewSampleB ) 100 } 101 # Compute p-value 102 if(LowTail==TRUE) 103 { 104 # if alternative Ha is mu < mu0 105 p = sum(ODiffBars<=xobs)/NSim 106 }else 107 { 108 # if alternative Ha is mu > mu0 109 p = sum(ODiffBars>=xobs)/NSim 110 } 111 return(p) 112 } 113 114 115 two.mean.boot.pval.f = function(xsampA,xsampB,LowTail = TRUE,NSim=100000) 116 { 117 # Compute Sizes of each sample 118 nA = length(xsampA) 119 nB = length(xsampB) 120 121 xobs = mean(xsampA)-mean(xsampB) 122 123 # Combine Samples to one super sample 124 supersample = c(xsampA,xsampB) 125 126 # Create object where simulated DIFFERENCES of averages will be stored 127 ODiffBars = numeric(NSim) 128 129 # Repeat simulations NSim times 130 for(i in 1:NSim) 131 { 132 shuffledsupersample = sample(supersample) 133 134 # Generate sample 1 by resampling super Sample WITH replacement 135 NewSampleA = sample(supersample,size = nA,replace = TRUE) 136 137 # Generate sample 2 by resampling super Sample WITH replacement 138 NewSampleB = sample(supersample,size = nB,replace = TRUE) 139 140 # Save difference in means simulated samples 141 ODiffBars[i] = mean(NewSampleA) - mean(NewSampleB ) 142 } 143 # Compute p-value 144 if(LowTail==TRUE) 145 { 146 # if alternative Ha is mu < mu0 147 p = sum(ODiffBars<=xobs)/NSim 148 }else 149 { 150 # if alternative Ha is mu > mu0 151 p = sum(ODiffBars>=xobs)/NSim 152 } 153 return(p) 154 } 155 156 157 two.mean.boot.recenter.pval.f = function(xsampA,xsampB,LowTail = TRUE,NSim=100000) 158 { 159 # Compute Sizes of each sample 160 nA = length(xsampA) 161 nB = length(xsampB) 162 163 xobs = mean(xsampA)-mean(xsampB) 164 165 # center samples at 0 by subtracting the means 166 xsampA = xsampA - mean(xsampA) 167 168 xsampB = xsampB - mean(xsampB) 169 170 # Combine Samples to one super sample 171 supersample = c(xsampA,xsampB) 172 173 # Create object where simulated DIFFERENCES of averages will be stored 174 ODiffBars = numeric(NSim) 175 176 # Repeat simulations NSim times 177 for(i in 1:NSim) 178 { 179 shuffledsupersample = sample(supersample) 180 181 # Generate sample 1 by resampling super Sample WITH replacement 182 NewSampleA = sample(supersample,size = nA,replace = TRUE) 183 184 # Generate sample 2 by resampling super Sample WITH replacement 185 NewSampleB = sample(supersample,size = nB,replace = TRUE) 186 187 # Save difference in means simulated samples 188 ODiffBars[i] = mean(NewSampleA) - mean(NewSampleB ) 189 } 190 # Compute p-value 191 if(LowTail==TRUE) 192 { 193 # if alternative Ha is mu < mu0 194 p = sum(ODiffBars<=xobs)/NSim 195 }else 196 { 197 # if alternative Ha is mu > mu0 198 p = sum(ODiffBars>=xobs)/NSim 199 } 200 return(p) 201 } 202 203 204 #***************************************************# 205 # Means CI 206 #***************************************************# 207 208 two.mean.CI.f = function(xsampA,xsampB,alpha=.05,NSim=100000) 209 { 210 # Compute Sizes of each sample 211 nA = length(xsampA) 212 nB = length(xsampB) 213 214 # Create object where simulated DIFFERENCES of averages will be stored 215 ODiffBars = numeric(NSim) 216 217 # Repeat simulations NSim times 218 for(i in 1:NSim) 219 { 220 # Generate sample 1 by resampling original sample WITH replacement 221 NewSampleA = sample(x= xsampA,size = nA,replace = TRUE) 222 223 # Generate sample 2 by resampling original sample WITH replacement 224 NewSampleB = sample(x= xsampB,size = nB,replace = TRUE) 225 226 # Save difference in means simulated samples 227 ODiffBars[i] = mean(NewSampleA) - mean(NewSampleB) 228 } 229 230 # Compute CI 231 ci =quantile(ODiffBars,probs = c(alpha/2,1-alpha/2)) 232 return(ci) 233 } 234 235 236 237 #***************************************************# 238 #***************************************************# 239 #***************************************************# 240 #***************************************************# 241 #***************************************************# 242 #***************************************************# 243 # Type I Simulation Study - Case 1: Normal 244 #***************************************************# 245 #***************************************************# 246 #***************************************************# 247 #***************************************************# 248 #***************************************************# 249 #***************************************************# 250 251 NSTudyReps = 10000 252 NSim = 10000 253 254 nA = 35 255 nB = 35 256 LowTail = TRUE 257 258 PermNullPVals = numeric(NSTudyReps) 259 PermRecNullPVals = numeric(NSTudyReps) 260 BootNullPVals = numeric(NSTudyReps) 261 BootRecNullPVals = numeric(NSTudyReps) 262 263 #***************************************************# 264 # The true distributionin the Common Population: 265 # We randomly select this. We should explore different 266 # options. These options will address two components: 267 # 1) Shape of distribution (Normal, Uniform, Exponential, etc.) 268 # 2) Parameters of distribution (Mean, SD, lambda, etc.) 269 #***************************************************# 270 271 272 #***************************************************# 273 274 start_time <- Sys.time() 275 276 for(s in 1:NSTudyReps) 277 { 278 # Generating Samples A and B Under the Null Using the given distribution 279 280 281 282 283 284 285 286 287 # p-val using permutation method 288 PermNullPVals[s] = two.mean.perm.pval.f(SampA,SampB,LowTail = LowTail,NSim=NSim) 289 290 # p-val using recentering permutation method 291 PermRecNullPVals[s] = two.mean.perm.recenter.pval.f(SampA,SampB,LowTail = LowTail,NSim=NSim) 292 293 # p-val using bootstrap method 294 BootNullPVals[s] = two.mean.boot.pval.f(SampA,SampB,LowTail = LowTail,NSim=NSim) 295 296 # p-val using recentering bootstrap method 297 BootRecNullPVals[s] = two.mean.boot.recenter.pval.f(SampA,SampB,LowTail = LowTail,NSim=NSim) 298 299 cat(s,"\n") 300 } 301 302 end_time <- Sys.time() 303 304 end_time - start_time 305 306 # Compute the percentage of replicates that p-value less than 307 # given alpha 308 309 a=.05 310 sum(PermNullPVals<=a)/NSTudyReps 311 sum(PermRecNullPVals<=a)/NSTudyReps 312 sum(BootNullPVals<=a)/NSTudyReps 313 sum(BootRecNullPVals<=a)/NSTudyReps 314 315 316 #***************************************************# 317 #***************************************************# 318 #***************************************************# 319 #***************************************************# 320 #***************************************************# 321 # Power Simulation Study 322 #***************************************************# 323 #***************************************************# 324 #***************************************************# 325 #***************************************************# 326 #***************************************************# 327 #***************************************************# 328 329 330 PermPowPVals = numeric(NSTudyReps) 331 PermRecPowPVals = numeric(NSTudyReps) 332 BootPowPVals = numeric(NSTudyReps) 333 BootRecPowPVals = numeric(NSTudyReps) 334 335 #***************************************************# 336 # The true distributionin the Two Populations: 337 # We randomly select this. We should explore different 338 # options. These options will address two components: 339 # 1) Shape of distribution (Normal, Uniform, Exponential, etc.) 340 # 2) Parameters of distribution (Mean, SD, lambda, etc.) 341 #***************************************************# 342 343 # Case 1: Normal 344 345 346 #***************************************************# 347 348 start_time <- Sys.time() 349 350 for(s in 1:NSTudyReps) 351 { 352 # Generating Samples A and B Under the Alternative 353 354 355 356 357 358 # p-val using permutation method 359 PermPowPVals[s] = two.mean.perm.pval.f(SampA,SampB,LowTail = LowTail,NSim=NSim) 360 361 # p-val using permutation method 362 PermRecPowPVals[s] = two.mean.perm.recenter.pval.f(SampA,SampB,LowTail = LowTail,NSim=NSim) 363 364 # p-val using bootstrap method 365 BootPowPVals[s] = two.mean.boot.pval.f(SampA,SampB,LowTail = LowTail,NSim=NSim) 366 367 # p-val using recentering bootstrap method 368 BootRecPowPVals[s] = two.mean.boot.recenter.pval.f(SampA,SampB,LowTail = LowTail,NSim=NSim) 369 370 cat(s,"\n") 371 } 372 373 end_time <- Sys.time() 374 375 end_time - start_time 376 377 # Compute the percentage of replicates that p-value less than 378 # given alpha. This will give us the power of the method 379 380 a=.05 381 sum(PermPowPVals<=a)/NSTudyReps 382 sum(PermRecPowPVals<=a)/NSTudyReps 383 sum(BootPowPVals<=a)/NSTudyReps 384 sum(BootRecPowPVals<=a)/NSTudyReps 385 386 387 #***************************************************# 388 #***************************************************# 389 #***************************************************# 390 #***************************************************# 391 #***************************************************# 392 # CI Study 393 #***************************************************# 394 #***************************************************# 395 #***************************************************# 396 #***************************************************# 397 #***************************************************# 398 #***************************************************# 399 400 401 #***************************************************# 402 # The true distributionin the Two Populations: 403 # We randomly select this. We should explore different 404 # options. These options will address two components: 405 # 1) Shape of distribution (Normal, Uniform, Exponential, etc.) 406 # 2) Parameters of distribution (Mean, SD, lambda, etc.) 407 #***************************************************# 408 409 # Case 1: Normal 410 411 412 a = .05 413 414 415 # The CI is for the difference in the two proportions 416 TrueDiff = muA - muB 417 418 CIIncludeTrueDiff = numeric(NSTudyReps) 419 420 421 #***************************************************# 422 423 start_time <- Sys.time() 424 425 for(s in 1:NSTudyReps) 426 { 427 # Generating Samples A and B Under the Alternative 428 429 430 431 432 433 434 435 436 # CI using bootstrap method 437 CI = two.mean.CI.f(SampA,SampB,alpha=a,NSim=NSim) 438 439 # Check if CI included true difference 440 if(CI[1] <= TrueDiff && TrueDiff <= CI[2]) 441 { 442 CIIncludeTrueDiff[s]=1 443 }else 444 { 445 CIIncludeTrueDiff[s]=0 446 } 447 cat(s,"\n") 448 } 449 450 end_time <- Sys.time() 451 452 end_time - start_time 453 454 # Compute the percentage of replicates that CI included truediff 455 456 sum(CIIncludeTrueDiff)/NSTudyReps 457