stat_data_analysis_rcode

Statistical Data Analysis - Fall 2018
Log | Files | Refs | README

R-IC09.R (9985B)


      1 #*******************************************************#
      2 #*******************************************************#
      3 # 		         STAT320 - IC09 Solutions	      	  	    #
      4 #*******************************************************#
      5 #*******************************************************#
      6 #***************************************************#
      7 # Type I Error/Power for Two Sample Proportions         #  
      8 #***************************************************#
      9 
     10 #***************************************************#
     11 #***************************************************#
     12 #***************************************************#
     13 #***************************************************#
     14 #***************************************************#
     15 
     16 #***************************************************#
     17 # Means P-value
     18 #***************************************************#
     19 set.seed(257)
     20 #***************************************************#
     21 
     22 two.prop.permute.pval.f = function(xA,nA,xB,nB,LowTail = TRUE,NSim=100000)
     23 {
     24   xobs = xA/nA - xB/nB
     25   # Reconstruct sample A
     26   xsampA = rep(0,nA)
     27   xsampA[1:xA] = rep(1,xA)
     28   
     29   # Reconstruct sample B
     30   xsampB = rep(0,nB)
     31   xsampB[1:xB] = rep(1,xB)
     32   
     33   # Combine Samples to one super sample
     34   supersample = c(xsampA,xsampB)
     35   
     36   # Create object where simulated DIFFERENCES of averages will be stored
     37   ODiffProps = numeric(NSim)
     38   
     39   # Repeat simulations NSim times
     40   for(i in 1:NSim)
     41   {
     42     
     43     # Shuffle the elements inthe supersample
     44     shuffledsupersample = sample(supersample)
     45     
     46     # Generate sample 1 by resampling super Sample WITHOUT replacement
     47     # in practice we select the first nA elements of the shuffled supersample
     48     NewSampleA = shuffledsupersample[1:nA]
     49     
     50     # Assign remaining values in supersample to Sample B
     51     # that is the elements (nA+1) through (nA+nB)
     52     NewSampleB  = shuffledsupersample[nA+1:nB]
     53     
     54     # Save difference in means simulated samples
     55     ODiffProps[i] = mean(NewSampleA) - mean(NewSampleB )
     56   }
     57   # Compute p-value
     58   if(LowTail==TRUE)
     59   {
     60     # if alternative Ha is mu < mu0
     61     p = sum(ODiffProps<=xobs)/NSim
     62   }else
     63   {
     64     # if alternative Ha is mu > mu0
     65     p = sum(ODiffProps>=xobs)/NSim
     66   }
     67   return(p) 
     68 }
     69 
     70 
     71 two.prop.bootstrap.pval.f = function(xA,nA,xB,nB,LowTail = TRUE,NSim=100000)
     72 {
     73   xobs = xA/nA - xB/nB
     74   # Reconstruct sample A
     75   xsampA = rep(0,nA)
     76   xsampA[1:xA] = rep(1,xA)
     77   
     78   # Reconstruct sample B
     79   xsampB = rep(0,nB)
     80   xsampB[1:xB] = rep(1,xB)
     81   
     82   # Combine Samples to one super sample
     83   supersample = c(xsampA,xsampB)
     84   
     85   # Create object where simulated DIFFERENCES of averages will be stored
     86   ODiffProps = numeric(NSim)
     87   
     88   # Repeat simulations NSim times
     89   for(i in 1:NSim)
     90   {
     91     
     92     # Generate sample 1 by resampling super Sample WITH replacement
     93     NewSampleA = sample(supersample,size = nA,replace = TRUE) 
     94     
     95     # Generate sample 2 by resampling super Sample WITH replacement
     96     NewSampleB = sample(supersample,size = nB,replace = TRUE) 
     97     
     98     # Save difference in means simulated samples
     99     ODiffProps[i] = mean(NewSampleA) - mean(NewSampleB )
    100   }
    101   # Compute p-value
    102   if(LowTail==TRUE)
    103   {
    104     # if alternative Ha is mu < mu0
    105     p = sum(ODiffProps<=xobs)/NSim
    106   }else
    107   {
    108     # if alternative Ha is mu > mu0
    109     p = sum(ODiffProps>=xobs)/NSim
    110   }
    111   return(p) 
    112 }
    113 
    114 
    115 #***************************************************#
    116 # Means CI
    117 #***************************************************#
    118 
    119 two.prop.CI.f = function(xA,nA,xB,nB,alpha=.05,NSim=100000)
    120 {
    121   # Reconstruct sample A
    122   xsampA = rep(0,nA)
    123   xsampA[1:xA] = rep(1,xA)
    124   
    125   # Reconstruct sample B
    126   xsampB = rep(0,nB)
    127   xsampB[1:xB] = rep(1,xB)
    128   
    129   # Create object where simulated DIFFERENCES of averages will be stored
    130   ODiffProps = numeric(NSim)
    131   
    132   # Repeat simulations NSim times
    133   for(i in 1:NSim)
    134   {
    135     # Generate sample 1 by resampling original Sample A WITH replacement
    136     NewSampleA = sample(xsampA,size = nA,replace = TRUE) 
    137     
    138     # Generate sample 2 by resampling original Sample B WITH replacement
    139     NewSampleB = sample(xsampB,size = nB,replace = TRUE) 
    140     
    141     # Save difference in means simulated samples
    142     ODiffProps[i] = mean(NewSampleA) - mean(NewSampleB)
    143   }
    144   
    145   # Compute CI
    146   ci =quantile(ODiffProps,probs = c(alpha/2,1-alpha/2))
    147    return(ci) 
    148 }
    149 
    150 
    151 
    152 
    153 #***************************************************#
    154 #***************************************************#
    155 #***************************************************#
    156 #***************************************************#
    157 #***************************************************#
    158 #***************************************************#
    159 # Type I Simulation Study 
    160 #***************************************************#
    161 #***************************************************#
    162 #***************************************************#
    163 #***************************************************#
    164 #***************************************************#
    165 #***************************************************#
    166 
    167 NSTudyReps = 3000
    168 NSim = 10000
    169 
    170 nA = 529
    171 nB = 326
    172 
    173 BootNullPVals = numeric(NSTudyReps)
    174 PermNullPVals = numeric(NSTudyReps)
    175 
    176 #***************************************************#
    177 # The true proportion in the Common Population:
    178 # We randomly assign this value. We should explore different 
    179 # values to see what happens.
    180 #***************************************************#
    181 
    182 
    183 #***************************************************#
    184 
    185 start_time <- Sys.time()
    186 
    187 for(s in 1:NSTudyReps)
    188 {
    189   # Generating Samples A and B Under the Null and Counting the successes  
    190  
    191   
    192   
    193   
    194   
    195   
    196   
    197   # p-val using bootstrap method
    198   p1 = two.prop.bootstrap.pval.f(nSA,nA,nSB,nB,LowTail = FALSE,NSim=NSim)
    199     
    200   # p-val using permutation method
    201   p2 = two.prop.permute.pval.f(nSA,nA,nSB,nB,LowTail = FALSE,NSim=NSim)
    202   
    203   # Storing each p-val into the appropriate vector
    204   BootNullPVals[s] = p1
    205   PermNullPVals[s] = p2
    206   cat(s,"\n")
    207 }
    208 
    209 end_time <- Sys.time()
    210 
    211 end_time - start_time
    212 
    213 # Compute the percentage of replicates that p-value less than
    214 # given alpha
    215 
    216 a=.05
    217 sum(BootNullPVals<=a)/NSTudyReps
    218 sum(PermNullPVals<=a)/NSTudyReps
    219 
    220 
    221 #***************************************************#
    222 #***************************************************#
    223 #***************************************************#
    224 #***************************************************#
    225 #***************************************************#
    226 # Power Simulation Study 
    227 #***************************************************#
    228 #***************************************************#
    229 #***************************************************#
    230 #***************************************************#
    231 #***************************************************#
    232 #***************************************************#
    233 
    234 NSTudyReps = 3000
    235 NSim = 10000
    236 
    237 
    238 nA = 529
    239 nB = 326
    240 
    241 BootPowPVals = numeric(NSTudyReps)
    242 PermPowPVals = numeric(NSTudyReps)
    243 
    244 #***************************************************#
    245 # The true proportions in each Population is Unknown:
    246 # We randomly assign these values. We should explore different 
    247 # values to see what happens.
    248 # Note, the two values can be exactly the same or different 
    249 #***************************************************#
    250 
    251 
    252 #***************************************************#
    253 
    254 start_time <- Sys.time()
    255 
    256 for(s in 1:NSTudyReps)
    257 {
    258   # Generating Samples A and B Under the Alternative and Counting the successes  
    259 
    260   
    261   
    262   
    263   
    264   
    265     
    266   # p-val using bootstrap method
    267   p1 = two.prop.bootstrap.pval.f(nSA,nA,nSB,nB,LowTail = FALSE,NSim=NSim)
    268   
    269   # p-val using permutation method
    270   p2 = two.prop.permute.pval.f(nSA,nA,nSB,nB,LowTail = FALSE,NSim=NSim)
    271   
    272   # Storing each p-val into the appropriate vector
    273   BootPowPVals[s] = p1
    274   PermPowPVals[s] = p2
    275   cat(s,"\n")
    276 }
    277 
    278 end_time <- Sys.time()
    279 
    280 end_time - start_time
    281 
    282 # Compute the percentage of replicates that p-value less than
    283 # given alpha. This will give us the power of the method
    284 
    285 a=.05
    286 sum(BootPowPVals<=a)/NSTudyReps
    287 sum(PermPowPVals<=a)/NSTudyReps
    288 
    289 
    290 
    291 #***************************************************#
    292 #***************************************************#
    293 #***************************************************#
    294 #***************************************************#
    295 #***************************************************#
    296 # CI Study 
    297 #***************************************************#
    298 #***************************************************#
    299 #***************************************************#
    300 #***************************************************#
    301 #***************************************************#
    302 #***************************************************#
    303 
    304 NSTudyReps = 1000
    305 NSim = 10000
    306 
    307 nA = 529
    308 nB = 326
    309 a = .05 
    310 
    311 CIIncludeTrueDiff = numeric(NSTudyReps)
    312 
    313 #***************************************************#
    314 # The true proportions in the Two Population:
    315 # We randomly assign these values. We should explore different 
    316 # values to see what happens.
    317 # Note, the two values can be exactly the same or different 
    318 #***************************************************#
    319 
    320 
    321 
    322 # The CI is for the difference in the two proportions 
    323 
    324 TrueDiff = pTA - pTB
    325 
    326 #***************************************************#
    327 
    328 start_time <- Sys.time()
    329 
    330 for(s in 1:NSTudyReps)
    331 {
    332   # Generating Samples A and B Under the True Proportions and Counting the successes  
    333 
    334   
    335   
    336   
    337     
    338   # CI using bootstrap method
    339   CI = two.prop.CI.f(nSA,nA,nSB,nB,alpha=a,NSim=NSim)
    340   
    341   # Check if CI included true difference
    342   if(CI[1] <= TrueDiff && TrueDiff <= CI[2])
    343   {
    344     CIIncludeTrueDiff[s]=1
    345   }else
    346   {
    347     CIIncludeTrueDiff[s]=0
    348   }
    349   cat(s,"\n")
    350 }
    351 
    352 end_time <- Sys.time()
    353 
    354 end_time - start_time
    355 
    356 # Compute the percentage of replicates that CI included truediff
    357 
    358 sum(CIIncludeTrueDiff)/NSTudyReps
    359 
    360 
    361 
    362 
    363 
    364