stat_data_analysis_rcode

Statistical Data Analysis - Fall 2018
Log | Files | Refs | README

R-IC06.R (6593B)


      1 #*******************************************************#
      2 #*******************************************************#
      3 # 		         STAT320 - IC06 Solutions	      	  	    #
      4 #*******************************************************#
      5 #*******************************************************#
      6 #***************************************************#
      7 #           PVal/Power for Two Sample Means         #  
      8 #***************************************************#
      9 
     10 #***************************************************#
     11 #***************************************************#
     12 #***************************************************#
     13 #***************************************************#
     14 #***************************************************#
     15 
     16 #***************************************************#
     17 # Means P-value
     18 #***************************************************#
     19 #set.seed(257)
     20 #***************************************************#
     21 
     22 two.mean.pval.f = function(xobs,xsampA,xsampB,Low = TRUE,NSim=100000)
     23 {
     24   # Compute Sizes of each sample
     25   nA = length(xsampA)
     26   nB = length(xsampB)
     27   
     28   # Combine Samples to one super sample
     29   supersample = c(xsampA,xsampB)
     30   
     31   # Create object where simulated DIFFERENCES of averages will be stored
     32   ODiffBars = numeric(NSim)
     33   
     34   # Repeat simulations NSim times
     35   for(i in 1:NSim)
     36   {
     37     shuffledsupersample = sample(supersample)
     38     
     39     # Generate sample 1 by resampling super Sample WITHOUT replacement
     40     # in practice we select the first nA elements of the shuffled supersample
     41     NewSampleA = shuffledsupersample[1:nA]
     42     
     43     # Assign remaining values in supersample to Sample B
     44     # that is the elements (nA+1) through (nA+nB)
     45     NewSampleB  = shuffledsupersample[nA+1:nB]
     46     
     47     # Save difference in means simulated samples
     48     ODiffBars[i] = mean(NewSampleA) - mean(NewSampleB )
     49   }
     50   # Compute p-value
     51   if(Low==TRUE)
     52   {
     53     # if alternative Ha is mu < mu0
     54     p = sum(ODiffBars<=xobs)/NSim
     55   }else
     56   {
     57     # if alternative Ha is mu > mu0
     58     p = sum(ODiffBars>=xobs)/NSim
     59   }
     60   return(p) 
     61 }
     62 
     63 
     64 #***************************************************#
     65 # Means Cutoff
     66 #***************************************************#
     67 
     68 two.mean.pval.cutoff.f = function(xsampA,xsampB,alpha=.05,Low = TRUE,NSim=100000)
     69 {
     70   # Compute Sizes of each sample
     71   nA = length(xsampA)
     72   nB = length(xsampB)
     73   
     74   # Combine Samples to one super sample
     75   supersample = c(xsampA,xsampB)
     76   
     77   # Create object where simulated DIFFERENCES of averages will be stored
     78   ODiffBars = numeric(NSim)
     79   
     80   # Repeat simulations NSim times
     81   for(i in 1:NSim)
     82   {
     83     # Generate sample 1 by resampling super Sample WITHOUT replacement 
     84     NewSampleA = sample(x= supersample,size = nA,replace = FALSE) 
     85     
     86     # Assign remaining values in supersample to Sample B
     87     NewSampleB  = setdiff(supersample,NewSampleA)
     88     
     89     # Save difference in means simulated samples
     90     ODiffBars[i] = mean(NewSampleA) - mean(NewSampleB )
     91   }
     92 
     93   # Compute Threshold
     94   if(Low==TRUE)
     95   {
     96     # if alternative Ha is mu < mu0
     97     x = quantile(ODiffBars,probs = alpha)
     98   }else
     99   {
    100     # if alternative Ha is mu > mu0
    101     x = quantile(ODiffBars,probs = 1-alpha)
    102   }
    103   return(x) 
    104 }
    105 
    106 #***************************************************#
    107 # Means CI
    108 #***************************************************#
    109 
    110 two.mean.CI.f = function(xsampA,xsampB,alpha=.05,NSim=100000)
    111 {
    112   # Compute Sizes of each sample
    113   nA = length(xsampA)
    114   nB = length(xsampB)
    115   
    116   # Create object where simulated DIFFERENCES of averages will be stored
    117   ODiffBars = numeric(NSim)
    118   
    119   # Repeat simulations NSim times
    120   for(i in 1:NSim)
    121   {
    122     # Generate sample 1 by resampling original sample WITH replacement 
    123     NewSampleA = sample(x= xsampA,size = nA,replace = TRUE) 
    124     
    125     # Generate sample 2 by resampling original sample WITH replacement 
    126     NewSampleB  = sample(x= xsampB,size = nB,replace = TRUE) 
    127     
    128     # Save difference in means simulated samples
    129     ODiffBars[i] = mean(NewSampleA) - mean(NewSampleB)
    130   }
    131   
    132   # Compute CI
    133   ci =quantile(ODiffBars,probs = c(alpha/2,1-alpha/2))
    134    return(ci) 
    135 }
    136 
    137 
    138 
    139 #***************************************************#
    140 # Means Power
    141 #***************************************************#
    142 two.mean.power.f = function(CAlpha,xsampA,xsampB,Delta=1,Low=TRUE,NSim=100000)
    143 {
    144   # Getting the rejection cutoff value
    145   
    146   # Compute Sizes of each sample
    147   nA = length(xsampA)
    148   nB = length(xsampB)
    149   
    150   # rescale group A to have mean 0
    151   rescaleA = xsampA - mean(xsampA)
    152   
    153   # rescale group B to have mean Delta
    154   if(Low==TRUE)
    155   {
    156   rescaleB = xsampB - mean(xsampB) - Delta 
    157   }else{
    158     rescaleB = xsampB - mean(xsampB) + Delta 
    159   }
    160   
    161   # Create object where simulated DIFFERENCES of averages will be stored
    162   ODiffBars = numeric(NSim)
    163   
    164   # Repeat simulations NSim times
    165   for(i in 1:NSim)
    166   {
    167     # Generate sample 1 by resampling original sample WITH replacement 
    168     NewSampleA = sample(x= rescaleA,size = nA,replace = TRUE) 
    169     
    170     # Generate sample 2 by resampling original sample WITH replacement 
    171     NewSampleB  = sample(x= rescaleB,size = nB,replace = TRUE) 
    172     
    173     # Save difference in means simulated samples
    174     ODiffBars[i] = mean(NewSampleA) - mean(NewSampleB )
    175   }
    176   
    177   # Compute p-value
    178   if(Low==TRUE)
    179   {
    180     # if alternative Ha is mu < mu0
    181     p = sum(ODiffBars<=CAlpha )/NSim
    182   }else
    183   {
    184     # if alternative Ha is mu > mu0
    185     p = sum(ODiffBars>=CAlpha )/NSim
    186   }
    187   return(p) 
    188   }
    189 
    190 
    191 ######################################################
    192 ######################################################
    193 ######################################################
    194 ######################################################
    195 
    196 
    197 xsampA=c(5.26, 5.07, 5.54, 5.17, 5.39)
    198 xbarA = mean(xsampA)
    199 xbarA
    200 sd(xsampA)
    201 xsampB=c(6.33, 6.51, 6.29, 6.16, 6.48)
    202 xbarB = mean(xsampB)
    203 xbarB
    204 sd(xsampB)
    205 xbarA - xbarB
    206 
    207 means = two.mean.pval.f(xobs = (xbarA-xbarB), xsampA, xsampB, Low = TRUE, NSim = 100000)
    208 means
    209 
    210 CI = two.mean.CI.f(xsampA, xsampB, alpha = 0.02, NSim = 100000)
    211 CI
    212 
    213 ######################################################
    214 ######################################################
    215 ######################################################
    216 
    217 
    218 ######################################################
    219 ######################################################
    220 ######################################################
    221 
    222 # xsampA=c(68,84,81,85,75,69,80,76,79,74)
    223 # xsampB=c(74,71,79,63,80,61,69,72,80,65)