stat_data_analysis_rcode

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README

commit fdd15bd294e3248d746e841ee4c81b161f1d4899
Author: John Kubach <johnkubach@gmail.com>
Date:   Fri, 17 Sep 2021 11:03:06 -0400

Add R files

Diffstat:
AR - HW06 - PvalFunction.R | 60++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
AR-IC04.R | 127+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
AR-IC05.R | 126+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
AR-IC06.R | 223+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
AR-IC08.R | 166+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
AR-IC09.R | 364+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
AR-IC10.R | 457+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
7 files changed, 1523 insertions(+), 0 deletions(-)

diff --git a/R - HW06 - PvalFunction.R b/R - HW06 - PvalFunction.R @@ -0,0 +1,60 @@ +#*******************************************************# +#*******************************************************# +# STAT320 - HW6 Solutions # +#*******************************************************# +#*******************************************************# +#***************************************************# +# PVal for Proportions # +#***************************************************# + +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# + +#***************************************************# +# Fair Coin +#***************************************************# +#set.seed(257) +#***************************************************# + +prop.pval.f = function(x,p0=0.5,NSize=100,Low = TRUE,NSim=100000) +{ + POP = c("T","H") + Succ = "T" + ONSuccs = numeric(NSim) + + for(i in 1:NSim) + { + Sample = sample(x=POP, prob =c(p0,1-p0),size = NSize,replace = TRUE) + ONSuccs[i] = sum(Sample==Succ) + } + + if(Low==TRUE) + { + p = sum(ONSuccs<=x)/NSim + }else + { + p = sum(ONSuccs>=x)/NSim + } + return(p) +} + +#***************************************************# +#***************************************************# + +NObs = 100000 +N = 564 +pS = .135 + +x = 65 +Dir = TRUE +prop.pval.f(x,p0=pS,NSize = N,Low = Dir,NSim=NObs) + + + + + + + diff --git a/R-IC04.R b/R-IC04.R @@ -0,0 +1,127 @@ +#*******************************************************# +#*******************************************************# +# STAT320 - IC04 Solutions # +#*******************************************************# +#*******************************************************# +#***************************************************# +# CI for Proportions # +#***************************************************# + +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# + +#***************************************************# +# Fair Coin +#***************************************************# +set.seed(257) +#***************************************************# + +#***************************************************# +#***************************************************# + +prop.CI.f = function(a=.05,p0=0.5,NSize=100,NSim=100000) +{ + POP = c("T","H") + Succ = "T" + ONSuccs = numeric(NSim) + + ONSuccs = replicate(NSim, + { + Sample=sample(x=POP, prob =c(p0,1-p0),size = NSize,replace = TRUE) + sum(Sample==Succ) + }) + x = quantile(ONSuccs,probs = c(a/2,1-a/2))/NSize + return(x) +} + + +#***************************************************# +#***************************************************# +NSim = 100000 # Number of simulated samples +a = 0.05 # Alpha level for CI + + +# Sample Estimate +N = 564 # Sample size +nS = 51 # Number of Successes in sample +phat = nS/N # Estimated sample proportion + +# Set True value to be the estimate from sample +pT = phat + +# simulate 100,000 samples, compute their sample proportions, +# and store them in the vector SimProps + +POP = c(1,0) +SimProps = numeric(NSim) + +for(i in 1:NSim) +{ + # Generate a sample using the given pT and the sample size N + Sample=sample(x=POP, prob =c(pT,1-pT),size = N,replace = TRUE) + + # Count the number of successes (1's) in the simulated sample + xS = sum(Sample==1) + + # compute sample proportion of current sample + phat = xS/N + + # save current sample proportion + SimProps[i] = phat + + # go back and repeat +} + + +# Compute the (1-a)*100% confidence based on the simulated phat's + +CI = quantile(SimProps,probs = c(a/2,1-a/2)) + +CI + + +prop.CI.f(a,p0=phat,NSize=N,NSim=NSim) + +#***************************************************# + + +#***************************************************# +#***************************************************# +# Program to demonstrate what confidence means +# in Confidence intervals +#***************************************************# +#***************************************************# + +N.CIs = 20 # Number of simulated samples +N = 564 # Sample size + +pT = .15 # True percantage of successes in population +a = 0.01 + +NCIIn = 0 # Counter of the samples the produced a CI that + # Included the true value + +POP = c(1,0) +for(i in 1:N.CIs) +{ + # Randomly select a sample from the Real Population to get + # an estmate phat of the true pT + CurSample = sample(x=POP, prob =c(pT,1-pT),size = N,replace = TRUE) + phat = sum(CurSample)/N + + # we use the phat to compute a confidence interval + ci = prop.CI.f(a,p0=phat,NSize = N,NSim=NSim) + + # Checking if the particular CI contains the true value pT + if(ci[1] <= pT && pT <= ci[2]) + NCIIn = NCIIn + 1 +} + +# Number and Proportion of CIs who actually contained the true value of pT +NCIIn +NCIIn/N.CIs + + diff --git a/R-IC05.R b/R-IC05.R @@ -0,0 +1,126 @@ +#*******************************************************# +#*******************************************************# +# STAT320 - IC05 Solutions # +#*******************************************************# +#*******************************************************# +#***************************************************# +# PVal/Power for Means # +#***************************************************# + +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# + +#***************************************************# +# Means P-value +#***************************************************# +set.seed(257) +#***************************************************# + +xsamp1=c(96.7, 108.8, 95.1, 110, 104.5, 106.7, 120.5, 113.4, 94.4, 67.6, 81.9, 118.1) +mean(xsamp1) +NSim1=100000 +Low1=FALSE +NSize1=12 + +mean.pval.f = function(xobs,xsamp,mu0=0,NSize=100,Low = TRUE,NSim=100000) +{ + # Rescale sample so as to have mean equal to mu0 + rescaledsample = xsamp - mean(xsamp) + mu0 + + # Create object where simulated averages will be stored + OXBars = numeric(NSim) + + # Repeat simulations NSim times + for(i in 1:NSim) + { + # Generate sample by resampling original sample WITH replacement + Sample = sample(x=rescaledsample,size = NSize,replace = TRUE) + # Save mean of simulated sample + OXBars[i] = mean(Sample) + } + # Compute p-value + if(Low==TRUE) + { + # if alternative Ha is mu < mu0 + p = sum(OXBars<=xobs)/NSim + }else + { + # if alternative Ha is mu > mu0 + p = sum(OXBars>=xobs)/NSim + } + return(p) +} +pval = mean.pval.f(xobs = mean(xsamp1), xsamp=xsamp1,mu0=98,NSize=NSize1,Low=Low1,NSim=NSim1) +pval + +mu0=10 + +#***************************************************# +# Means Cutoff +#***************************************************# + +mean.pval.cutoff.f = function(a=.05,xsamp,mu0=0,NSize=100,Low = TRUE,NSim=100000) +{ + # Rescale sample so as to have mean equal to mu0 + rescaledsample = xsamp - mean(xsamp) + mu0 + + # Create object where simulated averages will be stored + OXBars = numeric(NSim) + + # Repeat simulations NSim times + for(i in 1:NSim) + { + # Generate sample by resampling original sample WITH replacement + Sample = sample(x=rescaledsample,size = NSize,replace = TRUE) + # Save mean of simulated sample + OXBars[i] = mean(Sample) + } + # Compute p-value + if(Low==TRUE) + { + # if alternative Ha is mu < mu0 + x = quantile(OXBars,probs = a) + }else + { + # if alternative Ha is mu > mu0 + x = quantile(OXBars,probs = 1-a) + } + return(x) +} +cOff = mean.pval.cutoff.f(a = 0.01,xsamp=xsamp1,mu0=29.5,NSize=18,Low=TRUE,NSim=100000) +cOff + + +#***************************************************# +# Means CI +#***************************************************# + +mean.CI.f = function(xsamp,a=.05,NSize=100,NSim=100000) +{ + # Rescale sample so as to have mean equal to mu0 + rescaledsample = xsamp - mean(xsamp) + mu0 + + # Create object where simulated averages will be stored + OXBars = numeric(NSim) + + # Repeat simulations NSim times + for(i in 1:NSim) + { + # Generate sample by resampling original sample WITH replacement + Sample = sample(x=rescaledsample,size = NSize,replace = TRUE) + # Save mean of simulated sample + OXBars[i] = mean(Sample) + } + x = quantile(OXBars,probs = c(a/2,1-a/2)) + return(x) +} + +CI = mean.CI.f(xsamp=xsamp1, a=0.05, NSize=12, NSim=100000) +CI + +pow = mean.pval.f(xobs = mean(xsamp1) ,xsamp=xsamp1,mu0=99,NSize=12,Low=FALSE,NSim=100000) +pow + diff --git a/R-IC06.R b/R-IC06.R @@ -0,0 +1,223 @@ +#*******************************************************# +#*******************************************************# +# STAT320 - IC06 Solutions # +#*******************************************************# +#*******************************************************# +#***************************************************# +# PVal/Power for Two Sample Means # +#***************************************************# + +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# + +#***************************************************# +# Means P-value +#***************************************************# +#set.seed(257) +#***************************************************# + +two.mean.pval.f = function(xobs,xsampA,xsampB,Low = TRUE,NSim=100000) +{ + # Compute Sizes of each sample + nA = length(xsampA) + nB = length(xsampB) + + # Combine Samples to one super sample + supersample = c(xsampA,xsampB) + + # Create object where simulated DIFFERENCES of averages will be stored + ODiffBars = numeric(NSim) + + # Repeat simulations NSim times + for(i in 1:NSim) + { + shuffledsupersample = sample(supersample) + + # Generate sample 1 by resampling super Sample WITHOUT replacement + # in practice we select the first nA elements of the shuffled supersample + NewSampleA = shuffledsupersample[1:nA] + + # Assign remaining values in supersample to Sample B + # that is the elements (nA+1) through (nA+nB) + NewSampleB = shuffledsupersample[nA+1:nB] + + # Save difference in means simulated samples + ODiffBars[i] = mean(NewSampleA) - mean(NewSampleB ) + } + # Compute p-value + if(Low==TRUE) + { + # if alternative Ha is mu < mu0 + p = sum(ODiffBars<=xobs)/NSim + }else + { + # if alternative Ha is mu > mu0 + p = sum(ODiffBars>=xobs)/NSim + } + return(p) +} + + +#***************************************************# +# Means Cutoff +#***************************************************# + +two.mean.pval.cutoff.f = function(xsampA,xsampB,alpha=.05,Low = TRUE,NSim=100000) +{ + # Compute Sizes of each sample + nA = length(xsampA) + nB = length(xsampB) + + # Combine Samples to one super sample + supersample = c(xsampA,xsampB) + + # Create object where simulated DIFFERENCES of averages will be stored + ODiffBars = numeric(NSim) + + # Repeat simulations NSim times + for(i in 1:NSim) + { + # Generate sample 1 by resampling super Sample WITHOUT replacement + NewSampleA = sample(x= supersample,size = nA,replace = FALSE) + + # Assign remaining values in supersample to Sample B + NewSampleB = setdiff(supersample,NewSampleA) + + # Save difference in means simulated samples + ODiffBars[i] = mean(NewSampleA) - mean(NewSampleB ) + } + + # Compute Threshold + if(Low==TRUE) + { + # if alternative Ha is mu < mu0 + x = quantile(ODiffBars,probs = alpha) + }else + { + # if alternative Ha is mu > mu0 + x = quantile(ODiffBars,probs = 1-alpha) + } + return(x) +} + +#***************************************************# +# Means CI +#***************************************************# + +two.mean.CI.f = function(xsampA,xsampB,alpha=.05,NSim=100000) +{ + # Compute Sizes of each sample + nA = length(xsampA) + nB = length(xsampB) + + # Create object where simulated DIFFERENCES of averages will be stored + ODiffBars = numeric(NSim) + + # Repeat simulations NSim times + for(i in 1:NSim) + { + # Generate sample 1 by resampling original sample WITH replacement + NewSampleA = sample(x= xsampA,size = nA,replace = TRUE) + + # Generate sample 2 by resampling original sample WITH replacement + NewSampleB = sample(x= xsampB,size = nB,replace = TRUE) + + # Save difference in means simulated samples + ODiffBars[i] = mean(NewSampleA) - mean(NewSampleB) + } + + # Compute CI + ci =quantile(ODiffBars,probs = c(alpha/2,1-alpha/2)) + return(ci) +} + + + +#***************************************************# +# Means Power +#***************************************************# +two.mean.power.f = function(CAlpha,xsampA,xsampB,Delta=1,Low=TRUE,NSim=100000) +{ + # Getting the rejection cutoff value + + # Compute Sizes of each sample + nA = length(xsampA) + nB = length(xsampB) + + # rescale group A to have mean 0 + rescaleA = xsampA - mean(xsampA) + + # rescale group B to have mean Delta + if(Low==TRUE) + { + rescaleB = xsampB - mean(xsampB) - Delta + }else{ + rescaleB = xsampB - mean(xsampB) + Delta + } + + # Create object where simulated DIFFERENCES of averages will be stored + ODiffBars = numeric(NSim) + + # Repeat simulations NSim times + for(i in 1:NSim) + { + # Generate sample 1 by resampling original sample WITH replacement + NewSampleA = sample(x= rescaleA,size = nA,replace = TRUE) + + # Generate sample 2 by resampling original sample WITH replacement + NewSampleB = sample(x= rescaleB,size = nB,replace = TRUE) + + # Save difference in means simulated samples + ODiffBars[i] = mean(NewSampleA) - mean(NewSampleB ) + } + + # Compute p-value + if(Low==TRUE) + { + # if alternative Ha is mu < mu0 + p = sum(ODiffBars<=CAlpha )/NSim + }else + { + # if alternative Ha is mu > mu0 + p = sum(ODiffBars>=CAlpha )/NSim + } + return(p) + } + + +###################################################### +###################################################### +###################################################### +###################################################### + + +xsampA=c(5.26, 5.07, 5.54, 5.17, 5.39) +xbarA = mean(xsampA) +xbarA +sd(xsampA) +xsampB=c(6.33, 6.51, 6.29, 6.16, 6.48) +xbarB = mean(xsampB) +xbarB +sd(xsampB) +xbarA - xbarB + +means = two.mean.pval.f(xobs = (xbarA-xbarB), xsampA, xsampB, Low = TRUE, NSim = 100000) +means + +CI = two.mean.CI.f(xsampA, xsampB, alpha = 0.02, NSim = 100000) +CI + +###################################################### +###################################################### +###################################################### + + +###################################################### +###################################################### +###################################################### + +# xsampA=c(68,84,81,85,75,69,80,76,79,74) +# xsampB=c(74,71,79,63,80,61,69,72,80,65) diff --git a/R-IC08.R b/R-IC08.R @@ -0,0 +1,166 @@ +#*******************************************************# +#*******************************************************# +# STAT320 - IC06 Solutions # +#*******************************************************# +#*******************************************************# +#***************************************************# +# PVal/Power for Two Sample Proportions # +#***************************************************# + +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# + +#***************************************************# +# Means P-value +#***************************************************# +#set.seed(300) +#***************************************************# + +two.prop.permute.pval.f = function(xA,nA,xB,nB,LowTail = TRUE,NSim=100000) +{ + xobs = xA/nA - xB/nB + # Reconstruct sample A + xsampA = rep(0,nA) + xsampA[1:xA] = rep(1,xA) + + # Reconstruct sample B + xsampB = rep(0,nB) + xsampB[1:xB] = rep(1,xB) + + # Combine Samples to one super sample + supersample = c(xsampA,xsampB) + + # Create object where simulated DIFFERENCES of averages will be stored + ODiffProps = numeric(NSim) + + # Repeat simulations NSim times + for(i in 1:NSim) + { + + # Shuffle the elements inthe supersample + shuffledsupersample = sample(supersample) + + # Generate sample 1 by resampling super Sample WITHOUT replacement + # in practice we select the first nA elements of the shuffled supersample + NewSampleA = shuffledsupersample[1:nA] + + # Assign remaining values in supersample to Sample B + # that is the elements (nA+1) through (nA+nB) + NewSampleB = shuffledsupersample[nA+1:nB] + + # Save difference in means simulated samples + ODiffProps[i] = mean(NewSampleA) - mean(NewSampleB ) + } + # Compute p-value + if(LowTail==TRUE) + { + # if alternative Ha is mu < mu0 + p = sum(ODiffProps<=xobs)/NSim + }else + { + # if alternative Ha is mu > mu0 + p = sum(ODiffProps>=xobs)/NSim + } + return(p) +} + +two.prop.bootstrap.pval.f = function(xA,nA,xB,nB,LowTail = TRUE,NSim=100000) +{ + xobs = xA/nA - xB/nB + # Reconstruct sample A + xsampA = rep(0,nA) + xsampA[1:xA] = rep(1,xA) + + # Reconstruct sample B + xsampB = rep(0,nB) + xsampB[1:xB] = rep(1,xB) + + # Combine Samples to one super sample + supersample = c(xsampA,xsampB) + + # Create object where simulated DIFFERENCES of averages will be stored + ODiffProps = numeric(NSim) + + # Repeat simulations NSim times + for(i in 1:NSim) + { + + # Generate sample 1 by resampling super Sample WITH replacement + NewSampleA = sample(supersample,size = nA,replace = TRUE) + + # Generate sample 2 by resampling super Sample WITH replacement + NewSampleB = sample(supersample,size = nB,replace = TRUE) + + # Save difference in means simulated samples + ODiffProps[i] = mean(NewSampleA) - mean(NewSampleB ) + } + # Compute p-value + if(LowTail==TRUE) + { + # if alternative Ha is mu < mu0 + p = sum(ODiffProps<=xobs)/NSim + }else + { + # if alternative Ha is mu > mu0 + p = sum(ODiffProps>=xobs)/NSim + } + return(p) +} + + +#***************************************************# +# Means CI +#***************************************************# + +two.prop.CI.f = function(xA,nA,xB,nB,alpha=.05,NSim=100000) +{ + # Reconstruct sample A + xsampA = rep(0,nA) + xsampA[1:xA] = rep(1,xA) + + # Reconstruct sample B + xsampB = rep(0,nB) + xsampB[1:xB] = rep(1,xB) + + # Create object where simulated DIFFERENCES of averages will be stored + ODiffProps = numeric(NSim) + + # Repeat simulations NSim times + for(i in 1:NSim) + { + # Generate sample 1 by resampling original Sample A WITH replacement + NewSampleA = sample(xsampA,size = nA,replace = TRUE) + + # Generate sample 2 by resampling original Sample B WITH replacement + NewSampleB = sample(xsampB,size = nB,replace = TRUE) + + # Save difference in means simulated samples + ODiffProps[i] = mean(NewSampleA) - mean(NewSampleB) + } + + # Compute CI + ci =quantile(ODiffProps,probs = c(alpha/2,1-alpha/2)) + return(ci) +} + +#***************************************************# +# Questions +#***************************************************# + +xA = 5 +nA = 119 + +xB = 19 +nB = 115 + +LowTail = TRUE +NSim = 100000 + +alpha=.3 + +two.prop.bootstrap.pval.f(xA, nA, xB, nB, LowTail, NSim) +two.prop.permute.pval.f(xA, nA, xB, nB, LowTail, NSim) +two.prop.CI.f(xA, nA, xB, nB, alpha, NSim) diff --git a/R-IC09.R b/R-IC09.R @@ -0,0 +1,364 @@ +#*******************************************************# +#*******************************************************# +# STAT320 - IC09 Solutions # +#*******************************************************# +#*******************************************************# +#***************************************************# +# Type I Error/Power for Two Sample Proportions # +#***************************************************# + +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# + +#***************************************************# +# Means P-value +#***************************************************# +set.seed(257) +#***************************************************# + +two.prop.permute.pval.f = function(xA,nA,xB,nB,LowTail = TRUE,NSim=100000) +{ + xobs = xA/nA - xB/nB + # Reconstruct sample A + xsampA = rep(0,nA) + xsampA[1:xA] = rep(1,xA) + + # Reconstruct sample B + xsampB = rep(0,nB) + xsampB[1:xB] = rep(1,xB) + + # Combine Samples to one super sample + supersample = c(xsampA,xsampB) + + # Create object where simulated DIFFERENCES of averages will be stored + ODiffProps = numeric(NSim) + + # Repeat simulations NSim times + for(i in 1:NSim) + { + + # Shuffle the elements inthe supersample + shuffledsupersample = sample(supersample) + + # Generate sample 1 by resampling super Sample WITHOUT replacement + # in practice we select the first nA elements of the shuffled supersample + NewSampleA = shuffledsupersample[1:nA] + + # Assign remaining values in supersample to Sample B + # that is the elements (nA+1) through (nA+nB) + NewSampleB = shuffledsupersample[nA+1:nB] + + # Save difference in means simulated samples + ODiffProps[i] = mean(NewSampleA) - mean(NewSampleB ) + } + # Compute p-value + if(LowTail==TRUE) + { + # if alternative Ha is mu < mu0 + p = sum(ODiffProps<=xobs)/NSim + }else + { + # if alternative Ha is mu > mu0 + p = sum(ODiffProps>=xobs)/NSim + } + return(p) +} + + +two.prop.bootstrap.pval.f = function(xA,nA,xB,nB,LowTail = TRUE,NSim=100000) +{ + xobs = xA/nA - xB/nB + # Reconstruct sample A + xsampA = rep(0,nA) + xsampA[1:xA] = rep(1,xA) + + # Reconstruct sample B + xsampB = rep(0,nB) + xsampB[1:xB] = rep(1,xB) + + # Combine Samples to one super sample + supersample = c(xsampA,xsampB) + + # Create object where simulated DIFFERENCES of averages will be stored + ODiffProps = numeric(NSim) + + # Repeat simulations NSim times + for(i in 1:NSim) + { + + # Generate sample 1 by resampling super Sample WITH replacement + NewSampleA = sample(supersample,size = nA,replace = TRUE) + + # Generate sample 2 by resampling super Sample WITH replacement + NewSampleB = sample(supersample,size = nB,replace = TRUE) + + # Save difference in means simulated samples + ODiffProps[i] = mean(NewSampleA) - mean(NewSampleB ) + } + # Compute p-value + if(LowTail==TRUE) + { + # if alternative Ha is mu < mu0 + p = sum(ODiffProps<=xobs)/NSim + }else + { + # if alternative Ha is mu > mu0 + p = sum(ODiffProps>=xobs)/NSim + } + return(p) +} + + +#***************************************************# +# Means CI +#***************************************************# + +two.prop.CI.f = function(xA,nA,xB,nB,alpha=.05,NSim=100000) +{ + # Reconstruct sample A + xsampA = rep(0,nA) + xsampA[1:xA] = rep(1,xA) + + # Reconstruct sample B + xsampB = rep(0,nB) + xsampB[1:xB] = rep(1,xB) + + # Create object where simulated DIFFERENCES of averages will be stored + ODiffProps = numeric(NSim) + + # Repeat simulations NSim times + for(i in 1:NSim) + { + # Generate sample 1 by resampling original Sample A WITH replacement + NewSampleA = sample(xsampA,size = nA,replace = TRUE) + + # Generate sample 2 by resampling original Sample B WITH replacement + NewSampleB = sample(xsampB,size = nB,replace = TRUE) + + # Save difference in means simulated samples + ODiffProps[i] = mean(NewSampleA) - mean(NewSampleB) + } + + # Compute CI + ci =quantile(ODiffProps,probs = c(alpha/2,1-alpha/2)) + return(ci) +} + + + + +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# +# Type I Simulation Study +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# + +NSTudyReps = 3000 +NSim = 10000 + +nA = 529 +nB = 326 + +BootNullPVals = numeric(NSTudyReps) +PermNullPVals = numeric(NSTudyReps) + +#***************************************************# +# The true proportion in the Common Population: +# We randomly assign this value. We should explore different +# values to see what happens. +#***************************************************# + + +#***************************************************# + +start_time <- Sys.time() + +for(s in 1:NSTudyReps) +{ + # Generating Samples A and B Under the Null and Counting the successes + + + + + + + + # p-val using bootstrap method + p1 = two.prop.bootstrap.pval.f(nSA,nA,nSB,nB,LowTail = FALSE,NSim=NSim) + + # p-val using permutation method + p2 = two.prop.permute.pval.f(nSA,nA,nSB,nB,LowTail = FALSE,NSim=NSim) + + # Storing each p-val into the appropriate vector + BootNullPVals[s] = p1 + PermNullPVals[s] = p2 + cat(s,"\n") +} + +end_time <- Sys.time() + +end_time - start_time + +# Compute the percentage of replicates that p-value less than +# given alpha + +a=.05 +sum(BootNullPVals<=a)/NSTudyReps +sum(PermNullPVals<=a)/NSTudyReps + + +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# +# Power Simulation Study +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# + +NSTudyReps = 3000 +NSim = 10000 + + +nA = 529 +nB = 326 + +BootPowPVals = numeric(NSTudyReps) +PermPowPVals = numeric(NSTudyReps) + +#***************************************************# +# The true proportions in each Population is Unknown: +# We randomly assign these values. We should explore different +# values to see what happens. +# Note, the two values can be exactly the same or different +#***************************************************# + + +#***************************************************# + +start_time <- Sys.time() + +for(s in 1:NSTudyReps) +{ + # Generating Samples A and B Under the Alternative and Counting the successes + + + + + + + + # p-val using bootstrap method + p1 = two.prop.bootstrap.pval.f(nSA,nA,nSB,nB,LowTail = FALSE,NSim=NSim) + + # p-val using permutation method + p2 = two.prop.permute.pval.f(nSA,nA,nSB,nB,LowTail = FALSE,NSim=NSim) + + # Storing each p-val into the appropriate vector + BootPowPVals[s] = p1 + PermPowPVals[s] = p2 + cat(s,"\n") +} + +end_time <- Sys.time() + +end_time - start_time + +# Compute the percentage of replicates that p-value less than +# given alpha. This will give us the power of the method + +a=.05 +sum(BootPowPVals<=a)/NSTudyReps +sum(PermPowPVals<=a)/NSTudyReps + + + +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# +# CI Study +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# + +NSTudyReps = 1000 +NSim = 10000 + +nA = 529 +nB = 326 +a = .05 + +CIIncludeTrueDiff = numeric(NSTudyReps) + +#***************************************************# +# The true proportions in the Two Population: +# We randomly assign these values. We should explore different +# values to see what happens. +# Note, the two values can be exactly the same or different +#***************************************************# + + + +# The CI is for the difference in the two proportions + +TrueDiff = pTA - pTB + +#***************************************************# + +start_time <- Sys.time() + +for(s in 1:NSTudyReps) +{ + # Generating Samples A and B Under the True Proportions and Counting the successes + + + + + + # CI using bootstrap method + CI = two.prop.CI.f(nSA,nA,nSB,nB,alpha=a,NSim=NSim) + + # Check if CI included true difference + if(CI[1] <= TrueDiff && TrueDiff <= CI[2]) + { + CIIncludeTrueDiff[s]=1 + }else + { + CIIncludeTrueDiff[s]=0 + } + cat(s,"\n") +} + +end_time <- Sys.time() + +end_time - start_time + +# Compute the percentage of replicates that CI included truediff + +sum(CIIncludeTrueDiff)/NSTudyReps + + + + + + diff --git a/R-IC10.R b/R-IC10.R @@ -0,0 +1,457 @@ +#*******************************************************# +#*******************************************************# +# STAT320 - IC10 Solutions # +#*******************************************************# +#*******************************************************# +#***************************************************# +# Type I/Power for Two Sample Means # +#***************************************************# + +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# + +#***************************************************# +# Means P-value +#***************************************************# +set.seed(3697) +#***************************************************# + +two.mean.perm.pval.f = function(xsampA,xsampB,LowTail = TRUE,NSim=100000) +{ + # Compute Sizes of each sample + nA = length(xsampA) + nB = length(xsampB) + + xobs = mean(xsampA)-mean(xsampB) + + # Combine Samples to one super sample + supersample = c(xsampA,xsampB) + + # Create object where simulated DIFFERENCES of averages will be stored + ODiffBars = numeric(NSim) + + # Repeat simulations NSim times + for(i in 1:NSim) + { + shuffledsupersample = sample(supersample) + + # Generate sample 1 by resampling super Sample WITHOUT replacement + # in practice we select the first nA elements of the shuffled supersample + NewSampleA = shuffledsupersample[1:nA] + + # Assign remaining values in supersample to Sample B + # that is the elements (nA+1) through (nA+nB) + NewSampleB = shuffledsupersample[nA+1:nB] + + # Save difference in means simulated samples + ODiffBars[i] = mean(NewSampleA) - mean(NewSampleB ) + } + # Compute p-value + if(LowTail==TRUE) + { + # if alternative Ha is mu < mu0 + p = sum(ODiffBars<=xobs)/NSim + }else + { + # if alternative Ha is mu > mu0 + p = sum(ODiffBars>=xobs)/NSim + } + return(p) +} + + +two.mean.perm.recenter.pval.f = function(xsampA,xsampB,LowTail = TRUE,NSim=100000) +{ + # Compute Sizes of each sample + nA = length(xsampA) + nB = length(xsampB) + + xobs = mean(xsampA)-mean(xsampB) + + # center samples at 0 by subtracting the means + xsampA = xsampA - mean(xsampA) + + xsampB = xsampB - mean(xsampB) + + # Combine Samples to one super sample + supersample = c(xsampA,xsampB) + + # Create object where simulated DIFFERENCES of averages will be stored + ODiffBars = numeric(NSim) + + # Repeat simulations NSim times + for(i in 1:NSim) + { + shuffledsupersample = sample(supersample) + + # Generate sample 1 by resampling super Sample WITHOUT replacement + # in practice we select the first nA elements of the shuffled supersample + NewSampleA = shuffledsupersample[1:nA] + + # Assign remaining values in supersample to Sample B + # that is the elements (nA+1) through (nA+nB) + NewSampleB = shuffledsupersample[nA+1:nB] + + # Save difference in means simulated samples + ODiffBars[i] = mean(NewSampleA) - mean(NewSampleB ) + } + # Compute p-value + if(LowTail==TRUE) + { + # if alternative Ha is mu < mu0 + p = sum(ODiffBars<=xobs)/NSim + }else + { + # if alternative Ha is mu > mu0 + p = sum(ODiffBars>=xobs)/NSim + } + return(p) +} + + +two.mean.boot.pval.f = function(xsampA,xsampB,LowTail = TRUE,NSim=100000) +{ + # Compute Sizes of each sample + nA = length(xsampA) + nB = length(xsampB) + + xobs = mean(xsampA)-mean(xsampB) + + # Combine Samples to one super sample + supersample = c(xsampA,xsampB) + + # Create object where simulated DIFFERENCES of averages will be stored + ODiffBars = numeric(NSim) + + # Repeat simulations NSim times + for(i in 1:NSim) + { + shuffledsupersample = sample(supersample) + + # Generate sample 1 by resampling super Sample WITH replacement + NewSampleA = sample(supersample,size = nA,replace = TRUE) + + # Generate sample 2 by resampling super Sample WITH replacement + NewSampleB = sample(supersample,size = nB,replace = TRUE) + + # Save difference in means simulated samples + ODiffBars[i] = mean(NewSampleA) - mean(NewSampleB ) + } + # Compute p-value + if(LowTail==TRUE) + { + # if alternative Ha is mu < mu0 + p = sum(ODiffBars<=xobs)/NSim + }else + { + # if alternative Ha is mu > mu0 + p = sum(ODiffBars>=xobs)/NSim + } + return(p) +} + + +two.mean.boot.recenter.pval.f = function(xsampA,xsampB,LowTail = TRUE,NSim=100000) +{ + # Compute Sizes of each sample + nA = length(xsampA) + nB = length(xsampB) + + xobs = mean(xsampA)-mean(xsampB) + + # center samples at 0 by subtracting the means + xsampA = xsampA - mean(xsampA) + + xsampB = xsampB - mean(xsampB) + + # Combine Samples to one super sample + supersample = c(xsampA,xsampB) + + # Create object where simulated DIFFERENCES of averages will be stored + ODiffBars = numeric(NSim) + + # Repeat simulations NSim times + for(i in 1:NSim) + { + shuffledsupersample = sample(supersample) + + # Generate sample 1 by resampling super Sample WITH replacement + NewSampleA = sample(supersample,size = nA,replace = TRUE) + + # Generate sample 2 by resampling super Sample WITH replacement + NewSampleB = sample(supersample,size = nB,replace = TRUE) + + # Save difference in means simulated samples + ODiffBars[i] = mean(NewSampleA) - mean(NewSampleB ) + } + # Compute p-value + if(LowTail==TRUE) + { + # if alternative Ha is mu < mu0 + p = sum(ODiffBars<=xobs)/NSim + }else + { + # if alternative Ha is mu > mu0 + p = sum(ODiffBars>=xobs)/NSim + } + return(p) +} + + +#***************************************************# +# Means CI +#***************************************************# + +two.mean.CI.f = function(xsampA,xsampB,alpha=.05,NSim=100000) +{ + # Compute Sizes of each sample + nA = length(xsampA) + nB = length(xsampB) + + # Create object where simulated DIFFERENCES of averages will be stored + ODiffBars = numeric(NSim) + + # Repeat simulations NSim times + for(i in 1:NSim) + { + # Generate sample 1 by resampling original sample WITH replacement + NewSampleA = sample(x= xsampA,size = nA,replace = TRUE) + + # Generate sample 2 by resampling original sample WITH replacement + NewSampleB = sample(x= xsampB,size = nB,replace = TRUE) + + # Save difference in means simulated samples + ODiffBars[i] = mean(NewSampleA) - mean(NewSampleB) + } + + # Compute CI + ci =quantile(ODiffBars,probs = c(alpha/2,1-alpha/2)) + return(ci) +} + + + +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# +# Type I Simulation Study - Case 1: Normal +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# + +NSTudyReps = 10000 +NSim = 10000 + +nA = 35 +nB = 35 +LowTail = TRUE + +PermNullPVals = numeric(NSTudyReps) +PermRecNullPVals = numeric(NSTudyReps) +BootNullPVals = numeric(NSTudyReps) +BootRecNullPVals = numeric(NSTudyReps) + +#***************************************************# +# The true distributionin the Common Population: +# We randomly select this. We should explore different +# options. These options will address two components: +# 1) Shape of distribution (Normal, Uniform, Exponential, etc.) +# 2) Parameters of distribution (Mean, SD, lambda, etc.) +#***************************************************# + + +#***************************************************# + +start_time <- Sys.time() + +for(s in 1:NSTudyReps) +{ + # Generating Samples A and B Under the Null Using the given distribution + + + + + + + + + # p-val using permutation method + PermNullPVals[s] = two.mean.perm.pval.f(SampA,SampB,LowTail = LowTail,NSim=NSim) + + # p-val using recentering permutation method + PermRecNullPVals[s] = two.mean.perm.recenter.pval.f(SampA,SampB,LowTail = LowTail,NSim=NSim) + + # p-val using bootstrap method + BootNullPVals[s] = two.mean.boot.pval.f(SampA,SampB,LowTail = LowTail,NSim=NSim) + + # p-val using recentering bootstrap method + BootRecNullPVals[s] = two.mean.boot.recenter.pval.f(SampA,SampB,LowTail = LowTail,NSim=NSim) + + cat(s,"\n") +} + +end_time <- Sys.time() + +end_time - start_time + +# Compute the percentage of replicates that p-value less than +# given alpha + +a=.05 +sum(PermNullPVals<=a)/NSTudyReps +sum(PermRecNullPVals<=a)/NSTudyReps +sum(BootNullPVals<=a)/NSTudyReps +sum(BootRecNullPVals<=a)/NSTudyReps + + +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# +# Power Simulation Study +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# + + +PermPowPVals = numeric(NSTudyReps) +PermRecPowPVals = numeric(NSTudyReps) +BootPowPVals = numeric(NSTudyReps) +BootRecPowPVals = numeric(NSTudyReps) + +#***************************************************# +# The true distributionin the Two Populations: +# We randomly select this. We should explore different +# options. These options will address two components: +# 1) Shape of distribution (Normal, Uniform, Exponential, etc.) +# 2) Parameters of distribution (Mean, SD, lambda, etc.) +#***************************************************# + +# Case 1: Normal + + +#***************************************************# + +start_time <- Sys.time() + +for(s in 1:NSTudyReps) +{ + # Generating Samples A and B Under the Alternative + + + + + + # p-val using permutation method + PermPowPVals[s] = two.mean.perm.pval.f(SampA,SampB,LowTail = LowTail,NSim=NSim) + + # p-val using permutation method + PermRecPowPVals[s] = two.mean.perm.recenter.pval.f(SampA,SampB,LowTail = LowTail,NSim=NSim) + + # p-val using bootstrap method + BootPowPVals[s] = two.mean.boot.pval.f(SampA,SampB,LowTail = LowTail,NSim=NSim) + + # p-val using recentering bootstrap method + BootRecPowPVals[s] = two.mean.boot.recenter.pval.f(SampA,SampB,LowTail = LowTail,NSim=NSim) + + cat(s,"\n") +} + +end_time <- Sys.time() + +end_time - start_time + +# Compute the percentage of replicates that p-value less than +# given alpha. This will give us the power of the method + +a=.05 +sum(PermPowPVals<=a)/NSTudyReps +sum(PermRecPowPVals<=a)/NSTudyReps +sum(BootPowPVals<=a)/NSTudyReps +sum(BootRecPowPVals<=a)/NSTudyReps + + +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# +# CI Study +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# +#***************************************************# + + +#***************************************************# +# The true distributionin the Two Populations: +# We randomly select this. We should explore different +# options. These options will address two components: +# 1) Shape of distribution (Normal, Uniform, Exponential, etc.) +# 2) Parameters of distribution (Mean, SD, lambda, etc.) +#***************************************************# + +# Case 1: Normal + + +a = .05 + + +# The CI is for the difference in the two proportions +TrueDiff = muA - muB + +CIIncludeTrueDiff = numeric(NSTudyReps) + + +#***************************************************# + +start_time <- Sys.time() + +for(s in 1:NSTudyReps) +{ + # Generating Samples A and B Under the Alternative + + + + + + + + + # CI using bootstrap method + CI = two.mean.CI.f(SampA,SampB,alpha=a,NSim=NSim) + + # Check if CI included true difference + if(CI[1] <= TrueDiff && TrueDiff <= CI[2]) + { + CIIncludeTrueDiff[s]=1 + }else + { + CIIncludeTrueDiff[s]=0 + } + cat(s,"\n") +} + +end_time <- Sys.time() + +end_time - start_time + +# Compute the percentage of replicates that CI included truediff + +sum(CIIncludeTrueDiff)/NSTudyReps +