commit fdd15bd294e3248d746e841ee4c81b161f1d4899
Author: John Kubach <johnkubach@gmail.com>
Date: Fri, 17 Sep 2021 11:03:06 -0400
Add R files
Diffstat:
A | R - HW06 - PvalFunction.R | | | 60 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | R-IC04.R | | | 127 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | R-IC05.R | | | 126 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | R-IC06.R | | | 223 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | R-IC08.R | | | 166 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | R-IC09.R | | | 364 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | R-IC10.R | | | 457 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
7 files changed, 1523 insertions(+), 0 deletions(-)
diff --git a/R - HW06 - PvalFunction.R b/R - HW06 - PvalFunction.R
@@ -0,0 +1,60 @@
+#*******************************************************#
+#*******************************************************#
+# STAT320 - HW6 Solutions #
+#*******************************************************#
+#*******************************************************#
+#***************************************************#
+# PVal for Proportions #
+#***************************************************#
+
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+
+#***************************************************#
+# Fair Coin
+#***************************************************#
+#set.seed(257)
+#***************************************************#
+
+prop.pval.f = function(x,p0=0.5,NSize=100,Low = TRUE,NSim=100000)
+{
+ POP = c("T","H")
+ Succ = "T"
+ ONSuccs = numeric(NSim)
+
+ for(i in 1:NSim)
+ {
+ Sample = sample(x=POP, prob =c(p0,1-p0),size = NSize,replace = TRUE)
+ ONSuccs[i] = sum(Sample==Succ)
+ }
+
+ if(Low==TRUE)
+ {
+ p = sum(ONSuccs<=x)/NSim
+ }else
+ {
+ p = sum(ONSuccs>=x)/NSim
+ }
+ return(p)
+}
+
+#***************************************************#
+#***************************************************#
+
+NObs = 100000
+N = 564
+pS = .135
+
+x = 65
+Dir = TRUE
+prop.pval.f(x,p0=pS,NSize = N,Low = Dir,NSim=NObs)
+
+
+
+
+
+
+
diff --git a/R-IC04.R b/R-IC04.R
@@ -0,0 +1,127 @@
+#*******************************************************#
+#*******************************************************#
+# STAT320 - IC04 Solutions #
+#*******************************************************#
+#*******************************************************#
+#***************************************************#
+# CI for Proportions #
+#***************************************************#
+
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+
+#***************************************************#
+# Fair Coin
+#***************************************************#
+set.seed(257)
+#***************************************************#
+
+#***************************************************#
+#***************************************************#
+
+prop.CI.f = function(a=.05,p0=0.5,NSize=100,NSim=100000)
+{
+ POP = c("T","H")
+ Succ = "T"
+ ONSuccs = numeric(NSim)
+
+ ONSuccs = replicate(NSim,
+ {
+ Sample=sample(x=POP, prob =c(p0,1-p0),size = NSize,replace = TRUE)
+ sum(Sample==Succ)
+ })
+ x = quantile(ONSuccs,probs = c(a/2,1-a/2))/NSize
+ return(x)
+}
+
+
+#***************************************************#
+#***************************************************#
+NSim = 100000 # Number of simulated samples
+a = 0.05 # Alpha level for CI
+
+
+# Sample Estimate
+N = 564 # Sample size
+nS = 51 # Number of Successes in sample
+phat = nS/N # Estimated sample proportion
+
+# Set True value to be the estimate from sample
+pT = phat
+
+# simulate 100,000 samples, compute their sample proportions,
+# and store them in the vector SimProps
+
+POP = c(1,0)
+SimProps = numeric(NSim)
+
+for(i in 1:NSim)
+{
+ # Generate a sample using the given pT and the sample size N
+ Sample=sample(x=POP, prob =c(pT,1-pT),size = N,replace = TRUE)
+
+ # Count the number of successes (1's) in the simulated sample
+ xS = sum(Sample==1)
+
+ # compute sample proportion of current sample
+ phat = xS/N
+
+ # save current sample proportion
+ SimProps[i] = phat
+
+ # go back and repeat
+}
+
+
+# Compute the (1-a)*100% confidence based on the simulated phat's
+
+CI = quantile(SimProps,probs = c(a/2,1-a/2))
+
+CI
+
+
+prop.CI.f(a,p0=phat,NSize=N,NSim=NSim)
+
+#***************************************************#
+
+
+#***************************************************#
+#***************************************************#
+# Program to demonstrate what confidence means
+# in Confidence intervals
+#***************************************************#
+#***************************************************#
+
+N.CIs = 20 # Number of simulated samples
+N = 564 # Sample size
+
+pT = .15 # True percantage of successes in population
+a = 0.01
+
+NCIIn = 0 # Counter of the samples the produced a CI that
+ # Included the true value
+
+POP = c(1,0)
+for(i in 1:N.CIs)
+{
+ # Randomly select a sample from the Real Population to get
+ # an estmate phat of the true pT
+ CurSample = sample(x=POP, prob =c(pT,1-pT),size = N,replace = TRUE)
+ phat = sum(CurSample)/N
+
+ # we use the phat to compute a confidence interval
+ ci = prop.CI.f(a,p0=phat,NSize = N,NSim=NSim)
+
+ # Checking if the particular CI contains the true value pT
+ if(ci[1] <= pT && pT <= ci[2])
+ NCIIn = NCIIn + 1
+}
+
+# Number and Proportion of CIs who actually contained the true value of pT
+NCIIn
+NCIIn/N.CIs
+
+
diff --git a/R-IC05.R b/R-IC05.R
@@ -0,0 +1,126 @@
+#*******************************************************#
+#*******************************************************#
+# STAT320 - IC05 Solutions #
+#*******************************************************#
+#*******************************************************#
+#***************************************************#
+# PVal/Power for Means #
+#***************************************************#
+
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+
+#***************************************************#
+# Means P-value
+#***************************************************#
+set.seed(257)
+#***************************************************#
+
+xsamp1=c(96.7, 108.8, 95.1, 110, 104.5, 106.7, 120.5, 113.4, 94.4, 67.6, 81.9, 118.1)
+mean(xsamp1)
+NSim1=100000
+Low1=FALSE
+NSize1=12
+
+mean.pval.f = function(xobs,xsamp,mu0=0,NSize=100,Low = TRUE,NSim=100000)
+{
+ # Rescale sample so as to have mean equal to mu0
+ rescaledsample = xsamp - mean(xsamp) + mu0
+
+ # Create object where simulated averages will be stored
+ OXBars = numeric(NSim)
+
+ # Repeat simulations NSim times
+ for(i in 1:NSim)
+ {
+ # Generate sample by resampling original sample WITH replacement
+ Sample = sample(x=rescaledsample,size = NSize,replace = TRUE)
+ # Save mean of simulated sample
+ OXBars[i] = mean(Sample)
+ }
+ # Compute p-value
+ if(Low==TRUE)
+ {
+ # if alternative Ha is mu < mu0
+ p = sum(OXBars<=xobs)/NSim
+ }else
+ {
+ # if alternative Ha is mu > mu0
+ p = sum(OXBars>=xobs)/NSim
+ }
+ return(p)
+}
+pval = mean.pval.f(xobs = mean(xsamp1), xsamp=xsamp1,mu0=98,NSize=NSize1,Low=Low1,NSim=NSim1)
+pval
+
+mu0=10
+
+#***************************************************#
+# Means Cutoff
+#***************************************************#
+
+mean.pval.cutoff.f = function(a=.05,xsamp,mu0=0,NSize=100,Low = TRUE,NSim=100000)
+{
+ # Rescale sample so as to have mean equal to mu0
+ rescaledsample = xsamp - mean(xsamp) + mu0
+
+ # Create object where simulated averages will be stored
+ OXBars = numeric(NSim)
+
+ # Repeat simulations NSim times
+ for(i in 1:NSim)
+ {
+ # Generate sample by resampling original sample WITH replacement
+ Sample = sample(x=rescaledsample,size = NSize,replace = TRUE)
+ # Save mean of simulated sample
+ OXBars[i] = mean(Sample)
+ }
+ # Compute p-value
+ if(Low==TRUE)
+ {
+ # if alternative Ha is mu < mu0
+ x = quantile(OXBars,probs = a)
+ }else
+ {
+ # if alternative Ha is mu > mu0
+ x = quantile(OXBars,probs = 1-a)
+ }
+ return(x)
+}
+cOff = mean.pval.cutoff.f(a = 0.01,xsamp=xsamp1,mu0=29.5,NSize=18,Low=TRUE,NSim=100000)
+cOff
+
+
+#***************************************************#
+# Means CI
+#***************************************************#
+
+mean.CI.f = function(xsamp,a=.05,NSize=100,NSim=100000)
+{
+ # Rescale sample so as to have mean equal to mu0
+ rescaledsample = xsamp - mean(xsamp) + mu0
+
+ # Create object where simulated averages will be stored
+ OXBars = numeric(NSim)
+
+ # Repeat simulations NSim times
+ for(i in 1:NSim)
+ {
+ # Generate sample by resampling original sample WITH replacement
+ Sample = sample(x=rescaledsample,size = NSize,replace = TRUE)
+ # Save mean of simulated sample
+ OXBars[i] = mean(Sample)
+ }
+ x = quantile(OXBars,probs = c(a/2,1-a/2))
+ return(x)
+}
+
+CI = mean.CI.f(xsamp=xsamp1, a=0.05, NSize=12, NSim=100000)
+CI
+
+pow = mean.pval.f(xobs = mean(xsamp1) ,xsamp=xsamp1,mu0=99,NSize=12,Low=FALSE,NSim=100000)
+pow
+
diff --git a/R-IC06.R b/R-IC06.R
@@ -0,0 +1,223 @@
+#*******************************************************#
+#*******************************************************#
+# STAT320 - IC06 Solutions #
+#*******************************************************#
+#*******************************************************#
+#***************************************************#
+# PVal/Power for Two Sample Means #
+#***************************************************#
+
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+
+#***************************************************#
+# Means P-value
+#***************************************************#
+#set.seed(257)
+#***************************************************#
+
+two.mean.pval.f = function(xobs,xsampA,xsampB,Low = TRUE,NSim=100000)
+{
+ # Compute Sizes of each sample
+ nA = length(xsampA)
+ nB = length(xsampB)
+
+ # Combine Samples to one super sample
+ supersample = c(xsampA,xsampB)
+
+ # Create object where simulated DIFFERENCES of averages will be stored
+ ODiffBars = numeric(NSim)
+
+ # Repeat simulations NSim times
+ for(i in 1:NSim)
+ {
+ shuffledsupersample = sample(supersample)
+
+ # Generate sample 1 by resampling super Sample WITHOUT replacement
+ # in practice we select the first nA elements of the shuffled supersample
+ NewSampleA = shuffledsupersample[1:nA]
+
+ # Assign remaining values in supersample to Sample B
+ # that is the elements (nA+1) through (nA+nB)
+ NewSampleB = shuffledsupersample[nA+1:nB]
+
+ # Save difference in means simulated samples
+ ODiffBars[i] = mean(NewSampleA) - mean(NewSampleB )
+ }
+ # Compute p-value
+ if(Low==TRUE)
+ {
+ # if alternative Ha is mu < mu0
+ p = sum(ODiffBars<=xobs)/NSim
+ }else
+ {
+ # if alternative Ha is mu > mu0
+ p = sum(ODiffBars>=xobs)/NSim
+ }
+ return(p)
+}
+
+
+#***************************************************#
+# Means Cutoff
+#***************************************************#
+
+two.mean.pval.cutoff.f = function(xsampA,xsampB,alpha=.05,Low = TRUE,NSim=100000)
+{
+ # Compute Sizes of each sample
+ nA = length(xsampA)
+ nB = length(xsampB)
+
+ # Combine Samples to one super sample
+ supersample = c(xsampA,xsampB)
+
+ # Create object where simulated DIFFERENCES of averages will be stored
+ ODiffBars = numeric(NSim)
+
+ # Repeat simulations NSim times
+ for(i in 1:NSim)
+ {
+ # Generate sample 1 by resampling super Sample WITHOUT replacement
+ NewSampleA = sample(x= supersample,size = nA,replace = FALSE)
+
+ # Assign remaining values in supersample to Sample B
+ NewSampleB = setdiff(supersample,NewSampleA)
+
+ # Save difference in means simulated samples
+ ODiffBars[i] = mean(NewSampleA) - mean(NewSampleB )
+ }
+
+ # Compute Threshold
+ if(Low==TRUE)
+ {
+ # if alternative Ha is mu < mu0
+ x = quantile(ODiffBars,probs = alpha)
+ }else
+ {
+ # if alternative Ha is mu > mu0
+ x = quantile(ODiffBars,probs = 1-alpha)
+ }
+ return(x)
+}
+
+#***************************************************#
+# Means CI
+#***************************************************#
+
+two.mean.CI.f = function(xsampA,xsampB,alpha=.05,NSim=100000)
+{
+ # Compute Sizes of each sample
+ nA = length(xsampA)
+ nB = length(xsampB)
+
+ # Create object where simulated DIFFERENCES of averages will be stored
+ ODiffBars = numeric(NSim)
+
+ # Repeat simulations NSim times
+ for(i in 1:NSim)
+ {
+ # Generate sample 1 by resampling original sample WITH replacement
+ NewSampleA = sample(x= xsampA,size = nA,replace = TRUE)
+
+ # Generate sample 2 by resampling original sample WITH replacement
+ NewSampleB = sample(x= xsampB,size = nB,replace = TRUE)
+
+ # Save difference in means simulated samples
+ ODiffBars[i] = mean(NewSampleA) - mean(NewSampleB)
+ }
+
+ # Compute CI
+ ci =quantile(ODiffBars,probs = c(alpha/2,1-alpha/2))
+ return(ci)
+}
+
+
+
+#***************************************************#
+# Means Power
+#***************************************************#
+two.mean.power.f = function(CAlpha,xsampA,xsampB,Delta=1,Low=TRUE,NSim=100000)
+{
+ # Getting the rejection cutoff value
+
+ # Compute Sizes of each sample
+ nA = length(xsampA)
+ nB = length(xsampB)
+
+ # rescale group A to have mean 0
+ rescaleA = xsampA - mean(xsampA)
+
+ # rescale group B to have mean Delta
+ if(Low==TRUE)
+ {
+ rescaleB = xsampB - mean(xsampB) - Delta
+ }else{
+ rescaleB = xsampB - mean(xsampB) + Delta
+ }
+
+ # Create object where simulated DIFFERENCES of averages will be stored
+ ODiffBars = numeric(NSim)
+
+ # Repeat simulations NSim times
+ for(i in 1:NSim)
+ {
+ # Generate sample 1 by resampling original sample WITH replacement
+ NewSampleA = sample(x= rescaleA,size = nA,replace = TRUE)
+
+ # Generate sample 2 by resampling original sample WITH replacement
+ NewSampleB = sample(x= rescaleB,size = nB,replace = TRUE)
+
+ # Save difference in means simulated samples
+ ODiffBars[i] = mean(NewSampleA) - mean(NewSampleB )
+ }
+
+ # Compute p-value
+ if(Low==TRUE)
+ {
+ # if alternative Ha is mu < mu0
+ p = sum(ODiffBars<=CAlpha )/NSim
+ }else
+ {
+ # if alternative Ha is mu > mu0
+ p = sum(ODiffBars>=CAlpha )/NSim
+ }
+ return(p)
+ }
+
+
+######################################################
+######################################################
+######################################################
+######################################################
+
+
+xsampA=c(5.26, 5.07, 5.54, 5.17, 5.39)
+xbarA = mean(xsampA)
+xbarA
+sd(xsampA)
+xsampB=c(6.33, 6.51, 6.29, 6.16, 6.48)
+xbarB = mean(xsampB)
+xbarB
+sd(xsampB)
+xbarA - xbarB
+
+means = two.mean.pval.f(xobs = (xbarA-xbarB), xsampA, xsampB, Low = TRUE, NSim = 100000)
+means
+
+CI = two.mean.CI.f(xsampA, xsampB, alpha = 0.02, NSim = 100000)
+CI
+
+######################################################
+######################################################
+######################################################
+
+
+######################################################
+######################################################
+######################################################
+
+# xsampA=c(68,84,81,85,75,69,80,76,79,74)
+# xsampB=c(74,71,79,63,80,61,69,72,80,65)
diff --git a/R-IC08.R b/R-IC08.R
@@ -0,0 +1,166 @@
+#*******************************************************#
+#*******************************************************#
+# STAT320 - IC06 Solutions #
+#*******************************************************#
+#*******************************************************#
+#***************************************************#
+# PVal/Power for Two Sample Proportions #
+#***************************************************#
+
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+
+#***************************************************#
+# Means P-value
+#***************************************************#
+#set.seed(300)
+#***************************************************#
+
+two.prop.permute.pval.f = function(xA,nA,xB,nB,LowTail = TRUE,NSim=100000)
+{
+ xobs = xA/nA - xB/nB
+ # Reconstruct sample A
+ xsampA = rep(0,nA)
+ xsampA[1:xA] = rep(1,xA)
+
+ # Reconstruct sample B
+ xsampB = rep(0,nB)
+ xsampB[1:xB] = rep(1,xB)
+
+ # Combine Samples to one super sample
+ supersample = c(xsampA,xsampB)
+
+ # Create object where simulated DIFFERENCES of averages will be stored
+ ODiffProps = numeric(NSim)
+
+ # Repeat simulations NSim times
+ for(i in 1:NSim)
+ {
+
+ # Shuffle the elements inthe supersample
+ shuffledsupersample = sample(supersample)
+
+ # Generate sample 1 by resampling super Sample WITHOUT replacement
+ # in practice we select the first nA elements of the shuffled supersample
+ NewSampleA = shuffledsupersample[1:nA]
+
+ # Assign remaining values in supersample to Sample B
+ # that is the elements (nA+1) through (nA+nB)
+ NewSampleB = shuffledsupersample[nA+1:nB]
+
+ # Save difference in means simulated samples
+ ODiffProps[i] = mean(NewSampleA) - mean(NewSampleB )
+ }
+ # Compute p-value
+ if(LowTail==TRUE)
+ {
+ # if alternative Ha is mu < mu0
+ p = sum(ODiffProps<=xobs)/NSim
+ }else
+ {
+ # if alternative Ha is mu > mu0
+ p = sum(ODiffProps>=xobs)/NSim
+ }
+ return(p)
+}
+
+two.prop.bootstrap.pval.f = function(xA,nA,xB,nB,LowTail = TRUE,NSim=100000)
+{
+ xobs = xA/nA - xB/nB
+ # Reconstruct sample A
+ xsampA = rep(0,nA)
+ xsampA[1:xA] = rep(1,xA)
+
+ # Reconstruct sample B
+ xsampB = rep(0,nB)
+ xsampB[1:xB] = rep(1,xB)
+
+ # Combine Samples to one super sample
+ supersample = c(xsampA,xsampB)
+
+ # Create object where simulated DIFFERENCES of averages will be stored
+ ODiffProps = numeric(NSim)
+
+ # Repeat simulations NSim times
+ for(i in 1:NSim)
+ {
+
+ # Generate sample 1 by resampling super Sample WITH replacement
+ NewSampleA = sample(supersample,size = nA,replace = TRUE)
+
+ # Generate sample 2 by resampling super Sample WITH replacement
+ NewSampleB = sample(supersample,size = nB,replace = TRUE)
+
+ # Save difference in means simulated samples
+ ODiffProps[i] = mean(NewSampleA) - mean(NewSampleB )
+ }
+ # Compute p-value
+ if(LowTail==TRUE)
+ {
+ # if alternative Ha is mu < mu0
+ p = sum(ODiffProps<=xobs)/NSim
+ }else
+ {
+ # if alternative Ha is mu > mu0
+ p = sum(ODiffProps>=xobs)/NSim
+ }
+ return(p)
+}
+
+
+#***************************************************#
+# Means CI
+#***************************************************#
+
+two.prop.CI.f = function(xA,nA,xB,nB,alpha=.05,NSim=100000)
+{
+ # Reconstruct sample A
+ xsampA = rep(0,nA)
+ xsampA[1:xA] = rep(1,xA)
+
+ # Reconstruct sample B
+ xsampB = rep(0,nB)
+ xsampB[1:xB] = rep(1,xB)
+
+ # Create object where simulated DIFFERENCES of averages will be stored
+ ODiffProps = numeric(NSim)
+
+ # Repeat simulations NSim times
+ for(i in 1:NSim)
+ {
+ # Generate sample 1 by resampling original Sample A WITH replacement
+ NewSampleA = sample(xsampA,size = nA,replace = TRUE)
+
+ # Generate sample 2 by resampling original Sample B WITH replacement
+ NewSampleB = sample(xsampB,size = nB,replace = TRUE)
+
+ # Save difference in means simulated samples
+ ODiffProps[i] = mean(NewSampleA) - mean(NewSampleB)
+ }
+
+ # Compute CI
+ ci =quantile(ODiffProps,probs = c(alpha/2,1-alpha/2))
+ return(ci)
+}
+
+#***************************************************#
+# Questions
+#***************************************************#
+
+xA = 5
+nA = 119
+
+xB = 19
+nB = 115
+
+LowTail = TRUE
+NSim = 100000
+
+alpha=.3
+
+two.prop.bootstrap.pval.f(xA, nA, xB, nB, LowTail, NSim)
+two.prop.permute.pval.f(xA, nA, xB, nB, LowTail, NSim)
+two.prop.CI.f(xA, nA, xB, nB, alpha, NSim)
diff --git a/R-IC09.R b/R-IC09.R
@@ -0,0 +1,364 @@
+#*******************************************************#
+#*******************************************************#
+# STAT320 - IC09 Solutions #
+#*******************************************************#
+#*******************************************************#
+#***************************************************#
+# Type I Error/Power for Two Sample Proportions #
+#***************************************************#
+
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+
+#***************************************************#
+# Means P-value
+#***************************************************#
+set.seed(257)
+#***************************************************#
+
+two.prop.permute.pval.f = function(xA,nA,xB,nB,LowTail = TRUE,NSim=100000)
+{
+ xobs = xA/nA - xB/nB
+ # Reconstruct sample A
+ xsampA = rep(0,nA)
+ xsampA[1:xA] = rep(1,xA)
+
+ # Reconstruct sample B
+ xsampB = rep(0,nB)
+ xsampB[1:xB] = rep(1,xB)
+
+ # Combine Samples to one super sample
+ supersample = c(xsampA,xsampB)
+
+ # Create object where simulated DIFFERENCES of averages will be stored
+ ODiffProps = numeric(NSim)
+
+ # Repeat simulations NSim times
+ for(i in 1:NSim)
+ {
+
+ # Shuffle the elements inthe supersample
+ shuffledsupersample = sample(supersample)
+
+ # Generate sample 1 by resampling super Sample WITHOUT replacement
+ # in practice we select the first nA elements of the shuffled supersample
+ NewSampleA = shuffledsupersample[1:nA]
+
+ # Assign remaining values in supersample to Sample B
+ # that is the elements (nA+1) through (nA+nB)
+ NewSampleB = shuffledsupersample[nA+1:nB]
+
+ # Save difference in means simulated samples
+ ODiffProps[i] = mean(NewSampleA) - mean(NewSampleB )
+ }
+ # Compute p-value
+ if(LowTail==TRUE)
+ {
+ # if alternative Ha is mu < mu0
+ p = sum(ODiffProps<=xobs)/NSim
+ }else
+ {
+ # if alternative Ha is mu > mu0
+ p = sum(ODiffProps>=xobs)/NSim
+ }
+ return(p)
+}
+
+
+two.prop.bootstrap.pval.f = function(xA,nA,xB,nB,LowTail = TRUE,NSim=100000)
+{
+ xobs = xA/nA - xB/nB
+ # Reconstruct sample A
+ xsampA = rep(0,nA)
+ xsampA[1:xA] = rep(1,xA)
+
+ # Reconstruct sample B
+ xsampB = rep(0,nB)
+ xsampB[1:xB] = rep(1,xB)
+
+ # Combine Samples to one super sample
+ supersample = c(xsampA,xsampB)
+
+ # Create object where simulated DIFFERENCES of averages will be stored
+ ODiffProps = numeric(NSim)
+
+ # Repeat simulations NSim times
+ for(i in 1:NSim)
+ {
+
+ # Generate sample 1 by resampling super Sample WITH replacement
+ NewSampleA = sample(supersample,size = nA,replace = TRUE)
+
+ # Generate sample 2 by resampling super Sample WITH replacement
+ NewSampleB = sample(supersample,size = nB,replace = TRUE)
+
+ # Save difference in means simulated samples
+ ODiffProps[i] = mean(NewSampleA) - mean(NewSampleB )
+ }
+ # Compute p-value
+ if(LowTail==TRUE)
+ {
+ # if alternative Ha is mu < mu0
+ p = sum(ODiffProps<=xobs)/NSim
+ }else
+ {
+ # if alternative Ha is mu > mu0
+ p = sum(ODiffProps>=xobs)/NSim
+ }
+ return(p)
+}
+
+
+#***************************************************#
+# Means CI
+#***************************************************#
+
+two.prop.CI.f = function(xA,nA,xB,nB,alpha=.05,NSim=100000)
+{
+ # Reconstruct sample A
+ xsampA = rep(0,nA)
+ xsampA[1:xA] = rep(1,xA)
+
+ # Reconstruct sample B
+ xsampB = rep(0,nB)
+ xsampB[1:xB] = rep(1,xB)
+
+ # Create object where simulated DIFFERENCES of averages will be stored
+ ODiffProps = numeric(NSim)
+
+ # Repeat simulations NSim times
+ for(i in 1:NSim)
+ {
+ # Generate sample 1 by resampling original Sample A WITH replacement
+ NewSampleA = sample(xsampA,size = nA,replace = TRUE)
+
+ # Generate sample 2 by resampling original Sample B WITH replacement
+ NewSampleB = sample(xsampB,size = nB,replace = TRUE)
+
+ # Save difference in means simulated samples
+ ODiffProps[i] = mean(NewSampleA) - mean(NewSampleB)
+ }
+
+ # Compute CI
+ ci =quantile(ODiffProps,probs = c(alpha/2,1-alpha/2))
+ return(ci)
+}
+
+
+
+
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+# Type I Simulation Study
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+
+NSTudyReps = 3000
+NSim = 10000
+
+nA = 529
+nB = 326
+
+BootNullPVals = numeric(NSTudyReps)
+PermNullPVals = numeric(NSTudyReps)
+
+#***************************************************#
+# The true proportion in the Common Population:
+# We randomly assign this value. We should explore different
+# values to see what happens.
+#***************************************************#
+
+
+#***************************************************#
+
+start_time <- Sys.time()
+
+for(s in 1:NSTudyReps)
+{
+ # Generating Samples A and B Under the Null and Counting the successes
+
+
+
+
+
+
+
+ # p-val using bootstrap method
+ p1 = two.prop.bootstrap.pval.f(nSA,nA,nSB,nB,LowTail = FALSE,NSim=NSim)
+
+ # p-val using permutation method
+ p2 = two.prop.permute.pval.f(nSA,nA,nSB,nB,LowTail = FALSE,NSim=NSim)
+
+ # Storing each p-val into the appropriate vector
+ BootNullPVals[s] = p1
+ PermNullPVals[s] = p2
+ cat(s,"\n")
+}
+
+end_time <- Sys.time()
+
+end_time - start_time
+
+# Compute the percentage of replicates that p-value less than
+# given alpha
+
+a=.05
+sum(BootNullPVals<=a)/NSTudyReps
+sum(PermNullPVals<=a)/NSTudyReps
+
+
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+# Power Simulation Study
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+
+NSTudyReps = 3000
+NSim = 10000
+
+
+nA = 529
+nB = 326
+
+BootPowPVals = numeric(NSTudyReps)
+PermPowPVals = numeric(NSTudyReps)
+
+#***************************************************#
+# The true proportions in each Population is Unknown:
+# We randomly assign these values. We should explore different
+# values to see what happens.
+# Note, the two values can be exactly the same or different
+#***************************************************#
+
+
+#***************************************************#
+
+start_time <- Sys.time()
+
+for(s in 1:NSTudyReps)
+{
+ # Generating Samples A and B Under the Alternative and Counting the successes
+
+
+
+
+
+
+
+ # p-val using bootstrap method
+ p1 = two.prop.bootstrap.pval.f(nSA,nA,nSB,nB,LowTail = FALSE,NSim=NSim)
+
+ # p-val using permutation method
+ p2 = two.prop.permute.pval.f(nSA,nA,nSB,nB,LowTail = FALSE,NSim=NSim)
+
+ # Storing each p-val into the appropriate vector
+ BootPowPVals[s] = p1
+ PermPowPVals[s] = p2
+ cat(s,"\n")
+}
+
+end_time <- Sys.time()
+
+end_time - start_time
+
+# Compute the percentage of replicates that p-value less than
+# given alpha. This will give us the power of the method
+
+a=.05
+sum(BootPowPVals<=a)/NSTudyReps
+sum(PermPowPVals<=a)/NSTudyReps
+
+
+
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+# CI Study
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+
+NSTudyReps = 1000
+NSim = 10000
+
+nA = 529
+nB = 326
+a = .05
+
+CIIncludeTrueDiff = numeric(NSTudyReps)
+
+#***************************************************#
+# The true proportions in the Two Population:
+# We randomly assign these values. We should explore different
+# values to see what happens.
+# Note, the two values can be exactly the same or different
+#***************************************************#
+
+
+
+# The CI is for the difference in the two proportions
+
+TrueDiff = pTA - pTB
+
+#***************************************************#
+
+start_time <- Sys.time()
+
+for(s in 1:NSTudyReps)
+{
+ # Generating Samples A and B Under the True Proportions and Counting the successes
+
+
+
+
+
+ # CI using bootstrap method
+ CI = two.prop.CI.f(nSA,nA,nSB,nB,alpha=a,NSim=NSim)
+
+ # Check if CI included true difference
+ if(CI[1] <= TrueDiff && TrueDiff <= CI[2])
+ {
+ CIIncludeTrueDiff[s]=1
+ }else
+ {
+ CIIncludeTrueDiff[s]=0
+ }
+ cat(s,"\n")
+}
+
+end_time <- Sys.time()
+
+end_time - start_time
+
+# Compute the percentage of replicates that CI included truediff
+
+sum(CIIncludeTrueDiff)/NSTudyReps
+
+
+
+
+
+
diff --git a/R-IC10.R b/R-IC10.R
@@ -0,0 +1,457 @@
+#*******************************************************#
+#*******************************************************#
+# STAT320 - IC10 Solutions #
+#*******************************************************#
+#*******************************************************#
+#***************************************************#
+# Type I/Power for Two Sample Means #
+#***************************************************#
+
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+
+#***************************************************#
+# Means P-value
+#***************************************************#
+set.seed(3697)
+#***************************************************#
+
+two.mean.perm.pval.f = function(xsampA,xsampB,LowTail = TRUE,NSim=100000)
+{
+ # Compute Sizes of each sample
+ nA = length(xsampA)
+ nB = length(xsampB)
+
+ xobs = mean(xsampA)-mean(xsampB)
+
+ # Combine Samples to one super sample
+ supersample = c(xsampA,xsampB)
+
+ # Create object where simulated DIFFERENCES of averages will be stored
+ ODiffBars = numeric(NSim)
+
+ # Repeat simulations NSim times
+ for(i in 1:NSim)
+ {
+ shuffledsupersample = sample(supersample)
+
+ # Generate sample 1 by resampling super Sample WITHOUT replacement
+ # in practice we select the first nA elements of the shuffled supersample
+ NewSampleA = shuffledsupersample[1:nA]
+
+ # Assign remaining values in supersample to Sample B
+ # that is the elements (nA+1) through (nA+nB)
+ NewSampleB = shuffledsupersample[nA+1:nB]
+
+ # Save difference in means simulated samples
+ ODiffBars[i] = mean(NewSampleA) - mean(NewSampleB )
+ }
+ # Compute p-value
+ if(LowTail==TRUE)
+ {
+ # if alternative Ha is mu < mu0
+ p = sum(ODiffBars<=xobs)/NSim
+ }else
+ {
+ # if alternative Ha is mu > mu0
+ p = sum(ODiffBars>=xobs)/NSim
+ }
+ return(p)
+}
+
+
+two.mean.perm.recenter.pval.f = function(xsampA,xsampB,LowTail = TRUE,NSim=100000)
+{
+ # Compute Sizes of each sample
+ nA = length(xsampA)
+ nB = length(xsampB)
+
+ xobs = mean(xsampA)-mean(xsampB)
+
+ # center samples at 0 by subtracting the means
+ xsampA = xsampA - mean(xsampA)
+
+ xsampB = xsampB - mean(xsampB)
+
+ # Combine Samples to one super sample
+ supersample = c(xsampA,xsampB)
+
+ # Create object where simulated DIFFERENCES of averages will be stored
+ ODiffBars = numeric(NSim)
+
+ # Repeat simulations NSim times
+ for(i in 1:NSim)
+ {
+ shuffledsupersample = sample(supersample)
+
+ # Generate sample 1 by resampling super Sample WITHOUT replacement
+ # in practice we select the first nA elements of the shuffled supersample
+ NewSampleA = shuffledsupersample[1:nA]
+
+ # Assign remaining values in supersample to Sample B
+ # that is the elements (nA+1) through (nA+nB)
+ NewSampleB = shuffledsupersample[nA+1:nB]
+
+ # Save difference in means simulated samples
+ ODiffBars[i] = mean(NewSampleA) - mean(NewSampleB )
+ }
+ # Compute p-value
+ if(LowTail==TRUE)
+ {
+ # if alternative Ha is mu < mu0
+ p = sum(ODiffBars<=xobs)/NSim
+ }else
+ {
+ # if alternative Ha is mu > mu0
+ p = sum(ODiffBars>=xobs)/NSim
+ }
+ return(p)
+}
+
+
+two.mean.boot.pval.f = function(xsampA,xsampB,LowTail = TRUE,NSim=100000)
+{
+ # Compute Sizes of each sample
+ nA = length(xsampA)
+ nB = length(xsampB)
+
+ xobs = mean(xsampA)-mean(xsampB)
+
+ # Combine Samples to one super sample
+ supersample = c(xsampA,xsampB)
+
+ # Create object where simulated DIFFERENCES of averages will be stored
+ ODiffBars = numeric(NSim)
+
+ # Repeat simulations NSim times
+ for(i in 1:NSim)
+ {
+ shuffledsupersample = sample(supersample)
+
+ # Generate sample 1 by resampling super Sample WITH replacement
+ NewSampleA = sample(supersample,size = nA,replace = TRUE)
+
+ # Generate sample 2 by resampling super Sample WITH replacement
+ NewSampleB = sample(supersample,size = nB,replace = TRUE)
+
+ # Save difference in means simulated samples
+ ODiffBars[i] = mean(NewSampleA) - mean(NewSampleB )
+ }
+ # Compute p-value
+ if(LowTail==TRUE)
+ {
+ # if alternative Ha is mu < mu0
+ p = sum(ODiffBars<=xobs)/NSim
+ }else
+ {
+ # if alternative Ha is mu > mu0
+ p = sum(ODiffBars>=xobs)/NSim
+ }
+ return(p)
+}
+
+
+two.mean.boot.recenter.pval.f = function(xsampA,xsampB,LowTail = TRUE,NSim=100000)
+{
+ # Compute Sizes of each sample
+ nA = length(xsampA)
+ nB = length(xsampB)
+
+ xobs = mean(xsampA)-mean(xsampB)
+
+ # center samples at 0 by subtracting the means
+ xsampA = xsampA - mean(xsampA)
+
+ xsampB = xsampB - mean(xsampB)
+
+ # Combine Samples to one super sample
+ supersample = c(xsampA,xsampB)
+
+ # Create object where simulated DIFFERENCES of averages will be stored
+ ODiffBars = numeric(NSim)
+
+ # Repeat simulations NSim times
+ for(i in 1:NSim)
+ {
+ shuffledsupersample = sample(supersample)
+
+ # Generate sample 1 by resampling super Sample WITH replacement
+ NewSampleA = sample(supersample,size = nA,replace = TRUE)
+
+ # Generate sample 2 by resampling super Sample WITH replacement
+ NewSampleB = sample(supersample,size = nB,replace = TRUE)
+
+ # Save difference in means simulated samples
+ ODiffBars[i] = mean(NewSampleA) - mean(NewSampleB )
+ }
+ # Compute p-value
+ if(LowTail==TRUE)
+ {
+ # if alternative Ha is mu < mu0
+ p = sum(ODiffBars<=xobs)/NSim
+ }else
+ {
+ # if alternative Ha is mu > mu0
+ p = sum(ODiffBars>=xobs)/NSim
+ }
+ return(p)
+}
+
+
+#***************************************************#
+# Means CI
+#***************************************************#
+
+two.mean.CI.f = function(xsampA,xsampB,alpha=.05,NSim=100000)
+{
+ # Compute Sizes of each sample
+ nA = length(xsampA)
+ nB = length(xsampB)
+
+ # Create object where simulated DIFFERENCES of averages will be stored
+ ODiffBars = numeric(NSim)
+
+ # Repeat simulations NSim times
+ for(i in 1:NSim)
+ {
+ # Generate sample 1 by resampling original sample WITH replacement
+ NewSampleA = sample(x= xsampA,size = nA,replace = TRUE)
+
+ # Generate sample 2 by resampling original sample WITH replacement
+ NewSampleB = sample(x= xsampB,size = nB,replace = TRUE)
+
+ # Save difference in means simulated samples
+ ODiffBars[i] = mean(NewSampleA) - mean(NewSampleB)
+ }
+
+ # Compute CI
+ ci =quantile(ODiffBars,probs = c(alpha/2,1-alpha/2))
+ return(ci)
+}
+
+
+
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+# Type I Simulation Study - Case 1: Normal
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+
+NSTudyReps = 10000
+NSim = 10000
+
+nA = 35
+nB = 35
+LowTail = TRUE
+
+PermNullPVals = numeric(NSTudyReps)
+PermRecNullPVals = numeric(NSTudyReps)
+BootNullPVals = numeric(NSTudyReps)
+BootRecNullPVals = numeric(NSTudyReps)
+
+#***************************************************#
+# The true distributionin the Common Population:
+# We randomly select this. We should explore different
+# options. These options will address two components:
+# 1) Shape of distribution (Normal, Uniform, Exponential, etc.)
+# 2) Parameters of distribution (Mean, SD, lambda, etc.)
+#***************************************************#
+
+
+#***************************************************#
+
+start_time <- Sys.time()
+
+for(s in 1:NSTudyReps)
+{
+ # Generating Samples A and B Under the Null Using the given distribution
+
+
+
+
+
+
+
+
+ # p-val using permutation method
+ PermNullPVals[s] = two.mean.perm.pval.f(SampA,SampB,LowTail = LowTail,NSim=NSim)
+
+ # p-val using recentering permutation method
+ PermRecNullPVals[s] = two.mean.perm.recenter.pval.f(SampA,SampB,LowTail = LowTail,NSim=NSim)
+
+ # p-val using bootstrap method
+ BootNullPVals[s] = two.mean.boot.pval.f(SampA,SampB,LowTail = LowTail,NSim=NSim)
+
+ # p-val using recentering bootstrap method
+ BootRecNullPVals[s] = two.mean.boot.recenter.pval.f(SampA,SampB,LowTail = LowTail,NSim=NSim)
+
+ cat(s,"\n")
+}
+
+end_time <- Sys.time()
+
+end_time - start_time
+
+# Compute the percentage of replicates that p-value less than
+# given alpha
+
+a=.05
+sum(PermNullPVals<=a)/NSTudyReps
+sum(PermRecNullPVals<=a)/NSTudyReps
+sum(BootNullPVals<=a)/NSTudyReps
+sum(BootRecNullPVals<=a)/NSTudyReps
+
+
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+# Power Simulation Study
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+
+
+PermPowPVals = numeric(NSTudyReps)
+PermRecPowPVals = numeric(NSTudyReps)
+BootPowPVals = numeric(NSTudyReps)
+BootRecPowPVals = numeric(NSTudyReps)
+
+#***************************************************#
+# The true distributionin the Two Populations:
+# We randomly select this. We should explore different
+# options. These options will address two components:
+# 1) Shape of distribution (Normal, Uniform, Exponential, etc.)
+# 2) Parameters of distribution (Mean, SD, lambda, etc.)
+#***************************************************#
+
+# Case 1: Normal
+
+
+#***************************************************#
+
+start_time <- Sys.time()
+
+for(s in 1:NSTudyReps)
+{
+ # Generating Samples A and B Under the Alternative
+
+
+
+
+
+ # p-val using permutation method
+ PermPowPVals[s] = two.mean.perm.pval.f(SampA,SampB,LowTail = LowTail,NSim=NSim)
+
+ # p-val using permutation method
+ PermRecPowPVals[s] = two.mean.perm.recenter.pval.f(SampA,SampB,LowTail = LowTail,NSim=NSim)
+
+ # p-val using bootstrap method
+ BootPowPVals[s] = two.mean.boot.pval.f(SampA,SampB,LowTail = LowTail,NSim=NSim)
+
+ # p-val using recentering bootstrap method
+ BootRecPowPVals[s] = two.mean.boot.recenter.pval.f(SampA,SampB,LowTail = LowTail,NSim=NSim)
+
+ cat(s,"\n")
+}
+
+end_time <- Sys.time()
+
+end_time - start_time
+
+# Compute the percentage of replicates that p-value less than
+# given alpha. This will give us the power of the method
+
+a=.05
+sum(PermPowPVals<=a)/NSTudyReps
+sum(PermRecPowPVals<=a)/NSTudyReps
+sum(BootPowPVals<=a)/NSTudyReps
+sum(BootRecPowPVals<=a)/NSTudyReps
+
+
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+# CI Study
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+#***************************************************#
+
+
+#***************************************************#
+# The true distributionin the Two Populations:
+# We randomly select this. We should explore different
+# options. These options will address two components:
+# 1) Shape of distribution (Normal, Uniform, Exponential, etc.)
+# 2) Parameters of distribution (Mean, SD, lambda, etc.)
+#***************************************************#
+
+# Case 1: Normal
+
+
+a = .05
+
+
+# The CI is for the difference in the two proportions
+TrueDiff = muA - muB
+
+CIIncludeTrueDiff = numeric(NSTudyReps)
+
+
+#***************************************************#
+
+start_time <- Sys.time()
+
+for(s in 1:NSTudyReps)
+{
+ # Generating Samples A and B Under the Alternative
+
+
+
+
+
+
+
+
+ # CI using bootstrap method
+ CI = two.mean.CI.f(SampA,SampB,alpha=a,NSim=NSim)
+
+ # Check if CI included true difference
+ if(CI[1] <= TrueDiff && TrueDiff <= CI[2])
+ {
+ CIIncludeTrueDiff[s]=1
+ }else
+ {
+ CIIncludeTrueDiff[s]=0
+ }
+ cat(s,"\n")
+}
+
+end_time <- Sys.time()
+
+end_time - start_time
+
+# Compute the percentage of replicates that CI included truediff
+
+sum(CIIncludeTrueDiff)/NSTudyReps
+