*SAS and R code. Copy and paste code one problem part at a time. The easiest way to get lregpack.txt and lregdata.txt is to copy and paste the following two commands into R. source("http://parker.ad.siu.edu/Olive/lregpack.txt") source("http://parker.ad.siu.edu/Olive/lregdata.txt") If lregpack.txt and lregdata.txt are on J drive, get lregpack and lregdata with the following commands: source("J:/lregpack.txt") source("J:/lregdata.txt") #####for Linux use left most port for flash drive source("/media/190A-4001/lregpack.txt") source("/media/190A-4001/lregdata.txt") #Problem 1.2 lrplot2(xtimss,ytimss) **Problem 2.17; options ls = 70; data wcdata; input x y; cards; 30 73 20 50 60 128 80 170 40 87 50 108 60 135 30 60 70 148 60 132 ; proc print; proc corr; proc plot; plot y*x; proc reg; model y=x; output out =a p = pred r = resid; proc plot data = a; plot resid*(pred x); plot y*pred; run; **Problem 2.18; options ls = 70; data brand; input y x1 x2; cards; 64.0 4.0 2.0 73.0 4.0 4.0 61.0 4.0 2.0 76.0 4.0 4.0 72.0 6.0 2.0 80.0 6.0 4.0 71.0 6.0 2.0 83.0 6.0 4.0 83.0 8.0 2.0 89.0 8.0 4.0 86.0 8.0 2.0 93.0 8.0 4.0 88.0 10.0 2.0 95.0 10.0 4.0 94.0 10.0 2.0 100.0 10.0 4.0 ; proc print; proc corr; proc plot; plot y*(x1 x2); proc reg; model y=x1 x2; output out =a p = pred r = resid; proc plot data = a; plot resid*(pred x1 x2); plot y*pred; run; ##Problem 2.27 for R #a) zbux <- cbind(buxx,buxy) zbux <- as.data.frame(zbux) zfull <- lm(buxy~len+nasal+bigonal+cephalic,data=zbux) zred <- lm(buxy~len+nasal,data=zbux) anova(zred,zfull) #d) plot(zred$fit,buxy) abline(0,1) #e) plot(zred$fit,zred$resid) #f) zbux <- zbux[-c(60,61,62,63,64,65),] zfull <- lm(buxy~len+nasal+bigonal+cephalic,data=zbux) zred <- lm(buxy~len+nasal,data=zbux) anova(zred,zfull) #h) plot(zred$fit,zbux[,5]) abline(0,1) #i) plot(zred$fit,zred$resid) ##Problem 2.28 a) for R; #skewed data x <- matrix(rnorm(3000),nrow=1000,ncol=3) y <- x%*%c(1,1,1) + 2*rexp(1000) out<-lsfit(x,y) FIT <- y - out$resid plot(FIT,y) abline(0,1) lines(lowess(FIT,y)) ##Problem 2.28 b) and c) for R plot(FIT,out$resid) abline(0,0) lines(lowess(FIT,out$resid)) out$coef ##Problem 2.29 #b) pisim(n=100, type = 1) #c) pisim(n=100, type = 3) #d) piplot(cbrainx,cbrainy) ##Problem 2.30 MLRsim(nruns=10) # right click Stop 20 times ##Problem 3.5 #b) nx <- matrix(rnorm(300),nrow=100,ncol=3) y <- exp( 4 + nx%*%c(1,1,1) + 0.5*rnorm(100) ) #c) tplot(nx,y) #right click Stop 7 times #d) out <- lsfit(nx,log(y)) ls.print(out) ##Problem 3.6 #a) Use the source command near the top of this file to download the data set. zx <- cbrainx[,c(1,3,5,6,7,8,9,10)] zbrain <- as.data.frame(cbind(cbrainy,zx)) zfull <- lm(cbrainy~.,data=zbrain) summary(zfull) back <- step(zfull) #c) zint <- lm(cbrainy~1,data=zbrain) forw <- step(zint,scope=list(lower=~1, upper=~age+breadth+cephalic+circum+headht+height +len+sex),direction="forward") #e) Use library() to check that you have the leaps library. library(leaps) out<-leaps(x=zx,y=cbrainy) out plot(out$size,out$Cp) tem<-1:length(out$size) tem[out$Cp < 6.01] for(i in 2:9) print( c(i, min(out$Cp[out$size==i]))) out$which[39,] zx[1,] **Problem 3.30; *From SAS User's Guide: Statistics (1985, p. 695-704,717-718); data fitness; input Age Weight Oxygen RunTime RestPulse RunPulse MaxPulse @@; datalines; 44 89.47 44.609 11.37 62 178 182 40 75.07 45.313 10.07 62 185 185 44 85.84 54.297 8.65 45 156 168 42 68.15 59.571 8.17 40 166 172 38 89.02 49.874 9.22 55 178 180 47 77.45 44.811 11.63 58 176 176 40 75.98 45.681 11.95 70 176 180 43 81.19 49.091 10.85 64 162 170 44 81.42 39.442 13.08 63 174 176 38 81.87 60.055 8.63 48 170 186 44 73.03 50.541 10.13 45 168 168 45 87.66 37.388 14.03 56 186 192 45 66.45 44.754 11.12 51 176 176 47 79.15 47.273 10.60 47 162 164 54 83.12 51.855 10.33 50 166 170 49 81.42 49.156 8.95 44 180 185 51 69.63 40.836 10.95 57 168 172 51 77.91 46.672 10.00 48 162 168 48 91.63 46.774 10.25 48 162 164 49 73.37 50.388 10.08 67 168 168 57 73.37 39.407 12.63 58 174 176 54 79.38 46.080 11.17 62 156 165 52 76.32 45.441 9.63 48 164 166 50 70.87 54.625 8.92 48 146 155 51 67.25 45.118 11.08 48 172 172 54 91.63 39.203 12.88 44 168 172 51 73.71 45.790 10.47 59 186 188 57 59.08 50.545 9.93 49 148 155 49 76.32 48.673 9.40 56 186 188 48 61.24 47.920 11.50 52 170 176 52 82.78 47.467 10.50 53 170 172 ; proc reg data=fitness; model Oxygen=Age Weight RunTime RunPulse RestPulse MaxPulse; output out =a p = pred r = resid; model Oxygen=Age Weight RunTime RunPulse RestPulse MaxPulse / selection=forward; model Oxygen=Age Weight RunTime RunPulse RestPulse MaxPulse / selection=backward; model Oxygen=Age Weight RunTime RunPulse RestPulse MaxPulse / selection=cp best = 10; run; proc rsquare cp data = fitness; model Oxygen=Age Weight RunTime RunPulse RestPulse MaxPulse; proc plot data = a; plot resid*(pred); plot Oxygen*pred; proc reg data=fitness; model Oxygen=Age RunTime RunPulse MaxPulse; output out =sub p = pred r = resid; proc plot data = sub; plot resid*(pred); plot Oxygen*pred; run; ##Problem 4.1 commands for R e <- rnorm(9) x <- matrix(rnorm(27),nrow=9,ncol=3) sqrtv <- sqrt(diag(1/1:9)) Y <- 4 + x%*%c(1,2,3) + sqrtv%*%e wtt <- 1:9 lsfit(x,Y,wtt)$coef kinv <- sqrt(diag(1:9)) Z <- kinv%*%Y X <- 1 + 0*1:9 X <- cbind(X,x) U <- kinv%*%X lsfit(U,Z,int=F)$coef ##Problem 4.2 #Use the two source commands near the top of the file to download the #data and lregpack functions. #a) wlsplot(x=dsx, y = dsy, w = dsw) ##Problem 4.3 #a) fwlssim(type=1) #use the up arrow to repeat the command quickly #b) fwlssim(type=2) #c) fwlssim(type=3) #d) fwlssim(type=4) ##Problem 5.11 commands for R #b) pcisim(n1=100,n2=200,var1=10,var2=1) ##Problem 5.12 commands for R #a) anovasim(m1 = 0, m2 = 0, m3 = 0, m4 = 0, sd1 = 1, sd2 = 1, sd3 = 1, sd4 = 1) #b) anovasim(m1 = 0, m2 = 0, m3 = 0, m4 = 0, sd1 = 1, sd2 = 2, sd3 = 3, sd4 = 4) #c) anovasim(m1 = 1, m2 = 0, m3 = 0, m4 = 1) #d) anovasim(m4 = 1, sd4 = 9) ##Problem 5.13 commands for R #a) y <- ycrab+1/6 aovtplt(crabhab,y) ##Problem 5.14 commands for R #a) help(warpbreaks) out <- aov(breaks ~ tension, data = warpbreaks) out summary(out) plot(out$fit,out$residuals) title("Residual Plot") #c) warpbreaks[1,] plot(out$fit,warpbreaks[,1]) abline(0,1) title("Response Plot") ##Problem 5.15 commands for R #a) ganova(bloodx,bloody) #c) z<-rand1way(y=bloody,group=bloodx,B=1000) hist(z$rdist) z$Fpval z$randpval **5.16, from SAS User's Guide: Statistics (1985, p. 126-129); *Uses proc glm to get residual and response plots; options ls = 70; data clover; input strain $ nitrogen @@; cards; 3dok1 19.4 3dok1 32.6 3dok1 27.0 3dok1 32.1 3dok1 33.0 3dok5 17.7 3dok5 24.8 3dok5 27.9 3dok5 25.2 3dok5 24.3 3dok4 17.0 3dok4 19.4 3dok4 9.1 3dok4 11.9 3dok4 15.8 3dok7 20.7 3dok7 21.0 3dok7 20.5 3dok7 18.8 3dok7 18.6 3dok13 14.3 3dok13 14.4 3dok13 11.8 3dok13 11.6 3dok13 14.2 compos 17.3 compos 19.4 compos 19.1 compos 16.9 compos 20.8 ; proc glm; class strain; model nitrogen=strain; output out =a p = pred r = resid; proc plot data = a; plot resid*pred; plot nitrogen*pred; run; **Problem 6.3: The program below is for the two way Anova; *Data from Montgomery (1984, p. 198); options ls = 70; data voltage; input material $ temp $ mvoltage @@; cards; 1 50 130 1 50 155 1 50 74 1 50 180 1 65 34 1 65 40 1 65 80 1 65 75 1 80 20 1 80 70 1 80 82 1 80 58 2 50 150 2 50 188 2 50 159 2 50 126 2 65 136 2 65 122 2 65 106 2 65 115 2 80 25 2 80 70 2 80 58 2 80 45 3 50 138 3 50 110 3 50 168 3 50 160 3 65 174 3 65 120 3 65 150 3 65 139 3 80 96 3 80 104 3 80 82 3 80 60 ; proc glm; class material temp; model mvoltage =material|temp; output out =a p = pred r = resid; proc plot data = a; plot resid*pred; plot mvoltage*pred; proc means data = voltage nway; class material temp; var mvoltage; output out=means mean=ymn; symbol1 v=square color=black i=join; symbol2 v=circle color=black i=join; symbol3 v=triangle color=black i=join; proc gplot data=means; title"Interaction Plot"; plot ymn * material = temp; proc gplot data=means; title"Interaction Plot"; plot ymn * temp = material; run; #Problem 6.5 R commands #a) out1<-aov(stime~ptype*treat,poison) summary(out1) out2<-aov(stime~ptype + treat + ptype*treat,poison) summary(out2) out3<-aov(stime~.^2,poison) summary(out3) #b) plot(fitted(out1),resid(out1)) title("Residual Plot") #c) FIT <- poison$stime - out1$resid #FIT <- fitted(out1) plot(FIT,poison$stime) abline(0,1) title("Response Plot") #e) attach(poison) out4 <- aov((1/stime)~ptype*treat,poison) summary(out4) #f) plot(fitted(out4),resid(out4)) title("Residual Plot") #g) FIT <- 1/poison$stime - out4$resid #FIT <- fitted(out4) plot(FIT,(1/poison$stime)) abline(0,1) title("Response Plot") #h) interaction.plot(treat,ptype,(1/stime)) detach(poison) **Problem 7.4 program for a one way block design; *Problem 7.6 also uses the data; *Data from Box, Hunter and Hunter (2005, p. 146); *a); options ls = 70; data penicillan; input block $ treat $ yield; cards; 1 1 89 1 2 88 1 3 97 1 4 94 2 1 84 2 2 77 2 3 92 2 4 79 3 1 81 3 2 87 3 3 87 3 4 85 4 1 87 4 2 92 4 3 89 4 4 84 5 1 79 5 2 81 5 3 80 5 4 88 ; proc glm; class block treat; model yield =block treat; output out =a p = pred r = resid; proc plot data = a; plot resid*pred; plot yield*pred; run; #g) use the source command near the top of this file to get the data z<-aov(yield~block+treat,pen) summary(z) **Problem 7.5 program below is for a latin squares design; *Data from Box, Hunter and Hunter (2005, p. 157-160); *a); options ls = 70; data auto; input rblocks $ cblocks $ additives $ emmissions; cards; 1 1 1 19 1 2 2 24 1 3 4 23 1 4 3 26 2 1 4 23 2 2 3 24 2 3 1 19 2 4 2 30 3 1 2 15 3 2 4 14 3 3 3 15 3 4 1 16 4 1 3 19 4 2 1 18 4 3 2 19 4 4 4 16 ; proc glm; class rblocks cblocks additives; model emmissions = rblocks cblocks additives; output out =a p = pred r = resid; **the following commands can be used; **to make a response and residual plot; *proc plot data = a; * plot resid*pred; * plot emmissions*pred; run; #b) use the source command near the top of the file to get the data z<-aov(emissions~rblocks+cblocks+additives,auto) summary(z) plot(fitted(z),resid(z)) title("Residual Plot") abline(0,0) #c) attach(auto) FIT <- auto$emissions - z$resid #FIT <- fitted(z) plot(FIT,auto$emissions) title("Response Plot") abline(0,1) detach(auto) ##Problem 7.6 #a) Use the two source commands near the top of this file. attach(pen) ganova2(pen$treat,pen$block,pen$yield) detach(pen) ##Problem 8.8 #need to copy and paste the devel data into R out <- aov(conversion~K*Te*P*C,devel) summary(out) **Problem 8.9; *Box, Hunter and Hunter (2005, p. 183) pilot plant data; *SAS will treat a -1 like a 0; *T = temp, C = concentration, K = catalyst; options ls = 70; data pilotplant; input T $ C $ K $ yield; cards; -1 -1 -1 59 1 -1 -1 74 -1 1 -1 50 1 1 -1 69 -1 -1 1 50 1 -1 1 81 -1 1 1 46 1 1 1 79 -1 -1 -1 61 1 -1 -1 70 -1 1 -1 58 1 1 -1 67 -1 -1 1 54 1 -1 1 85 -1 1 1 44 1 1 1 81 ; proc glm; class T C K; model yield = T|C|K; output out =a p = pred r = resid; proc plot data = a; plot resid*pred; plot yield*pred; run; **Problem 8.10; *this data is actually for Minitab; *A B C AB AC BC ABC yield; -1 -1 -1 1 1 1 -1 59 1 -1 -1 -1 -1 1 1 74 -1 1 -1 -1 1 -1 1 50 1 1 -1 1 -1 -1 -1 69 -1 -1 1 1 -1 -1 1 50 1 -1 1 -1 1 -1 -1 81 -1 1 1 -1 -1 1 -1 46 1 1 1 1 1 1 1 79 -1 -1 -1 1 1 1 -1 61 1 -1 -1 -1 -1 1 1 70 -1 1 -1 -1 1 -1 1 58 1 1 -1 1 -1 -1 -1 67 -1 -1 1 1 -1 -1 1 54 1 -1 1 -1 1 -1 -1 85 -1 1 1 -1 -1 1 -1 44 1 1 1 1 1 1 1 81 ##Problem 8.11 for R. #Box, Hunter and Hunter (2005, p. 183 ) pilot plant data mn <- 1 + 0*1:16 x1 <- rep(c(-1,-1,1,1),4) x2 <- rep(c(-1,-1,-1,-1,1,1,1,1),2) x3 <- mn x3[1:8] <- -1 x12 <- x1*x2 x13 <- x1*x3 x23 <- x2*x3 x123 <- x12*x3 x<-cbind(x1,x2,x3,x12,x13,x23,x123) y<-c(59,61,74,70,50,58,69,67,50,54,81,85,46,44,79,81) out<-lsfit(x,y) ls.print(out) out2 <- aov(y~x1*x2*x3) summary(out2) summary.lm(out2) rm(mn,x1,x2,x3,x12,x13,x23,x123,x,y,out,out2) ##Problem 8.12 mns <- c(60,72,54,68,52,83,45,80) twocub(mns,m=2,MSE=8) ##Problem 8.13 mns <- c(20,14,17,10,19,13,14,10) twocub(mns,m=1) ##Problem 8.14 mns <- c(14,16,8,22,19,37,20,38,1,8,4,10,12,30,13,30) twofourth(mns) ##Problem 8.15 resp <- c(56,93,67,60,77,65,95,49,44,63,63,61) pb12(resp,k=5) ##Problem 9.5 #Need to download the guay data with a source command near the top of the file. attach(guay) out<-aov(plants~variety*treatment + Error(flats),guay) summary(out) detach(guay) ##Problem 9.6 #Need to download the steel data with a source command near the top of the file. attach(steel) out<-aov(resistance~heat*coating + Error(wplots),steel) summary(out) detach(steel) **Problem 9.7; *Box, Hunter and Hunter (2005, p. 336) steel split plot data; *Need to divide MS heat by MS wplots to get the correct *F statistic to test heat; options ls = 70; data steel; input coating $ heat $ repl $ resistance wplots $; cards; 1 1 1 67 1 1 2 1 65 2 1 3 1 155 3 2 1 1 73 1 2 2 1 91 2 2 3 1 127 3 3 1 1 83 1 3 2 1 87 2 3 3 1 147 3 4 1 1 89 1 4 2 1 86 2 4 3 1 212 3 1 1 2 33 4 1 2 2 140 5 1 3 2 108 6 2 1 2 8 4 2 2 2 142 5 2 3 2 100 6 3 1 2 46 4 3 2 2 121 5 3 3 2 90 6 4 1 2 54 4 4 2 2 150 5 4 3 2 153 6 ; proc glm; class coating heat repl wplots; model resistance = heat wplots coating heat*coating; test H = heat E = wplots; run; **Problem 9.8; *SAS User's Guide: Statistics (1985, p. 131-132); *Block 1 A 2 B 3 and 142 makes block = 1, A = 4, B = 2; data split; input Block 1 A 2 B 3 Response; cards; 142 40.0 141 39.5 112 37.9 111 35.4 121 36.7 122 38.2 132 36.4 131 34.8 221 42.7 222 41.6 212 40.3 211 41.6 241 44.5 242 47.6 231 43.6 232 42.8 ; **Use proc print to see what the data looks like; *proc print; proc anova; class Block A B; model Response = Block A Block*A B A*B; test H=A E=Block*A; run; ##Problem 10.12 #b) simx2 <- matrix(rnorm(200),nrow=100,ncol=2) outx2 <- matrix(10 + rnorm(80),nrow=40,ncol=2) outx2 <- rbind(outx2,simx2) maha(outx2) ##Problem 10.13 #a) library(MASS) ddcomp(buxx) #right click Stop 4 times #b) ddcomp(cbrainx) #right click Stop 4 times #c) ddcomp(museum[,-1]) #right click Stop 4 times ##Problem 12.8, needs lregpack, lregdata #a) y <- log(mussels)[,4:5] x <- mussels[,1:3] x[,2] <- log(x[,2]) z<-cbind(x,y) out <- mltreg(x,y,indices=c(3,4)) #right click Stop 4 times #b) out #c) ddplot4(out$res) #right click Stop once ##Problem 12.9, needs lregpack and lregdata #a) x <- fitness[,1:3] y <- fitness[,4:6] out <- mltreg(x,y,indices=c(2,4)) #right click Stop on the plot 6 times b) ddplot4(out$res) #right click Stop once ##Problem 12.10 #a) mregsim(n=20,m=2,p=4,nruns=5000,etype=1) #b) mregsim(n=20,m=2,p=4,nruns=5000,etype=1,mnull=F) ##Problem 12.11 mpredsim(n=100,m=2,p=4,nruns=5000,mnull=F,alpha=0.1) ## Problem 13.19 out <- lrdata() x <- out$x y <- out$y lressp(x,y) ##Problem 13.20 #a) out <- prdata() x <- out$x y <- out$y pressp(x,y) #b) prplot(x,y) ## Problem 13.21 #a) belx <- 50:73 bely <-c(0.44,0.47,0.47,0.59,0.66,0.73,0.81,0.88,1.06,1.2,1.35,1.49, 1.61,2.12,11.9,12.4,14.2,15.9,18.2,21.2,4.3,2.4,2.7,2.9) out <- lm(bely~belx) ESP <- predict(out) Y <- bely plot(ESP,Y) abline(0,1) #b) library(mgcv) outgam <- gam(bely~s(belx)) EAP <- predict(outgam) plot(EAP,Y) abline(0,1) ## Problem 13.22 #ha = number patients with heart attack for each ck group #ok = number of patients without a heart attack #a) ck <- c(20,60,100,140,180,220,260,300,340,380,420,460) ha <- c(2,13,30,30,21,19,18,13,19,15,7,8) ok <- c(88,26,8,5,0,1,1,1,1,0,0,0) heart <- as.data.frame(cbind(ck,ha,ok)) outw <- glm(cbind(ha,ok)~ck+I(ck^2)+I(ck^3),family=binomial, data=heart) out <- glm(cbind(ha,ok)~log(ck),family=binomial, data=heart) par(mfrow=c(2,2)) library(mgcv) outgam <- gam(cbind(ha,ok)~s(ck),family=binomial, data=heart) plot(outgam) title("a) Shat") outgam2 <- gam(cbind(ha,ok)~s(log(ck)),family=binomial, data=heart) plot(outgam2) title("b) Shat for log(ck) GAM") EAP <- predict(outgam) ESP <- predict(out) plot(EAP,ESP) abline(0,1) title("c) EE Plot: log(ck) GLM") Z <- ha/(ha+ok) fit<-Z plot(ESP,Z) fit <- exp(ESP)/(1 + exp(ESP)) indx <- sort.list(ESP) lines(ESP[indx], fit[indx]) title("d) Response Plot ") par(mfrow=c(1,1)) #b) AIC(outw) AIC(out) #HW 13.23; #make sure cbrain.dat.txt is on j drive or the infile command won't work; options ls = 70; data cbrain; infile "j:cbrain.dat.txt"; input obs x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 sex x12; proc rsquare; model sex = x2 x3 x4 x5 x6 x7 x9 x10 / cp; proc reg; model sex = x2 x3 x4 x5 x6 x7 x9 x10; output out = a p = pred r = resid; run; ##lab 2 R commands ?lm data() ?EuStockMarkets pairs(EuStockMarkets) help(EuStockMarkets) zz<-EuStockMarkets pairs(zz) FULL <- lm(DAX~SMI+CAC+FTSE,data=zz) plot(FULL) help(lm) args(lm) ?lsfit args(lsfit)