*R code for homework problems; Copy and paste code one problem part at a time. The easiest way to get linmodpack.txt and linmoddata.txt is to copy and paste the following two commands into R. source("http://parker.ad.siu.edu/Olive/linmodpack.txt") source("http://parker.ad.siu.edu/Olive/linmoddata.txt") #The following commands are often used. library(glmnet) library(pls) library(leaps) #If slpack.txt and sldata.txt are on J drive, get lregpack and lregdata with #the following commands: #source("J:/slpack.txt") #source("J:/sldata.txt") You will often need packages and may need to install packages the first time you use a given computer. install.packages("glmnet") install.packages("pls") install.packages("leaps") #These commands can fail if your computer has a firewall. #Then an administrator may be needed to install the packages. #Problem 1.40 a) n<-100000; q<-7 b <- 0 * 1:q + 1 x <- matrix(rnorm(n * q), nrow = n, ncol = q) y <- 1 + x %*% b + rnorm(n) out<-lsfit(x,y) res<-out$res yhat<-y-res dd<-length(out$coef) AERplot(yhat,y,res=res,d=dd,alph=1) #usual response plot # 1.40 b) AERplot(yhat,y,res=res,d=dd,alph=0.01) #plots data outside the 99% pointwise PIs ##HW1 C) Problem 1.41 a) #n > 10p library(glmnet) library(leaps) library(pls) x <- matrix(rnorm(300),nrow=100,ncol=3) z <- x[,1] + rnorm(100,sd=0.1) Y <- exp(z) enet(x,Y) #1.41b) tplot2(x,Y,type=2) #right click Stop 7 times #tplot2(x,Y,type=1) #right click Stop 7 times #tplot2(x,Y,type=3) #right click Stop 7 times #tplot2(x,Y,type=4) #right click Stop 7 times #tplot2(x,Y,type=5) #right click Stop 7 times #tplot2(x,Y,type=6) #right click Stop 7 times #tplot2(x,Y,type=7) #right click Stop 7 times #1.41d) #n = p-1, sparse model #100 nontrivail predictors x <- matrix(rnorm(10000),nrow=100,ncol=100) z <- x[,1] + rnorm(100,sd=0.1) Y <- exp(z) tplot2(x,Y,type=2) #right click stop 7 times #1.41f) tplot2(x,Y,type=5) #PLS overfit, right click stop 7 times #tplot2(x,Y,type=3) #tplot2(x,Y,type=4) #ridge regression failed #tplot2(x,Y,type=6) #PCR may have overfit #tplot2(x,Y,type=7) #tplot2(x,Y,type=1) #should give an error since n < p ##Problem 1.42 a) #skewed data x <- matrix(rnorm(3000),nrow=1000,ncol=3) y <- x%*%c(1,1,1) + 2*rexp(1000) out<-lsfit(x,y) FIT <- y - out$resid plot(FIT,y) abline(0,1) lines(lowess(FIT,y)) #Problem 1.42 b) and c) for R plot(FIT,out$resid) abline(0,0) lines(lowess(FIT,out$resid)) out$coef ##Problem 2.54 for Homework 6 e <- rnorm(9) x <- matrix(rnorm(27),nrow=9,ncol=3) sqrtv <- sqrt(diag(1/1:9)) Y <- 4 + x%*%c(1,2,3) + sqrtv%*%e wtt <- 1:9 lsfit(x,Y,wtt)$coef kinv <- sqrt(diag(1:9)) Z <- kinv%*%Y X <- 1 + 0*1:9 X <- cbind(X,x) U <- kinv%*%X lsfit(U,Z,int=F)$coef ##Problem 4.21, needs linmodpack predsim() ##Problem 4.22, needs linmodpack regbootsim2(nruns=500,psi=0.0) ##Problem 5.18, #Copy and paste two source commands from near the top of this file into R. #a) library(leaps) n<- 1000 p<-4 q<-p-1 b <- 0 * 1:q b[1] <- 1 #beta = (1,1,0,0)^T x <- matrix(rnorm(n * q), nrow = n, ncol = q) y <- 1 + x %*% b + rnorm(n) outols <- lsfit(x,y) outols$coef #Tn = betahat #b) tem<-regboot(x,y) betas <- tem$betas betas[1:5,] #c) apply(betas,2,mean) #d) temp<-vselboot(x,y) vsbetas <- temp$betas vsbetas[1:5,] #e) apply(vsbetas,2,mean) ##Problem 5.19 library(leaps) #person 1 regbootsim2(n=100,p=4,k=1,nruns=1000,type=1,psi=0) vsbootsim3(n=100,p=4,k=1,nruns=1000,type=1,psi=0) #person 2 regbootsim2(n=100,p=4,k=1,nruns=1000,type=2,psi=0) vsbootsim3(n=100,p=4,k=1,nruns=1000,type=2,psi=0) #person 3 regbootsim2(n=100,p=4,k=1,nruns=1000,type=3,psi=0) vsbootsim3(n=100,p=4,k=1,nruns=1000,type=3,psi=0) #person 4 regbootsim2(n=100,p=4,k=1,nruns=1000,type=4,psi=0) vsbootsim3(n=100,p=4,k=1,nruns=1000,type=4,psi=0) #person 5 regbootsim2(n=100,p=4,k=1,nruns=1000,type=5,psi=0) vsbootsim3(n=100,p=4,k=1,nruns=1000,type=5,psi=0) #person 6 regbootsim2(n=100,p=4,k=1,nruns=1000,type=1,psi=0.9) vsbootsim3(n=100,p=4,k=1,nruns=1000,type=1,psi=0.9) #person 7 regbootsim2(n=100,p=4,k=1,nruns=1000,type=2,psi=0.9) vsbootsim3(n=100,p=4,k=1,nruns=1000,type=2,psi=0.9) #person 8 regbootsim2(n=100,p=4,k=1,nruns=1000,type=3,psi=0.9) vsbootsim3(n=100,p=4,k=1,nruns=1000,type=3,psi=0.9) #person 9 regbootsim2(n=100,p=4,k=1,nruns=1000,type=4,psi=0.9) vsbootsim3(n=100,p=4,k=1,nruns=1000,type=4,psi=0.9) #person 10 regbootsim2(n=100,p=4,k=1,nruns=1000,type=5,psi=0.9) vsbootsim3(n=100,p=4,k=1,nruns=1000,type=5,psi=0.9) #person 11 regbootsim2(n=100,p=5,k=1,nruns=1000,type=1,psi=0) vsbootsim3(n=100,p=5,k=1,nruns=1000,type=1,psi=0) #person 12 regbootsim2(n=100,p=5,k=1,nruns=1000,type=2,psi=0) vsbootsim3(n=100,p=5,k=1,nruns=1000,type=2,psi=0) #person 13 regbootsim2(n=100,p=5,k=1,nruns=1000,type=3,psi=0) vsbootsim3(n=100,p=5,k=1,nruns=1000,type=3,psi=0) #person 14 regbootsim2(n=100,p=5,k=1,nruns=1000,type=4,psi=0) vsbootsim3(n=100,p=5,k=1,nruns=1000,type=4,psi=0) #person 15 regbootsim2(n=100,p=5,k=1,nruns=1000,type=5,psi=0) vsbootsim3(n=100,p=5,k=1,nruns=1000,type=5,psi=0) #person 16 regbootsim2(n=100,p=5,k=1,nruns=1000,type=1,psi=0.9) vsbootsim3(n=100,p=5,k=1,nruns=1000,type=1,psi=0.9) #person 17 regbootsim2(n=100,p=5,k=1,nruns=1000,type=2,psi=0.9) vsbootsim3(n=100,p=5,k=1,nruns=1000,type=2,psi=0.9) #person 18 regbootsim2(n=100,p=5,k=1,nruns=1000,type=3,psi=0.9) vsbootsim3(n=100,p=5,k=1,nruns=1000,type=3,psi=0.9) #person 19 regbootsim2(n=100,p=5,k=1,nruns=1000,type=4,psi=0.9) vsbootsim3(n=100,p=5,k=1,nruns=1000,type=4,psi=0.9) #person 20 regbootsim2(n=100,p=5,k=1,nruns=1000,type=5,psi=0.9) vsbootsim3(n=100,p=5,k=1,nruns=1000,type=5,psi=0.9) ##Problem 5.20 ridge regression versus OLS library(leaps) library(glmnet) #person 1 regbootsim2(n=100,p=4,k=1,nruns=1000,type=1,psi=0) lassobootsim3(n=100,p=4,k=1,nruns=100,type=1,psi=0,regtype=0,BB=250) #person 2 regbootsim2(n=100,p=4,k=1,nruns=1000,type=2,psi=0) lassobootsim3(n=100,p=4,k=1,nruns=100,type=2,psi=0,regtype=0,BB=250) #person 3 regbootsim2(n=100,p=4,k=1,nruns=1000,type=3,psi=0) lassobootsim3(n=100,p=4,k=1,nruns=100,type=3,psi=0,regtype=0,BB=250) #person 4 regbootsim2(n=100,p=4,k=1,nruns=1000,type=4,psi=0) lassobootsim3(n=100,p=4,k=1,nruns=100,type=4,psi=0,regtype=0,BB=250) #person 5 regbootsim2(n=100,p=4,k=1,nruns=1000,type=5,psi=0) lassobootsim3(n=100,p=4,k=1,nruns=100,type=5,psi=0,regtype=0,BB=250) #person 6 regbootsim2(n=100,p=4,k=1,nruns=1000,type=1,psi=0.9) lassobootsim3(n=100,p=4,k=1,nruns=100,type=1,psi=0.9,regtype=0,BB=250) #person 7 regbootsim2(n=100,p=4,k=1,nruns=1000,type=2,psi=0.9) lassobootsim3(n=100,p=4,k=1,nruns=100,type=2,psi=0.9,regtype=0,BB=250) #person 8 regbootsim2(n=100,p=4,k=1,nruns=1000,type=3,psi=0.9) lassobootsim3(n=100,p=4,k=1,nruns=100,type=3,psi=0.9,regtype=0,BB=250) #person 9 regbootsim2(n=100,p=4,k=1,nruns=1000,type=4,psi=0.9) lassobootsim3(n=100,p=4,k=1,nruns=100,type=4,psi=0.9,regtype=0,BB=250) #person 10 regbootsim2(n=100,p=4,k=1,nruns=1000,type=5,psi=0.9) lassobootsim3(n=100,p=4,k=1,nruns=100,type=5,psi=0.9,regtype=0,BB=250) #person 11 regbootsim2(n=100,p=5,k=1,nruns=1000,type=1,psi=0) lassobootsim3(n=100,p=5,k=1,nruns=100,type=1,psi=0,regtype=0,BB=250) #person 12 regbootsim2(n=100,p=5,k=1,nruns=1000,type=2,psi=0) lassobootsim3(n=100,p=5,k=1,nruns=100,type=2,psi=0,regtype=0,BB=250) #person 13 regbootsim2(n=100,p=5,k=1,nruns=1000,type=3,psi=0) lassobootsim3(n=100,p=5,k=1,nruns=100,type=3,psi=0,regtype=0,BB=250) #person 14 regbootsim2(n=100,p=5,k=1,nruns=1000,type=4,psi=0) lassobootsim3(n=100,p=5,k=1,nruns=100,type=4,psi=0,regtype=0,BB=250) #person 15 regbootsim2(n=100,p=5,k=1,nruns=1000,type=5,psi=0) lassobootsim3(n=100,p=5,k=1,nruns=100,type=5,psi=0,regtype=0,BB=250) #person 16 regbootsim2(n=100,p=5,k=1,nruns=1000,type=1,psi=0.9) lassobootsim3(n=100,p=5,k=1,nruns=100,type=1,psi=0.9,regtype=0,BB=250) #person 17 regbootsim2(n=100,p=5,k=1,nruns=1000,type=2,psi=0.9) lassobootsim3(n=100,p=5,k=1,nruns=100,type=2,psi=0.9,regtype=0,BB=250) #person 18 regbootsim2(n=100,p=5,k=1,nruns=1000,type=3,psi=0.9) lassobootsim3(n=100,p=5,k=1,nruns=100,type=3,psi=0.9,regtype=0,BB=250) #person 19 regbootsim2(n=100,p=5,k=1,nruns=1000,type=4,psi=0.9) lassobootsim3(n=100,p=5,k=1,nruns=100,type=4,psi=0.9,regtype=0,BB=250) #person 20 regbootsim2(n=100,p=5,k=1,nruns=1000,type=5,psi=0.9) lassobootsim3(n=100,p=5,k=1,nruns=100,type=5,psi=0.9,regtype=0,BB=250) ##Problem 5.21 lasso versus OLS #library(leaps) #library(glmnet) #person 1 regbootsim2(n=100,p=4,k=1,nruns=1000,type=1,psi=0) lassobootsim4(n=100,p=4,k=1,nruns=100,type=1,psi=0,BB=250) #person 2 regbootsim2(n=100,p=4,k=1,nruns=1000,type=2,psi=0) lassobootsim4(n=100,p=4,k=1,nruns=100,type=2,psi=0,BB=250) #person 3 regbootsim2(n=100,p=4,k=1,nruns=1000,type=3,psi=0) lassobootsim4(n=100,p=4,k=1,nruns=100,type=3,psi=0,BB=250) #person 4 regbootsim2(n=100,p=4,k=1,nruns=1000,type=4,psi=0) lassobootsim4(n=100,p=4,k=1,nruns=100,type=4,psi=0,BB=250) #person 5 regbootsim2(n=100,p=4,k=1,nruns=1000,type=5,psi=0) lassobootsim4(n=100,p=4,k=1,nruns=100,type=5,psi=0,BB=250) #person 6 regbootsim2(n=100,p=4,k=1,nruns=1000,type=1,psi=0.9) lassobootsim4(n=100,p=4,k=1,nruns=100,type=1,psi=0.9,BB=250) #person 7 regbootsim2(n=100,p=4,k=1,nruns=1000,type=2,psi=0.9) lassobootsim4(n=100,p=4,k=1,nruns=100,type=2,psi=0.9,BB=250) #person 8 regbootsim2(n=100,p=4,k=1,nruns=1000,type=3,psi=0.9) lassobootsim4(n=100,p=4,k=1,nruns=100,type=3,psi=0.9,BB=250) #person 9 regbootsim2(n=100,p=4,k=1,nruns=1000,type=4,psi=0.9) lassobootsim4(n=100,p=4,k=1,nruns=100,type=4,psi=0.9,BB=250) #person 10 regbootsim2(n=100,p=4,k=1,nruns=1000,type=5,psi=0.9) lassobootsim4(n=100,p=4,k=1,nruns=100,type=5,psi=0.9,BB=250) #person 11 regbootsim2(n=100,p=5,k=1,nruns=1000,type=1,psi=0) lassobootsim4(n=100,p=5,k=1,nruns=100,type=1,psi=0,BB=250) #person 12 regbootsim2(n=100,p=5,k=1,nruns=1000,type=2,psi=0) lassobootsim4(n=100,p=5,k=1,nruns=100,type=2,psi=0,BB=250) #person 13 regbootsim2(n=100,p=5,k=1,nruns=1000,type=3,psi=0) lassobootsim4(n=100,p=5,k=1,nruns=100,type=3,psi=0,BB=250) #person 14 regbootsim2(n=100,p=5,k=1,nruns=1000,type=4,psi=0) lassobootsim4(n=100,p=5,k=1,nruns=100,type=4,psi=0,BB=250) #person 15 regbootsim2(n=100,p=5,k=1,nruns=1000,type=5,psi=0) lassobootsim4(n=100,p=5,k=1,nruns=100,type=5,psi=0,BB=250) #person 16 regbootsim2(n=100,p=5,k=1,nruns=1000,type=1,psi=0.9) lassobootsim4(n=100,p=5,k=1,nruns=100,type=1,psi=0.9,BB=250) #person 17 regbootsim2(n=100,p=5,k=1,nruns=1000,type=2,psi=0.9) lassobootsim4(n=100,p=5,k=1,nruns=1000,type=2,psi=0.9,BB=250) #person 18 regbootsim2(n=100,p=5,k=1,nruns=1000,type=3,psi=0.9) lassobootsim4(n=100,p=5,k=1,nruns=100,type=3,psi=0.9,BB=250) #person 19 regbootsim2(n=100,p=5,k=1,nruns=1000,type=4,psi=0.9) lassobootsim4(n=100,p=5,k=1,nruns=100,type=4,psi=0.9,BB=250) #person 20 regbootsim2(n=100,p=5,k=1,nruns=1000,type=5,psi=0.9) lassobootsim4(n=100,p=5,k=1,nruns=100,type=5,psi=0.9,BB=250) ##Problem 7.3 zgam <- c(.01,.05,.1,.15,.2,.25,.3,.35,.4,.45,.5) pifclean(3000,zgam) ##Problem 7.4 zh <- c(10,20,30,40,50,60,70,80,90,100) for(i in 1:10) gamper(zh[i]) ##Problem 7.5, needs linmodpack and linmoddata #a) library(MASS) ddcomp(buxx) #left click on the outliers and right click Stop 4 times #b) ddcomp(cbrainx) #left click on the outliers and right click Stop 4 times #c) ddcomp(museum[,-1]) #left click on the outliers and right click Stop 4 times ##Problem 7.6 R commands, needs linmodpack #repeat the following command several times concmv() #right click Stop 5 times ##Problem 7.7, needs mpack ddmv(p=2,gam=.4) #right click Stop 5 times ddmv(p=4,gam=.35) ddmv(p=4,gam=.30) ##Problem 7.8 R commands, needs mpack and mrobdata #b) mbamv(belx,bely) #right click Stop seven times #c) #for better looking plots than the default cmar <- par("mar") par(mfrow = c(2, 1)) par(mar=c(4.0,4.0,2.0,0.5)) #main command is below mbamv2(buxx,buxy) #right click Stop fourteen times #restore default par(mfrow=c(1,1)) par(mar=cmar) ##Problem 7.9 R commands, needs linmodpack and linmoddata #b) mlrplot2(belx,bely) #right click Stop four times #c) mlrplot2(cbrainx,cbrainy) #right click Stop four times #d) mlrplot2(museum[,3:11],museum[,2]) #right click Stop four times #e) mlrplot2(buxx,buxy) #right click Stop four times #f) #use the following command several times mlrplot2(hx,hy) #right click Stop four times library(MASS) ffplot2(hx,hy) ##Problem 7.10 #a) cmar <- par("mar") par(mfrow = c(2, 1)) par(mar=c(4.0,4.0,2.0,0.5)) #main command is below MLRplot(buxx,buxy) #right click Stop twice #restore default par(mfrow=c(1,1)) par(mar=cmar) ## Problem 7.11 a) #Copy and paste two source commands and the three library commands #from near the top of this file into R. out <- cv.glmnet(buxx,buxy,alpha=1) #lasso lam <- out$lambda.min fit <- predict(out,s=lam,newx=buxx) res <- buxy - fit vars <- 1:4 lcoef <- predict(out,type="coefficients",s=lam) lcoef<-as.vector(lcoef)[-1] vin <- vars[lcoef!=0] pp <- length(vin)+1 AERplot2(yhat=fit,y=buxy,res=res,d=pp) title("lasso") #Problem 7.11 b) ind <-getB(buxx)$indx yb <- buxy[ind] xb <- buxx[ind,] xnotinb <- buxx[-ind,] ynotinb <- buxy[-ind] out <- cv.glmnet(xb,yb,alpha=1) #lasso lam <- out$lambda.min fit <- predict(out,s=lam,newx=xb) fit2 <- predict(out,s=lam,newx=xnotinb) yy <- c(yb,ynotinb) yhat <- c(fit,fit2) res <- yy - yhat vars <- 1:4 lcoef <- predict(out,type="coefficients",s=lam) lcoef<-as.vector(lcoef)[-1] vin <- vars[lcoef!=0] pp <- length(vin)+1 AERplot2(yhat=yhat,y=yy,res=res,d=pp) title("lasso using covmb set B") #Problem 7.11 c) ddplot5(buxx) ##Problem 7.12 a) out <- cv.glmnet(cbrainx,cbrainy,alpha=1) #lasso lam <- out$lambda.min fit <- predict(out,s=lam,newx=cbrainx) res <- cbrainy - fit vars <- 1:11 lcoef <- predict(out,type="coefficients",s=lam) lcoef<-as.vector(lcoef)[-1] vin <- vars[lcoef!=0] pp <- length(vin)+1 AERplot2(yhat=fit,y=cbrainy,res=res,d=pp) title("lasso") #Problem 7.12 b) xcontin <- cbrainx[,-c(2,4,10)] #continuous predictors ind <-getB(xcontin)$indx yb <- cbrainy[ind] xb <- cbrainx[ind,] xnotinb <- cbrainx[-ind,] ynotinb <- cbrainy[-ind] out <- cv.glmnet(xb,yb,alpha=1) #lasso lam <- out$lambda.min fit <- predict(out,s=lam,newx=xb) fit2 <- predict(out,s=lam,newx=xnotinb) yy <- c(yb,ynotinb) yhat <- c(fit,fit2) res <- yy - yhat vars <- 1:4 lcoef <- predict(out,type="coefficients",s=lam) lcoef<-as.vector(lcoef)[-1] vin <- vars[lcoef!=0] pp <- length(vin)+1 AERplot2(yhat=yhat,y=yy,res=res,d=pp) title("lasso using covmb set B") #length(cbrainy) - length(ind) #8 cases with the largest distances are omitted from covmb2 set B #Problem 7.12 c) ddplot5(xcontin) ## Problem 7.13 a) mldsim6(n=100,p=10,outliers=1,gam=0.25,pm=20, osteps=9) mldsim6(n=100,p=45,outliers=1,gam=0.25,pm=60, osteps=9) #Problem 1.15 b) #takes a few minutes since p = 1000 mldsim7(n=100,p=1000,outliers=2,pm=900,osteps=0) mldsim7(n=100,p=1000,outliers=2,pm=900,osteps=9) #Problem 7.13 c) mldsim7(n=100,p=100,outliers=3,gam=0.25,pm=8,osteps=0) mldsim7(n=100,p=100,outliers=3,gam=0.25,pm=8,osteps=9) ##Problem 7.14, needs linmodpack and linmoddata library(MASS) tvreg(buxx,buxy,ii=1) #right click Stop 10 times tvreg2(buxx,buxy, M = 0) #right click Stop ##Problem 7.15 elastic net with outliers #a) library(glmnet) out <- enet2(buxx,buxy) #elastic net lam <- out$lam tem <- out$out fit <- predict(tem,s=lam,newx=buxx) res <- buxy - fit vars <- 1:4 lcoef <- predict(tem,type="coefficients",s=lam) lcoef<-as.vector(lcoef)[-1] vin <- vars[lcoef!=0] pp <- length(vin)+1 AERplot2(yhat=fit,y=buxy,res=res,d=pp) title("a) elastic net") #b) #resistant elastic net ind <-getB(buxx)$indx yb <- buxy[ind] xb <- buxx[ind,] xnotinb <- buxx[-ind,] ynotinb <- buxy[-ind] out <- enet2(xb,yb) #elastic net lam <- out$lam tem <- out$out fit <- predict(tem,s=lam,newx=xb) fit2 <- predict(tem,s=lam,newx=xnotinb) yy <- c(yb,ynotinb) yhat <- c(fit,fit2) res <- yy - yhat vars <- 1:4 lcoef <- predict(tem,type="coefficients",s=lam) lcoef<-as.vector(lcoef)[-1] vin <- vars[lcoef!=0] pp <- length(vin)+1 AERplot2(yhat=yhat,y=yy,res=res,d=pp) title("b) elastic net using covmb set B") ##Problem 8.9, needs linmodpack, linmoddata #a) y <- log(mussels)[,4:5] x <- mussels[,1:3] x[,2] <- log(x[,2]) z<-cbind(x,y) out <- mltreg(x,y,indices=c(3,4)) #right click Stop 4 times #b) out #c) ddplot4(out$res) #right click Stop once ##Problem 8.10, needs linmodpack and linmoddata #a) x <- fitness[,1:3] y <- fitness[,4:6] out <- mltreg(x,y,indices=c(2,4)) #right click Stop on the plot 6 times #b) ddplot4(out$res) #right click Stop once ##Problem 8.11 #a) mregsim(n=20,m=2,p=4,nruns=5000,etype=1) #b) mregsim(n=20,m=2,p=4,nruns=5000,etype=1,mnull=F) ##Problem 8.12 mpredsim(n=100,m=2,p=4,nruns=5000,mnull=F,alpha=0.1)