*SAS and R code for Robust Statistics homework problems; Copy and paste code one problem part at a time. The easiest way to get rpack.txt and robdata.txt is to copy and paste the following two commands into R. source("http://lagrange.math.siu.edu/Olive/rpack.txt") source("http://lagrange.math.siu.edu/Olive/robdata.txt") If rpack.txt and robdata.txt are on G drive, get rpack and robdata with the following commands: source("G:/rpack.txt") source("G:/robdata.txt") #Problem 1.2 R commands a) X <- matrix(rnorm(300),nrow=100,ncol=3) Y <- (X %*% 1:3)^3 + rnorm(100) bols <- lsfit(X,Y)\$coef[-1] plot(X %*% bols, Y) c) plot(10*X %*% bols, Y) title("Scaled OLS View") #Problem 1.3 R commands a) cltsim <- function(n=100, nruns=100){ ybar <- 1:nruns for(i in 1:nruns){ ybar[i] <- mean(rexp(n))} list(ybar=ybar)} b) z1 <- cltsim(n=1) z5 <- cltsim(n=5) z25 <- cltsim(n=25) z200 <- cltsim(n=200) par(mfrow=c(2,2)) hist(z1\$ybar) hist(z5\$ybar) hist(z25\$ybar) hist(z200\$ybar) d) cltsim <- function(n=100, nruns=100){ ybar <- 1:nruns for(i in 1:nruns){ ybar[i] <- mean(rnorm(n))} list(ybar=ybar)} z1 <- cltsim(n=1) z5 <- cltsim(n=5) z25 <- cltsim(n=25) z200 <- cltsim(n=200) par(mfrow=c(2,2)) hist(z1\$ybar) hist(z5\$ybar) hist(z25\$ybar) hist(z200\$ybar) par(mfrow=c(1,1)) #Problem 2.15 R commands height <- rnorm(87, mean=1692, sd = 65) height[61:65] <- 19.0 par(mfrow=c(2,2)) plot(height) title("a) Dot plot of heights") hist(height) length(height) val <- quantile(height)[4] - quantile(height)[2] val wid <- 4*1.06*min(sqrt(var(height)),val/1.34)*(87^(-1/5)) wid dens<- density(height,width=wid) plot(dens\$x,dens\$y) lines(dens\$x,dens\$y) title("c) Density of heights") boxplot(height) title("d) Boxplot of heights") par(mfrow=c(1,1)) #Problem 2.16 R commands a) y <- rnorm(10000) mad(y, constant=1) b) y <- rexp(10000) mad(y, constant=1) #Problem 2.17 R commands tmn <-function(x, tp = 0.25){ mean(x, trim = tp)} a) y <- rnorm(10000) tmn(y) b) y <- rexp(10000) mean(y) tmn(y) #Problem 2.18 R commands metmn <-function(x, k = 6){ madd <- mad(x, constant = 1) med <- median(x) mean(x[(x >= med - k * madd) & (x <= med + k * madd)])} y <- rnorm(10000) metmn(y) #Problem 2.19 R commands y <- rnorm(10000) ratmn(y) #Problem 2.20 R commands y <- rnorm(10000) rstmn(y) #Problem 2.29 R commands qplot<-function(y){ plot(sort(y), ppoints(y)) title("QPLOT")} a) #height from Problem 2.15 commands: height <- rnorm(87, mean=1692, sd = 65) height[61:65] <- 19.0 qplot(height) b) Y <- rnorm(1000) qplot(y) #Problem 5.20 R commands, needs rpack b) nx <- matrix(rnorm(300),nrow=100,ncol=3) y <- exp( 4 + nx%*%c(1,1,1) + 0.5*rnorm(100) ) c) tplot(nx,y) d) out <- lsfit(nx,log(y)) ls.print(out) #Problem 5.21 R commands, needs rpack and robdata b) pisim(n=100, type = 1) c) pisim(n=100, type = 3) d) piplot(cbrainx,cbrainy) #Problem 6.1 R commands, needs rpack and robdata a) cmar <- par("mar") par(mfrow = c(2, 1)) par(mar=c(4.0,4.0,2.0,0.5)) #main command is below MLRplot(buxx,buxy) #restore default par(mfrow=c(1,1)) par(mar=cmar) c) library(MASS) rrplot2(buxx,buxy) #use rrplot(buxx,buxy) for Splus d) ffplot2(buxx,buxy) #use ffplot(buxx,buxy) for Splus #Problem 6.2 R commands, needs rpack and robdata b) diagplot(buxx,buxy) #Problem 6.3 R commands library(stats) data(lynx) Y <- log(lynx) out <- ar.yw(Y) Yts <- Y[12:114] FIT <- Yts - out\$resid[12:114] plot(FIT,Yts) abline(0,1) #For Splus use the following commands. #Y <- log(lynx) #out <- ar.yw(Y) #FIT <- Y - out\$resid #plot(FIT,Y) #abline(0,1) #Problem 6.4 R commands, needs rpack library(stats) data(lynx) fflynx() #Problem 6.5 Splus commands, may not work in R. set.seed(14) x1 <- runif(400,-1,1) x2 <- runif(400,-1,1) eps <- rnorm(400,0,.2) Y <- x1*x2 + eps x <- cbind(x1,x2) x[1:10,] <- rnorm(20,.2,.2) Y[1:10] <- rnorm(10,-1.5,.2) out <- ppreg(x,Y,2,3) FIT <- out\$ypred plot(FIT,Y) abline(0,1) #Problem 7.1 R commands, get pifclean from rpack. zgam <- c(.01,.05,.1,.15,.2,.25,.3,.35,.4,.45,.5) pifclean(3000,zgam) #Problem 7.2 R commands, needs rpack zh <- c(10,20,30,40,50,60,70,80,90,100) for(i in 1:10) gamper(zh[i]) #Problem 8.1 R commands, needs rpack b) nltv(0.5) nltv(0.75) nltv(0.9) nltv(0.9999) #Problem 8.2 R commands, needs rpack b) deltv(0.5) deltv(0.75) deltv(0.9) deltv(0.9999) #Problem 8.3 R commands, needs rpack b) cltv(0.5) cltv(0.75) cltv(0.9) cltv(0.9999) #Problem 8.4 R commands, needs rpack and robdata b) mbamv(belx,bely) c) #for better looking plots than the default cmar <- par("mar") par(mfrow = c(2, 1)) par(mar=c(4.0,4.0,2.0,0.5)) #main command is below mbamv2(buxx,buxy) #restore default par(mfrow=c(1,1)) par(mar=cmar) #Problem 8.5 R commands, needs rpack and robdata b) mlrplot2(belx,bely) c) mlrplot2(cbrainx,cbrainy) d) mlrplot2(museum[,3:11],museum[,2]) e) mlrplot2(buxx,buxy) f) #use the following command several times mlrplot2(hx,hy) library(MASS) ffplot2(hx,hy) #Problem 10.12 R commands, needs rpack b) simx2 <- matrix(rnorm(200),nrow=100,ncol=2) outx2 <- matrix(10 + rnorm(80),nrow=40,ncol=2) outx2 <- rbind(outx2,simx2) maha(outx2) #Problem 10.13 R commands, needs rpack and get outx2 from Problem 10.12 library(MASS) rmaha(outx2) #Problem 10.14 R commands, needs rpack #enter the following command 3 times rcovsim(100) #Problem 10.15 R commands, needs rpack and robdata a) library(MASS) ddcomp(buxx) b) ddcomp(cbrainx) c) ddcomp(museum[,-1]) #Problem 10.16 R commands, needs rpack #repeat the following command several times concmv() #Problem 10.17 R commands, needs rpack ddmv(p=2,gam=.4) ddmv(p=4,gam=.4) ddmv(p=4,gam=.1) #Problem 11.2 R commands, needs rpack b) library(MASS) ddsim(n=20,p=2) ddsim(n=30,p=2) #Problem 11.3 R commands, needs rpack library(MASS) corrsim(n=20,p=2,nruns=10) corrsim(n=30,p=2,nruns=10) #Problem 11.4 R commands, needs rpack b) library(MASS) n <- 400 p <- 3 eps <- 0.4 x <- matrix(rnorm(n * p), ncol = p, nrow = n) zu <- runif(n) x[zu < eps,] <- x[zu < eps,]*5 c) ddplot(x) #Problem 11.5 R commands, needs rpack b) #plot for classical covering ellipsoid simx2 <- matrix(rnorm(200),nrow=100,ncol=2) outx2 <- matrix(10 + rnorm(80),nrow=40,ncol=2) outx2 <- rbind(outx2,simx2) ellipse(outx2) #plot for RMVN covering ellipsoid zout <- covrmvn(outx2) ellipse(outx2,center=zout\$center,cov=zout\$cov) #Problem 11.6 R commands, needs rpack c) # get x using Problem 11.4 mplot(x) #Problem 11.7 R commands, needs rpack c) # get x using Problem 11.4 wddplot(x) #Problem 11.8 R commands, needs rpack and robdata library(MASS) tvreg(buxx,buxy,ii=1) tvreg2(buxx,buxy, M = 0) #Problem 12.6 R commands, needs rpack n3x <- matrix(rnorm(300),nrow=100,ncol=3) ln3x <- exp(n3x) library(MASS) a) pairs(n3x) pairs(ln3x) b) ncy <- (n3x%*%1:3)^3 + 0.1*rnorm(100) plot(n3x%*%(1:3),ncy) c) trviews(n3x, ncy) d) #change betahat coefficients below plot(n3x%*%c(22.469,61.242,75.285),n3x%*%1:3) e) lncy <- (ln3x%*%1:3)^3 + 0.1*rnorm(100) trviews(ln3x,lncy) f) #change betahat coefficients below plot(ln3x%*%c(94.848,216.719,328.444),ln3x%*%1:3) #Problem 12.7 R commands, needs rpack a) library(MASS) nx <- matrix(rnorm(300),nrow=100,ncol=3) lnx <- exp(nx) SP <- lnx%*%1:3 lnsincy <- sin(SP)/SP + 0.01*rnorm(100) b) trviews(lnx,lnsincy) essp(lnx,lnsincy,M=40) c) ctrviews(lnx,lnsincy) d) lmsviews(lnx,lnsincy) #Problem 12.8 R commands, needs rpack a) nx <- matrix(rnorm(300),nrow=100,ncol=3) SP <- nx%*%1:3 ncuby <- SP^3 + rnorm(100) nexpy <- exp(SP) + rnorm(100) nlinsy <- SP + 4*sin(SP) + 0.1*rnorm(100) nsincy <- sin(SP)/SP + 0.01*rnorm(100) nsiny <- sin(SP) + 0.1*rnorm(100) nsqrty <- sqrt(abs(SP)) + 0.1*rnorm(100) nsqy <- SP^2 + rnorm(100) b) plot(SP,ncuby) plot(-SP,ncuby) c) library(MASS) trviews(nx,ncuby) essp(nx,ncuby, M=40) d) tem <- c(12.60514, 25.06613, 37.25504) ESP <- nx%*%tem plot(ESP,SP) e) lnx <- exp(nx) SP <- lnx%*%1:3 lncuby <- (SP/3)^3 + rnorm(100) lnlinsy <- SP + 10*sin(SP) + 0.1*rnorm(100) lnsincy <- sin(SP)/SP + 0.01*rnorm(100) lnsiny <- sin(SP/3) + 0.1*rnorm(100) ESP <- lnx%*%tem #Problem 13.19 R commands, needs rpack out <- lrdata() x <- out\$x y <- out\$y lressp(x,y) #Problem 13.20 R commands, needs rpack a) out <- prdata() x <- out\$x y <- out\$y pressp(x,y) b) prplot(x,y) #Problem 13.21 a) belx <- 50:73 bely <-c(0.44,0.47,0.47,0.59,0.66,0.73,0.81,0.88,1.06,1.2,1.35,1.49, 1.61,2.12,11.9,12.4,14.2,15.9,18.2,21.2,4.3,2.4,2.7,2.9) out <- lm(bely~belx) ESP <- predict(out) Y <- bely plot(ESP,Y) abline(0,1) b) library(mgcv) outgam <- gam(bely~s(belx)) EAP <- predict(outgam) plot(EAP,Y) abline(0,1) #Problem 13.22 #ha = number patients with heart attack for each ck group #ok = number of patients without a heart attack a) ck <- c(20,60,100,140,180,220,260,300,340,380,420,460) ha <- c(2,13,30,30,21,19,18,13,19,15,7,8) ok <- c(88,26,8,5,0,1,1,1,1,0,0,0) heart <- as.data.frame(cbind(ck,ha,ok)) outw <- glm(cbind(ha,ok)~ck+I(ck^2)+I(ck^3),family=binomial, data=heart) out <- glm(cbind(ha,ok)~log(ck),family=binomial, data=heart) par(mfrow=c(2,2)) library(mgcv) outgam <- gam(cbind(ha,ok)~s(ck),family=binomial, data=heart) plot(outgam) title("a) Shat") outgam2 <- gam(cbind(ha,ok)~s(log(ck)),family=binomial, data=heart) plot(outgam2) title("b) Shat for log(ck) GAM") EAP <- predict(outgam) ESP <- predict(out) plot(EAP,ESP) abline(0,1) title("c) EE Plot: log(ck) GLM") Z <- ha/(ha+ok) fit<-Z plot(ESP,Z) fit <- exp(ESP)/(1 + exp(ESP)) indx <- sort.list(ESP) lines(ESP[indx], fit[indx]) title("d) Response Plot ") par(mfrow=c(1,1)) b) AIC(outw) AIC(out) #HW 13.23; #make sure cbrain.dat.txt is on j drive or the infile command won't work; options ls = 70; data cbrain; infile "j:cbrain.dat.txt"; input obs x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 sex x12; proc rsquare; model sex = x2 x3 x4 x5 x6 x7 x9 x10 / cp; proc reg; model sex = x2 x3 x4 x5 x6 x7 x9 x10; output out = a p = pred r = resid; run;