##The command
##source("http://parker.ad.siu.edu/Olive/mpack.txt")
##is an easy way to get these functions into R.

##### BETTER robust MLD algorithms for FCH, RFCH, and RMVN
##would use MB if DGK causes an error.


anovasim2<-function(n1=20,n2=20,n3=20,n4=20,m1=0,m2=0,m3=0,m4=0,
sd1=1,sd2=1,sd3=1,sd4=1,type=1,B=400,nruns=100,alpha=0.05){
# Simulates ANOVA F, bootstrap F, modified F, Welch F, modified Welch F, 
#rank F statistic, large sample F test for 1 way ANOVA. 
# tests Ho: mu1 = mu2 = mu3 = mu4
	p <- 4
        r <- p-1
	n <- n1 + n2 + n3 + n4
        up <- min((1 - alpha/2), (1 - alpha + 10*alpha*r/B))
        if(alpha > 0.1)
        up <- min((1 - alpha + 0.05), (1 - alpha + r/B))
        qn <- up
        if(qn < 1 - alpha + 0.001)
        up <- 1 - alpha
        indx1 <- 1:n1
        indx2 <- 1:n2
        indx3 <- 1:n3
        indx4 <- 1:n4
	y <- 0 * 1:n
	c1 <- 0 * 1:n
	c2 <- c1
	c3 <- c1
	c4 <- c1
	c1[1:n1] <- 1
	c2[(n1 + 1):(n1 + n2)] <- 1
	c3[(n1 + n2 + 1):(n1 + n2 + n3)] <- 1
	c4[(n1 + n2 + n3 + 1):n] <- 1
	x <- cbind(c1, c2, c3, c4) #cell means model
        zx <- c(rep(1,n1),rep(2,n2),rep(3,n3),rep(4,n4))
	y1 <- 1:n1
	y2 <- 1:n2
	y3 <- 1:n3
	y4 <- 1:n4
	beta <- c(m1, m2, m3, m4)
	bstat <- 1:B
        bcut <- 1:nruns
        quant = 1 - alpha
        fcut <- qf(quant, p - 1, n - p)
        dfmin <- min(n1,n2,n3,n4)
        lcut <- qf(quant, p - 1, dfmin)
        faov <- 0 * 1:nruns
        rf <- faov
        lf <- rf
        wwf <- rf
	mct <- 0
	wct <- 0
	mwct <- 0
        cinv <- diag(x=c(n1,n2,n3)) -(1/n)*as.matrix(c(n1,n2,n3))%*%c(n1,n2,n3)
	for(i in 1:nruns) {
#make data
           if(type==1){
		y1 <- rnorm(n1, mean = m1, sd = sd1)
		y2 <- rnorm(n2, mean = m2, sd = sd2)
		y3 <- rnorm(n3, mean = m3, sd = sd3)
		y4 <- rnorm(n4, mean = m4, sd = sd4)
                }
          if(type==2){
		y1 <- rexp(n1, rate = 1/sd1) + m1 - sd1
		y2 <- rexp(n2, rate = 1/sd2) + m2 - sd2
		y3 <- rexp(n3, rate = 1/sd3) + m3 - sd3
		y4 <- rexp(n4, rate = 1/sd4) + m4 - sd4
                }
          if(type==3){
		y1 <- rnorm(n1, mean = m1, sd = sd1)
		y2 <- rnorm(n2, mean = m2, sd = sd2)
		y3 <- rexp(n3, rate = 1/sd3) + m3 - sd3
		y4 <- rexp(n4, rate = 1/sd4) + m4 - sd4
                }
		y <- c(y1, y2, y3, y4)
		mn1 <- mean(y1)
		mn2 <- mean(y2)
		mn3 <- mean(y3)
		mn4 <- mean(y4)
                s1sq <- var(y1)
		s2sq <- var(y2)
		s3sq <- var(y3)
		s4sq <- var(y4)	#get ANOVA  F statistic
                fit<-c(rep(mn1,n1),rep(mn2,n2),rep(mn3,n3),rep(mn4,n4))
		res <- y-fit
		mse <- t(res) %*% res/(n - p)
		sse <- mse * (n - p)
		mny <- mean(y)
		sstr <- n1 * (mn1 - mny)^2 + n2 * (mn2 - mny)^2 + 
                        n3 * (mn3 - mny)^2 + n4 * (mn4 - mny)^2
		fnum <- sstr/(p - 1)
		faov[i] <- fnum/mse
                AT <- c(mn1-mn4,mn2-mn4,mn3-mn4)
                #faov[i] = AT%*%cinv%*%AT/((p-1)*mse)	
                #get modified  F statistic
		num <- (p - 1) * fnum
		den <- (1 - n1/n) * s1sq + (1 - n2/n) * s2sq + 
                       (1 - n3/n) * s3sq + (1 - n4/n) * s4sq
		modf <- num/den
		c1 <- ((1 - n1/n) * s1sq)/den
		c2 <- ((1 - n2/n) * s2sq)/den
		c3 <- ((1 - n3/n) * s3sq)/den
		c4 <- ((1 - n4/n) * s4sq)/den
		finv <- c1^2/(n1 - 1) + c2^2/(n2 - 1) + c3^2/(n3 - 1) + 
                        c4^2/(n4 - 1)
		md <- ceiling(1/finv)
		if(modf > qf(quant, p - 1, md)) mct <- mct + 1	
               #get Welch  F statistic
		w1 <- n1/s1sq
		w2 <- n2/s2sq
		w3 <- n3/s3sq
		w4 <- n4/s4sq
		u <- w1 + w2 + w3 + w4
		xdd <- (w1 * mn1)/u + (w2 * mn2)/u + (w3 * mn3)/u + (w4 * mn4)/u
		num <- (w1 * (mn1 - xdd)^2 + w2 * (mn2 - xdd)^2 + 
                        w3 * (mn3 - xdd)^2 + w4 * (mn4 - xdd)^2)/(p - 1)
		tem <- (1 - w1/u)^2/(n1 - 1) + (1 - w2/u)^2/(n2 - 1) + 
                       (1 - w3/u)^2/(n3 - 1) + (1 - w4/u)^2/(n4 - 1)
		den <- 1 + (2 * (p - 2) * tem)/(p^2 - 1)
		wf <- num/den
                #wwf[i] <- wf
		finv <- (3 * tem)/(p^2 - 1)
		wd <- ceiling(1/finv)
		if(wf > qf(quant, p - 1, wd)) wct <- wct + 1	
	#get modified Welch  F statistic
		dfnum <- (1/w1 + 1/w2 + 1/w3 + 1/w4)^2
		dfden <- (1/w1)^2/(n1 - 1) + (1/w2)^2/(n2 - 1) + 
                         (1/w3)^2/(n3 - 1) + (1/w4)^2/(n4 - 1)
		wd <- ceiling(dfnum/dfden)
		if(wf > qf(quant, p - 1, wd)) mwct <- mwct + 1
                #get large sample F test
                wt <- c(w1,w2,w3)
       dinv <- diag(x=wt) -as.matrix(wt)%*%wt/u
                lf[i] <- AT%*%dinv%*%AT/(p-1)	
               #get rank F statistic
		ry <- rank(y)
		out <- lsfit(x, ry, intercept = F)
		brols <- out$coef
		mn1 <- brols[1]
		mn2 <- brols[2]
		mn3 <- brols[3]
		mn4 <- brols[4]
		res <- out$resid
		mser <- t(res) %*% res/(n - p)
		sse <- mser * (n - p)
		mny <- mean(ry)
		sstr <- n1 * (mn1 - mny)^2 + n2 * (mn2 - mny)^2 + 
                        n3 * (mn3 - mny)^2 + n4 * (mn4 - mny)^2
		fnum <- sstr/(p - 1)
		rf[i] <- fnum/mser
                #get bootstrap cutoff
                bag <- 0
                for(j in 1:B){
                   tem <- sample(indx1,n1,replace=T)
                   mny1 <- mean(y1[tem])
                   tem <- sample(indx2,n2,replace=T)
                   mny2 <- mean(y2[tem])
                   tem <- sample(indx3,n3,replace=T)
                   mny3 <- mean(y3[tem])
                   tem <- sample(indx4,n4,replace=T)
                   mny4 <- mean(y4[tem])
                ATb <- c(mny1-mny4,mny2-mny4,mny3-mny4)
                bstat[j] = (ATb-AT)%*%cinv%*%(ATb-AT)/((p-1)*mse)          
                }
                bcut[i] <- quantile(bstat,up)
	}
	faovcov <- sum(faov > fcut)/nruns
        bootcov <- sum(faov > bcut)/nruns
	modfcov <- mct/nruns
	mwfcov <- mwct/nruns
	wfcov <- wct/nruns
	rfcov <- sum(rf > fcut)/nruns
        lfcov <- sum(lf > lcut)/nruns
	list(faovcov=faovcov,bootcov=bootcov,modfcov=modfcov,wfcov=wfcov,         mwfcov=mwfcov,rfcov=rfcov,lfcov=lfcov,bcut=mean(bcut),fcut=fcut,lcut=lcut,up=up)        
}


bootci<-function(bstat, stat, upc){
#computes the prediction region method and Bickel and Ren CIs for statistic stat.
#bstat is the bootstrap sample T_1^*,...,T_B^* where stat = T_n
cent<-mean(bstat)
adist <- abs(bstat-cent)
phlen <- quantile(adist, upc)
pLn <- cent - phlen
pUn <- cent + phlen
adist <- abs(bstat-stat)
brhlen <- quantile(adist, upc)
brLn <- stat - brhlen
brUn <- stat + brhlen
prcilen<-2*phlen
brcilen<-2*brhlen
list(prci=c(pLn, pUn),prcilen=prcilen,brci=c(brLn, brUn),brcilen=brcilen)
}


cci<-function(x, alpha = 0.05)
{#gets classical  100 (1-alpha)% CI 
#defaults are alpha = .05 
	n <- length(x)
	up <- 1 - alpha/2
	mn <- mean(x)
	v <- var(x)
	se <- sqrt(v/n)
	val <- qt(up, n - 1) * se
	lo <- mn - val
	hi <- mn + val
	list(int = c(lo, hi), mean = mn, se = se)
}


cltv<-function(gam = 0.5)
{# Gets asy var for lts(h) and lta(h) at Cauchy C(0,1)
# where h/n -> gam.
	k <- tan((pi * gam)/2)
	num <- 2 * k - pi * gam
	den <- pi * (gam - (2 * k)/(pi * (1 + k^2)))^2
	ltsv <- num/den
	num <- gam
	den <- 4 * (1/pi - 1/(pi * (1 + k^2)))^2
	ltav <- num/den
	list(ltsv=ltsv, ltav=ltav)
}


cmba2<-function(x, csteps = 5, ii = 1)
{# gets the covmba estimator using 98, 95, 90, 80, 70, 60 and 50% trimming
#needs p > 1, plots the 7 DD plots
#right click 7 times
	n <- dim(x)[1]
	p <- dim(x)[2]
	mds <- matrix(nrow = n, ncol = 8, 0)	##get the DGK estimator
	covs <- var(x)
	mns <- apply(x, 2, mean)
	cmd <- sqrt(mahalanobis(x, mns, covs))	## concentrate
	for(i in 1:csteps) {
		md2 <- mahalanobis(x, mns, covs)
		medd2 <- median(md2)
		mns <- apply(x[md2 <= medd2,  ], 2, mean)
		covs <- var(x[md2 <= medd2,  ])
	}
	mds[, 8] <- sqrt(mahalanobis(x, mns, covs))
	covb <- covs
	mnb <- mns	##get the square root of det(covb)
	critb <- prod(diag(chol(covb)))	##get the resistant estimator
	covv <- diag(p)
	med <- apply(x, 2, median)
	md2 <- mahalanobis(x, center = med, covv)
	smd2 <- sort(md2)
	val <- p + 3
	tem <- 1:7
	tem[1] <- smd2[val + floor(0.02 * n)]
	tem[2] <- smd2[val + floor(0.05 * n)]
	tem[3] <- smd2[val + floor(0.1 * n)]
	tem[4] <- smd2[val + floor(0.2 * n)]
	tem[5] <- smd2[val + floor(0.3 * n)]
	tem[6] <- smd2[val + floor(0.4 * n)]
	tem[7] <- median(md2)
	medd2 <- tem[7]
	for(j in ii:7) {
## get the start
		val2 <- tem[j]
		mns <- apply(x[md2 <= val2,  ], 2, mean)
		covs <- var(x[md2 <= val2,  ])	## concentrate
		for(i in 1:csteps) {
			md2 <- mahalanobis(x, mns, covs)
			medd2 <- median(md2)
			mns <- apply(x[md2 <= medd2,  ], 2, mean)
			covs <- var(x[md2 <= medd2,  ])
		}
		mds[, j] <- sqrt(mahalanobis(x, mns, covs))
		plot(cmd, mds[, j])
		identify(cmd, mds[, j])
		crit <- prod(diag(chol(covs)))
		if(crit < critb) {
			critb <- crit
			covb <- covs
			mnb <- mns
		}
	}
	pairs(mds)	##scale for better performance at MVN
	rd2 <- mahalanobis(x, mnb, covb)
	const <- median(rd2)/(qchisq(0.5, p))
	covb <- const * covb
	list(center = mnb, cov = covb, mds = mds)
}


cmve<- function(x, csteps = 5)
{# gets the cmve, rcmve and mb estimators
        zx <- x
	x <- as.matrix(x)
	p <- dim(x)[2]	##get the DGK estimator
	covs <- var(x)
	mns <- apply(x, 2, mean)	## concentrate
	for(i in 1:csteps) {
		md2 <- mahalanobis(x, mns, covs)
		medd2 <- median(md2)
		if(p > 1) {
			mns <- apply(x[md2 <= medd2,  ], 2, mean)
		}
		if(p == 1) {
			mns <- mean(zx[md2 <= medd2])
		}
                covs <- var(x[md2 <= medd2,  ])
	}
	covd <- covs
	mnd <- mns	##get the MB estimator
	covv <- diag(p)
	med <- apply(x, 2, median)
	md2 <- mahalanobis(x, center = med, covv)
	medd2 <- median(md2)
	medd3 <- medd2	## get the start
	if(p > 1) {
		mns <- apply(x[md2 <= medd2,  ], 2, mean)
	}
	if(p == 1) {
		mns <- mean(zx[md2 <= medd2])
	}
        covs <- var(x[md2 <= medd2,  ])
## concentrate
	for(i in 1:csteps) {
		md2 <- mahalanobis(x, mns, covs)
		medd2 <- median(md2)
		if(p > 1) {
			mns <- apply(x[md2 <= medd2,  ], 2, mean)
		}
		if(p == 1) {
			mns <- mean(zx[md2 <= medd2])
		}
                covs <- var(x[md2 <= medd2,  ])
	}
	covm <- covs
	mnm <- mns	##get CMVE attractor
	covf <- covm
	mnf <- mnm
	val <- mahalanobis(t(mnd), med, covv)
	if(val < medd3) {
##crit = [med(D)]^p * square root of det(cov)
       rd2 <- mahalanobis(x, mnd, covd)
		critd <- (sqrt(median(rd2)))^p*prod(diag(chol(covd)))
		rd2 <- mahalanobis(x, mnm, covm)
		critm <- (sqrt(median(rd2)))^p*prod(diag(chol(covm)))
		if(critd < critm) {
			covf <- covd
			mnf <- mnd
		}
	}
## get CMVE estimator
        chisqm <- qchisq(0.5, p)
	rd2 <- mahalanobis(x, mnf, covf)
	const <- median(rd2)/chisqm
	covf <- const * covf	
##reweight the above CMVE estimator (mnf,covf) to get the 
##RCMVE estimator (rmnf,rcovf)
	rd2 <- mahalanobis(x, mnf, covf)
	up <- qchisq(0.975, p)
        if(p > 1){
	  rmnf <- apply(x[rd2 <= up,  ], 2, mean)
        }
        if(p == 1){
          rmnf = mean(zx[rd2 <= up])
        }
	rcovf <- var(x[rd2 <= up,  ])
	rd2 <- mahalanobis(x, rmnf, rcovf)
	const <- median(rd2)/chisqm
	rcovf <- const * rcovf	## reweight again
	rd2 <- mahalanobis(x, rmnf, rcovf)
        if(p > 1){
	  rmnf <- apply(x[rd2 <= up,  ], 2, mean)
        }
        if(p == 1){
          rmnf = mean(zx[rd2 <= up])
        }
	rcovf <- var(x[rd2 <= up,  ])
	rd2 <- mahalanobis(x, rmnf, rcovf)
	const <- median(rd2)/chisqm
	rcovf <- const * rcovf
	list(center = mnf, cov = covf, rmnf = rmnf, rcovf = rcovf, 
             mnm = mnm, covm = covm)
}


concmv<-function(n = 100, csteps = 5, gam = 0.4, outliers = T, start = 3)
{#Shows how concentration works when p = 2.
#right click csteps times
# Use start = 1 for DGK, start = 2 for MBA sphere = FCH sphere = MB estimator,
# start = 3 for MBA coord MED, diag([MAD(X1)]^2,...,[MAD(Xp)]^2) start
	p <- 2	#A <- cbind(c(1, 0.9), c(0.9, 1))
	x <- matrix(rnorm(n * p), ncol = p, nrow = n)	#A <- diag(sqrt(1:p))
#if(outliers == T) {
# val <- floor(gam * n)
# tem <- 10 + 0 * 1:p
# x[1:val,  ] <- x[1:val,  ] + tem
#}
#x <- x %*% A
	A <- cbind(c(1, 0.4), c(0.4, 1))
	B <- cbind(c(0.5, 0), c(0, 0.5))
	if(outliers == T) {
		val <- floor(gam * n)
		x[(val + 1):n,  ] <- x[(val + 1):n,  ] %*% A
		x[1:val,  ] <- x[1:val,  ] %*% B
		x[1:val, 1] <- x[1:val, 1] + 0
		x[1:val, 2] <- x[1:val, 2] + 6
	}
	else {
		x <- x %*% A
	}
	if(start == 1) {
		covs <- var(x)
		mns <- apply(x, 2, mean)
	}
	if(start == 2) {
		covv <- diag(p)
		med <- apply(x, 2, median)
		md2 <- mahalanobis(x, center = med, covv)
		medd2 <- median(md2)	## get the start
		mns <- apply(x[md2 <= medd2,  ], 2, mean)
		covs <- var(x[md2 <= medd2,  ])
	}
	if(start > 2) {
		tem <- apply(x, 2, mad)^2
		covv <- diag(tem)
		med <- apply(x, 2, median)
		md2 <- mahalanobis(x, center = med, covv)
		medd2 <- median(md2)	## get the start
		mns <- apply(x[md2 <= medd2,  ], 2, mean)
		covs <- var(x[md2 <= medd2,  ])
	}
## concentrate
	for(i in 1:csteps) {
		md2 <- mahalanobis(x, mns, covs)
		medd2 <- median(md2)
		mns <- apply(x[md2 <= medd2,  ], 2, mean)
		covs <- var(x[md2 <= medd2,  ])
		plot(x[, 1], x[, 2])
		points(x[md2 <= medd2, 1], x[md2 <= medd2, 2], pch = 15)
		identify(x[, 1], x[, 2])
	}
}


concsim<-function(n = 100, p = 2, steps = 5, gam = 0.4, runs = 20)
{#  type "library(MASS)" or "library(lqs)."
# Need n > 2p, p > 1. This function is used to determine when the DD  
# plot separates outliers from non-outliers for various starts.
	A <- sqrt(diag(1:p))
	mbact <- 0
	fmcdct <- 0
	mbct <- 0
	madct <- 0
	dgkct <- 0
        fchct <- 0
	for(i in 1:runs) {
		x <- matrix(rnorm(n * p), ncol = p, nrow = n)	
	## outliers have mean (10, 10 sqrt(2), ..., 10 sqrt(p))^T
		val <- floor(gam * n)
		tem <- 10 + 0 * 1:p
		x[1:val,  ] <- x[1:val,  ] + tem
		x <- x %*% A	#MBA
		out <- covmba(x, csteps = steps)
		center <- out$center
		cov <- out$cov
		rd2 <- mahalanobis(x, center, cov)
		if(min(rd2[1:val]) > max(rd2[(val + 1):n])) mbact <- mbact + 1	
	#DGK
		covs <- var(x)
		mns <- apply(x, 2, mean)	## concentrate
		for(j in 1:steps) {
			md2 <- mahalanobis(x, mns, covs)
			medd2 <- median(md2)
			mns <- apply(x[md2 <= medd2,  ], 2, mean)
			covs <- var(x[md2 <= medd2,  ])
		}
		rd2 <- mahalanobis(x, mns, covs)
		if(min(rd2[1:val]) > max(rd2[(val + 1):n])) dgkct <- dgkct + 1	
	#Median Ball start
		covv <- diag(p)
		med <- apply(x, 2, median)
		md2 <- mahalanobis(x, center = med, covv)
		medd2 <- median(md2)	## get the start
		mns <- apply(x[md2 <= medd2,  ], 2, mean)
		covs <- var(x[md2 <= medd2,  ])	## concentrate
		for(j in 1:steps) {
			md2 <- mahalanobis(x, mns, covs)
			medd2 <- median(md2)
			mns <- apply(x[md2 <= medd2,  ], 2, mean)
			covs <- var(x[md2 <= medd2,  ])
		}
		rd2 <- mahalanobis(x, mns, covs)
		if(min(rd2[1:val]) > max(rd2[(val + 1):n])) mbct <- mbct + 1	
	#MAD start
		tem <- apply(x, 2, mad)^2
		covv <- diag(tem)
		md2 <- mahalanobis(x, center = med, covv)
		medd2 <- median(md2)	## get the start
		mns <- apply(x[md2 <= medd2,  ], 2, mean)
		covs <- var(x[md2 <= medd2,  ])	## concentrate
		for(j in 1:steps) {
			md2 <- mahalanobis(x, mns, covs)
			medd2 <- median(md2)
			mns <- apply(x[md2 <= medd2,  ], 2, mean)
			covs <- var(x[md2 <= medd2,  ])
		}
		rd2 <- mahalanobis(x, mns, covs)
		if(min(rd2[1:val]) > max(rd2[(val + 1):n])) madct <- madct + 1	
	#FMCD
		out <- cov.mcd(x)
		center <- out$center
		cov <- out$cov
		rd2 <- mahalanobis(x, center, cov)
		if(min(rd2[1:val]) > max(rd2[(val + 1):n]))
			fmcdct <- fmcdct + 1
        #FCH
		out <- covfch(x, csteps = steps)
		center <- out$center
		cov <- out$cov
		rd2 <- mahalanobis(x, center, cov)
		if(min(rd2[1:val]) > max(rd2[(val + 1):n])) fchct <- fchct + 1	
	}
	list(mbact = mbact, fmcdct = fmcdct, dgkct = dgkct, mbct = mbct, madct
		 = madct, fchct = fchct)
}



confreg<-function(x, g = 4, that = 1:4, alpha = 0.05){
# Makes confidence regions for theta from rows of x = Ti* from a bootstrap.
# Use fot testing H_0: theta = 0 versus H_1: theta != 0.
# The prediction region method, hybrid, and Bickel and Ren regions are used.
# If g = 1, the shorth interval should work better.
# Also computes the distance for the 0 vector.
# Need g = dim(x)[2] and T = that the g by 1 estimator of theta.
# Often that = A betahat(I_min,0). 
# Note that center= Tbar* = bagging estimator and cov = S*_T.
x <- as.matrix(x)
that <- as.vector(that)
n <- dim(x)[1]
zero <- 0*(1:g)
up <- min((1 - alpha/2), (1 - alpha + 10*alpha*g/n))
if(alpha > 0.1)
	up <- min((1 - alpha + 0.05), (1 - alpha + g/n))
qn <- up
if(qn < 1 - alpha + 0.001)
        up <- 1 - alpha
center <- apply(x, 2, mean)
cov <- var(x)
md2 <- mahalanobis(x, center, cov)
# MD is the classical  distance
MD <- sqrt(md2)
#get prediction region method cutoff
cuplim <- sqrt(quantile(md2, up))
D0 <- sqrt(mahalanobis(zero, center, cov))
#get hybrid region statistic = Bickel and Ren statistic
br0 <- sqrt(mahalanobis(zero, that, cov))
#get the Bickel and Ren cutoff and test statistic
br2 <- mahalanobis(x,that,cov)
brlim <- sqrt(quantile(br2, up))
list(cuplim=cuplim,brlim=brlim,br0=br0,D0=D0,MD=MD,center=center,cov=cov)
}


corboot<-function(x, B = 1000){
#rowwise bootstrap of the correlation matrix
#stacks entries above the diagonal into a vector beta
x <- as.matrix(x)
p <- dim(x)[2]
n <- dim(x)[1]
indx <- 1:n
nc <- p*(p-1)/2
betas <- matrix(0,nrow=B,ncol=nc)
for(i in 1:B){
tem <- sample(indx,n,replace=T)
out <- cor(x[tem,])
temp <- out[1,(2:p)]
for(j in (2:(p-1)))
temp <- c(temp,out[j,((j+1):p)])
betas[i,] <- temp
}
list(betas=betas)
}



corbootsim<-function(n = 100, p = 4, BB=1000, nruns = 100,  type = 1, 
psi = 0.0, dd=1, eps = 0.25, alph = 0.05){
#Simulates bootstrap for correlation matrix.
#Stacks entries above the diagonal into a vector beta.
# Make x where type = 1 for MVN Nq(0,I), 
# 2, 3, 4 and 5 (with delta = eps) for (1 - delta) Nq(0,I) + delta Nq(0, 25 I)
# 6, 7, 8 and 9  for multivariate t_d with d = 3, 5, 19 or dd
# 10 for lognormal.
# Multiply x by A: for MVN data this results
# in a covariance matrix with eigenvector c(1, ..., 1)^T
# corresponding to the largest eigenvalue. As psi gets
# close to 1, the data clusters about the line in the
# direction of (1, ..., 1)^T. See Maronna and Zamar (2002).
# cor(X_i,X_j) = [2 psi +(p-2)psi^2]/[1 + (p-1)psi^2], i not = j
# when the correlation exists.
        nc <- p*(p-1)/2
        ncp1 <- nc + 1
        cicov <- 0*(1:ncp1)
        avelen <- cicov
        rho <- (2*psi + (p-2)*psi^2)/(1 + (p-1)*psi^2)
        A <- matrix(psi,nrow=p,ncol=p)
        diag(A) <- 1
        for(i in 1:nruns) {
            #make data
		x <- matrix(rnorm(n * p), nrow = n, ncol = p)
                if(type == 2) {
			zu <- runif(n)
			x[zu < 0.4,  ] <- x[zu < 0.4,  ] * 5
		}
		if(type == 3) {
			zu <- runif(n)
			x[zu < 0.6,  ] <- x[zu < 0.6,  ] * 5
		}
		if(type == 4) {
			zu <- runif(n)
			x[zu < 0.1,  ] <- x[zu < 0.1,  ] * 5
		}
		if(type == 5) {
			zu <- runif(n)
			x[zu < eps,  ] <- x[zu < eps,  ] * 5
		}
		if(type == 6) {
			zu <- sqrt(rchisq(n, 3)/3)
			x <- x/zu
		}
		if(type == 7) {
			zu <- sqrt(rchisq(n, 5)/5)
			x <- x/zu
		}
		if(type == 8) {
			zu <- sqrt(rchisq(n, 19)/19)
			x <- x/zu
		}
		if(type == 9) {
			zu <- sqrt(rchisq(n, dd)/dd)
			x <- x/zu
		}
		if(type == 10)
			x <- exp(x)
		x <- x %*% A
		out <-corboot(x, B= BB) #rowwise bootstrap
                for (j in 1:nc){
                   tem <- shorth3(out$betas[,j],alpha=alph)
                   if(rho >= tem$shorth[1] && rho <= tem$shorth[2])
                    cicov[j] <- cicov[j] + 1
                    avelen[j] <- avelen[j] + tem$shorth[2] - tem$shorth[1]
                    } 
                #test whether the nc values of beta are 0
                tem <- predreg(out$betas,alpha=alph)
                if(tem$D0 <= tem$cuplim) 
                cicov[ncp1] <- cicov[ncp1] + 1
                avelen[ncp1] <- avelen[ncp1] + tem$cuplim
           }
cicov <- cicov/nruns
avelen <- avelen/nruns
list(cicov=cicov,avelen=avelen)}


corrsim<-function(n = 100, p = 3, eps = 0.4, nruns = 100, type = 5)
{#Need n > 2p. For R, type "library(MASS)" before using this 
#function. This function generates 100 n by p matrices x.
# The output is the 100 sample correlations between the MDi and RDi
# RDi uses covmba for type = 1, rmba for type = 2, cov.mcd for 
# type = 3, covfch for type = 4, rfch for type = 5, cmve for type 6
# rcmve for type 7, rmvn for type = 8.
# mahalanobis gives squared Maha distances 
	corrs <- 1:nruns
	for(i in 1:nruns) {
		wt <- 0 * (1:n)
		x <- matrix(rnorm(n * p), ncol = p, nrow = n)	
	#The following 3 commands make x elliptically contoured.
#zu <- runif(n)
#x[zu < eps,] <- x[zu < eps,]*5
#x <- x^2
# To make marginals of x lognormal, use
#x <- exp(x)
		center <- apply(x, 2, mean)
		cov <- var(x)
		md2 <- mahalanobis(x, center, cov)
		if(type == 1) {
			out <- covmba(x)
		}
		if(type == 2) {
			out <- rmba(x)
		}
		if(type == 3) {
			out <- cov.mcd(x)
		}
                if(type == 4) {
			out <- covfch(x)
		}
                if(type == 5) {
                out <- covfch(x)
                center <- out$rmnf
		cov <- out$rcovf
                } 
                if(type == 6) {
                out <- cmve(x)
                }
                if(type == 7) {
                out <- cmve(x)
                center <- out$rmnf
		cov <- out$rcovf
                }
                if(type == 8) {
                out <- covrmvn(x)
                }
                if(type != 5 && type != 7){
		  center <- out$center
		  cov <- out$cov
                }
		rd2 <- mahalanobis(x, center, cov)	
	# need square roots for the usual distances
		md <- sqrt(md2)
		rd <- sqrt(rd2)
		const <- sqrt(qchisq(0.5, p))/median(rd)
		rd <- const * rd	
	# wt[rd < sqrt(qchisq(0.975, p))] <- 1 
#  corrs[i] <- cor(md[wt > 0], rd[wt > 0])}
		corrs[i] <- cor(md, rd)
	}
	cmean <- mean(corrs)
	cmin <- min(corrs)
	clt95 <- sum(corrs < 0.95)
	clt80 <- sum(corrs < 0.8)
	list(cmean = cmean, cmin = cmin, clt95 = clt95, clt80 = clt80, 
		corrs = corrs)
}


corrsim2<-function(n = 100, q = 4, nruns = 100, xtype = 1, type = 1, dd = 1, eps = 0.25){
# Need n > 2q. R users need to type library(MASS). 
# MAY NOT WORK IF q = 1
# Multiply x by A where xtype = 1 for MVN Nq(0,I), 
# 2, 3, 4 and 5 (with delta = eps) for (1 - delta) Nq(0,I) + delta Nq(0, 25 I)
# 6, 7, 8 and 9  for multivariate t_d with d = 3, 5, 19 or dd
# 10 for lognormal.
# The output is the 100 sample correlations between the MDi and RDi
# RDi uses covmba for type = 1, rmba for type = 2, cov.mcd for 
# type = 3, covfch for type = 4, rfch for type = 5, cmve for type 6
# rcmve for type 7, rmvn for type = 8.
# mahalanobis gives squared Maha distances 
	set.seed(974)
	corrs <- 1:nruns
	k <- q/2
	A <- sqrt(diag(1:q))
	for(i in 1:nruns) {
#make data
		x <- matrix(rnorm(n * q), nrow = n, ncol = q)
		if(xtype == 2) {
			zu <- runif(n)
			x[zu < 0.4,  ] <- x[zu < 0.4,  ] * 5
		}
		if(xtype == 3) {
			zu <- runif(n)
			x[zu < 0.6,  ] <- x[zu < 0.6,  ] * 5
		}
		if(xtype == 4) {
			zu <- runif(n)
			x[zu < 0.1,  ] <- x[zu < 0.1,  ] * 5
		}
		if(xtype == 5) {
			zu <- runif(n)
			x[zu < eps,  ] <- x[zu < eps,  ] * 5
		}
		if(xtype == 6) {
			zu <- sqrt(rchisq(n, 3)/3)
			x <- x/zu
		}
		if(xtype == 7) {
			zu <- sqrt(rchisq(n, 5)/5)
			x <- x/zu
		}
		if(xtype == 8) {
			zu <- sqrt(rchisq(n, 19)/19)
			x <- x/zu
		}
		if(xtype == 9) {
			zu <- sqrt(rchisq(n, dd)/dd)
			x <- x/zu
		}
		if(xtype == 10)
			x <- exp(x)
		x <- x %*% A
		center <- apply(x, 2, mean)
		cov <- var(x)
		md2 <- mahalanobis(x, center, cov)
		if(type == 1) {
			out <- covmba(x)
		}
		if(type == 2) {
			out <- rmba(x)
		}
		if(type == 3) {
			out <- cov.mcd(x)
		}
		if(type == 4) {
			out <- covfch(x)
		}
                if(type < 5)
                {
		  center <- out$center
		  cov <- out$cov
                }
		if(type == 5) {
			out <- covfch(x)
			center <- out$rmnf
			cov <- out$rcovf
		}
		if(type == 6) {
			out <- cmve(x)
		}
		if(type == 7) {
			out <- cmve(x)
			center <- out$rmnf
			cov <- out$rcovf
		}
                if(type == 8) {
                out <- covrmvn(x)
                }
                if(type != 5 && type != 7){
		  center <- out$center
		  cov <- out$cov
                }
		rd2 <- mahalanobis(x, center, cov)	
	# need square roots for the usual distances
		md <- sqrt(md2)
		rd <- sqrt(rd2)
		const <- sqrt(qchisq(0.5, p))/median(rd)
		rd <- const * rd
		corrs[i] <- cor(md, rd)
	}
	cmean <- mean(corrs)
	cmin <- min(corrs)
	clt95 <- sum(corrs < 0.95)
	clt80 <- sum(corrs < 0.8)
	list(cmean = cmean, cmin = cmin, clt95 = clt95, clt80 = clt80, corrs = corrs)
}


covcheck<-function(n = 100, p = 4,  xtype = 1, 
dd = 3, eps = 0.25, tau = 5){# MAY NOT WORK IF p = 1. 
# Simulates multivariate data and computes the population
# and sample covariance matrices for several distributions.  
# p = number of predictors including intercept
# want n > 10 p
#  etype = 1 for MVN Nq(0,I), 
# etype = 2 for   (1 - eps) Nq(0,I) + eps Nq(0, (tau^2) I)
# eps = 0.1, 0.25, 0.4, and 0.6 are interesting
# etype = 3 for multivariate t_d with d =  dd degrees of freedom
# Need dd > 2. 
# etype = 4 for lognormal - E(lognormal).
#make data
		x <- matrix(rnorm(n * p), nrow = n, ncol = p)
                if(xtype == 1) 
                   cov <- diag(p)
		if(xtype == 2) {
			zu <- runif(n)
			x[zu < eps,  ] <- x[zu < eps,  ] * tau
                        cov <- (1 - eps + eps*tau^2) * diag(p)}
                if(xtype == 3) {
			zu <- sqrt(rchisq(n, dd)/dd)
			x <- x/zu 
                        cov <- (dd/(dd-2))*diag(p)}
		if(xtype == 4){
		    x <- exp(x) - exp(0.5)
                    cov <- (exp(2) - exp(1)) * diag(p)}
		xbar <- apply(x, 2, mean)
		covhat <- var(x)
	list(xbar = xbar, covhat = covhat, cov = cov)
}


covdgk<-function(x, csteps = 10)
{#computes the scaled DGK multivariate estimator, need p > 1
	p <- dim(x)[2]
	covs <- var(x)
	mns <- apply(x, 2, mean)	## concentrate
	for(i in 1:csteps) {
		md2 <- mahalanobis(x, mns, covs)
		medd2 <- median(md2)
		mns <- apply(x[md2 <= medd2,  ], 2, 
			mean)
		covs <- var(x[md2 <= medd2,  ])
	}
##scale for consistency at MVN
	rd2 <- mahalanobis(x, mns, covs)
	const <- median(rd2)/(qchisq(0.5, p))
	covs <- const * covs
	list(center = mns, cov = covs)
}


covesim<-function(n = 10, p = 2, csteps = 5, gam = 0.4, runs = 20, outliers = 0, pm = 10){
# This R function compares various ways to robustly estimate the 
# covariance matrix. The estimators used are ccov: the classical 
# estimator applied to the clean cases, RFCH and RMVN.
# For RMVN want diago approx (1, 2, ...., p)
# For RFCH want diagfch approx c (1, 2, ...., p)
# Need p > 1.
# outliers = 0 for no outliers and X~N(0,diag(1,...,p)), 
# 1 for outliers a tight cluster at major axis (0,...,0,pm)'
# 2 for outliers a tight cluster at minor axis  (pm,0, ...,0)'
# 3 for outliers X~N((pm,...,pm)',diag(1,...,p))
# 4 for outliers X[i,p] = pm
# 5 for outliers X[i,1] = pm
	A <- sqrt(diag(1:p))
	ccov <- 0 * A
	rfche <- ccov
	rmvne <- ccov
	covv <- diag(p)
	val <- floor(gam * n)
	up <- qchisq(0.975, p)
	qchi <- qchisq(0.5, p)
	qtem <- 1:runs
	qtem2 <- qtem
	diagdiff <- 1:p
	for(i in 1:runs) {
		x <- matrix(rnorm(n * p), ncol = p, nrow = n)
		x <- x %*% A
		if(outliers == 1) {
			x[1:val,  ] <- matrix(rnorm(val * p, sd = 0.01), ncol = p, nrow = val
				)
			x[1:val, p] <- x[1:val, p] + pm
		}
		if(outliers == 2) {
			x[1:val,  ] <- matrix(rnorm(val * p, sd = 0.01), ncol = p, nrow = val
				)
			x[1:val, 1] <- x[1:val, 1] + pm
		}
		if(outliers == 3) {
			tem <- pm + 0 * 1:p
			x[1:val,  ] <- x[1:val,  ] + tem
		}
		if(outliers == 4) {
			x[1:val, p] <- pm
		}
		if(outliers == 5) {
			x[1:val, 1] <- pm
		}
		ccov <- ccov + var(x[ - (1:val),  ])	##get the DGK estimator
		covs <- var(x)
		mns <- apply(x, 2, mean)	## concentrate
		for(j in 1:csteps) {
			md2 <- mahalanobis(x, mns, covs)
			medd2 <- median(md2)
			mns <- apply(x[md2 <= medd2,  ], 2, mean)
			covs <- var(x[md2 <= medd2,  ])
		}
		covd <- covs
		mnd <- mns	##get the MB estimator
		med <- apply(x, 2, median)
		md2 <- mahalanobis(x, center = med, covv)
		medd2 <- median(md2)
		medd3 <- medd2	## get the start
		mns <- apply(x[md2 <= medd2,  ], 2, mean)
		covs <- var(x[md2 <= medd2,  ])	## concentrate
		for(j in 1:csteps) {
			md2 <- mahalanobis(x, mns, covs)
			medd2 <- median(md2)
			mns <- apply(x[md2 <= medd2,  ], 2, mean)
			covs <- var(x[md2 <= medd2,  ])
		}
		covm <- covs
		mnm <- mns	##get FCH attractor
		covf <- covm
		mnf <- mnm
		val2 <- mahalanobis(t(mnd), med, covv)
		if(val2 < medd3) {
##crit = square root of det(cov)
			critd <- prod(diag(chol(covd)))
			critm <- prod(diag(chol(covm)))
			if(critd < critm) {
				covf <- covd
				mnf <- mnd
			}
		}
## get FCH estimator
		rd2 <- mahalanobis(x, mnf, covf)
		const <- median(rd2)/qchi
		covf <- const * covf	
	##reweight the above FCH estimator (mnf,covf) to get the RFCH estimator
## (rmnf,rcovf)
		rd2 <- mahalanobis(x, mnf, covf)
		rmnf <- apply(x[rd2 <= up,  ], 2, mean)
		rcovf <- var(x[rd2 <= up,  ])
##use the following 3 commands for the covrmvn estimator
                rd3 <- rd2
                rmnmvn <- rmnf
                rcovmvn <- rcovf
##
		rd2 <- mahalanobis(x, rmnf, rcovf)
##rd4 is used for covrmvn
                rd4 <- rd2
		const <- median(rd2)/qchi
		rcovf <- const * rcovf	## reweight again
		rd2 <- mahalanobis(x, rmnf, rcovf)
		rmnf <- apply(x[rd2 <= up,  ], 2, mean)
		rcovf <- var(x[rd2 <= up,  ])
		rd2 <- mahalanobis(x, rmnf, rcovf)
		const <- median(rd2)/qchi
		rcovf <- const * rcovf
		rfche <- rfche + rcovf	
##reweight the FCH estimator (mnf,covf) to get the covrmvn estimator
## (rmnmvn,rcovmvn) tailored for MVN data
		d1 <- sum(rd3 <= up)
		qchi2 <- (0.5 * 0.975 * n)/d1
		qchi2 <- min(qchi2, 0.995)
		qtem[i] <- qchi2
		const <- median(rd4)/qchisq(qchi2, p)
		rcovmvn <- const * rcovmvn	## reweight again
		rd2 <- mahalanobis(x, rmnmvn, rcovmvn)
		rmnmvn <- apply(x[rd2 <= up,  ], 2, mean)
		rcovmvn <- var(x[rd2 <= up,  ])
		d2 <- sum(rd2 <= up)
		rd2 <- mahalanobis(x, rmnmvn, rcovmvn)
		qchi2 <- (0.5 * 0.975 * n)/d2
		qchi2 <- min(qchi2, 0.995)
		qtem2[i] <- qchi2
		const <- median(rd2)/qchisq(qchi2, p)
		rcovmvn <- const * rcovmvn
		rmvne <- rmvne + rcovmvn
	}
	ccov <- ccov/runs
	rfche <- rfche/runs
	rmvne <- rmvne/runs
	diago <- diag(rmvne)
	diagdiff <- diag(ccov) - diag(rmvne)
	abssumrmvn <- sum(abs(diagdiff))
        diagfch <- diag(rfche)
	diagdifch <- diag(ccov) - diag(rfche)
	abssumfch <- sum(abs(diagdifch))
	list(ccov = ccov, rfche = rfche, rmvne = rmvne, diago = diago, diagdiff = diagdiff, 
   abssumrmvn = abssumrmvn, diagfch = diagfch, diagdifch= diagdifch, abssumfch = abssumfch)
}


covfch<-function(x, csteps = 5)
{# gets the  FCH and RFCH estimators and the MB attractor
# works for p = 1
        zx <- x
	x <- as.matrix(x)
	p <- dim(x)[2]	##get the DGK estimator
	covs <- var(x)
	mns <- apply(x, 2, mean)	## concentrate
	for(i in 1:csteps) {
		md2 <- mahalanobis(x, mns, covs)
		medd2 <- median(md2)
		if(p > 1) {
			mns <- apply(x[md2 <= medd2,  ], 2, mean)
		}
		if(p == 1) {
			mns <- mean(zx[md2 <= medd2])
		}
                covs <- var(x[md2 <= medd2,  ])
	}
	covd <- covs
	mnd <- mns	##get the MB estimator
	covv <- diag(p)
	med <- apply(x, 2, median)
	md2 <- mahalanobis(x, center = med, covv)
	medd2 <- median(md2) ##get the location criterion
        lcut <- medd2 
        ## get the start
	if(p > 1) {
		mns <- apply(x[md2 <= medd2,  ], 2, mean)
	}
	if(p == 1) {
		mns <- mean(zx[md2 <= medd2])
	}
        covs <- var(x[md2 <= medd2,  ])
## concentrate
	for(i in 1:csteps) {
		md2 <- mahalanobis(x, mns, covs)
		medd2 <- median(md2)
		if(p > 1) {
			mns <- apply(x[md2 <= medd2,  ], 2, mean)
		}
		if(p == 1) {
			mns <- mean(zx[md2 <= medd2])
		}
                covs <- var(x[md2 <= medd2,  ])
	}
	covm <- covs
	mnm <- mns	
##get FCH attractor
	covf <- covm
	mnf <- mnm
	val <- mahalanobis(t(mnd), med, covv)
	if(val < lcut) {##crit = square root of det(cov)
	critd <- prod(diag(chol(covd)))
	critm <- prod(diag(chol(covm)))	
		if(critd < critm) {
			covf <- covd
			mnf <- mnd
		}
	}
	## get FCH estimator
        chisqm <- qchisq(0.5, p)
	rd2 <- mahalanobis(x, mnf, covf)
	const <- median(rd2)/chisqm
	covf <- const * covf
 ##reweight the above FCH estimator (mnf,covf) to get the RFCH estimator
 ## (rmnf,rcovf)
	rd2 <- mahalanobis(x, mnf, covf)
	up <- qchisq(0.975, p)
        if(p > 1) {
          rmnf <- apply(x[rd2 <= up,  ], 2, mean)
        }
        if(p == 1){
          rmnf = mean(zx[rd2 <= up])
        }
	rcovf <- var(x[rd2 <= up,  ])
        rd2 <- mahalanobis(x, rmnf, rcovf)
	const <- median(rd2)/chisqm
	rcovf <- const * rcovf	## reweight again
	rd2 <- mahalanobis(x, rmnf, rcovf)
        if(p > 1){
	  rmnf <- apply(x[rd2 <= up,  ], 2, mean)
        }
        if(p == 1){
          rmnf = mean(zx[rd2 <= up])
        }
	rcovf <- var(x[rd2 <= up,  ])
        rd2 <- mahalanobis(x, rmnf, rcovf)
	const <- median(rd2)/chisqm
	rcovf <- const * rcovf
	list(center = mnf, cov = covf, rmnf = rmnf, rcovf = rcovf,
             mnm=mnm, covm=covm)
}


covfch2<-function(x, csteps = 5, locc = 0.5)
{# Gets the  FCH and RFCH estimators and the MB attractor
# Allows the location criterion cutoff to be varied.
        zx <- x
	x <- as.matrix(x)
	p <- dim(x)[2]	##get the DGK estimator
	covs <- var(x)
	mns <- apply(x, 2, mean)	## concentrate
	for(i in 1:csteps) {
		md2 <- mahalanobis(x, mns, covs)
		medd2 <- median(md2)
		if(p > 1) {
			mns <- apply(x[md2 <= medd2,  ], 2, mean)
		}
		if(p == 1) {
			mns <- mean(zx[md2 <= medd2])
		}
                covs <- var(x[md2 <= medd2,  ])
	}
	covd <- covs
	mnd <- mns	##get the MB estimator
	covv <- diag(p)
	med <- apply(x, 2, median)
	md2 <- mahalanobis(x, center = med, covv)
	medd2 <- median(md2) ##get the location criterion cutoff
        lcut <- medd2 
        if(locc != 0.5) lcut <- quantile(md2,locc) ## get the start
	if(p > 1) {
		mns <- apply(x[md2 <= medd2,  ], 2, mean)
	}
	if(p == 1) {
		mns <- mean(zx[md2 <= medd2])
	}
        covs <- var(x[md2 <= medd2,  ])
## concentrate
	for(i in 1:csteps) {
		md2 <- mahalanobis(x, mns, covs)
		medd2 <- median(md2)
		if(p > 1) {
			mns <- apply(x[md2 <= medd2,  ], 2, mean)
		}
		if(p == 1) {
			mns <- mean(zx[md2 <= medd2])
		}
                covs <- var(x[md2 <= medd2,  ])
	}
	covm <- covs
	mnm <- mns	
##get FCH attractor
	covf <- covm
	mnf <- mnm
	val <- mahalanobis(t(mnd), med, covv)
	if(val < lcut) {##crit = square root of det(cov)
	critd <- prod(diag(chol(covd)))
	critm <- prod(diag(chol(covm)))	
		if(critd < critm) {
			covf <- covd
			mnf <- mnd
		}
	}
	## get FCH estimator
        chisqm <- qchisq(0.5, p)
	rd2 <- mahalanobis(x, mnf, covf)
	const <- median(rd2)/chisqm
	covf <- const * covf
 ##reweight the above FCH estimator (mnf,covf) to get the RFCH estimator
 ## (rmnf,rcovf)
	rd2 <- mahalanobis(x, mnf, covf)
	up <- qchisq(0.975, p)
        if(p > 1) {
          rmnf <- apply(x[rd2 <= up,  ], 2, mean)
	}
        if(p == 1){
          rmnf = mean(zx[rd2 <= up])
        }
        rcovf <- var(x[rd2 <= up,  ])
	rd2 <- mahalanobis(x, rmnf, rcovf)
	const <- median(rd2)/chisqm
	rcovf <- const * rcovf	## reweight again
	rd2 <- mahalanobis(x, rmnf, rcovf)
        if(p > 1){
	  rmnf <- apply(x[rd2 <= up,  ], 2, mean)
	}
        if(p == 1){
          rmnf = mean(zx[rd2 <= up])
        }
        rcovf <- var(x[rd2 <= up,  ])
	rd2 <- mahalanobis(x, rmnf, rcovf)
	const <- median(rd2)/chisqm
	rcovf <- const * rcovf
	list(center = mnf, cov = covf, rmnf = rmnf, rcovf = rcovf,
             mnm=mnm, covm=covm)
}


covmb<-function(x, steps = 5, scale=F)
{# Computes the median ball estimator.
# Needs p > 1. If scale = T, the plotted points will roughly
# scatter about the identity line if the data is MVN
# and spherical about mu.
        p <- dim(x)[2]
	#Median Ball start
	covv <- diag(p)
	med <- apply(x, 2, median)
	md2 <- mahalanobis(x, center = med, covv)
	medd2 <- median(md2)	## get the half set start
	mns <- apply(x[md2 <= medd2,  ], 2, mean)
	covs <- var(x[md2 <= medd2,  ])	## concentrate
	for(i in 1:steps) {
		md2 <- mahalanobis(x, mns, covs)
		medd2 <- median(md2)
		mns <- apply(x[md2 <= medd2,  ], 2, mean)
		covs <- var(x[md2 <= medd2,  ])
	}
        if(scale == T){
	  rd2 <- mahalanobis(x, mns, covs)
	  const <- median(rd2)/(qchisq(0.5, p))
	  covs <- const * covs
        }
	list(center=mns,cov=covs)
}


covmb2<-function(x, m=0, k=5, msteps=9){
# Computes the covmb2 estimator with concentration type steps. Needs p > 1.
# Use even if p > n. Look at out<-medout(x) to determine how many cases
# m are clean.  Use m >= n/2.
# Estimate m if m = 0: use k >= 0, so at least half of the cases are used,
# and do the concentration type msteps to get a weighted median.
# The concentration type steps help the most when the outlier proportion
# is high. Try covbm2(x,msteps=0) and covmb2(x,msteps=9).
# Using msteps > 0 does slow down the function some. 
       	p <- dim(x)[2]
	#Median Ball start
	covv <- diag(p)
	med <- apply(x, 2, median) 
        #Get squared Euclidean distances from coordinatewise median. 
	md2 <- mahalanobis(x, center = med, covv)
        if(m == 0){
          if(msteps > 0){#do concentration type steps
            for(i in 1:msteps){
               medd <- median(md2)
               medw <- apply(x[md2<=medd,], 2, median) 
               md2 <- mahalanobis(x, center = medw, covv)
            }
          }
          md <- sqrt(md2)        
	  mcut <- median(md) + k*mad(md,constant=1)	
	  mns <- apply(x[md <= mcut,  ], 2, mean)
	  covs <- var(x[md <= mcut,  ])
        }
        else{ #Use m cases with the smallest distances. 
        mcut <- sort(md2)[m]
        mns <- apply(x[md2 <= mcut,  ], 2, mean)
	covs <- var(x[md2 <= mcut,  ])	
        }    	           
	list(center=mns,cov=covs)
}


covmba <- function(x, csteps = 5)
{  # gets the MBA estimator, works for p = 1
        zx <- x
        x <- as.matrix(x)
	p <- dim(x)[2]	
	##get the DGK estimator
	covs <- var(x)
	mns <- apply(x, 2, mean)	## concentrate
	for(i in 1:csteps) {
              	md2 <- mahalanobis(x, mns, covs)
		medd2 <- median(md2)
             if(p > 1){
		mns <- apply(x[md2 <= medd2,  ], 2, mean)
	     }
             if(p == 1){
		mns <- mean(zx[md2 <= medd2])
	     }
             covs <- var(x[md2 <= medd2,  ])
	}
	covb <- covs
	mnb <- mns	##get the square root of det(covb)
	critb <- prod(diag(chol(covb)))	
	##get the resistant estimator
	covv <- diag(p)
	med <- apply(x, 2, median)
	md2 <- mahalanobis(x, center = med, covv)
	medd2 <- median(md2)	## get the start
        if(p > 1){
	mns <- apply(x[md2 <= medd2,  ], 2, mean)
	}
        if(p == 1){
	mns <- mean(zx[md2 <= medd2])
	}
        covs <- var(x[md2 <= medd2,  ])	
        ## concentrate
	for(i in 1:csteps) {
		md2 <- mahalanobis(x, mns, covs)
		medd2 <- median(md2)
              if(p > 1){
		mns <- apply(x[md2 <= medd2,  ], 2, mean)
	      }
              if(p == 1){
	        mns <- mean(zx[md2 <= medd2])
	      }
              covs <- var(x[md2 <= medd2,  ])
	}
	crit <- prod(diag(chol(covs)))
	if(crit < critb) {
		critb <- crit
		covb <- covs
		mnb <- mns
	}
##scale for better performance at MVN
	rd2 <- mahalanobis(x, mnb, covb)
	const <- median(rd2)/(qchisq(0.5, p))
	covb <- const * covb
	list(center = mnb, cov = covb, mbL = mns, mbc = covs)
}


covmba2<-function(x, csteps = 5)
{# gets the MBA estimator, use covmba2 instead of covmba if p > 1
	p <- dim(x)[2]	
	##get the DGK estimator
	covs <- var(x)
	mns <- apply(x, 2, mean)	## concentrate
	for(i in 1:csteps) {
		md2 <- mahalanobis(x, mns, covs)
		medd2 <- median(md2)
		mns <- apply(x[md2 <= medd2,  ], 2, 
			mean)
		covs <- var(x[md2 <= medd2,  ])
	}
	covb <- covs
	mnb <- mns	##get the square root of det(covb)
	critb <- prod(diag(chol(covb)))	
	##get the resistant estimator
	covv <- diag(p)
	med <- apply(x, 2, median)
	md2 <- mahalanobis(x, center = med, covv)
	medd2 <- median(md2)	## get the start
	mns <- apply(x[md2 <= medd2,  ], 2, mean)
	covs <- var(x[md2 <= medd2,  ])	## concentrate
	for(i in 1:csteps) {
		md2 <- mahalanobis(x, mns, covs)
		medd2 <- median(md2)
		mns <- apply(x[md2 <= medd2,  ], 2, 
			mean)
		covs <- var(x[md2 <= medd2,  ])
	}
	crit <- prod(diag(chol(covs)))
	if(crit < critb) {
		critb <- crit
		covb <- covs
		mnb <- mns
	}
##scale for better performance at MVN
	rd2 <- mahalanobis(x, mnb, covb)
	const <- median(rd2)/(qchisq(0.5, p))
	covb <- const * covb
	list(center = mnb, cov = covb)
}


covrmb<-function(x, csteps = 5){
# Need p > 1. Produces the median ball and reweighted median ball (RMB)
# estimators. RMB is tailored to estimate the covariance matrix
# of the bulk of the data if the bulk of the data is MVN 
# and the outliers  are "not too bad." 
# This function is like covrmvn, except only the MB attractor
# is used.  
	x <- as.matrix(x)
	p <- dim(x)[2]
	n <- dim(x)[1]
	up <- qchisq(0.975, p)
	qchi <- qchisq(0.5, p)	##get the MB estimator
	covv <- diag(p)
	med <- apply(x, 2, median)
	md2 <- mahalanobis(x, center = med, covv)
	medd2 <- median(md2)
        ## get the half subset start
	mnm <- apply(x[md2 <= medd2,  ], 2, mean)
	covm <- var(x[md2 <= medd2,  ])	## concentrate
	for(i in 1:csteps) {
		md2 <- mahalanobis(x, mnm, covm)
		medd2 <- median(md2)
		mnm <- apply(x[md2 <= medd2,  ], 2, mean)
		covm <- var(x[md2 <= medd2,  ])
	}
	## scale the MB estimator to behave well for MVN data
	rd2 <- mahalanobis(x, mnm, covm)
	const <- median(rd2)/qchi
	covm <- const * covm	
## reweight the scaled MB estimator (mnm,covm) twice
## to get the covrmb estimator (mnrmb,covrmb) tailored for MVN data
	rd2 <- mahalanobis(x, mnm, covm)
	mnrmb <- apply(x[rd2 <= up,  ], 2, mean)
	covrmb <- var(x[rd2 <= up,  ])
	d1 <- sum(rd2 <= up)
	rd2 <- mahalanobis(x, mnrmb, covrmb)
	qchi2 <- (0.5 * 0.975 * n)/d1
	qchi2 <- min(qchi2, 0.995)
	const <- median(rd2)/qchisq(qchi2, p)
	covrmb <- const * covrmb	## reweight again
	rd2 <- mahalanobis(x, mnrmb, covrmb)
	mnrmb <- apply(x[rd2 <= up,  ], 2, mean)
	covrmb <- var(x[rd2 <= up,  ])
	d2 <- sum(rd2 <= up)
	rd2 <- mahalanobis(x, mnrmb, covrmb)
	qchi2 <- (0.5 * 0.975 * n)/d2
	qchi2 <- min(qchi2, 0.995)
	const <- median(rd2)/qchisq(qchi2, p)
	covrmb <- const * covrmb
	list(center = mnrmb, cov = covrmb, mnm = mnm, covm = covm)
}


covrmvn<-function(x, csteps = 5, locc = 0.5)
{# Needs number of predictors p > 1.
# This robust MLD estimator is tailored to estimate the covariance matrix
# of the bulk of the data when the bulk of the data is MVN and the outliers
# are "not too bad." The FCH and MB estimators are also produced. 
	x <- as.matrix(x)
	p <- dim(x)[2]
	n <- dim(x)[1]
	up <- qchisq(0.975, p)
	qchi <- qchisq(0.5, p)	##get the DGK estimator
	covs <- var(x)
	mns <- apply(x, 2, mean)	## concentrate
	for(i in 1:csteps) {
		md2 <- mahalanobis(x, mns, covs)
		medd2 <- median(md2)
		mns <- apply(x[md2 <= medd2,  ], 2, mean)
		covs <- var(x[md2 <= medd2,  ])
	}
	covd <- covs
	mnd <- mns	##get the MB estimator
	covv <- diag(p)
	med <- apply(x, 2, median)
	md2 <- mahalanobis(x, center = med, covv)
	medd2 <- median(md2)##get the location criterion cutoff
        lcut <- medd2 
        if(locc != 0.5) lcut <- quantile(md2,locc) ## get the start
	mns <- apply(x[md2 <= medd2,  ], 2, mean)
	covs <- var(x[md2 <= medd2,  ])	## concentrate
	for(i in 1:csteps) {
		md2 <- mahalanobis(x, mns, covs)
		medd2 <- median(md2)
		mns <- apply(x[md2 <= medd2,  ], 2, mean)
		covs <- var(x[md2 <= medd2,  ])
	}
	covm <- covs
	mnm <- mns	##get FCH attractor
	covf <- covm
	mnf <- mnm
	val2 <- mahalanobis(t(mnd), med, covv)
	if(val2 < lcut) {
##crit = square root of det(cov)
		critd <- prod(diag(chol(covd)))
		critm <- prod(diag(chol(covm)))
		if(critd < critm) {
			covf <- covd
			mnf <- mnd
		}
	}
## get the FCH estimator
	rd2 <- mahalanobis(x, mnf, covf)
	const <- median(rd2)/qchi
	covf <- const * covf	
	##reweight the above FCH estimator (mnf,covf) to get the cov estimator
## (rmnmvn,rcovmvn) tailored for MVN data
	rd2 <- mahalanobis(x, mnf, covf)
	rmnmvn <- apply(x[rd2 <= up,  ], 2, mean)
	rcovmvn <- var(x[rd2 <= up,  ])
	d1 <- sum(rd2 <= up)
	rd2 <- mahalanobis(x, rmnmvn, rcovmvn)
	qchi2 <- (0.5 * 0.975 * n)/d1
	qchi2 <- min(qchi2, 0.995)
	const <- median(rd2)/qchisq(qchi2, p)
	rcovmvn <- const * rcovmvn	## reweight again
	rd2 <- mahalanobis(x, rmnmvn, rcovmvn)
	rmnmvn <- apply(x[rd2 <= up,  ], 2, mean)
	rcovmvn <- var(x[rd2 <= up,  ])
	d2 <- sum(rd2 <= up)
	rd2 <- mahalanobis(x, rmnmvn, rcovmvn)
	qchi2 <- (0.5 * 0.975 * n)/d2
	qchi2 <- min(qchi2, 0.995)
	const <- median(rd2)/qchisq(qchi2, p)
	rcovmvn <- const * rcovmvn
	list(center = rmnmvn, cov = rcovmvn, mnf = mnf, covf = covf, mnm = mnm, covm = covm)
}


covrob<-function(x, csteps = 5, locc = 0.5){
# gets the MBA, FCH, RFCH, RMVN and CMVE estimators 
# and the MB attractor
	zx <- x
	x <- as.matrix(x)
	p <- dim(x)[2]	
        n <- dim(x)[1]##get the DGK estimator
	covs <- var(x)
	mns <- apply(x, 2, mean)	## concentrate
	for(i in 1:csteps) {
		md2 <- mahalanobis(x, mns, covs)
		medd2 <- median(md2)
		if(p > 1) {
			mns <- apply(x[md2 <= medd2,  ], 2, mean)
		}
		if(p == 1) {
			mns <- mean(zx[md2 <= medd2])
		}
                covs <- var(x[md2 <= medd2,  ])
	}
	covd <- covs
	mnd <- mns	##get the MB estimator
	covv <- diag(p)
	med <- apply(x, 2, median)
	md2 <- mahalanobis(x, center = med, covv)
	medd2 <- median(md2) ##get the location criterion cutoff
        lcut <- medd2 
        if(locc != 0.5) lcut <- quantile(md2,locc)	## get the start
	if(p > 1) {
		mns <- apply(x[md2 <= medd2,  ], 2, mean)
	}
	if(p == 1) {
		mns <- mean(zx[md2 <= medd2])
	}
        covs <- var(x[md2 <= medd2,  ])
## concentrate
	for(i in 1:csteps) {
		md2 <- mahalanobis(x, mns, covs)
		medd2 <- median(md2)
		if(p > 1) {
			mns <- apply(x[md2 <= medd2,  ], 2, mean)
		}
		if(p == 1) {
			mns <- mean(zx[md2 <= medd2])
		}
                covs <- var(x[md2 <= medd2,  ])
	}
	covm <- covs
	mnm <- mns	##crit = square root of det(cov)
	critd <- prod(diag(chol(covd)))
	critm <- prod(diag(chol(covm)))	##get MBA attractor
	covb <- covm
	mnb <- mnm
	if(critd < critm) {
		covb <- covd
		mnb <- mnd
	}
##get FCH and CMVE attractors
	covf <- covm
	mnf <- mnm
        covcmv <- covm
        mncmv <- mnm
	val <- mahalanobis(t(mnd), med, covv)
	if(val < lcut) {
		if(critd < critm) {
			covf <- covd
			mnf <- mnd
		}
                rd2 <- mahalanobis(x, mnd, covd)
		critdm <- (sqrt(median(rd2)))^p * critd
		rd2 <- mahalanobis(x, mnm, covm)
		critmm <- (sqrt(median(rd2)))^p * critm
		if(critdm < critmm) {
			covcmv <- covd
			mncmv <- mnd
		}
            }
##scale for better performance at MVN
## get MBA estimator
        chisqm <- qchisq(0.5, p)
	rd2 <- mahalanobis(x, mnb, covb)
	const <- median(rd2)/chisqm
	covb <- const * covb	
## get FCH estimator
	rd2 <- mahalanobis(x, mnf, covf)
	const <- median(rd2)/chisqm
	covf <- const * covf
## get 1st step of the RFCH estimator
        rd2 <- mahalanobis(x, mnf, covf)
        up <- qchisq(0.975, p)
        if(p > 1) {
          rmnf <- apply(x[rd2 <= up,  ], 2, mean)
        }
        if(p == 1){
          rmnf = mean(zx[rd2 <= up])
        }
	rcovf <- var(x[rd2 <= up,  ])
##use the following 3 commands for the RMVN estimator
        rd3 <- rd2
        rmnmvn <- rmnf
        rcovmvn <- rcovf
## use rd2 for RFCH and rd4 for RMVN
	rd2 <- mahalanobis(x, rmnf, rcovf)
        rd4 <- rd2
##use the following command for the RFCH estimator
        const <- median(rd2)/chisqm
	rcovf <- const * rcovf	## reweight again
	rd2 <- mahalanobis(x, rmnf, rcovf)
        if(p > 1) {
          rmnf <- apply(x[rd2 <= up,  ], 2, mean)
        }
        if(p == 1){
          rmnf = mean(zx[rd2 <= up])
        }
	rcovf <- var(x[rd2 <= up,  ])
	rd2 <- mahalanobis(x, rmnf, rcovf)
	const <- median(rd2)/chisqm
	rcovf <- const * rcovf
## get the RMVN estimator
        d1 <- sum(rd3 <= up)
	qchi2 <- (0.5 * 0.975 * n)/d1
	qchi2 <- min(qchi2, 0.995)
	const <- median(rd4)/qchisq(qchi2, p)
	rcovmvn <- const * rcovmvn	## reweight again
	rd2 <- mahalanobis(x, rmnmvn, rcovmvn)
        if(p > 1){
	  rmnmvn <- apply(x[rd2 <= up,  ], 2, mean)
        }
        if(p == 1){
          rmnmvn = mean(zx[rd2 <= up])
        }
	rcovmvn <- var(x[rd2 <= up,  ])
	d2 <- sum(rd2 <= up)
	rd2 <- mahalanobis(x, rmnmvn, rcovmvn)
	qchi2 <- (0.5 * 0.975 * n)/d2
	qchi2 <- min(qchi2, 0.995)
	const <- median(rd2)/qchisq(qchi2, p)
	rcovmvn <- const * rcovmvn
## get CMVE estimator
	rd2 <- mahalanobis(x, mncmv, covcmv)
	const <- median(rd2)/(qchisq(0.5, p))
	covcmv <- const * covcmv
	list(center = mnb, cov = covb, mnf = mnf, covf = covf,
        rmnf = rmnf, rcovf = rcovf, rmnmvn = rmnmvn, rcovmvn = rcovmvn, 
        mncmv = mncmv, covcmv = covcmv, mnm = mnm, covm = covm)
}


covsim2<-function(n=100, p = 2,  gam = 0.4, runs = 20)
{# Needs p > 1. This function is used to determine when the DD  
# plot separates outliers from non-outliers.
# Rather large n and p can be used since covfch is used.
	A <- sqrt(diag(1:p))
	ct <- 0
	for(i in 1:runs) {
		x <- matrix(rnorm(n * p), ncol = p, nrow = n)	
	## outliers have mean (10, 10 sqrt(2), ..., 10 sqrt(p))^T
		val <- floor(gam * n)
		tem <- 10 + 0 * 1:p
		x[1:val,  ] <- x[1:val,  ] + tem
		x <- x %*% A
		out <- covfch(x)
#try covmba(x),rmba(x),cov.mcd(x),covOGK(x,sigmamu = scaleTau2)
		center <- out$center
		cov <- out$cov
		rd2 <- mahalanobis(x, center, cov)
		if(min(rd2[1:val]) > max(rd2[(val + 1):n]))
			ct <- ct + 1
	}
	list(ct = ct)
}


ctrviews<-function(x, Y, ii = 1)
{# Uses classical distances instead of robust distances.
# Trimmed views for 90, 80, ... 0 percent
# trimming. Allows visualization of m
# and crude estimatation of c beta in models
# of the form y = m(x^T beta) + e.
# Advance the view with the right mouse button
# and in R, highight "stop."
# Workstation: activate a graphics
# device with command "X11()" or "motif()."
	x <- as.matrix(x)
	center <- apply(x, 2, mean)
	cov <- var(x)
	rd2 <- mahalanobis(x, center, cov)
	labs <- c("90%", "80%", "70%", "60%", "50%", "40%", "30%", "20%", "10%",
		"0%")
	tem <- seq(0.1, 1, 0.1)
	for(i in ii:10) {
		val <- quantile(rd2, tem[i])
		bhat <- lsfit(x[rd2 <= val,  ], Y[rd2 <= val])$coef
		ESP <- x %*% bhat[-1]
		plot(ESP, Y)
		title(labs[i])
		identify(ESP, Y)
		print(bhat)
	}
}


ddcomp<-function(x, steps = 5)
{# Need p > 1.
# Makes 4 DD plots using the DGK, FCH, FMCD and MB estimators. 
# Click left mouse button to identify points.
# Click right mouse button to end the function.
# Unix systems turn on graphics device eg enter
#  command "X11()" or "motif()" before using.
# R users need to type "library(MASS)" or "library(lqs)."
	p <- dim(x)[2]
	par(mfrow = c(2, 2))
	center <- apply(x, 2, mean)
	cov <- var(x)
	md2 <- mahalanobis(x, center, cov)	
	# MD is the classical and RD the robust distance
	MD <- sqrt(md2)	#DGK start
	md2 <- mahalanobis(x, center, cov)
	medd2 <- median(md2)	## get the start
	mns <- center
	covs <- cov	## concentrate
	for(i in 1:steps) {
		md2 <- mahalanobis(x, mns, covs)
		medd2 <- median(md2)
		mns <- apply(x[md2 <= medd2,  ], 2, mean)
		covs <- var(x[md2 <= medd2,  ])
	}
	rd2 <- mahalanobis(x, mns, covs)
	rd <- sqrt(rd2)	#Scale the RD so the plot follows the 0-1 line
#if the data is multivariate normal.
	const <- sqrt(qchisq(0.5, p))/median(rd)
	RDdgk <- const * rd
	plot(MD, RDdgk)
	abline(0, 1)
	identify(MD, RDdgk)
	title("DGK DD Plot")	#FCH
	out <- covfch(x)
	center <- out$center
	cov <- out$cov
	rd2 <- mahalanobis(x, center, cov)
	rd <- sqrt(rd2)	
	RDfch <- rd
	plot(MD, RDfch)
	abline(0, 1)
	identify(MD, RDfch)
	title("FCH DD Plot")	#FMCD
	out <- cov.mcd(x)
	center <- out$center
	cov <- out$cov
	rd2 <- mahalanobis(x, center, cov)
	rd <- sqrt(rd2)	#Scale the RD so the plot follows the 0-1 line
#if the data is multivariate normal.
	const <- sqrt(qchisq(0.5, p))/median(rd)
	RDf <- const * rd
	plot(MD, RDf)
	abline(0, 1)
	identify(MD, RDf)
	title("FMCD DD Plot")	#Median Ball start
	covv <- diag(p)
	med <- apply(x, 2, median)
	md2 <- mahalanobis(x, center = med, covv)
	medd2 <- median(md2)	## get the start
	mns <- apply(x[md2 <= medd2,  ], 2, mean)
	covs <- var(x[md2 <= medd2,  ])	## concentrate
	for(i in 1:steps) {
		md2 <- mahalanobis(x, mns, covs)
		medd2 <- median(md2)
		mns <- apply(x[md2 <= medd2,  ], 2, mean)
		covs <- var(x[md2 <= medd2,  ])
	}
	rd2 <- mahalanobis(x, mns, covs)
	rd <- sqrt(rd2)	#Scale the RD so the plot follows the 0-1 line
#if the data is multivariate normal.
	const <- sqrt(qchisq(0.5, p))/median(rd)
	RDmb <- const * rd
	plot(MD, RDmb)
	abline(0, 1)
	identify(MD, RDmb)
	title("Med Ball DD Plot")
        par(mfrow=c(1,1))
}


ddcomp2<-function(x, steps = 5)
{# Need p > 1.
# Makes a 4 DD plots using the MBA, FCH, RFCH and MB estimators. 
# Click left mouse button to identify points.
# Click right mouse button to end the function, and in R, highlight "stop."
# Unix systems turn on graphics device eg enter
#  command "X11()" or "motif()" before using.
	p <- dim(x)[2]
	par(mfrow = c(2, 2))
	center <- apply(x, 2, mean)
	cov <- var(x)
	md2 <- mahalanobis(x, center, cov)	
	# MD is the classical and RD the robust distance
	MD <- sqrt(md2)	#MBA
	out <- covmba(x)
	center <- out$center
	cov <- out$cov
	rd2 <- mahalanobis(x, center, cov)
	RDm <- sqrt(rd2)
	plot(MD, RDm)
	abline(0, 1)
	identify(MD, RDm)
	title("MBA DD Plot")	#FCH
	out <- covfch(x)
	center <- out$center
	cov <- out$cov
	rd2 <- mahalanobis(x, center, cov)
	RDf <- sqrt(rd2)	
	plot(MD, RDf)
	abline(0, 1)
	identify(MD, RDf)
	title("FCH DD Plot")	#RFCH
	center <- out$rmnf
	cov <- out$rcovf
	rd2 <- mahalanobis(x, center, cov)
	RDr <- sqrt(rd2)	
	plot(MD, RDr)
	abline(0, 1)
	identify(MD, RDr)
	title("RFCH DD Plot")	#MB
	center <- out$mnm
	cov <- out$covm
	rd2 <- mahalanobis(x, center, cov)
	RDmb <- sqrt(rd2)
	plot(MD, RDmb)
	identify(MD, RDmb)
	title("MB DD Plot")
        par(mfrow=c(1,1))
}


ddmv<-function(n = 100, p = 2, steps = 5, gam = 0.4, outtype = 2, est = 1)
{# Need p > 1.
# This function is used to determine when the DD  
# plot separates outliers from non-outliers for various starts.
# Workstation needs to activate a graphics
# device with the command "X11()" or "motif()."
# Advance the view with the right mouse button, and in R, highlight "stop."
## est = 1  for DGK, 2 for median ball, 3 for MAD
	A <- sqrt(diag(1:p))
	x <- matrix(rnorm(n * p), ncol = p, nrow
		 = n)
	val <- floor(gam * n)
	tem <- 10 + 0 * 1:p
	x[1:val,  ] <- x[1:val,  ] + tem	
	#if outtype = 1, outliers are Np(10 1, Ip) nonoutliers Np(0,Ip)
	if(outtype == 2) x <- x %*% A	
	## outliers have mean (10, 10 sqrt(2), ..., 10 sqrt(p))^T
## get the start
	if(est == 1) {
#DGK classical start
		covs <- var(x)
		mns <- apply(x, 2, mean)
	}
	if(est == 2) {
#Median Ball high breakdown start
		covv <- diag(p)
		med <- apply(x, 2, median)
		md2 <- mahalanobis(x, center = 
			med, covv)
		medd2 <- median(md2)	
	## get the start
		mns <- apply(x[md2 <= medd2,  ], 
			2, mean)
		covs <- var(x[md2 <= medd2,  ])
	}
	if(est == 3) {
#MAD high breakdown start
		tem <- apply(x, 2, mad)^2
		covv <- diag(tem)
		med <- apply(x, 2, median)
		md2 <- mahalanobis(x, center = 
			med, covv)
		medd2 <- median(md2)	
	## get the start
		mns <- apply(x[md2 <= medd2,  ], 
			2, mean)
		covs <- var(x[md2 <= medd2,  ])
	}
## concentrate and plot, highlighting outliers
	MD <- sqrt(mahalanobis(x, mns, covs))
	for(i in 1:steps) {
		md <- sqrt(mahalanobis(x, mns, 
			covs))
		medd <- median(md)
		mns <- apply(x[md <= medd,  ], 2,
			mean)
		covs <- var(x[md <= medd,  ])
		rd <- sqrt(mahalanobis(x, mns, 
			covs))
		plot(MD, rd)
		points(MD[1:val], rd[1:val], pch
			 = 15)
		identify(MD, rd)
	}
}


ddplot<-function(x, type = 5){
# Makes a DD plot. Needs p > 1.
# Click left mouse button to identify points.
# Click right mouse button to end the function
# and in R, highlight "stop". 
# R users need to type "library(MASS)" or "library(lqs)."
# RDi uses covmba for type = 1, rmba for type = 2, cov.mcd for 
# type = 3, covfch for type = 4, rfch for type = 5, cmve for type 6
# rcmve for type 7, rmvn for type = 8.
# Unix systems turn on graphics device eg enter
#  command "X11()" or "motif()" before using.
	p <- dim(x)[2]
	center <- apply(x, 2, mean)
	cov <- var(x)
	md2 <- mahalanobis(x, center, cov)
        if(type == 1){
          out <- covmba(x)
	}
	if(type == 2){
          out <- rmba(x)
        }
        if(type >= 3){
	  out <- cov.mcd(x)
        }	
        if(type == 4) {
	  out <- covfch(x)
	}
        if(type == 5) {
           out <- covfch(x)
           center <- out$rmnf
	   cov <- out$rcovf
           } 
        if(type == 6) {
          out <- cmve(x)
          }
        if(type == 7) {
          out <- cmve(x)
          center <- out$rmnf
	  cov <- out$rcovf
        }
        if(type == 8) {
           out <- covrmvn(x)
        }
        if(type != 5 && type != 7){
		  center <- out$center
		  cov <- out$cov
        }
	rd2 <- mahalanobis(x, center, cov)	
        # md is the classical and rd the robust distance
	MD <- sqrt(md2)
	rd <- sqrt(rd2)	
	#Scale the RD so the plot follows the 0-1 line
#if the data is multivariate normal.
	const <- sqrt(qchisq(0.5, p))/median(rd)
	RD <- const * rd
	plot(MD, RD)
	abline(0, 1)
	identify(MD, RD)	#  list(MD = MD, RD = RD)
}


ddplot2<-function(x, type = 2)
{# Need p > 1. Makes a DD plot with RDi depending on type
# as well as a MB DD plot.
# Click left mouse button to identify points.
# Click right mouse button to end the function
# and in R, highlight "stop". 
# R users need to type "library(MASS)" or "library(lqs)."
# The plot used covfch  for the RDi if type = 1
# rfch if type = 2 and cov.mcd if type = 3.
# Unix systems turn on graphics device eg enter
#  command "X11()" or "motif()" before using.
	p <- dim(x)[2]
	center <- apply(x, 2, mean)
	cov <- var(x)
	md2 <- mahalanobis(x, center, cov)
        outm <- covfch(x)
        if(type == 1){
        center <- outm$center
	cov <- outm$cov
	}
	if(type == 2){
        center <- outm$rmnf
	cov <- outm$rcovf
        }
        if(type >= 3){
	out <- cov.mcd(x)
        center <- out$center
	cov <- out$cov
        }	
	rd2 <- mahalanobis(x, center, cov)	
        # md is the classical and rd the robust distance
	MD <- sqrt(md2)
	rd <- sqrt(rd2)	
	#Scale the RD so the plot follows the 0-1 line
#if the data is multivariate normal.
	const <- sqrt(qchisq(0.5, p))/median(rd)
	RD <- const * rd
        par(mfrow=c(1,2))
        plot(MD, RD)
	abline(0, 1)
        identify(MD, RD)
#if statement causes a bug
#       if(type > 1)
        rd2 <- mahalanobis(x, outm$mnm, outm$covm)
        rd <- sqrt(rd2)
        plot(MD,rd)
        title("MB DD Plot")
	identify(MD, rd)	#  list(MD = MD, RD = RD)
        par(mfrow=c(1,1))
}


ddplot4<-function(x, alpha = 0.1){
# Makes a DD plot with covrmvn used for the RDi.
# Need p > 1.
# Semiparametric prediction regions are added.
# Click left mouse button to identify points.
# Click right mouse button to end the function.
# Unix systems turn on graphics device eg enter
#  command "X11()" or "motif()" before using.
	p <- dim(x)[2]
	n <- dim(x)[1]
        up <- min((1 - alpha/2), (1 - alpha + 10*alpha*p/n))
	if(alpha > 0.1)
	  up <- min((1 - alpha + 0.05), (1 - alpha + p/n))
        qn <- up
        if(qn < 1 - alpha + 0.001)
          up <- 1 - alpha
	center <- apply(x, 2, mean)
	cov <- var(x)
	md2 <- mahalanobis(x, center, cov)
	out <- covrmvn(x)
	center <- out$center
	cov <- out$cov
	rd2 <- mahalanobis(x, center, cov)	
	# MD is the classical and RD the robust distance
	MD <- sqrt(md2)
	RD <- sqrt(rd2)
	plot(MD, RD)
	abline(0, 1)	#get nonparametric prediction region boundary
	cuplim <- sqrt(quantile(md2, up))
	a <- min(RD)
	b <- max(RD)
	lines(c(cuplim, cuplim), c(b, a))	
	#get semiparametric prediction region boundary
	ruplim <- sqrt(quantile(rd2, up))
	a <- min(MD)
	b <- max(MD)
	lines(c(a, b), c(ruplim, ruplim))	
	#get parametric MVN prediction region boundary
	mvnlim <- sqrt(qchisq(up, p))
	b <- min(b, mvnlim)
	lines(c(a, b), c(mvnlim, mvnlim))	
        identify(MD, RD)
	list(cuplim = cuplim, ruplim = ruplim, mvnlim = mvnlim)
}


ddplot5<-function(x, mm=0, kk= 5,steps = 9)
{# Plots Euclidean distances from the coordinatewise median
#vs. those of covmb2 location estimator with 9 concentration type steps.
#Good plot for outlier detection even if p > n.
#Needs p > 1.
x<-as.matrix(x)
p <- dim(x)[2]
covv <- diag(p)
med <- apply(x, 2, median)
RDMED <- sqrt(mahalanobis(x, center = med, covv))
RDCOVMB2 <- 
sqrt(mahalanobis(x,center=covmb2(x,m=mm,k=kk,msteps=steps)$center,covv))
plot(RDMED,RDCOVMB2)
#list(RDMED=RDMED,RDCOVMB2=RDCOVMB2)
}


ddiscr<-function(x, w, group, xwflag = F){
#Uses Mahalanobis distances to classify data.
#Let x contains classified data, group is vector with group[i] = j
#if ith row is a case from the jth group, j = 1, ..., k.
#Let w contains data to be classified, each row is a case.
#Need each group size nh > p.
#Set xwflag = T if x = w.
#mahalanobis computes squared distances
k <- max(group)
maxd <- 1:k
x <- as.matrix(x)
w <- as.matrix(w)
n <- dim(x)[1]
m <- dim(w)[1]
nh <- 1:k
err <- 1:k
mdx <- matrix(0,nrow=n,ncol=k)
mdw <- matrix(0,nrow=m,ncol=k)
classx <- 1:n
classw <- 1:m
for(i in 1:k){ 
   nh[i] <- sum(group==i)
   xi <- as.matrix(x[group==i,])
   mns <- mean(xi)
   covs<- var(xi)
   maxd[i] <- max(sqrt(mahalanobis(xi, mns, covs)))
   mdx[,i] <- sqrt(mahalanobis(x, mns, covs))
   mdw[,i] <- sqrt(mahalanobis(w, mns, covs))
}
for(i in 1:m){
   if(sum(mdw[i,] <= maxd) == 0){
     classw[i] <- order(mdw[i,]/maxd)[1]
     }
else{val <- mdw[i,]
     val[val > maxd] <- max(val) + 1
     classw[i] <- order(val)[1]
     }
}
if(xwflag == T){
  classx <- classw}
else{
   for(i in 1:n){
      if(sum(mdx[i,] <= maxd) == 0){
        classx[i] <- order(mdx[i,]/maxd)[1]
      }
      else{val <- mdx[i,]
        val[val > maxd] <- max(val) + 1
        classx[i] <- order(val)[1]
      }
   }
}
for(i in 1:k){
err[i] <- 1 - sum(classx[group==i]==i)/nh[i]
}
toterr = 1 - sum(classx==group)/n
list(mdx=mdx,mdw=mdw,classw = classw,classx=classx,err=err,toterr=toterr)
}


ddiscr2<-function(x, w, group, xwflag=F){
#Uses Mahalanobis distances. If the covering ellipsoids overlap,
#then a nonparametric density estimator is used to classify data. 
#Let x contains classified data, group is vector with group[i] = j
#if ith row is a case from the jth group, j = 1, ..., k.
#Let w contains data to be classified, each row is a case.
#Need each group size nh > max(3,p). Want nh > 10p.
#mahalanobis computes squared distances
#Computes fhat for cases where the covering ellipsoids overlap.
#Set xwflag = T if x = w.
#Slow for n = 10000.
k <- max(group)
maxd <- 1:k
x <- as.matrix(x)
w <- as.matrix(w)
n <- dim(x)[1]
p <- dim(x)[2]
m <- dim(w)[1]
covv <- as.matrix(diag(p))
nh <- 1:k
err <- 1:k
radsq <- 1:k
mdx <- matrix(0,nrow=n,ncol=k)
mdw <- matrix(0,nrow=m,ncol=k)
classx <- 1:n
classw <- 1:m
for(i in 1:k){ 
   nh[i] <- sum(group==i)
   tem <- ceiling(2*sqrt(nh[i]))
   xi <- as.matrix(x[group==i,])
   mns <- mean(xi)
   covs<- var(xi)
   maxd[i] <- max(sqrt(mahalanobis(xi, mns, covs)))
   mdx[,i] <- sqrt(mahalanobis(x, mns, covs))
   mdw[,i] <- sqrt(mahalanobis(w, mns, covs))
   radsq[i] <- sort(mahalanobis(xi,mns,covv))[tem]
}
for(i in 1:m){
   if(sum(mdw[i,] <= maxd) == 0){
     classw[i] <- order(mdw[i,]/maxd)[1]}
   else{if(sum(mdw[i,] <= maxd) == 1){
           classw[i] <- order(rank(mdw[i,]>maxd))[1]}
        else{val<-mdw[i,]
           val[val>maxd] <- -1
           for(j in 1:k){
              if(mdw[i,j] <= maxd[j]) #get estimate of f_j(w[i,])
                {xj <- as.matrix(x[group==j,])
 val[j] <- sum( mahalanobis(xj,w[i,],covv) <= radsq[j] )
                val[j] <- val[j]/( nh[j] * sqrt(radsq[j])^p  )
               }
           }
           classw[i] <- order(val)[k]
        }
   }
}

if(xwflag == T){
  classx <- classw}
else{
   for(i in 1:n){
      if(sum(mdx[i,] <= maxd) == 0){
        classx[i] <- order(mdx[i,]/maxd)[1]}
      else{if(sum(mdx[i,] <= maxd) == 1){
              classx[i] <- order(rank(mdx[i,]>maxd))[1]}
           else{val<-mdx[i,]
              val[val>maxd] <- -1
              for(j in 1:k){
                 if(mdx[i,j] <= maxd[j]) #get estimate of f_j(x[i,])
                   {xj <- as.matrix(x[group==j,])
 val[j] <- sum( mahalanobis(xj,x[i,],covv) <= radsq[j] )
                   val[j] <- val[j]/( nh[j] * sqrt(radsq[j])^p  )
                  }
              }
              classx[i] <- order(val)[k]
           }
      }
   }
}
for(i in 1:k){
err[i] <- 1 - sum(classx[group==i]==i)/nh[i]
}
toterr = 1 - sum(classx==group)/n
list(mdx=mdx,mdw=mdw,classw = classw, classx=classx, err=err, toterr=toterr)
}


ddsim<-function(n = 100, p = 3, eps = 0.4, type = 5)
{# Need p > 1.
# R: type "library(MASS)" or "library(lqs)."
# Rapidly plots 20 DD plots in a row.
# Unix: type "X11()" or "motif()" to
# turn on a graphics device.
# RDi uses covmba for type = 1, rmba for type = 2, cov.mcd for
# type = 3, covfch for type = 4, rfch for type = 5, rmvn type = 6.
	med <- 1:20
	for(i in 1:20) {
		x <- matrix(rnorm(n * p), ncol = p, nrow = n)	
	## For elliptically contoured data, use:
#zu <- runif(n)
#x[zu < eps,] <- x[zu < eps,]*5
#x <- x^2
##For lognormal marginals, add:
#x <- exp(x)
		center <- apply(x, 2, mean)
		cov <- var(x)
		md2 <- mahalanobis(x, center, cov)
		if(type == 1) {
			out <- covmba(x)
		}
		if(type == 2) {
			out <- rmba(x)
		}
		if(type == 3) {
			out <- cov.mcd(x)
		}
                if(type == 4) {
			out <- covfch(x)
		}
                if(type < 5)
                {
		  center <- out$center
		  cov <- out$cov
                }
                if(type == 5) {
                out <- covfch(x)
                center <- out$rmnf
		cov <- out$rcovf
                }
                if(type == 6) {
                  out <- covrmvn(x)
                } 
                if(type != 5) {
                   center <- out$center
                   cov <- out$cov
                }
		rd2 <- mahalanobis(x, center, cov)
		md <- sqrt(md2)
		rd <- sqrt(rd2)	#Scale the RDi so plot follows 0-1 line
#if the data is multivariate normal.
		const <- sqrt(qchisq(0.5, p))/median(rd)
		rd <- const * rd
		plot(md, rd)
		abline(0, 1)
		med[i] <- median(md)	#The following command can be inserted
#to slow down the plots "identify(md,rd)"
	}
	list(med = med)
}


ddsim3<-function(n = 100, p = 4, xtype = 1, dd = 1, eps = 0.25, alpha = 0.1){
# MAY NOT WORK IF p = 1. Calls ddplot4.
# Makes DD plots of data used in function "predsim". Gets coverages 
# of semiparametric and robust parametric prediction regions.
# Multiply x by A where xtype = 1 for MVN Nq(0,I), 
# 2, 3, 4 and 10 (with delta = eps) for delta Np(0,I) + (1-delta) Np(0, 25 I)
# 5 for lognormal, 
# 6, 7, 8 and 9  for multivariate t_dd
# mahalanobis gives squared Maha distances 
	up <- min((1 - alpha/2), (1 - 
		alpha + (50 * alpha)/n))
	A <- sqrt(diag(1:p))
	m <- n + 1	#make data
	x <- matrix(rnorm(m * p), nrow = 
		m, ncol = p)
	if(xtype == 2) {
		zu <- runif(m)
		x[zu < 0.4,  ] <- x[zu < 
			0.4,  ] * 5
	}
	if(xtype == 3) {
		zu <- runif(m)
		x[zu < 0.6,  ] <- x[zu < 
			0.6,  ] * 5
	}
	if(xtype == 4) {
		zu <- runif(m)
		x[zu < 0.1,  ] <- x[zu < 
			0.1,  ] * 5
	}
	if(xtype == 5)
		x <- exp(x)
	if(xtype == 6) {
		zu <- sqrt(rchisq(m, 3)/3
			)
		x <- x/zu
	}
	if(xtype == 7) {
		zu <- sqrt(rchisq(m, 5)/5
			)
		x <- x/zu
	}
	if(xtype == 8) {
		zu <- sqrt(rchisq(m, 19)/
			19)
		x <- x/zu
	}
	if(xtype == 9) {
		zu <- sqrt(rchisq(m, dd)/
			dd)
		x <- x/zu
	}
	if(xtype == 10) {
		zu <- runif(m)
		x[zu < eps,  ] <- x[zu < 
			eps,  ] * 5
	}
	x <- x %*% A
	xx <- x[ - m,  ]
	ddplot4(xx)
}


deltv<-function(gam = 0.5)
{# Gets asy var for lts(h) and lta(h) at standard double exp
# where h/n -> gam.
	k <- -1 * log(1 - gam)
	num <- 2 - (2 + 2 * k + k^2) * exp( - k)
	den <- (gam - k * exp( - k))^2
	ltsv <- num/den
	ltav <- 1/gam
	list(ltsv=ltsv, ltav=ltav)
}


drsim5<-function(n=100,q=4,nruns=100,h=2,tr=10,xtype=1,mtype=1,eps=0.4,dd=5)
{# MAY NOT WORK IF q = 1
# Needs dr library so only works in R. Uses FCH.
# Finds ave(esp), scaled sd, ave scaled ols se, ave scaled chen and li se for tvreg(M) estimator.
# CHANGE A IF q IS NOT EVEN
# Change beta, q, and A to change Ho.
# Use tr = 1 for 90%, tr = 2 for 80%, ..., tr = 10 for 0% trimming
# xtype = 1 for MVN Nq(0,I), 
# 2, 3, 4 and 10 for eps Nq(0,I) + (1-eps) Nq(0, 25 I)
# 5 for lognormal, 
# 6, 7, 8 and 9  for multivariate t_d with d = 3, 5, 19 or dd, 
	set.seed(974)
	k <- q/2
# test Ho beta(q/2 + 1) = ... = beta(q) = 0
	beta <- 0 * 1:q
	beta[1:q/2] <- 1
	bols <- matrix(0, nrow = nruns, ncol = (q + 1))
	bsir <- matrix(0, nrow = nruns, ncol = q)
        bwsir <- bsir
	sseols <- matrix(0, nrow = nruns, ncol = q)
	ssecl <- sseols
	zols <- 0 * 1:nruns
	fols <- zols
	zsir <- zols
        zwsir <- zsir
	labs <- c("90%", "80%", "70%", "60%", "50%", "40%", "30%", 
		"20%", "10%", "0%")
	A2 <- diag(q/2)
	A1 <- 0 * A2
	A <- cbind(A1, A2)
	tem <- seq(0.1, 1, 0.1)
	for(i in 1:nruns) {
#make data
		x <- matrix(rnorm(n * q), nrow = n, ncol = q)
		if(xtype == 2){
			zu <- runif(n)
			x[zu < 0.4,  ] <- x[zu < 0.4,  ] * 5
                }
		if(xtype == 3) {
			zu <- runif(n)
			x[zu < 0.6,  ] <- x[zu < 0.6,  ] * 5
		}
		if(xtype == 4) {
			zu <- runif(n)
			x[zu < 0.1,  ] <- x[zu < 0.1,  ] * 5
		}
                if(xtype == 5)
			x <- exp(x)
		if(xtype == 6) {
			zu <- sqrt(rchisq(n, 3)/3)
			x <- x/zu
		}
		if(xtype == 7) {
			zu <- sqrt(rchisq(n, 5)/5)
			x <- x/zu
		}
                if(xtype == 8){
			zu <- sqrt(rchisq(n, 19)/19)
			x <- x/zu
                }
		if(xtype == 9) {
			zu <- sqrt(rchisq(n, dd)/dd)
			x <- x/zu
		}
		if(xtype == 10) {
			zu <- runif(n)
			x[zu < eps,  ] <- x[zu < eps,  ] * 5
		}
		sp <- 1 + x %*% beta
                if(mtype == 1) y <-  sp + rnorm(n)
                if(mtype == 2) y <-  sp^2 + rnorm(n)
                if(mtype == 3) y <-  exp(sp) + rnorm(n)
                if(mtype == 4) y <-  (sp)^3 + rnorm(n)
                if(mtype == 5) y <-  sin(sp)/sp + 0.01*rnorm(n)
                if(mtype == 6) y <-  sp + sin(sp) + 0.1*rnorm(n)
                if(mtype == 7) y <-  sqrt(abs(sp)) + 0.1*rnorm(n)
		#do trimming
		out <- covfch(x)
		center <- out$center
		cov <- out$cov
		rd2 <- mahalanobis(x, center, cov)
		val <- quantile(rd2, tem[tr])	#get untrimmed data
		xm <- x[rd2 <= val,  ]
		ym <- y[rd2 <= val]	#get ESP from untrimmed data
		out <- lsfit(xm, ym)
		bols[i,  ] <- out$coef
		resm <- out$resid
		nm <- length(ym)
		msem <- t(resm) %*% resm/(nm - q - 1)
		cxm <- var(xm)	#get OLS SE which is usually incorrect
		cxminv <- solve(cxm)
		sseols[i,  ] <- sqrt(n/nm) * sqrt(msem * diag(cxminv))	
	#get OLS chisquare test statistic
		bhat <- bols[i, -1]
		covAb <- A %*% cxminv %*% t(A)
		covinv <- solve(covAb)
		cent <- t(A) %*% covinv %*% A
		num <- t(bhat) %*% cent %*% bhat
		zols[i] <- (nm * num)/msem	#get OLS partial F statistic
		fols[i] <- ((nm - 1) * zols[i])/(k * nm)	
	#get Chen and Li scaled SE 
		mnxm <- apply(xm, 2, mean)
		mid <- matrix(0, nrow = q, ncol = q)
		for(j in 1:nm)
			mid <- mid + resm[j]^2 * (xm[j,  ] - mnxm) %*% 
				t(xm[j,  ] - mnxm)
		mid <- mid/nm
		cmcl <- cxminv %*% mid %*% cxminv
		ssecl[i,  ] <- sqrt(n/nm) * sqrt(diag(cmcl))	
	#get Statlib SIR scaled SE and test statistics
		out <- sir(xm, ym, h)
		bsir[i,  ] <- out$edr[, 1]
		bhat <- bsir[i,  ]
		lam <- out$evalues[1]
		val <- (1 - lam)/lam
		num <- t(bhat) %*% cent %*% bhat
		zsir[i] <- (nm * num)/val
        #get Weisberg SIR scaled SE and test statistics
		out <- dr(ym~xm[,1]+xm[,2]+xm[,3]+xm[,4],nslices=h)
                bwsir[i,] <- out$evectors[,1]
		bhat <- bwsir[i,  ]
		lam <- out$evalues[1]
		val <- (1 - lam)/lam
		num <- t(bhat) %*% cent %*% bhat
		zwsir[i] <- (nm * num)/val
	}
	print(labs[tr])
	print(beta)
	mnbsir <- apply(bsir, 2, mean)
        mnwbsir <- apply(bwsir,2,mean)
	mnbols <- apply(bols, 2, mean)
	ssdbols <- sqrt(n) * sqrt(apply(bols, 2, var))
	mnsseols <- apply(sseols, 2, mean)
	mnssecl <- apply(ssecl, 2, mean)
	zolslev <- sum(zols > qchisq(0.95, k))/nruns
	folslev <- sum(fols > qf(0.95, k, nm - q - 1))/nruns
	zsirlev <- sum(zsir > qchisq(0.95, k))/nruns
        zwsirlev <- sum(zwsir > qchisq(0.95, k))/nruns
	list(mnbsir = mnbsir, mnwbsir= mnwbsir, mnbols = mnbols, ssdbols = ssdbols, 
		mnsseols = mnsseols, mnssecl = mnssecl, zolslev = 
		zolslev, folslev = folslev, zsirlev = zsirlev, zwsirlev=zwsirlev)
}


drsim6<-function(n = 100,q = 4,nruns = 100,xtype = 1,mtype = 1,eps = 0.4,dd = 5)
{# MAY NOT WORK IF q = 1
# Needs dr library so only works in R. Uses FCH.
# Finds ave(esp), and rejection proportions for 0% and adaptive  
# trimming OLS estimators.
# CHANGE A IF q IS NOT EVEN
# xtype = 1 for MVN Nq(0,I), 
# 2, 3, 4 and 10 for eps Nq(0,I) + (1-eps) Nq(0, 25 I)
# 5 for lognormal, 
# 6, 7, 8 and 9  for multivariate t_d, with d = 3, 5, 19 or dd 
	set.seed(974)
	k <- q/2
	beta <- 0 * 1:q
	beta[1:q/2] <- 1
	esp <- matrix(0, nrow = n, ncol = 10)
	bols <- matrix(0, nrow = nruns, ncol = (q + 1))
	baols <- bols
	fols <- 0 * 1:nruns
	best <- fols
	atrp <- best
	faols <- fols
	faolslev <- 0
	labs <- c("90%", "80%", "70%", "60%", "50%", "40%", "30%", 
		"20%", "10%", "0%")
	A2 <- diag(q/2)
	A1 <- 0 * A2
	A <- cbind(A1, A2)
	tem <- seq(0.1, 1, 0.1)
	for(i in 1:nruns) {
#make data
		x <- matrix(rnorm(n * q), nrow = n, ncol = q)
		if(xtype == 2) {
			zu <- runif(n)
			x[zu < 0.4,  ] <- x[zu < 0.4,  ] * 5
		}
		if(xtype == 3) {
			zu <- runif(n)
			x[zu < 0.6,  ] <- x[zu < 0.6,  ] * 5
		}
		if(xtype == 4) {
			zu <- runif(n)
			x[zu < 0.1,  ] <- x[zu < 0.1,  ] * 5
		}
		if(xtype == 5)
			x <- exp(x)
		if(xtype == 6) {
			zu <- sqrt(rchisq(n, 3)/3)
			x <- x/zu
		}
		if(xtype == 7) {
			zu <- sqrt(rchisq(n, 5)/5)
			x <- x/zu
		}
		if(xtype == 8) {
			zu <- sqrt(rchisq(n, 19)/19)
			x <- x/zu
		}
		if(xtype == 9) {
			zu <- sqrt(rchisq(n, dd)/dd)
			x <- x/zu
		}
		if(xtype == 10) {
			zu <- runif(n)
			x[zu < eps,  ] <- x[zu < eps,  ] * 5
		}
		sp <- 1 + x %*% beta
		if(mtype == 1)
			y <- sp + rnorm(n)
		if(mtype == 2)
			y <- sp^2 + rnorm(n)
		if(mtype == 3)
			y <- exp(sp) + rnorm(n)
		if(mtype == 4)
			y <- (sp)^3 + rnorm(n)
		if(mtype == 5)
			y <- sin(sp)/sp + 0.01 * rnorm(n)
		if(mtype == 6)
			y <- sp + sin(sp) + 0.1 * rnorm(n)
		if(mtype == 7) y <- sqrt(abs(sp)) + 0.1 * rnorm(n)	
	#get OLS for 0% trimming
		xm <- x
		ym <- y	#get ESP 
		out <- lsfit(xm, ym)
		bols[i,  ] <- out$coef
		resm <- out$resid
		nm <- length(ym)
		msem <- t(resm) %*% resm/(nm - q - 1)
		cxm <- var(xm)
		cxminv <- solve(cxm)	#get OLS chisquare test statistic
		bhat <- bols[i, -1]
		covAb <- A %*% cxminv %*% t(A)
		covinv <- solve(covAb)
		cent <- t(A) %*% covinv %*% A
		num <- t(bhat) %*% cent %*% bhat
		zols <- (nm * num)/msem	#get OLS partial F statistic
		fols[i] <- ((nm - 1) * zols)/(k * nm)	
	#get covfch estimator
		out <- covfch(x)
		center <- out$center
		cov <- out$cov
		rd2 <- mahalanobis(x, center, cov)	
	#get best trimming proportion
		btr <- 10
		esp[, 10] <- x %*% bhat
		bcorr <- abs(cor(esp[, 10], sp))
		for(j in 1:9) {
			val <- quantile(rd2, tem[j])	#get (xm,ym)
			xm <- x[rd2 <= val,  ]
			ym <- y[rd2 <= val]	#get ESP from (xm,ym)
			out <- lsfit(xm, ym)$coef[-1]
			esp[, j] <- x %*% out
			tcorr <- abs(cor(esp[, j], sp))
			if(tcorr > bcorr)
				btr <- j
		}
		best[i] <- btr	#get adaptive trimming proportion
		atr <- btr
		if(btr < 10) {
			for(j in (btr + 1):10) {
				tval <- abs(cor(esp[, j], esp[, btr]))
				if(tval > 0.95)
				  atr <- j
			}
		}
		atrp[i] <- atr
		val <- quantile(rd2, tem[atr])	
	#get data from adaptive trimming
		xm <- x[rd2 <= val,  ]
		ym <- y[rd2 <= val]	#get ESP from adaptive trimming
		out <- lsfit(xm, ym)
		baols[i,  ] <- out$coef
		resm <- out$resid
		nm <- length(ym)
		msem <- t(resm) %*% resm/(nm - q - 1)
		cxm <- var(xm)
		cxminv <- solve(cxm)	#get OLS chisquare test statistic
		bhat <- baols[i, -1]
		covAb <- A %*% cxminv %*% t(A)
		covinv <- solve(covAb)
		cent <- t(A) %*% covinv %*% A
		num <- t(bhat) %*% cent %*% bhat
		zols <- (nm * num)/msem	#get OLS partial F statistic
		faols[i] <- ((nm - 1) * zols)/(k * nm)	
	#check whether adaptive trimming rejects Ho
		if(faols[i] > qf(0.95, q/2, nm - q - 1))
			faolslev <- faolslev + 1
	}
	print(labs[btr])
	print(beta)
	mnbols <- apply(bols, 2, mean)
	ssdbols <- sqrt(n) * sqrt(apply(bols, 2, var))
	folslev <- sum(fols > qf(0.95, q/2, n - q - 1))/nruns
	mnbaols <- apply(baols, 2, mean)
	ssdbaols <- sqrt(n) * sqrt(apply(baols, 2, var))
	faolslev <- faolslev/nruns
	list(mnbols = mnbols, mnbaols = mnbaols, ssdbols = ssdbols, 
		ssdbaols = ssdbaols, folslev = folslev, faolslev = 
		faolslev, best = best, atrp = atrp)
}


drsim7<-function(n=100,p = 4,h=4,steps = 5,gam=0.49,nruns=100,drtype=1,outliers=F)
{# Need to modify dr formulas if p is not equal to 4.
# This R function generates outlier data. Uses FCH.
# Compares SIR, SAVE, PHD and OLS for 0 and 50% trimming
# Use drtype = 1 for sir, 2 for save, 3 for phd, 4 for ols
# Needs library dr.
set.seed(974)
beta1 <-  0 * 1:p
beta1[1] <- 1
bwdr1 <- matrix(0, nrow = nruns, ncol = p)
b50 <- bwdr1
mbaloc <- 0 * (1:p)
mbasig <- 0 * diag(1:p)
for(i in 1:nruns) {
x <- matrix(rnorm(n * p), nrow = n, ncol = p)
sp1 <-  1 + x %*% beta1
y <-  (sp1)^3 + rnorm(n)
## code below: x outliers have mean +/- (10, 10, ..., 10)^T
## and y outliers have mean 0
if(outliers == T) {
val <- floor(gam * n)
val2 <- as.integer(val/2)
tem <- 10 + 0 * 1:p
x[1:val2,  ] <- x[1:val2,  ] + tem
x[(val2+1):val,] <- x[(val2+1):val,] - tem
y[1:val] <-  rnorm(val)
}
out <- covfch(x, csteps = steps)
mbaloc <- mbaloc + out$center
mbasig <- mbasig + out$cov
#do trimming
center <- out$center
cov <- out$cov
rd2 <- mahalanobis(x, center, cov)
val <- quantile(rd2, 0.5)	#get untrimmed data
xm <- x[rd2 <= val,  ]
ym <- y[rd2 <= val]
#get Weisberg SIR, SAVE or PHD
if(drtype==1){
out <- dr(y~x[,1]+x[,2]+x[,3]+x[,4],nslices=h,method="sir")
outm <- dr(ym~xm[,1]+xm[,2]+xm[,3]+xm[,4],nslices=h,method="sir")
}
if(drtype==2){
out <- dr(y~x[,1]+x[,2]+x[,3]+x[,4],nslices=h,method="save")
outm <- dr(ym~xm[,1]+xm[,2]+xm[,3]+xm[,4],nslices=h,method="save")
}
if(drtype==3){
out <- dr(y~x[,1]+x[,2]+x[,3]+x[,4],nslices=h,method="phd")
outm <- dr(ym~xm[,1]+xm[,2]+xm[,3]+xm[,4],nslices=h,method="phd")
}
if(drtype==4){
out <- lsfit(x,y)
outm <- lsfit(xm,ym)
}
if(drtype == 4){
                bwdr1[i,] <- out$coef[-1]
                b50[i,] <- outm$coef[-1]
}
else{
                bwdr1[i,] <- out$evectors[,1]
                b50[i,] <- outm$evectors[,1]
}
             		}
mbaloc <- mbaloc/nruns
mbasig <- mbasig/nruns
mnwbdr1 <- apply(bwdr1,2,mean)
mnb50 <- apply(b50,2,mean)
list(mnwbdr1=mnwbdr1, mnb50 = mnb50, mbaloc = mbaloc,  mbasig = mbasig)
}


ellipse<-function(x, center = apply(x, 2, mean), cov = var(x), alph = 0.95)
{# Makes a covering interval. The x should have 2 columns.
	mu1 <- center[1]
	mu2 <- center[2]
	w <- solve(cov)
	w11 <- w[1, 1]
	w12 <- w[1, 2]
	w22 <- w[2, 2]
	tem <- x[, 2] - mu2
	y2 <- seq(min(tem), max(tem), length = 100)
	xc <- qchisq(alph, 2)
	el <- matrix(0, 2, 2)
	ind <- 0
	for(i in 1:100) {
		j1 <- (y2[i] * w12)^2
		j2 <- w11 * ((y2[i])^2 * w22 - xc)	
	# print(i)
# print(j1 - j2)
		if((j1 - j2) >= 0) {
			ind <- ind + 2
			tem <- (y2[i] * w12)^2
			tem <- tem - w11 * ((y2[i])^2 * 
				w22 - xc)
			tem <- sqrt(tem)
			term <- ( - y2[i] * w12 + tem)/
				w11
			el <- rbind(el, c((term + mu1), (
				y2[i] + mu2)))
			term <- ( - y2[i] * w12 - tem)/
				w11
			el <- rbind(el, c((term + mu1), (
				y2[i] + mu2)))
		}
	}
	el <- el[3:ind,  ]
	nn <- dim(x)[1]
	if((ind - 2) > nn) {
		tem <- sample((ind - 2), nn)
		el <- el[tem,  ]
	}
	xt <- cbind(x[, 1], el[, 1])
	yt <- cbind(x[, 2], el[, 2])
	matplot(xt, yt)
}


ellipse2<-
function(x, center = apply(x, 2, mean), cov = var(x), cut = 4,
	alph = 0.95, cutoff = F)
{# Makes a covering interval. The x should have 2 columns.
	mu1 <- center[1]
	mu2 <- center[2]
	w <- solve(cov)
	w11 <- w[1, 1]
	w12 <- w[1, 2]
	w22 <- w[2, 2]
	tem <- x[, 2] - mu2
	y2 <- seq(min(tem), max(tem), length = 100)
        if(cutoff ==F)
	xc <- qchisq(alph, 2)
        else
        xc <- cut
	el <- matrix(0, 2, 2)
	ind <- 0
	for(i in 1:100) {
		j1 <- (y2[i] * w12)^2
		j2 <- w11 * ((y2[i])^2 * w22 - xc)	
	# print(i)
# print(j1 - j2)
		if((j1 - j2) >= 0) {
			ind <- ind + 2
			tem <- (y2[i] * w12)^2
			tem <- tem - w11 * ((y2[i])^2 * 
				w22 - xc)
			tem <- sqrt(tem)
			term <- ( - y2[i] * w12 + tem)/
				w11
			el <- rbind(el, c((term + mu1), (
				y2[i] + mu2)))
			term <- ( - y2[i] * w12 - tem)/
				w11
			el <- rbind(el, c((term + mu1), (
				y2[i] + mu2)))
		}
	}
	el <- el[3:ind,  ]
	nn <- dim(x)[1]
	if((ind - 2) > nn) {
		tem <- sample((ind - 2), nn)
		el <- el[tem,  ]
	}
	xt <- cbind(x[, 1], el[, 1])
	yt <- cbind(x[, 2], el[, 2])
	matplot(xt, yt)
}


essp<- function(x, Y, M = 50, type = 3)
{# Trimmed view or ESSP for M percent
# trimming. Allows visualization of g
# and crude estimation of c beta in models
# of the form y = g(x^T beta,e).
# Workstation need to activate a graphics
# device with command "X11()" or "motif()."
# R needs command "library(MASS)" or "library(lqs)."
# Click on the right mouse button to finish, and
# in R, highlight "stop."
x <- as.matrix(x)
tval <- M/100
if(type == 1)
  out <- cov.mcd(x)
if(type == 2)
  out <- covmba(x)
if(type == 3)
  out <- covfch(x)
if(type == 4) 
  out <- covrmvn(x)
if (type <= 4){
  center <- out$center
  cov <- out$cov}
  #type = 5 for rfch estimator
if (type == 5){
   out <- covfch(x)
   center <- out$rmnf
   cov <- out$rcovf}
rd2 <- mahalanobis(x, center, cov)
val <- quantile(rd2, (1 - tval))
bhat <- lsfit(x[rd2 <= val,  ], Y[rd2 <= val])$coef
ESP <- x %*% bhat[-1] + bhat[1]
plot(ESP, Y)
identify(ESP, Y)
return(bhat[-1])
}


FDAboot<-function(x,  group, B = 1000){
#Bootstraps FDA betahat = first eigenvector. 
#Nominal 95% CIs
#group labels are 1,2,...,g
#needs library(MASS)
g <- max(group)
x <- as.matrix(x)
p <- dim(x)[2]
betas <- matrix(0,nrow=B,ncol=p)
nis <- 1:g
tindx <- 1:dim(x)[1]
out<-lda(x,group)
for(i in 1:g){ 
    nis[i] <- sum(group==i)
}
for(i in 1:B){
    indx <- 1:2
    for(j in 1:g){      
        tind <- tindx[group==j]
        tdx<- sample(tind,nis[j],replace=T)
        indx <- c(indx,tdx)
    }
    indx <- indx[-c(1,2)]
    betas[i,]<-lda(x[indx,],group[indx])$scaling[,1]
}
shorci <- apply(betas,2,shorth3)
list(betas=betas,shorci=shorci,out=out)
}



ffplot2<-function(x, y, nsamps = 7){
# For Unix, use X11() to turn on the graphics device before 
# using this function. For R, first type library(MASS). 
# Makes an FF plot with several resistant estimators.
# Needs hbreg, mbareg, mbalata, and rmreg3.
	n <- length(y)
        x <- as.matrix(x)
	rmat <- matrix(nrow = n, ncol = 8)
	lsfit <- y - lsfit(x, y)$residuals
	print("got OLS")
	almsfit <- y - lmsreg(x, y)$resid
	print("got ALMS")
	altsfit <- y - ltsreg(x, y)$residuals
	print("got ALTS")
	mbacoef <- mbareg(x, y, nsamp = nsamps)$coef
	MBAfit <- mbacoef[1] + x %*% mbacoef[-1]
	print("got MBA")
        bbcoef <- hbreg(x, y)$bbcoef
	BBfit <- bbcoef[1] + x %*% bbcoef[-1]
	print("got BB")
        mbalcoef <- mbalata(x, y, nsamp = nsamps)$coef
	MBALfit <- mbalcoef[1] + x %*% mbalcoef[-1]
	print("got MBALATA")
        RMREG2 <- y - rmreg3(x,y)$res
	rmat[, 1] <- y
	rmat[, 2] <- lsfit
	rmat[, 3] <- almsfit
	rmat[, 4] <- altsfit
	rmat[, 5] <- MBAfit
        rmat[, 6] <- BBfit
        rmat[, 7] <- MBALfit
        rmat[, 8] <- RMREG2
	pairs(rmat, labels = c("Y", "OLS Fit", "ALMS Fit", 
             "ALTS Fit", "MBA Fit", "BB Fit", "MBALATA", "RMREG2"))
}



frey <- function(alph = 0.05){
#compare Frey correction with Olive correction
nn <- seq(20,500,by = 1)
m <- length(nn)
fr <- 1:m
ol <- fr
for(i in 1:m){
fr[i] <- min(1,1 - alph + 1.12*sqrt(alph/nn[i]))
ol[i] <- 1 - alph + 10*alph/nn[i]
}
plot(ol,fr)
abline(0,1)
}



fselboot<-function(x,y,B = 1000){
#needs library(leaps), n > 5p, p > 2
#bootstrap min Cp model forward selection regression 
#Does not make sense to do variable selection if there
#is only one nontrivial predictor.
x <- as.matrix(x)
n <- length(y)
p <- 1 + dim(x)[2]
vmax <- min(p,as.integer(n/5))
vars <- as.vector(1:(p-1))
#get the full model
full <- lsfit(x,y)
res <- full$resid
fit <- y - res
#get the minimum Cp submodel
tem<-regsubsets(x,y,nvmax=vmax,method="forward")
out<-summary(tem)
mincp <- out$which[out$cp==min(out$cp)]
#do not need the constant in vin
vin <- vars[mincp[-1]]
sub <- lsfit(x[,vin],y)
betas <- matrix(0,nrow=B,ncol=p)
#bootstrap the minimum Cp submodel
for(i in 1:B){
yb <- fit + sample(res,n,replace=T)
tem<-regsubsets(x,y=yb,method="forward")
out<-summary(tem)
mincp <- out$which[out$cp==min(out$cp)]
vin <- vars[mincp[-1]]
indx <- c(1,1+vin)
betas[i,indx] <- lsfit(x[,vin],yb)$coef
}
list(full=full,sub=sub,betas=betas)
}



gamper<-function(h, k=500)
{#estimates amount of comtamination FLTS can tolerate
	n <- 10000
	c <- 5000
	gam0 <- min((n - c)/n, (1 - (1 - 0.2^(1/k))^(1/
		h))) * 100
	print(gam0)
}


gamper2<-function(p, k = 500)
{#estimates the amount of contamination fmcd can tolerate
	n <- 10000
	c <- 5000
	h <- p + 1
	gam0 <- min((n - c)/n, (1 - (1 - 0.2^(1/k))^(1/h))) * 100
	print(gam0)
}


getB<-function(x, m=0, k= 5, msteps = 0){
# Gets the covmb2 subset B and the index of cases indx.
# Best if p > n.
# You can estimate number of clean cases m > n/2 with plot: out<-medout(x)
	x <- as.matrix(x)
	p <- dim(x)[2]
	n <- dim(x)[1]
        index <- 1:n
        covv <- diag(p)
	med <- apply(x, 2, median) 
        #Get squared Euclidean distances from coordinatewise median. 
	md2 <- mahalanobis(x, center = med, covv)
        if(m==0){ 
           if(msteps > 0){#do concentration type steps
            for(i in 1:msteps){
               medd <- median(md2)
               medw <- apply(x[md2<=medd,], 2, median) 
               md2 <- mahalanobis(x, center = medw, covv)
            }
           }
        md <- sqrt(md2)
        mcut <- median(md) + k*mad(md,constant=1)        
        }
        else{ #Use m cases with the smallest distances. 
        md <- sqrt(md2)
        mcut <- sort(md)[m]
        }   
        B <- x[md <= mcut,] 	
	indx <- index[md <= mcut]        
	list(B = B, indx=indx)
}


getBbig<-function(x, group){
#Gets Bbig for discriminant analysis, binary regression, or one way MANOVA.
#Let x contains  data, group is vector with group[i] = j
#if ith row is a case from the jth group, j = 1, ..., g.
#Can be used if p > n.
g <- max(group)
x <- as.matrix(x)
indx <- 1:2
tindx <- 1:dim(x)[1]
#get the cases used in the covmb2 set B from each group
for(i in 1:g){ 
   xi <- as.matrix(x[group==i,])
   tind <- tindx[group==i]
   tem <- getB(xi)  
   tind <- tind[tem$indx]
   indx <- c(indx,tind)
}
indx <- indx[-c(1,2)]
Bbig <- x[indx,]
grp <- group[indx]
list(Bbig=Bbig,indx=indx,grp=grp)
}


getfubig<-function(x, group, type = 1){
# needs library(robustbase)
#Gets fUbig for discriminant analysis, binary regression, or one way MANOVA.
#Let x contains  data, group is vector with group[i] = j
#if ith row is a case from the jth group, j = 1, ..., g.
#Need each group size nh > 2(p+1). Would like nh > 20p.
#type = 1: fastMCD, type = 2: DetMCD, type = 3: OGK
g <- max(group)
x <- as.matrix(x)
indx <- 1:2
tindx <- 1:dim(x)[1]
#get the cases used in the RMVN set from each group
for(i in 1:g){ 
   xi <- as.matrix(x[group==i,])
   tind <- tindx[group==i]
   if(type == 1){
   tem <- covMcd(xi)  
   tind <- tind[tem$mcd.wt>0.5]
   }
   if(type == 2){
   tem <- covMcd(xi,nsamp="deterministic")  
   tind <- tind[tem$mcd.wt>0.5]
   }
   if(type == 3){
   tem <- covOGK(xi,sigmamu=scaleTau2)  
   #tem <- covOGK(xi,sigmamu=s_IQR)
   #tem <- covOGK(xi,sigmamu=s_Sn)
   #tem <- covOGK(xi,sigmamu=s_Qn)
   tind <- tind[tem$weights>0.5]
   }
   indx <- c(indx,tind)
}
indx <- indx[-c(1,2)]
fUbig <- x[indx,]
grp <- group[indx]
list(fUbig=fUbig,indx=indx,grp=grp)
}


getu<-function(x, csteps = 5, locc = 0.5){
# Gets the RMVN subset U and the index of cases indx.
# Needs number of predictors > 1.
	x <- as.matrix(x)
	p <- dim(x)[2]
	n <- dim(x)[1]
        index <- 1:n
	up <- qchisq(0.975, p)
	qchi <- qchisq(0.5, p)	##get the DGK estimator
	covs <- var(x)
	mns <- apply(x, 2, mean)	## concentrate
	for(i in 1:csteps) {
		md2 <- mahalanobis(x, mns, covs)
		medd2 <- median(md2)
		mns <- apply(x[md2 <= medd2,  ], 2, mean)
		covs <- var(x[md2 <= medd2,  ])
	}
	covd <- covs
	mnd <- mns	##get the MB estimator
	covv <- diag(p)
	med <- apply(x, 2, median)
	md2 <- mahalanobis(x, center = med, covv)
	medd2 <- median(md2)##get the location criterion cutoff
        lcut <- medd2 
        if(locc != 0.5) lcut <- quantile(md2,locc)## get the start
	mns <- apply(x[md2 <= medd2,  ], 2, mean)
	covs <- var(x[md2 <= medd2,  ])	## concentrate
	for(i in 1:csteps) {
		md2 <- mahalanobis(x, mns, covs)
		medd2 <- median(md2)
		mns <- apply(x[md2 <= medd2,  ], 2, mean)
		covs <- var(x[md2 <= medd2,  ])
	}
	covm <- covs
	mnm <- mns	##get FCH attractor
	covf <- covm
	mnf <- mnm
	val2 <- mahalanobis(t(mnd), med, covv)
	if(val2 < lcut) {
##crit = square root of det(cov)
		critd <- prod(diag(chol(covd)))
		critm <- prod(diag(chol(covm)))
		if(critd < critm) {
			covf <- covd
			mnf <- mnd
		}
	}
## get the FCH estimator
	rd2 <- mahalanobis(x, mnf, covf)
	const <- median(rd2)/qchi
	covf <- const * covf	
##reweight the above FCH estimator (mnf,covf) to get the cov estimator
## (rmnmvn,rcovmvn) tailored for MVN data
	rd2 <- mahalanobis(x, mnf, covf)
	rmnmvn <- apply(x[rd2 <= up,  ], 2, mean)
	rcovmvn <- var(x[rd2 <= up,  ])
	d1 <- sum(rd2 <= up)
	rd2 <- mahalanobis(x, rmnmvn, rcovmvn)
	qchi2 <- (0.5 * 0.975 * n)/d1
	qchi2 <- min(qchi2, 0.995)
	const <- median(rd2)/qchisq(qchi2, p)
	rcovmvn <- const * rcovmvn	## get U
	rd2 <- mahalanobis(x, rmnmvn, rcovmvn)
        U <- x[rd2 <= up,  ]
	indx <- index[rd2 <= up]
	list(U = U, indx=indx)
}


getubig<-function(x, group){
# Gets Ubig for discriminant analysis, binary regression, or one way MANOVA.
#Let x contains  data, group is vector with group[i] = j
#if ith row is a case from the jth group, j = 1, ..., g.
#Need each group size nh > 2(p+1). Would like nh > 20p.
g <- max(group)
x <- as.matrix(x)
indx <- 1:2
tindx <- 1:dim(x)[1]
#get the cases used in the RMVN set from each group
for(i in 1:g){ 
   xi <- as.matrix(x[group==i,])
   tind <- tindx[group==i]
   tem <- getu(xi)   
   tind <- tind[tem$indx]
   indx <- c(indx,tind)
}
indx <- indx[-c(1,2)]
Ubig <- x[indx,]
grp <- group[indx]
list(Ubig=Ubig,indx=indx,grp=grp)
}


getuc<-function(x, group){
# Gets Uc for discriminant analysis or one way MANOVA
# assuming the g groups or populations only differ in mean.
#So Yij - muj are iid where j = 1, ..., g. BIG ASSUMPTION!
#Subtracts the coordinatewise median from each group.
#Then gets the RMVN set from the 0 median data.
#Then gets the corresponding x data. 
#Let x contains  data, group is vector with group[i] = j
#if ith row is a case from the jth group, j = 1, ..., g.
#Need each group size nj > 2(p+1). Would like nj > 20p.
g <- max(group)
x <- as.matrix(x)
Uz <- x[1:2,]
#Subtract the group coordinatewise median from data in each group.
#Combine the groups into one data set Uz of nearly iid data
#if the medians approximate the means.
for(i in 1:g){ 
   xi <- as.matrix(x[group==i,])
   med <- apply(xi, 2, median)
#one <- 1 + 0*1:dim(xi)[1];xc <- xi - one%*%t(med)
   xc <- sweep(xi,2,med)
   Uz <- rbind(Uz,xc)
}
Uz <- Uz[-c(1,2),]
out <- getu(Uz)
indx <- out$indx
Uc <- x[indx,]
grp <- group[indx]
list(Uc=Uc,indx=indx,grp=grp)
}


gsqboot<-function(x, B = 1000){
#bootstraps Fan et al. squared cor coef of the correlation matrix
x <- as.matrix(x)
p <- dim(x)[2]
n <- dim(x)[1]
Ip <- diag(p)
indx <- 1:n
gsqs <- 1:B
for(i in 1:B){
tem <- sample(indx,n,replace=T)
out <- cor(x[tem,])
##get  det(R*)
 detR <- prod(diag(chol(out)))^2
 temp <- (out - Ip)%*%(out - Ip)
##get squared Frobenius norm of (R* - Ip)^2
 fr <- sum(diag(temp))
 gsqs[i] <- fr/(2*detR + fr)
}
list(gsqs=gsqs)
}



gsqbootsim<-function(n = 100, p = 4, BB=1000, nruns = 100,  type = 1, 
psi = 0.0, dd=7, eps = 0.25, alph = 0.05){
#Simulates bootstrap for Fan et al. squared cor coef of the correlation matrix.
# Use 0 <= psi < 1.
# Make x where type = 1 for MVN Nq(0,I), 
# 2, 3, 4 and 5 (with delta = eps) for (1 - delta) Nq(0,I) + delta Nq(0, 25 I)
# 6, 7, 8 and 9  for multivariate t_d with d = 3, 5, 19 or dd
# 10 for lognormal.
# Multiply x by A: for MVN data this results
# in a covariance matrix with eigenvector c(1, ..., 1)^T
# corresponding to the largest eigenvalue. As psi gets
# close to 1, the data clusters about the line in the
# direction of (1, ..., 1)^T. See Maronna and Zamar (2002).
# cor(X_i,X_j) = [2 psi +(p-2)psi^2]/[1 + (p-1)psi^2], i not = j
# when the correlation exists.
#Makes the shorth CI [Ln,Un], the lower CI [0,U] and the upper CI [L,1]
        cicov <- 0        
        avelen <- 0
        ucicov <- 0
        uavelen <- 0
        lcicov <- 0
        lavelen <- 0
        rho <- (2*psi + (p-2)*psi^2)/(1 + (p-1)*psi^2)
     gsq <- p*(p-1)*rho^2/( 2*(1-rho)^(p-1) * (1+(p-1)*rho) + p*(p-1)*rho^2 )
        A <- matrix(psi,nrow=p,ncol=p)
        diag(A) <- 1
        for(i in 1:nruns) {
            #make data
		x <- matrix(rnorm(n * p), nrow = n, ncol = p)
                if(type == 2) {
			zu <- runif(n)
			x[zu < 0.4,  ] <- x[zu < 0.4,  ] * 5
		}
		if(type == 3) {
			zu <- runif(n)
			x[zu < 0.6,  ] <- x[zu < 0.6,  ] * 5
		}
		if(type == 4) {
			zu <- runif(n)
			x[zu < 0.1,  ] <- x[zu < 0.1,  ] * 5
		}
		if(type == 5) {
			zu <- runif(n)
			x[zu < eps,  ] <- x[zu < eps,  ] * 5
		}
		if(type == 6) {
			zu <- sqrt(rchisq(n, 3)/3)
			x <- x/zu
		}
		if(type == 7) {
			zu <- sqrt(rchisq(n, 5)/5)
			x <- x/zu
		}
		if(type == 8) {
			zu <- sqrt(rchisq(n, 19)/19)
			x <- x/zu
		}
		if(type == 9) {
			zu <- sqrt(rchisq(n, dd)/dd)
			x <- x/zu
		}
		if(type == 10)
			x <- exp(x)
		x <- x %*% A
		gsqrs <-gsqboot(x, B= BB)$gsqs #nonparametric bootstrap
                tem <- shorthLU(gsqrs,alpha=alph)
                if(gsq >= tem$shorth[1] && gsq <= tem$shorth[2])
                    cicov <- cicov + 1
                    avelen <- avelen + tem$shorth[2] - tem$shorth[1]
                if(gsq <= tem$right)
                    lcicov <- lcicov + 1
                    lavelen <- lavelen + tem$right
                if(gsq >= tem$left)
                    ucicov <- ucicov + 1
                    uavelen <- uavelen + 1 - tem$left
}
cicov <- cicov/nruns
avelen <- avelen/nruns
lcicov <- lcicov/nruns
lavelen <- lavelen/nruns
ucicov <- ucicov/nruns
uavelen <- uavelen/nruns
list(cicov=cicov,avelen=avelen,lcicov=lcicov,lavelen=lavelen,
ucicov=ucicov,uavelen=uavelen)}


gsqbootsim3<-function(n = 100, p = 4, BB=2000, nruns = 100,  type = 1, 
psi = 0.0, dd=7, eps = 0.25, alph = 0.05){
#Simulates bootstrap for Fan's squared cor coef of the correlation matrix.
# Uses the library(boot) and shorth CIs. Needs function gsqfn
# Use 0 <= psi < 1.
# Make x where type = 1 for MVN Nq(0,I), 
# 2, 3, 4 and 5 (with delta = eps) for (1 - delta) Nq(0,I) + delta Nq(0, 25 I)
# 6, 7, 8 and 9  for multivariate t_d with d = 3, 5, 19 or dd
# 10 for lognormal.
# Multiply x by A: for MVN data this results
# in a covariance matrix with eigenvector c(1, ..., 1)^T
# corresponding to the largest eigenvalue. As psi gets
# close to 1, the data clusters about the line in the
# direction of (1, ..., 1)^T. See Maronna and Zamar (2002).
# cor(X_i,X_j) = [2 psi +(p-2)psi^2]/[1 + (p-1)psi^2], i not = j
# when the correlation exists.
#Makes the shorth CI [Ln,Un], the lower CI [0,U] and the upper CI [L,1]
        cicov <- 0        
        avelen <- 0
        ucicov <- 0
        basiccov <- 0
        bcacov <- 0
        normcov <- 0
        perccov <- 0        
        uavelen <- 0
        lcicov <- 0
        lavelen <- 0
        basiclen<- 0
        bcalen <- 0
        normlen <- 0
        perclen <- 0
        indx<-1:n
        rho <- (2*psi + (p-2)*psi^2)/(1 + (p-1)*psi^2)
     gsq <- p*(p-1)*rho^2/( 2*(1-rho)^(p-1) * (1+(p-1)*rho) + p*(p-1)*rho^2 )
        A <- matrix(psi,nrow=p,ncol=p)
        diag(A) <- 1
        for(i in 1:nruns) {
            #make data
		x <- matrix(rnorm(n * p), nrow = n, ncol = p)
                if(type == 2) {
			zu <- runif(n)
			x[zu < 0.4,  ] <- x[zu < 0.4,  ] * 5
		}
		if(type == 3) {
			zu <- runif(n)
			x[zu < 0.6,  ] <- x[zu < 0.6,  ] * 5
		}
		if(type == 4) {
			zu <- runif(n)
			x[zu < 0.1,  ] <- x[zu < 0.1,  ] * 5
		}
		if(type == 5) {
			zu <- runif(n)
			x[zu < eps,  ] <- x[zu < eps,  ] * 5
		}
		if(type == 6) {
			zu <- sqrt(rchisq(n, 3)/3)
			x <- x/zu
		}
		if(type == 7) {
			zu <- sqrt(rchisq(n, 5)/5)
			x <- x/zu
		}
		if(type == 8) {
			zu <- sqrt(rchisq(n, 19)/19)
			x <- x/zu
		}
		if(type == 9) {
			zu <- sqrt(rchisq(n, dd)/dd)
			x <- x/zu
		}
		if(type == 10)
			x <- exp(x)
		x <- x %*% A
		gsqrs <-gsqboot(x, B= BB)$gsqs #nonparametric bootstrap
                tem <- shorthLU(gsqrs,alpha=alph)
                if(gsq >= tem$shorth[1] && gsq <= tem$shorth[2])
                    cicov <- cicov + 1
                    avelen <- avelen + tem$shorth[2] - tem$shorth[1]
                if(gsq <= tem$right)
                    lcicov <- lcicov + 1
                    lavelen <- lavelen + tem$right
                if(gsq >= tem$left)
                    ucicov <- ucicov + 1
                    uavelen <- uavelen + 1 - tem$left
                out<-boot(x,gsqfn,R=BB)
tem <- boot.ci(out, conf=1-alph,type = c("norm", "basic", "perc", "bca"))
               if(gsq >= tem$basic[4] && gsq <= tem$basic[5])
                    basiccov <- basiccov + 1
                    basiclen <- basiclen + tem$basic[5] - tem$basic[4] 
                if(gsq >= tem$bca[4] && gsq <= tem$bca[5])
                    bcacov <- bcacov + 1
                    bcalen <- bcalen + tem$bca[5] - tem$bca[4] 
                if(gsq >= tem$normal[2] && gsq <= tem$normal[3])
                    normcov <- normcov + 1
                    normlen <- normlen + tem$normal[3] - tem$normal[2] 
                if(gsq >= tem$perc[4] && gsq <= tem$perc[5])
                    perccov <- perccov + 1
                    perclen <- perclen + tem$perc[5] - tem$perc[4] 
}
cicov <- cicov/nruns
avelen <- avelen/nruns
lcicov <- lcicov/nruns
lavelen <- lavelen/nruns
ucicov <- ucicov/nruns
uavelen <- uavelen/nruns
basiccov <- basiccov/nruns
basiclen <- basiclen/nruns
bcacov <- bcacov/nruns
bcalen <- bcalen/nruns
normcov <- normcov/nruns
normlen <- normlen/nruns
perccov <- perccov/nruns
perclen <- perclen/nruns
list(cicov=cicov,avelen=avelen,lcicov=lcicov,lavelen=lavelen,
ucicov=ucicov,uavelen=uavelen,basiccov=basiccov,basiclen=basiclen,
bcacov=bcacov,bcalen=bcalen,normcov=normcov,normlen=normlen,
perccov=perccov, perclen=perclen)}


hbplot<-function(x, Y, aa = 1.4, rr = T, typ = 1)
{# Makes the response plot for hbreg estimator and attractors.
#Click on right mouse button to advance plot, and in R, highlight 
#"stop."
# For R,  type "library(MASS)" before using this function.
# The hbreg estimator uses MBA if typ = 1, ltsreg if typ = 2, 
# mbalata if typ = 3.
	x <- as.matrix(x)
	out <- hbreg(x, Y, a = aa, rreg = rr, type = typ)
       	bhat <- out$coef
	HBFIT <- bhat[1] + x %*% bhat[-1]
	bhat <- out$olscoef
	OLSFIT <- bhat[1] + x %*% bhat[-1]
	bhat <- out$arobcoef
	AROBFIT <- bhat[1] + x %*% bhat[-1]
	bhat <- out$bbcoef
	BBFIT <- bhat[1] + x %*% bhat[-1]
	par(mfrow = c(2, 2))
	plot(HBFIT, Y)
	abline(0, 1)
	identify(HBFIT, Y)
	plot(OLSFIT, Y)
	abline(0, 1)
	identify(OLSFIT, Y)
	plot(AROBFIT, Y)
	abline(0, 1)
	identify(AROBFIT, Y)
	plot(BBFIT, Y)
	abline(0, 1)
	identify(BBFIT, Y)
	par(mfrow = c(1, 1))
}


#used in rmltreg


hbreg<-function(x, y, csteps = 10, a = 1.4, rreg = T, type = 1)
{# Gets the hbreg estimator with 10 concentration steps applied to 
# the HB attractor BB. Uses LTA criterion to screen the 3 attractors.
# For R,  type "library(MASS)" before using this function.
# If rreg = F, arobcoef = olscoef, if T, arobcoef uses
# mbareg if type = 1, ltsreg if type = 2, mbalata if type = 3 
# rmreg2 if type =4.
# Calls mbareg, mbalata, rmreg3 which is rmreg2 without plots.
med <- median(y)
madd <- mad(y, constant = 1)
x <- as.matrix(x)
n <- dim(x)[1]
nc <- dim(x)[2] + 1 
#nc is number of predictors including intercept
temp <- lsfit(x, y)
hbf <- temp$coef
absres <- abs(temp$residuals)
indx <- (absres <= median(absres))
critf <- sum(absres[indx])
#mbareg or lmsreg may work better than ltsreg
if(rreg == T){
   if(type == 1){
      tem <- mbareg(x, y)
      temres <- y - tem$coef[1] - x %*% tem$coef[-1]
      absres <- abs(temres)}
   if(type == 2){
      tem <- ltsreg(x, y)
      absres <- abs(tem$residuals)}
   if(type == 3){
      tem <- mbalata(x,y)
      temres <- y - tem$coef[1] - x %*% tem$coef[-1]
      absres <- abs(temres)}
   if(type == 4){
      tem <- rmreg3(x,y)
      temres <- y - tem$Bhat[1] - x %*% tem$Bhat[-1]
      absres <- abs(temres)}
   indx <- (absres <= median(absres))
   crit <- a*sum(absres[indx])
   if(crit < critf) {
   critf <- crit
   hbf <- tem$coef}
}
#get y's closest to median y
indx <- (y >= (med - madd) & y <= (med + madd))
#get the HB attractor bk = BB
bk <- lsfit(x[indx,  ], y[indx])$coef
res <- y - (x %*% bk[2:nc] + bk[1])
ressq <- res^2
m <- median(ressq)
for(i in 1:csteps) {
indx <- (ressq <= m)
bk <- lsfit(x[indx,  ], y[indx])$coef
res <- y - (x %*% bk[2:nc] + bk[1])
ressq <- res^2
m <- median(ressq)
}
bb <-  bk
res <- y - (x %*% bb[2:nc] + bb[1])
absres <- abs(res)
indx <- (absres <= median(absres))
crit <- a*sum(absres[indx])
if(crit < critf)
hbf <- bb
if(rreg == T && type != 4) arobcoef <- tem$coef
else if (rreg == T && type == 4) arobcoef <- as.vector(tem$Bhat)
else arobcoef <- temp$coef
list(coef = hbf, olscoef = temp$coef, arobcoef = arobcoef, bbcoef = bb)
}


hbregsim<-function(n = 100, p = 4, csteps = 10, nruns = 10, aa = 1.4, rr = T, 
typ = 1, sige = 1, sigx = 1, etype = 1){
# Simulates MLR model and gets fits from hbreg.
# For R,  type "library(MASS)" before using this function.
# For MLR, want mnbhat = (1, ..., 1), sd(i) = 1\sqrt(n).
# If rr = F, arobcoef = olscoef, if T, arobcoef uses
# mbareg if typ = 1, ltsreg if typ = 2, mbalata if typ = 3, rmreg3 if typ = 4.
# errors are normal if etype = 1, shifted exponential for etype=2, shifted lognormal if etype=3
#
	q <- p - 1
        beta <- 1 + 0 * 1:q
#       beta <- 1:q
	hbhat <- matrix(0, nrow = nruns, ncol = p)
	olshat <- hbhat
	arobhat <- hbhat
	bbhat <- hbhat
        for(i in 1:nruns) {
	   x <- matrix(rnorm(n * q, sd = sige), nrow = n, ncol = q)
           if (etype == 1) err <- rnorm(n, sd = sigx)
           if (etype == 2) err <- sige*(rexp(n) - 1)
           if (etype == 3) err <- sige*(exp(rnorm(n)) - exp(0.5))
	   y <- 1 + x %*% beta + err
  out <- hbreg(x, y, csteps = csteps, a = aa, rreg = rr, type = typ)
	   hbhat[i,  ] <- out$coef
	   olshat[i,  ] <- out$olscoef
	   arobhat[i,  ] <- out$arobcoef
	   bbhat[i,  ] <- out$bbcoef
        }
	mnhbhat <- apply(hbhat, 2, mean)
	sdhbhat <- sqrt(apply(hbhat, 2, var))
	mnolshat <- apply(olshat, 2, mean)
	sdolshat <- sqrt(apply(olshat, 2, var))
	mnarobhat <- apply(arobhat, 2, mean)
	sdarobhat <- sqrt(apply(arobhat, 2, var))
	mnbbhat <- apply(bbhat, 2, mean)
	sdbbhat <- sqrt(apply(bbhat, 2, var))
        list(mnhbhat = mnhbhat, sdhbhat = sdhbhat, mnolshat = 
mnolshat, sdolshat = sdolshat, mnarobhat = mnarobhat, sdarobhat = 
sdarobhat, mnbbhat = mnbbhat, sdbbhat = sdbbhat)
}

hdhot1wsim<-function(n=100,p=10,B=100,nruns=100,xtype=1,eps=0.4,dd=4,delta=0,
covtyp=1,psi=0.1,alpha=0.1){
# This R function simulates the one sample Hotelling's T^2 test based on the 
# bootstrap where n/p is small and the identity matrix is the dispersion 
# matrix.  Need p > 1.   Here p is the dimension of the mean mu.
# In the literature we sometimes use m instead of p. 
# Calls identrgn.
# xtype = 1 for MVN Np(0,I), 
# 2 for (1 - eps) Np(0,I) + eps Np(0, 25 I)
# 3 for multivariate t_d with d = dd
# 4 for lognormal.
# Power can be estimated by increasing delta so mu = delta(1,...,1) 
# and mu0 = (0, ..., 0).
# For MVN data, Cov(x) = I for covtyp=1.
# Cov(x) = diag(1,...,p) for covtyp=2.
# For covtyp = 3, cor(x_i,x_j) = rho for i not= j and
# for MVN data this results in a covariance matrix with eigenvector 
# c(1, ..., 1)^T corresponding to the largest eigenvalue. As psi gets
# close to 1, the data clusters about the line in the
# direction of (1, ..., 1)^T. 
# cor(X_i,X_j) = [2 psi +(p-2)psi^2]/[1 + (p-1)psi^2], i not = j
# when the correlation exists.
    A <- sqrt(diag(1:p))
    if(covtyp==3){
      A <- matrix(psi,nrow=p,ncol=p)
      diag(A) <- 1}
    munot <- 0 * (1:p)
    mu <- delta * (1 + munot)
        indx <- 1:n
        mnmus <- matrix(0,nrow=B,ncol=p)
        medmus <- mnmus
        prcv <- 0
        brcv <- 0
        prmedcv <- 0
        brmedcv <- 0
        chisqcut <- qchisq(p=(1-alpha),df=p)
        cutoffs <- 1:nruns
        cutbr <- cutoffs
        for(i in 1:nruns) {
#make data
        x <- matrix(rnorm(n*p), ncol = p, nrow = n)                            
        if(xtype == 2) {
            zu <- runif(n)
            x[zu < eps,  ] <- x[zu < eps,  ] * 5
        }
        if(xtype == 3) {
            zu <- sqrt(rchisq(n, dd)/dd)
            x <- x/zu
        }
        if(xtype == 4){  
            x <- exp(x)
            x <- x-exp(0.5)
                }
        if(covtyp>1){
        x <- x %*% A}      
        x <- mu + x    
        cent <- apply(x,2,mean) 
        centmed <- apply(x,2,median) 
          #x has mean mu =delta(1,...,1)^T
                #get bootstrapped xbar1                  
        for(j in 1:B){
                tem <- sample(indx,n,replace=T)
                mnx <- apply(x[tem,],2,mean)
                mnmus[j,] <- mnx
                medx <- apply(x[tem,],2,median)
                medmus[j,] <- medx
            }   
                outmn <- identrgn(mnmus,stat=cent,alpha=alpha)
                prcv <- prcv + outmn$inr
                brcv <- brcv + outmn$inr2
                cutoffs[i]<- (outmn$cuplim)^2
                cutbr[i] <- (outmn$cuplim2)^2
                outmed <- identrgn(medmus,stat=centmed,alpha=alpha)
                prmedcv <- prmedcv + outmed$inr
                brmedcv <- brmedcv + outmed$inr2
    }
    #prop of times Ho is rejected
        prcv  <- 1 - prcv/nruns   
        brcv <- 1 - brcv/nruns  
        prmedcv  <- 1 - prmedcv/nruns   
        brmedcv <- 1 - brmedcv/nruns         
        mncut <- mean(cutoffs)
        mncutbr <- mean(cutbr)
    list(chisqcut=chisqcut,mncut=mncut,mncutbr=mncutbr,prcv=prcv,brcv=brcv,
prmedcv=prmedcv,brmedcv=brmedcv)   
}



hdhot2wsim<-function(n1=100,n2=100,p=10,B=100,nruns=100,xtype=1,sig=1, eps=0.4,dd=4,delta=0,covtyp=1,alpha=0.05){
# This R function simulates the two sample Hotelling's T^2 test based on the 
# bootstrap where n/p is small and the identity matrix is the dispersion 
#matrix.  Need p > 1. 
#Calls identrgn.
# xtype = 1 for MVN Np(0,I), 
# 2 for (1 - eps) Np(0,I) + eps Np(0, 25 I)
# 3 for multivariate t_d with d = dd
# 4 for lognormal.
# Power can be estimated by increasing delta so mu = delta(1,...,1) 
# and mu2 = (0, ..., 0).
# Cov(x) = I, Cov(x2) = sig^2 Cov(x) for covtyp=1.
# Cov(x) = diag(1,2,...,p), Cov(x2) = sig^2 Cov(x) for covtyp=2.
# Cov(x) = I, Cov(x2) = sig^2 diag(1,2,...,p) for covtyp=3
    A <- sqrt(diag(1:p))
    munot <- 0 * (1:p)
    mu <- delta * (1 + munot)
        indx <- 1:n1
        indx2 <- 1:n2
        mnmus <- matrix(0,nrow=B,ncol=p)
        medmus <- mnmus
        prcv <- 0
        brcv <- 0
        prmedcv <- 0
        brmedcv <- 0
        chisqcut <- qchisq(p=(1-alpha),df=p)
        cutoffs <- 1:nruns
        cutbr <- cutoffs
        for(i in 1:nruns) {
#make data
        x <- matrix(rnorm(n1 * p), ncol = p, nrow = n1)
               x2 <- matrix(rnorm(n2 * p), ncol = p, nrow = n2)                
        if(xtype == 2) {
            zu <- runif(n1)
            x[zu < eps,  ] <- x[zu < eps,  ] * 5
                        zu <- runif(n2)
            x2[zu < eps,  ] <- x2[zu < eps,  ] * 5
        }
        if(xtype == 3) {
            zu <- sqrt(rchisq(n1, dd)/dd)
            x <- x/zu
                        zu <- sqrt(rchisq(n2, dd)/dd)
            x2 <- x2/zu
        }
        if(xtype == 4){  
            x <- exp(x)
            x <- x-exp(0.5)
            x2 <- exp(x2)
            x2 <- x2 - exp(0.5)
                }
        if(covtyp==2){
        x <- x %*% A
        x2 <- x2 %*% A}
        if(covtyp==3){
        x2 <- x2 %*% A}
        x2 <- sig * x2        
        x <- mu + x    
        cent <- apply(x,2,mean) - apply(x2,2,mean)
        centmed <- apply(x,2,median) - apply(x2,2,median)
          #x has mean mu =delta(1,...,1)^T, x2 has mean (0,...,0)^T
                #get bootstrapped xbar1 - xbar2                 
for(j in 1:B){
                tem <- sample(indx,n1,replace=T)
                mnx <- apply(x[tem,],2,mean)
                medx <- apply(x[tem,],2,median)
                tem <- sample(indx2,n2,replace=T)
                mnx2 <- apply(x2[tem,],2,mean)
                medx2 <- apply(x2[tem,],2,median)
                mnmus[j,] <- mnx-mnx2
                medmus[j,] <- medx-medx2
            }   
                outmn <- identrgn(mnmus,stat=cent,alpha=alpha)
                prcv <- prcv + outmn$inr
                brcv <- brcv + outmn$inr2
                cutoffs[i]<-(outmn$cuplim)^2
                cutbr[i] <- (outmn$cuplim2)^2
                outmed <- identrgn(medmus,stat=centmed,alpha=alpha)
                prmedcv <- prmedcv + outmed$inr
                brmedcv <- brmedcv + outmed$inr2
    }
    #prop of times Ho is rejected
        prcv  <- 1 - prcv/nruns   
        brcv <- 1 - brcv/nruns 
        prmedcv  <- 1 - prmedcv/nruns   
        brmedcv <- 1 - brmedcv/nruns         
        mncut <- mean(cutoffs)
        mncutbr <- mean(cutbr)
    list(chisqcut=chisqcut,mncut=mncut,mncutbr=mncutbr,prcv=prcv,brcv=brcv,
prmedcv=prmedcv,brmedcv=brmedcv)   
}



hdmansim<-function(n1=100,n2=100,n3=100,m=2,B=100,nruns=100,ytype=1, 
sig2=1,sig3=1,eps=0.4,dd=4,delta1=0,delta2=0,delta3=0,cov3I=F,alpha=0.1){
# This function simulates high dimensional one way Manova type  tests for 
# Ho: mu_1 = mu_2 = mu_3 where p = g = 3 = number of groups.
# Has the test using C_n = I. Test with standardized data is also given.
# yi is m by 1.
# Need m > 1. Want m >= ni.
# Multiply y by A where ytype = 1 for MVN Nm(0,I), 
# 2  for (1 - eps) Nm(0,I) + eps Nm(0, 25 I),
# 3 for multivariate t_d with d = dd,
# 4 for lognormal.
# Power can be estimated by using unequal deltai so mu1 = delta1(1,...,1) 
# and mu2 = delta2(1, ..., 1), mu3 = delta3(1,...,1).
# Cov(y1) = diag(1,2,...,m), Cov(y2) = sig^2 Cov(y1) for clean data.
# Cov(y3) = sig^3 Cov(y1) for clean data if cov3I = F,
# or Cov(y3) = cI_3 if cov3I = T.
	A <- sqrt(diag(1:m))
        munot <- 0 * (1:m)
	mu1 <- delta1 * (1 + munot)
        mu2 <- delta2 * (1 + munot)
        mu3 <- delta3 * (1 + munot)
	indx1 <- 1:n1
        indx2 <- 1:n2
        indx3 <- 1:n3
        mnmus <- matrix(0,nrow=B,ncol=2*m)
        smnmus <- matrix(0,nrow=B,ncol=2*m)
        cutoffs <- 1:nruns
        cutbr <- cutoffs
        scutoffs <- cutoffs
        scutbr <- cutoffs
        prcv <- 0
        brcv <- 0
        sprcv <- 0
        sbrcv <- 0
        for(i in 1:nruns) {
#make data
	       y1 <- matrix(rnorm(n1 * m), ncol = m, nrow = n1)
               y2 <- matrix(rnorm(n2 * m), ncol = m, nrow = n2)  
               y3 <- matrix(rnorm(n3 * m), ncol = m, nrow = n3)               
		if(ytype == 2) {
			zu <- runif(n1)
			y1[zu < eps,  ] <- y1[zu < eps,  ] * 5
                        zu <- runif(n2)
			y2[zu < eps,  ] <- y2[zu < eps,  ] * 5
                        zu <- runif(n3)
			y3[zu < eps,  ] <- y3[zu < eps,  ] * 5
		}
		if(ytype == 3) {
			zu <- sqrt(rchisq(n1, dd)/dd)
			y1 <- y1/zu
                        zu <- sqrt(rchisq(n2, dd)/dd)
			y2 <- y2/zu
                        zu <- sqrt(rchisq(n3, dd)/dd)
			y3 <- y3/zu
		}
		if(ytype == 4){  
			y1 <- exp(y1)
                        y1 <- y1 - exp(0.5)
                        y2 <- exp(y2)
                        y2 <- y2 - exp(0.5)
                        y3 <- exp(y3)
                        y3 <- y3 - exp(0.5)
                }
		y1 <- y1 %*% A
                y2 <- y2 %*% A
                y2 <- sig2 * y2
                if( cov3I != T){ 
                y3 <- y3 %*% A
                y3 <- sig3 * y3}
		y1 <- mu1 + y1
                y2 <- mu2 + y2
                y3 <- mu3 + y3	
          #yi has mean mui = deltai (1,...,1)^T
         diag1 <- 1/apply(y1,2,sd)
         D <- diag(diag1)
         z1 <- y1%*%D
         diag1 <- 1/apply(y2,2,sd)
         D <- diag(diag1)
         z2 <- y2%*%D
         diag1 <- 1/apply(y3,2,sd)
         D <- diag(diag1)
         z3 <- y3%*%D       
   
        #Get the classical test	w      
        T1 <- apply(y1,2,mean)
        T2 <- apply(y2,2,mean)
        T3 <- apply(y3,2,mean)
        w <- c(T1-T3,T2-T3)
        T1s <- apply(z1,2,mean)
        T2s <- apply(z2,2,mean)
        T3s <- apply(z3,2,mean)
        ws <- c(T1s-T3s,T2s-T3s)
        #get bootstrapped Ty - Ty3, Ty2 - Ty3 for various statistics T 
        for(j in 1:B){
            tem <- sample(indx1,n1,replace=T)
            mny1 <- apply(y1[tem,],2,mean)
            mnz1 <- apply(z1[tem,],2,mean)
            tem <- sample(indx2,n2,replace=T)
            mny2 <- apply(y2[tem,],2,mean)
            mnz2 <- apply(z2[tem,],2,mean)
            tem <- sample(indx3,n3,replace=T)
            mny3 <- apply(y3[tem,],2,mean)
            mnz3 <- apply(z3[tem,],2,mean)
            mnmus[j,] <- c(mny1-mny3,mny2-mny3)
            smnmus[j,] <- c(mnz1-mnz3,mnz2-mnz3)               
        } 
        outmn <- identrgn(mnmus,stat=w,alpha=alpha)
                prcv <- prcv + outmn$inr
                brcv <- brcv + outmn$inr2
                cutoffs[i]<-(outmn$cuplim)^2
                cutbr[i] <- (outmn$cuplim2)^2 
        outmn <- identrgn(smnmus,stat=ws,alpha=alpha)
                sprcv <- sprcv + outmn$inr
                sbrcv <- sbrcv + outmn$inr2
                scutoffs[i]<-(outmn$cuplim)^2
                scutbr[i] <- (outmn$cuplim2)^2 
        
	}
#prop of times Ho is rejected
        prcv  <- 1 - prcv/nruns   
        brcv <- 1 - brcv/nruns 
        sprcv  <- 1 - sprcv/nruns   
        sbrcv <- 1 - sbrcv/nruns 
#	
	list(prcv=prcv,brcv=brcv,sprcv=sprcv,sbrcv=sbrcv)	
}


hdr2<-function(x,xf,alph=0.05){
#see if xf is in the estimated highest density region
#scaled nonparametric density estimator, poor for p > 2
#kernel is uniform on a hypersphere
x <- as.matrix(x)
n <- dim(x)[1]
tem <- ceiling(2*sqrt(n))
p <- dim(x)[2]
covv <- as.matrix(diag(p))  #identity matrix
mns <- mean(x)
radsq <- sort(mahalanobis(x,mns,covv))[tem]
fh<-1:n
inr<-0
for(i in 1:n){
    fh[i] <- sum( mahalanobis(x,x[i,],covv) <= radsq )
    fh[i] <- fh[i]/( n * sqrt(radsq)^p  )
}
falph <- quantile(fh,alph)
fhatxf <- sum( mahalanobis(x,xf,covv) <= radsq )
fhatxf <- fhatxf/( n * sqrt(radsq)^p  )
#inr <- 1 if xf is in the prediction region
if(fhatxf >= falph){ 
inr <- 1}
list(inr=inr)
}


hot2sampsim<-function(n1=100,n2=100,p=2,B=100,nruns=100,xtype=1,sig=1, 
eps=0.4,dd=4,delta=0,covtyp=1,alpha=0.05){ 
# This R function simulates the two sample Hotelling's T^2 test
# a bootstrap test to get a cutoff, and a large sample test. 
# Two bootstrap tests using C_n = I are also used.
# Need p > 1. Want n1 > 20p, n2 > 20p.
#Calls identrgn.
# Multiply x by A where xtype = 1 for MVN Np(0,I), 
# 2, (with delta = eps) for (1 - delta) Np(0,I) + delta Np(0, 25 I)
# 3 for multivariate t_d with d = dd
# 4 for lognormal.
# Power can be estimated by increasing delta so mu = delta(1,...,1) 
# and mu2 = (0, ..., 0).
# Cov(x) = I, Cov(x2) = sig^2 Cov(x) for covtyp=1.
# Cov(x) = diag(1,2,...,p), Cov(x2) = sig^2 Cov(x) for covtyp=2.
# Cov(x) = I, Cov(x2) = sig^2 diag(1,2,...,p) for covtyp=3
    A <- sqrt(diag(1:p))
    munot <- 0 * (1:p)
    mu <- delta * (1 + munot)
        indx <- 1:n1
        indx2 <- 1:n2
        n<-n1+n2
        mnmus <- matrix(0,nrow=B,ncol=p)
        prcv <- 0
        brcv <- 0
        LScov <- 0
        Ccov <- 0
        bstat <- 1:B
        cstat <- 0 * 1:nruns
        bcut <- 1:nruns
        dn=min(n1-p,n2-p)
        LScut<- p*qf(p=(1-alpha),df1=p,df2=dn)
        Ccut <- p*qf(p=(1-alpha),df1=p,df2=(n-2))
       	up <- min((1 - alpha/2), (1 - alpha + 10*alpha*p/B))
        if(alpha > 0.1)
        up <- min((1 - alpha + 0.05), (1 - alpha + p/B))
        qn <- up
        if(qn < 1 - alpha + 0.001)
        up <- 1 - alpha        
       for(i in 1:nruns) {
#make data
        x <- matrix(rnorm(n1 * p), ncol = p, nrow = n1)
               x2 <- matrix(rnorm(n2 * p), ncol = p, nrow = n2)                
        if(xtype == 2) {
            zu <- runif(n1)
            x[zu < eps,  ] <- x[zu < eps,  ] * 5
                        zu <- runif(n2)
            x2[zu < eps,  ] <- x2[zu < eps,  ] * 5
        }
        if(xtype == 3) {
            zu <- sqrt(rchisq(n1, dd)/dd)
            x <- x/zu
                        zu <- sqrt(rchisq(n2, dd)/dd)
            x2 <- x2/zu
        }
        if(xtype == 4){  
            x <- exp(x)
            x <- x-exp(0.5)
            x2 <- exp(x2)
            x2 <- x2 - exp(0.5)
                }
        if(covtyp==2){
        x <- x %*% A
        x2 <- x2 %*% A}
        if(covtyp==3){
        x2 <- x2 %*% A}
        x2 <- sig * x2        
        x <- mu + x 
        #x has mean mu =delta(1,...,1)^T, x2 has mean (0,...,0)^T   
        cent <- apply(x,2,mean) - apply(x2,2,mean)
        S1<-cov(x)
        S2<-cov(x2)
        Sigma<-(S1/n1 + S2/n2)
        Sinv <- solve(Sigma)
        TLsq <- t(cent) %*% Sinv %*% cent
        if(TLsq <= LScut) LScov <- LScov+1
        Sp<-(1/n1 + 1/n2)*( (n1-1)*S1 + (n2-1)*S2 )/(n-2)
        Spinv <- solve(Sp)  #inverse scaled pooled cov matrix
        TCsq <- t(cent) %*% Spinv %*% cent
        cstat[i] <- TCsq
        if(TCsq <= Ccut) Ccov <- Ccov+1
        for(j in 1:B){
              tem <- sample(indx,n1,replace=T)
              mnx <- apply(x[tem,],2,mean)
              tem <- sample(indx2,n2,replace=T)
              mnx2 <- apply(x2[tem,],2,mean)
              mnmus[j,] <- mnx-mnx2
              diff <- mnx-mnx2 - cent
              bstat[j]<- t(diff) %*% Spinv %*% diff
              }   
         bcut[i] <- quantile(bstat,up)
         outmn <- identrgn(mnmus,stat=cent,alpha=alpha)
                prcv <- prcv + outmn$inr
                brcv <- brcv + outmn$inr2
} 
Ccov<-Ccov/nruns             
LScov <- LScov/nruns
bootcov <- sum(cstat <= bcut)/nruns
prcv <- prcv/nruns
brcv <- brcv/nruns   
#  
list(Ccov=Ccov,LScov=LScov,bootcov=bootcov,prcv=prcv,brcv=brcv)  	 
}


hot2sim<-function(n = 100, n2 = 100, p = 2, csteps = 5, B= 100, gam = 0.4, nruns = 100, xtype = 1, outliers = 0, pm = 10, sig = 1, eps = 0.4, 
dd=4, delta = 0){
# Needs library(Hotelling). 
# Partially written by Hasthika S. Rupasinghe Arachchige Don.
# This R function simulates the two sample Hotelling's T^2 test based on the 
# bootstrap. Here x2 is clean, x = x1 may have outliers.
# Uses the coordinatewise mean, median, 25% trimmed mean, 
# and RMVN location estimators as well as the classical test.
# The hotsim2 function does not do the classical test. 
# Need p > 1. Want n > 20p, n2 > 20p, B > 20p.
# Multiply x by A where xtype = 1 for MVN Np(0,I), 
# 2, (with delta = eps) for (1 - delta) Np(0,I) + delta Np(0, 25 I)
# 3 for multivariate t_d with d = dd
# 4 for lognormal.
# outliers = 0 for no outliers and X~N(0,diag(1,...,p)), 
# 1 for outliers a tight cluster at major axis (0,...,0,pm)'
# 2 for outliers a tight cluster at minor axis  (pm,0, ...,0)'
# 3 for outliers X~N((pm,...,pm)',diag(1,...,p))
# 4 for outliers X[i,p] = pm
# 5 for outliers X[i,1] = pm
# For the point mass outlier types 4 and 5, need gam much smaller than 0.4.
# Power can be estimated by increasing delta so mu = delta(1,...,1) 
# and mu2 = (0, ..., 0).
# Cov(x) = diag(1,2,...,p), Cov(x2) = sig^2 Cov(x) for clean data.
# For outliers=0, want hquant and rquant approx 1.
    A <- sqrt(diag(1:p))
    munot <- 0 * (1:p)
    mu <- delta * (1 + munot)
    val <- floor(gam * n)
        indx <- 1:n
        indx2 <- 1:n2
        medmus <- matrix(0,nrow=B,ncol=p)
        mnmus <- medmus
        tmnmus <- medmus
        rmvnmus <- medmus
        medcv <- 0
        mncv <- 0
        tmncv <- 0
        rmvncv <- 0
	HTcv <- 0
	HTpval <- 0
	HT <- matrix(0, ncol = 1, nrow = nruns)
        chisqcut <- qchisq(p=0.95,df=p)
        cutoffs <- matrix(0,nrow=nruns,ncol=4)
        for(i in 1:nruns) {
#make data
        x <- matrix(rnorm(n * p), ncol = p, nrow = n)
               x2 <- matrix(rnorm(n2 * p), ncol = p, nrow = n2)                
        if(xtype == 2) {
            zu <- runif(n)
            x[zu < eps,  ] <- x[zu < eps,  ] * 5
                        zu <- runif(n2)
            x2[zu < eps,  ] <- x2[zu < eps,  ] * 5
        }
        if(xtype == 3) {
            zu <- sqrt(rchisq(n, dd)/dd)
            x <- x/zu
                        zu <- sqrt(rchisq(n2, dd)/dd)
            x2 <- x2/zu
        }
        if(xtype == 4){  #Want pop coord med(x) = 0.
            x <- exp(x)
                        x <- x - 1
                        x2 <- exp(x2)
                        x2 <- x2 - 1
                }
        x <- x %*% A
                x2 <- x2 %*% A
                x2 <- sig * x2
        if(outliers == 1) {
                    x[1:val,  ] <- matrix(rnorm(val * p, sd = 0.01), ncol
                 = p, nrow = val)
            x[1:val, p] <- x[1:val, p] + pm
        }
        if(outliers == 2) {
            x[1:val,  ] <- matrix(rnorm(val * p, sd = 0.01), ncol
                 = p, nrow = val)
            x[1:val, 1] <- x[1:val, 1] + pm
        }
        if(outliers == 3) {
            tem <- pm + 0 * 1:p
            x[1:val,  ] <- x[1:val,  ] + tem
        }
        if(outliers == 4) {
            x[1:val, p] <- pm
        }
        if(outliers == 5) {
            x[1:val, 1] <- pm
        }
        x <- mu + x    
          #clean x has mean mu =delta(1,...,1)^T, x2 has mean (0,...,0)^T
                #get bootstrapped Tx - Tx2 for various statistics T 
                
		for(j in 1:B){
                tem <- sample(indx,n,replace=T)
                medx <- apply(x[tem,],2,median)
                mnx <- apply(x[tem,],2,mean)
                tmnx <- apply(x[tem,],2,tmn)
                rmvnx <- covrmvn(x[tem,])$center
                tem <- sample(indx2,n2,replace=T)
                medx2 <- apply(x2[tem,],2,median)
                mnx2 <- apply(x2[tem,],2,mean)
                tmnx2 <- apply(x2[tem,],2,tmn)
                rmvnx2 <- covrmvn(x2[tem,])$center
                medmus[j,] <- medx-medx2
                mnmus[j,] <- mnx-mnx2
                tmnmus[j,] <- tmnx-tmnx2
                rmvnmus[j,] <- rmvnx-rmvnx2
         }   
                outmed<-predreg(medmus)
                medcv <- medcv + outmed$inr 
                outmn <- predreg(mnmus)
                mncv <- mncv + outmn$inr
                outtmn <- predreg(tmnmus)
                tmncv <- tmncv + outtmn$inr
                outrmvn <- predreg(rmvnmus)   
              	rmvncv <- rmvncv + outrmvn$inr
cutoffs[i,]<-c(outmed$cuplim,outmn$cuplim,outtmn$cuplim,outrmvn$cuplim)^2		

	HTpval <- hotelling.test(x, x2)
	if(HTpval$pval < 0.05){
	HT[i,1] <- 1}
    }
    	medcv <- 1 - medcv/nruns #prop of times Ho is rejected
        mncv  <- 1 - mncv/nruns
        tmncv  <- 1 - tmncv/nruns
        rmvncv <- 1 - rmvncv/nruns	
	HTcv <- sum(HT)/nruns        
	mncut <- apply(cutoffs,2,mean)	
        list(chisqcut = chisqcut, mncut=mncut, medcv = medcv,mncv = mncv,tmncv=tmncv,rmvncv=rmvncv, HTcv = HTcv)  	 
}


hot2sim2<-function(n = 100, n2 = 100, p = 2, csteps = 5, B= 100, gam = 0.4, nruns = 100, xtype = 1, 
outliers = 0, pm = 10, sig = 1, eps = 0.4, dd=4, delta = 0){
# This R function simulates the two sample Hotelling's T^2 test based on the 
#bootstrap. Here x2 is clean, x = x1 may have outliers.
# Uses the coordinatewise mean, median, 25% trimmed mean, and RMVN location estimators.
#hotsim also simulates the classical estimator
# Need p > 1. Want n > 20p, n2 > 20p, B > 20p.
# Multiply x by A where xtype = 1 for MVN Np(0,I), 
# 2, (with delta = eps) for (1 - delta) Np(0,I) + delta Np(0, 25 I)
# 3 for multivariate t_d with d = dd
# 4 for lognormal.
# outliers = 0 for no outliers and X~N(0,diag(1,...,p)), 
# 1 for outliers a tight cluster at major axis (0,...,0,pm)'
# 2 for outliers a tight cluster at minor axis  (pm,0, ...,0)'
# 3 for outliers X~N((pm,...,pm)',diag(1,...,p))
# 4 for outliers X[i,p] = pm
# 5 for outliers X[i,1] = pm
# For the point mass outlier types 4 and 5, need gam much smaller than 0.4.
# Power can be estimated by increasing delta so mu = delta(1,...,1) 
# and mu2 = (0, ..., 0).
# Cov(x) = diag(1,2,...,p), Cov(x2) = sig^2 Cov(x) for clean data.
# For outliers=0, want hquant and rquant approx 1.
	A <- sqrt(diag(1:p))
	munot <- 0 * (1:p)
	mu <- delta * (1 + munot)
	val <- floor(gam * n)
        indx <- 1:n
        indx2 <- 1:n2
        medmus <- matrix(0,nrow=B,ncol=p)
        mnmus <- medmus
        tmnmus <- medmus
        rmvnmus <- medmus
        medcv <- 0
        mncv <- 0
        tmncv <- 0
        rmvncv <- 0
        chisqcut <- qchisq(p=0.95,df=p)
        cutoffs <- matrix(0,nrow=nruns,ncol=4)
        for(i in 1:nruns) {
#make data
		x <- matrix(rnorm(n * p), ncol = p, nrow = n)
               x2 <- matrix(rnorm(n2 * p), ncol = p, nrow = n2)                
		if(xtype == 2) {
			zu <- runif(n)
			x[zu < eps,  ] <- x[zu < eps,  ] * 5
                        zu <- runif(n2)
			x2[zu < eps,  ] <- x2[zu < eps,  ] * 5
		}
		if(xtype == 3) {
			zu <- sqrt(rchisq(n, dd)/dd)
			x <- x/zu
                        zu <- sqrt(rchisq(n2, dd)/dd)
			x2 <- x2/zu
		}
		if(xtype == 4){  #Want pop coord med(x) = 0.
			x <- exp(x)
                        x <- x - 1
                        x2 <- exp(x2)
                        x2 <- x2 - 1
                }
		x <- x %*% A
                x2 <- x2 %*% A
                x2 <- sig * x2
		if(outliers == 1) {
                    x[1:val,  ] <- matrix(rnorm(val * p, sd = 0.01), ncol
				 = p, nrow = val)
			x[1:val, p] <- x[1:val, p] + pm
		}
		if(outliers == 2) {
			x[1:val,  ] <- matrix(rnorm(val * p, sd = 0.01), ncol
				 = p, nrow = val)
			x[1:val, 1] <- x[1:val, 1] + pm
		}
		if(outliers == 3) {
			tem <- pm + 0 * 1:p
			x[1:val,  ] <- x[1:val,  ] + tem
		}
		if(outliers == 4) {
			x[1:val, p] <- pm
		}
		if(outliers == 5) {
			x[1:val, 1] <- pm
		}
		x <- mu + x	
          #clean x has mean mu =delta(1,...,1)^T, x2 has mean (0,...,0)^T
                #get bootstrapped Tx - Tx2 for various statistics T 
                for(j in 1:B){
                tem <- sample(indx,n,replace=T)
                medx <- apply(x[tem,],2,median)
                mnx <- apply(x[tem,],2,mean)
                tmnx <- apply(x[tem,],2,tmn)
                rmvnx <- covrmvn(x[tem,])$center
                tem <- sample(indx2,n2,replace=T)
                medx2 <- apply(x2[tem,],2,median)
                mnx2 <- apply(x2[tem,],2,mean)
                tmnx2 <- apply(x2[tem,],2,tmn)
                rmvnx2 <- covrmvn(x2[tem,])$center
                medmus[j,] <- medx-medx2
                mnmus[j,] <- mnx-mnx2
                tmnmus[j,] <- tmnx-tmnx2
                rmvnmus[j,] <- rmvnx-rmvnx2
         }   
                outmed<-predreg(medmus)
                medcv <- medcv + outmed$inr 
                outmn <- predreg(mnmus)
                mncv <- mncv + outmn$inr
                outtmn <- predreg(tmnmus)
                tmncv <- tmncv + outtmn$inr
                outrmvn <- predreg(rmvnmus)   
          	rmvncv <- rmvncv + outrmvn$inr
                cutoffs[i,]<-c(outmed$cuplim,outmn$cuplim,outtmn$cuplim,outrmvn$cuplim)^2
	}
	medcv <- 1 - medcv/nruns #prop of times Ho is rejected
        mncv  <- 1 - mncv/nruns
        tmncv  <- 1 - tmncv/nruns
        rmvncv <- 1 - rmvncv/nruns
        mncut <- apply(cutoffs,2,mean)
	list(chisqcut = chisqcut, mncut=mncut, medcv = medcv,
             mncv = mncv,tmncv=tmncv,rmvncv=rmvncv)	
}


identrgn<-function(x, stat, alpha = 0.05){
# Makes two confidence regions for the rows of x
# using I as the dispersion matrix.
#Also computes the distance for the 0 vector
#to check if the 0 vector is in the confidence region.
x <- as.matrix(x)
p <- dim(x)[2]
n <- dim(x)[1]  #n is actually B 
C <- diag(p)
zero <- 0*1:p
inr<-0
inr2<-0
up <- 1 - alpha   #quantile for confidence region
#prediction region method
tbar <- apply(x,2,mean)
#md2 <- mahalanobis(x, center=tbar, cov=C) will invert C
md2 <- mahalanobis(x, center=tbar, cov=C,inverted=TRUE)
# MD is the classical Euclidean distance
MD <- sqrt(md2)
#get PR confidence region cutoff
cuplim <- sqrt(quantile(md2,up))
D0 <- sqrt(mahalanobis(zero,center=tbar, cov=C))
if(D0 <= cuplim)
       inr <- 1
#Bickel and Ren type method
md2 <- mahalanobis(x, center=stat, cov=C,inverted=TRUE)
MD <- sqrt(md2)
#get modified Bickel and Ren confidence region cutoff
cuplim2 <- sqrt(quantile(md2,up))
D0br <- sqrt(mahalanobis(zero, center=stat, cov=C))
if(D0br <= cuplim2)
       inr2 <- 1
list(MD=MD,cuplim=cuplim,cuplim2=cuplim2,D0=D0,D0br=D0br,inr=inr,inr2=inr2)
}


lmsviews<-function(x, Y, ii = 1)
{# Trimmed views using lmsreg for 90, 80, ... 0 percent 
# trimming.   Allows visualization of  m
# and crudely estimation of  c beta in models 
# of the form y = m(x^T beta) + e.
# Workstation: activate a graphics device
# with commands "X11()" or "motif()."
# R needs command "library(MASS)" or "library(lqs)."
# Advance the view with the right mouse button and
# in R, highight "stop."
	x <- as.matrix(x)
	out <- cov.mcd(x)
	center <- out$center
	cov <- out$cov
	rd2 <- mahalanobis(x, center, cov)
	labs <- c("90%", "80%", "70%", "60%", "50%", "40%", "30%", "20%", "10%",
		"0%")
	tem <- seq(0.1, 1, 0.1)
	for(i in ii:10) {
		val <- quantile(rd2, tem[i])
		b <- lmsreg(x[rd2 <= val,  ], Y[rd2 <= val])$coef
		ESP <- x %*% b[-1] + b[1]
		plot(ESP, Y)
		title(labs[i])
		identify(ESP, Y)
		print(b)
	}
}


lrdata <- function(n = 200,  type = 3)
{# Generates data for logistic regression.
# If X|y=1 ~ N(mu_1,I) and X|Y=0 ~ N(0,I) then beta = mu_1 and alpha = -0.5 ||mu_1||^2.
#
#   If q is changed, change the formula in the glm statement.
	q <- 5
	y <- 0 * 1:n
	y[(n/2 + 1):n] <- y[(n/2 + 1):n] + 1
	beta <- 0 * 1:q
        if(type == 1) {
		beta[1] <- 1
                alpha <- -0.5
	}
	if(type == 2) {
		beta <- beta + 1
                alpha <- -q/2
	}
        if(type == 3) {
		beta[1:3] <-  1
                alpha <- -1.5
	}
        x <- matrix(rnorm(n * q), nrow = n, 
			ncol = q)
	if(type == 1) {
		x[(n/2 + 1):n, 1] <- x[(n/2 + 1
				):n, 1] + 1
		}
	if(type == 2) {
		x[(n/2 + 1):n,  ] <- x[(n/2 + 1
				):n,  ] + 1
		}
        if(type == 3) {
		x[(n/2 + 1):n, 1:3 ] <- x[(n/2 + 1
				):n, 1:3 ] + 1
		}
        #X|y=0 ~ N(0, I) and X|y=1 ~ N(beta,I)
	# change formula to x[,1]+ ... + x[,q] with q
	out <- glm(y ~ x[, 1] + x[, 2] + x[, 3] +
			x[, 4] + x[,5], family = binomial)
	list(alpha = alpha, beta = beta, lrcoef = out$coef,x=x,y=y)
}


lrdata2<-function(n = 200, p = 5, m = 10, type = 1)
{# Generates data for logistic regression.
q <- p -1
y <- 1:n
z <- y
mv <- m + 0 * y
beta <- 0 * 1:q
if(type == 1) {
	beta <- beta + 2
	alpha <- 0
	}
if(type == 2) {
	beta[1] <- 2
	alpha <- 0
	}
if(type == 3) {
	beta[1:3] <- 2
	alpha <- 0
	}
x <- matrix(rnorm(n * q), nrow = n, ncol = q)
SP <- alpha + x %*% beta
pv <- exp(SP)/(1 + exp(SP))
for(i in 1:n)
   y[i] <- rbinom(1, size = m, prob = pv[i])
Z <- y/m
out<-glm(Z~., family=binomial,weights=mv,data=data.frame(cbind(x,Z)))	
list(x = x, y = y, mv = mv, alpha = alpha, beta = beta, 
      lrcoef = out$coef)
}

lrplot2<-function(x, y, slices = 10, ff = 0.99, step = T)
{# Makes the ESSP for binary logistic regression.
# If slice = T use step function, else use lowess.
# Workstation need to activate a graphics
# device with command "X11()" or "motif()."
#
#ESP <- x %*% out$coef[-1] + out$coef[1]
#If q is changed, change the formula in the glm statement.
#q <- 5	# change formula to x[,1]+ ... + x[,q] with q
#out <- glm(y ~ x[, 1] + x[, 2] + x[, 3] + x[, 4] + x[, 5], family = poisson)
        out <- glm(y~., family=binomial, data=data.frame(cbind(x,y)))
        ESP <- predict(out)
	Y <- y
	plot(ESP, Y)
	fit <- y
	fit <- exp(ESP)/(1 + exp(ESP))	#  lines(sort(ESP),sort(fit))
	indx <- sort.list(ESP)
	lines(ESP[indx], fit[indx])
        if(step == T){
	fit2 <- fit
	n <- length(y)
	val <- as.integer(n/slices)
	for(i in 1:(slices - 1)) {
		fit2[((i - 1) * val + 1):(i * val)] <- mean(y[indx[((i - 1) * 
			val + 1):(i * val)]])
	}
	fit2[((slices - 1) * val + 1):n] <- mean(y[indx[((slices - 1) * val + 1
		):n]])	# fit2 is already sorted in order corresponding to indx
	lines(ESP[indx], fit2)}
        else lines(lowess(ESP, Y, f = ff), type = "s")
	title("Response Plot")
}



lrplot3<-function(ESP, y, slices = 10, ff = 0.99, step = T)
{# Makes the response plot for binary logistic regression.
# If out <- glm(y ~ x[, 1] + x[, 2], family = binomial) 
# use ESP <- predict(out).
# If outgam <- gam(STA ~ s(AGE)+SEX+RACE+SER+CAN+CRN+INF+CPR+s(SYS)+s(HRA)
# +PRE+TYP+FRA+PO2+PH+PCO+Bic+CRE+LOC,family=binomial,data=icu2)
# use ESP <- predict.gam(outgam).
# If step = T use step function, else use lowess.
#
	Y <- y
	plot(ESP, Y)
	fit <- y
	fit <- exp(ESP)/(1 + exp(ESP))	#  lines(sort(ESP),sort(fit))
	indx <- sort.list(ESP)
	lines(ESP[indx], fit[indx])
        if(step == T){
	fit2 <- fit
	n <- length(y)
	val <- as.integer(n/slices)
	for(i in 1:(slices - 1)) {
		fit2[((i - 1) * val + 1):(i * val)] <- mean(y[indx[((i - 1) * 
			val + 1):(i * val)]])
	}
	fit2[((slices - 1) * val + 1):n] <- mean(y[indx[((slices - 1) * val + 1
		):n]])	# fit2 is already sorted in order corresponding to indx
	lines(ESP[indx], fit2)}
        else lines(lowess(ESP, Y, f = ff), type = "s")
	title("Response  Plot")
}


lrplot4<-function(x, y, mv, slices = 10, ff = 0.97, step = T)
{# Makes response plot and OD plot for binomial logistic regression.
# mv = (m1, ..., mn) where yi is bin(mi,p(SP))
# If slice = T use step function, else use lowess.
# Workstation: need to activate a graphics
# device with command "X11()" or "motif()."
#
#ESP <- x %*% out$coef[-1] + out$coef[1]
#   
	n <- length(y)
	tdata <- as.data.frame(cbind(x,y))
        ny <- mv-y  #ny[i] = mv[i]-y[i] = no. of ``failures"
        out <- glm(cbind(y,ny)~., family=binomial, data=tdata)
	ESP <- predict(out)
	par(mfrow = c(1, 2),pty="s")
        Z <- y/mv
	plot(ESP, Z)
	abline(weighted.mean(Z, mv), 0)
	fit <- y
	fit <- exp(ESP)/(1 + exp(ESP))
	indx <- sort.list(ESP)
	lines(ESP[indx], fit[indx])
	if(step == T) {
		fit2 <- fit
		val <- as.integer(n/slices)
		for(i in 1:(slices - 1)) {
			fit2[((i - 1) * val + 1):(i * val)] <- weighted.mean(Z[
				indx[((i - 1) * val + 1):(i * val)]], mv[indx[((
				i - 1) * val + 1):(i * val)]])
		}
		fit2[((slices - 1) * val + 1):n] <- weighted.mean(Z[indx[((
			slices - 1) * val + 1):n]], mv[indx[((slices - 1) * val +
			1):n]])	
	# fit2 is already sorted in order corresponding to indx
		lines(ESP[indx], fit2)
	}
	else lines(lowess(ESP, Z, f = ff), type = "s")
	title("a) Response Plot")	#get OD plot
	val <- exp(ESP)/(1 + exp(ESP))
	Ehat <- mv * val
	Vmodhat <- Ehat * (1 - val)
	Vhat <- (y - Ehat)^2
	plot(Vmodhat, Vhat)
	abline(0, 1)
	abline(0, 4)
	abline(lsfit(Vmodhat, Vhat)$coef)
	title("b) OD Plot")
}


lsviews<-function(x, Y, ii = 10, type = 1)
{# This function is the same as tvreg except that the untrimmed
# cases are highlighted. It compares the LS fits for 0, 10, 
# ..., 90 percent trimming. 
# Use ii = 10 for 0, ii = 9 for 10 then 0, ...,
# ii = 1 for 90 then 80 ..., then 0 percent trimming. 
# Used to visualize g if y = g(beta^T x,e).
# Workstation: activate a graphics
# device with command "X11()" or "motif()."
# R needs command "library(MASS)."
# Advance the view with the right mouse button.
# In R, highlight ``stop."
	x <- as.matrix(x)
        if(type == 1)
	out <- cov.mcd(x)
        else out <- covmba(x)
	center <- out$center
	cov <- out$cov
	rd2 <- mahalanobis(x, center, cov)
	labs <- c("90%", "80%", "70%", "60%", "50%", "40%", "30%", "20%", "10%",
		"0%")
	tem <- seq(0.1, 1, 0.1)
	for(i in ii:10) {
		val <- quantile(rd2, tem[i])
		bhat <- lsfit(x[rd2 <= val,  ], Y[rd2 <= val])$coef
		ESP <- bhat[1] + x %*% bhat[-1]
		plot(ESP, Y)
		points(ESP[rd2 <= val], Y[rd2 <= val], pch = 15, cex = 1.4)
		abline(0, 1)
		title(labs[i])
		identify(ESP, Y)
		print(bhat)
	}
}


maha<-function(x)
{# Generates the classical mahalanobis distances. Need p > 1 or x a matrix.
	center <- apply(x, 2, mean)
	cov <- var(x)
	return(sqrt(mahalanobis(x, center, cov)))
}


manova1w<-function(y,p,group){
# Does one way MANOVA model.
# Advance the plot by highlighting Stop with the right mouse button.
# p is the number of groups
group <- as.factor(group)
y <- as.matrix(y)
n <- dim(y)[1]
m <- dim(y)[2]	
res <- matrix(nrow = n, ncol = m, 0)
fit <- res
Bhat <- matrix(nrow = p, ncol = m, 0)
cmar <- par("mar")
	par(mfrow = c(2, 1))
        par(mar=c(4.0,4.0,2.0,0.5))
for(i in 1:m){
  out <- aov(y[,i]~group)
  res[,i] <- out$res
  fit[,i] <- out$fit
  Bhat[,i] <- out$coef	
}
for(i in 1:m){
	plot(fit[,i],y[,i])
	abline(0, 1)
	title("Response Plot")
        identify(fit[,i],y[,i])
	plot(fit[,i], res[,i])
	title("Residual Plot")
        identify(fit[,i], res[,i])
}
par(mfrow = c(1, 1))
par(mar=cmar)
Covhat <- (n-1)/(n-p) * var(res)
out <- manova(y~group)
list(Bhat = Bhat, fit = fit, res = res, Covhat = Covhat, out=out)
}


manovabt<-function(y, group, g=2, B = 50, type = 1){
# Bootstraps a one way Manova type test for Ho: mu_1 = ... = mu_g. 
# There are g = p groups.
# Here y is an n by m data matrix, group is an n by 1 vector with entries  
# 1 to g indicating the group of the ith case. So g>1 is the number of groups.
# Type = 1 for sample mean, 2 for sample median, 3 for 25% trimmed mean
# Using replace = T results in an error.
y <- as.matrix(y)
ny <- dim(y)[1]
m <- dim(y)[2]
indx <- 1:g #get n1, ..., ng
for(i in 1:g) indx[i]<-sum(group==i)
mus <- matrix(0,nrow=B,ncol=m*(g-1))
zg<-y[group==g,]	
        #get bootstrap sample of the robust statistic T1 - Tg,...,T(g-1)-Tg
        # take a sample of size ni with replacement from each group
if(type == 1){
         for(j in 1:B){                
                temg <- sample(1:indx[g],indx[g],replace=TRUE)
                Tg <- apply(zg[temg,],2,mean)
                for(i in 1:(g-1)){
                  tem <- sample(1:indx[i],indx[i],replace=TRUE)
                  z <- y[group==i,]
                  Ti <- apply(z[tem,],2,mean)
                  mus[j,(m*(i-1)+1):(i*m)] <- Ti-Tg
               }
         }
}     
if(type == 2){
         for(j in 1:B){                
                temg <- sample(1:indx[g],indx[g],replace=TRUE)
                Tg <- apply(zg[temg,],2,median)
                for(i in 1:(g-1)){
                  tem <- sample(1:indx[i],indx[i],replace=TRUE)
                  z <- y[group==i,]
                  Ti <- apply(z[tem,],2,median)
                  mus[j,(m*(i-1)+1):(i*m)] <- Ti-Tg
               }
         }
}      
if(type == 3){
         for(j in 1:B){                
                temg <- sample(1:indx[g],indx[g],replace=TRUE)
                Tg <- apply(zg[temg,],2,tmn)
                for(i in 1:(g-1)){
                  tem <- sample(1:indx[i],indx[i],replace=TRUE)
                  z <- y[group==i,]
                  Ti <- apply(z[tem,],2,tmn)
                  mus[j,(m*(i-1)+1):(i*m)] <- Ti-Tg
               }
         }
}   
list(mus=mus)
}


manbtsim<-function(n = 100, n2 = 100, n3 = 100, p = 2, csteps = 5, B= 100, gam = 0.4, nruns = 100, xtype = 1, outliers = 0, pm = 10, sig2 = 1, sig3 = 1, eps = 0.4, dd=4, delta = 0){
# This R function simulates one way Manova type bootstrap test for 
# Ho: mu_1 = ... = mu_g where g = 3.
# Here x2 is clean, x = x1 may have outliers.
# Uses the coordinatewise mean, median, 25% trimmed mean, 
#####and RMVN location estimators. SO VERY SLOW.
# Need p > 1. Want n > 20p, n2 > 20p, n3 > 20p, B > 20p.
# Multiply x by A where xtype = 1 for MVN Np(0,I), 
# 2, (with delta = eps) for (1 - delta) Np(0,I) + delta Np(0, 25 I)
# 3 for multivariate t_d with d = dd
# 4 for lognormal.
# outliers = 0 for no outliers and X~N(0,diag(1,...,p)), 
# 1 for outliers a tight cluster at major axis (0,...,0,pm)'
# 2 for outliers a tight cluster at minor axis  (pm,0, ...,0)'
# 3 for outliers X~N((pm,...,pm)',diag(1,...,p))
# 4 for outliers X[i,p] = pm
# 5 for outliers X[i,1] = pm
# For the point mass outlier types 4 and 5, need gam much smaller than 0.4.
# Power can be estimated by increasing delta so mu = delta(1,...,1) 
# and mu2 = (0, ..., 0).
# Cov(x) = diag(1,2,...,p), Cov(x2) = sig^2 Cov(x) for clean data.
# For outliers=0, want hquant and rquant approx 1.
	A <- sqrt(diag(1:p))
	munot <- 0 * (1:p)
	mu <- delta * (1 + munot)
	val <- floor(gam * n)
        indx <- 1:n
        indx2 <- 1:n2
        indx3 <- 1:n3
        medmus <- matrix(0,nrow=B,ncol=2*p)
        mnmus <- medmus
        tmnmus <- medmus
        rmvnmus <- medmus
        medcv <- 0
        mncv <- 0
        tmncv <- 0
        rmvncv <- 0
        chisqcut <- qchisq(p=0.95,df=2*p)
        cutoffs <- matrix(0,nrow=nruns,ncol=4)
        for(i in 1:nruns) {
#make data
		x <- matrix(rnorm(n * p), ncol = p, nrow = n)
               x2 <- matrix(rnorm(n2 * p), ncol = p, nrow = n2)  
             x3 <- matrix(rnorm(n3 * p), ncol = p, nrow = n3)                  
		if(xtype == 2) {
			zu <- runif(n)
			x[zu < eps,  ] <- x[zu < eps,  ] * 5
                        zu <- runif(n2)
			x2[zu < eps,  ] <- x2[zu < eps,  ] * 5
                        zu <- runif(n3)
			x3[zu < eps,  ] <- x3[zu < eps,  ] * 5
		}
		if(xtype == 3) {
			zu <- sqrt(rchisq(n, dd)/dd)
			x <- x/zu
                        zu <- sqrt(rchisq(n2, dd)/dd)
			x2 <- x2/zu
                        zu <- sqrt(rchisq(n3, dd)/dd)
			x3 <- x3/zu
		}
		if(xtype == 4){  #Want pop coord med(x) = 0.
			x <- exp(x)
                        x <- x - 1
                        x2 <- exp(x2)
                        x2 <- x2 - 1
                        x3 <- exp(x3)
                        x3 <- x3 - 1
                }
		x <- x %*% A
                x2 <- x2 %*% A
                x2 <- sig2 * x2
                x3 <- x3 %*% A
                x3 <- sig3 * x3
		if(outliers == 1) {
                    x[1:val,  ] <- matrix(rnorm(val * p, sd = 0.01), ncol
				 = p, nrow = val)
			x[1:val, p] <- x[1:val, p] + pm
		}
		if(outliers == 2) {
			x[1:val,  ] <- matrix(rnorm(val * p, sd = 0.01), ncol
				 = p, nrow = val)
			x[1:val, 1] <- x[1:val, 1] + pm
		}
		if(outliers == 3) {
			tem <- pm + 0 * 1:p
			x[1:val,  ] <- x[1:val,  ] + tem
		}
		if(outliers == 4) {
			x[1:val, p] <- pm
		}
		if(outliers == 5) {
			x[1:val, 1] <- pm
		}
		x <- mu + x	
          #clean x has mean mu =delta(1,...,1)^T, x2 and x3 have mean (0,...,0)^T
                #get bootstrapped Tx - Tx3, Tx2 - Tx3 for various statistics T 
                for(j in 1:B){
                tem <- sample(indx,n,replace=T)
                medx <- apply(x[tem,],2,median)
                mnx <- apply(x[tem,],2,mean)
                tmnx <- apply(x[tem,],2,tmn)
                rmvnx <- covrmvn(x[tem,])$center
                tem <- sample(indx2,n2,replace=T)
                medx2 <- apply(x2[tem,],2,median)
                mnx2 <- apply(x2[tem,],2,mean)
                tmnx2 <- apply(x2[tem,],2,tmn)
                rmvnx2 <- covrmvn(x2[tem,])$center
                tem <- sample(indx3,n3,replace=T)
                medx3 <- apply(x3[tem,],2,median)
                mnx3 <- apply(x3[tem,],2,mean)
                tmnx3 <- apply(x3[tem,],2,tmn)
                rmvnx3 <- covrmvn(x3[tem,])$center
                medmus[j,] <- c(medx-medx3,medx2-medx3)
                mnmus[j,] <- c(mnx-mnx3,mnx2-mnx3)
                tmnmus[j,] <- c(tmnx-tmnx3,tmnx2-tmnx3)
                rmvnmus[j,] <- c(rmvnx-rmvnx3,rmvnx2-rmvnx3)
         }   
                outmed<-predreg(medmus)
                medcv <- medcv + outmed$inr 
                outmn <- predreg(mnmus)
                mncv <- mncv + outmn$inr
                outtmn <- predreg(tmnmus)
                tmncv <- tmncv + outtmn$inr
                outrmvn <- predreg(rmvnmus)   
          	rmvncv <- rmvncv + outrmvn$inr
                cutoffs[i,]<-c(outmed$cuplim,outmn$cuplim,outtmn$cuplim,outrmvn$cuplim)^2
	}
	medcv <- 1 - medcv/nruns #prop of times Ho is rejected
        mncv  <- 1 - mncv/nruns
        tmncv  <- 1 - tmncv/nruns
        rmvncv <- 1 - rmvncv/nruns
        mncut <- apply(cutoffs,2,mean)
	list(chisqcut = chisqcut, mncut=mncut, medcv = medcv,
             mncv = mncv,tmncv=tmncv,rmvncv=rmvncv)	
}



manbtsim2<-function(n = 100, n2 = 100, n3 = 100, m = 2, csteps = 5, B= 100, 
gam = 0.4, nruns = 100, ytype = 1, outliers = 0, pm = 10, sig2 = 1, sig3 = 1, 
eps = 0.4, dd=4, delta = 0, delta3 = 0, cov3I = F){
#needs library(MASS) 
# This R function simulates a one way Manova type bootstrap test for 
# Ho: mu_1 = mu_2 = mu_3 where p = g = 3 = number of groups.
#Here y2 and y3 are clean, y = y1 may have outliers, and y is m by 1.
# Uses the coordinatewise mean, median, and 25% trimmed mean.
#Also does the classical Hotelling Lawley one way MANOVA test.
#Partially written by Hasthika S. Rupasinghe Arachchige Don.
## Does not use the slow RMVN location estimators.
# Need m > 1. Want n > 20m, n2 > 20m, n3 > 20m, B > 20m.
# Multiply y by A where ytype = 1 for MVN Nm(0,I), 
# 2  for (1 - eps) Nm(0,I) + eps Nm(0, 25 I),
# 3 for multivariate t_d with d = dd,
# 4 for lognormal.
# outliers = 0 for no outliers and Y~N(0,diag(1,...,m)), 
# 1 for outliers a tight cluster at major axis (0,...,0,pm)'
# 2 for outliers a tight cluster at minor axis  (pm,0, ...,0)'
# 3 for outliers Y~N((pm,...,pm)',diag(1,...,m))
# 4 for outliers Y[i,m] = pm
# 5 for outliers Y[i,1] = pm
# For the point mass outlier types 4 and 5, need gam much smaller than 0.4.
# Power can be estimated by increasing delta so mu1 = delta(1,...,1) 
# and mu2 = (0, ..., 0), mu3 = delta3(1,...,1).
# Cov(y) = diag(1,2,...,m), Cov(y2) = sig^2 Cov(y) for clean data.
# Cov(y3) = sig^3 Cov(y) for clean data if cov3I = F,
# or Cov(y3) = cI_3 if cov3I = T.
# For outliers=0, want hquant and rquant approx 1.
	A <- sqrt(diag(1:m))
        munot <- 0 * (1:m)
	mu <- delta * (1 + munot)
        mu3 <- delta3 * (1 + munot)
	val <- floor(gam * n)
        indx <- 1:n
        indx2 <- 1:n2
        indx3 <- 1:n3
        medmus <- matrix(0,nrow=B,ncol=2*m)
        mnmus <- medmus
        tmnmus <- medmus
        medcv <- 0
        mncv <- 0
        tmncv <- 0
	crej <- 0 
	gp <- 0
	grp <- 0
	pval <- 0
	out <- 0

        chisqcut <- qchisq(p=0.95,df=2*m)
        cutoffs <- matrix(0,nrow=nruns,ncol=3)
        for(i in 1:nruns) {
#make data
		y <- matrix(rnorm(n * m), ncol = m, nrow = n)
               y2 <- matrix(rnorm(n2 * m), ncol = m, nrow = n2)  
               y3 <- matrix(rnorm(n3 * m), ncol = m, nrow =n3)                  
		if(ytype == 2) {
			zu <- runif(n)
			y[zu < eps,  ] <- y[zu < eps,  ] * 5
                        zu <- runif(n2)
			y2[zu < eps,  ] <- y2[zu < eps,  ] * 5
                        zu <- runif(n3)
			y3[zu < eps,  ] <- y3[zu < eps,  ] * 5
		}
		if(ytype == 3) {
			zu <- sqrt(rchisq(n, dd)/dd)
			y <- y/zu
                        zu <- sqrt(rchisq(n2, dd)/dd)
			y2 <- y2/zu
                        zu <- sqrt(rchisq(n3, dd)/dd)
			y3 <- y3/zu
		}
		if(ytype == 4){  #Want pop coord med(y) = 0.
			y <- exp(y)
                        y <- y - 1
                        y2 <- exp(y2)
                        y2 <- y2 - 1
                        y3 <- exp(y3)
                        y3 <- y3 - 1
                }
		y <- y %*% A
                y2 <- y2 %*% A
                y2 <- sig2 * y2
                if( cov3I != T){ 
                y3 <- y3 %*% A
                y3 <- sig3 * y3}
		if(outliers == 1) {
                    y[1:val,  ] <- matrix(rnorm(val * m, sd = 0.01), ncol
				 = m, nrow = val)
			y[1:val, m] <- y[1:val, m] + pm
		}
		if(outliers == 2) {
			y[1:val,  ] <- matrix(rnorm(val * m, sd = 0.01), ncol
				 = m, nrow = val)
			y[1:val, 1] <- y[1:val, 1] + pm
		}
		if(outliers == 3) {
			tem <- pm + 0 * 1:m
			y[1:val,  ] <- y[1:val,  ] + tem
		}
		if(outliers == 4) {
			y[1:val, m] <- pm
		}
		if(outliers == 5) {
			y[1:val, 1] <- pm
		}
		y <- mu + y
                y3 <- mu3 + y3	
          #clean y has mean mu =delta(1,...,1)^T, 
          #y2 has mean (0,..,0)^T and y3 has mean delta3 (1,...,1)^T
                #get bootstrapped Ty - Ty3, Ty2 - Ty3 for various statistics T 
                for(j in 1:B){
                tem <- sample(indx,n,replace=T)
                medy <- apply(y[tem,],2,median)
                mny <- apply(y[tem,],2,mean)
                tmny <- apply(y[tem,],2,tmn)
                tem <- sample(indx2,n2,replace=T)
                medy2 <- apply(y2[tem,],2,median)
                mny2 <- apply(y2[tem,],2,mean)
                tmny2 <- apply(y2[tem,],2,tmn)
                tem <- sample(indx3,n3,replace=T)
                medy3 <- apply(y3[tem,],2,median)
                mny3 <- apply(y3[tem,],2,mean)
                tmny3 <- apply(y3[tem,],2,tmn)
                medmus[j,] <- c(medy-medy3,medy2-medy3)
                mnmus[j,] <- c(mny-mny3,mny2-mny3)
                tmnmus[j,] <- c(tmny-tmny3,tmny2-tmny3)
             }   
                outmed<-predreg(medmus)
                medcv <- medcv + outmed$inr 
                outmn <- predreg(mnmus)
                mncv <- mncv + outmn$inr
                outtmn <- predreg(tmnmus)
                tmncv <- tmncv + outtmn$inr 
                cutoffs[i,]<-c(outmed$cuplim,outmn$cuplim,outtmn$cuplim)^2

	#Get the classical test coverage	
	yall<-rbind(y,y2,y3)
	yall<-as.matrix(yall)
 	gp <- c(rep(1, n),rep(2, n2),rep(3, n3))	
	grp<-factor(gp)
	out<-manova(yall~grp)

	pval <- summary(out,test="Hotelling-Lawley")$stats[1,6]  #pvalue for Hotelling-Lawley's test
	
		if(pval < 0.05){
		crej <- crej +1
		}
	
	}
	medcv <- 1 - medcv/nruns #prop of times Ho is rejected
        mncv  <- 1 - mncv/nruns
        tmncv  <- 1 - tmncv/nruns
        mncut <- apply(cutoffs,2,mean)
	ccv <- crej/nruns
	

	list(chisqcut = chisqcut, mncut=mncut, medcv = medcv,
             mncv = mncv,tmncv=tmncv,ccv=ccv)	
}



manbtsim3<-function(n1 = 100, n2 = 100, n3 = 100, m = 2, csteps = 5, B= 100, 
gam = 0.4, nruns = 100, ytype = 1, outliers = 0, pm = 10, sig2 = 1, sig3 = 1, 
eps = 0.4, dd=4, delta1 = 0, delta2 = 0, delta3 = 0, cov3I = F){
#needs library(MASS) 
# This R function simulates a one way Manova type bootstrap test for 
# Ho: mu_1 = mu_2 = mu_3 where p = g = 3 = number of groups.
#Can vary the mean vectors mui better than in manbtsim2.
#Here y2 and y3 are clean,  y1 may have outliers, and y1 is m by 1.
# Uses the coordinatewise mean, median, and 25% trimmed mean.
#Also does the classical Hotelling Lawley one way MANOVA test.
#Partially written by Hasthika S. Rupasinghe Arachchige Don.
## Does not use the slow RMVN location estimators.
# Need m > 1. Want n > 20m, n2 > 20m, n3 > 20m, B > 20m.
# Multiply y by A where ytype = 1 for MVN Nm(0,I), 
# 2  for (1 - eps) Nm(0,I) + eps Nm(0, 25 I),
# 3 for multivariate t_d with d = dd,
# 4 for lognormal.
# outliers = 0 for no outliers and Y~N(0,diag(1,...,m)), 
# 1 for outliers a tight cluster at major axis (0,...,0,pm)'
# 2 for outliers a tight cluster at minor axis  (pm,0, ...,0)'
# 3 for outliers Y~N((pm,...,pm)',diag(1,...,m))
# 4 for outliers Y[i,m] = pm
# 5 for outliers Y[i,1] = pm
# For the point mass outlier types 4 and 5, need gam much smaller than 0.4.
# Power can be estimated by using unequal deltai so mu1 = delta1(1,...,1) 
# and mu2 = delta2(1, ..., 1), mu3 = delta3(1,...,1).
# Cov(y1) = diag(1,2,...,m), Cov(y2) = sig^2 Cov(y1) for clean data.
# Cov(y3) = sig^3 Cov(y1) for clean data if cov3I = F,
# or Cov(y3) = cI_3 if cov3I = T.
# For outliers=0, want hquant and rquant approx 1.
	A <- sqrt(diag(1:m))
        munot <- 0 * (1:m)
	mu1 <- delta1 * (1 + munot)
        mu2 <- delta2 * (1 + munot)
        mu3 <- delta3 * (1 + munot)
	val <- floor(gam * n1)
        indx1 <- 1:n1
        indx2 <- 1:n2
        indx3 <- 1:n3
        medmus <- matrix(0,nrow=B,ncol=2*m)
        mnmus <- medmus
        tmnmus <- medmus
        medcv <- 0
        mncv <- 0
        tmncv <- 0
	crej <- 0 
	gp <- 0
	grp <- 0
	pval <- 0
	out <- 0

        chisqcut <- qchisq(p=0.95,df=2*m)
        cutoffs <- matrix(0,nrow=nruns,ncol=3)
        for(i in 1:nruns) {
#make data
	       y1 <- matrix(rnorm(n1 * m), ncol = m, nrow = n1)
               y2 <- matrix(rnorm(n2 * m), ncol = m, nrow = n2)  
               y3 <- matrix(rnorm(n3 * m), ncol = m, nrow = n3)               
		if(ytype == 2) {
			zu <- runif(n)
			y1[zu < eps,  ] <- y1[zu < eps,  ] * 5
                        zu <- runif(n2)
			y2[zu < eps,  ] <- y2[zu < eps,  ] * 5
                        zu <- runif(n3)
			y3[zu < eps,  ] <- y3[zu < eps,  ] * 5
		}
		if(ytype == 3) {
			zu <- sqrt(rchisq(n1, dd)/dd)
			y1 <- y1/zu
                        zu <- sqrt(rchisq(n2, dd)/dd)
			y2 <- y2/zu
                        zu <- sqrt(rchisq(n3, dd)/dd)
			y3 <- y3/zu
		}
		if(ytype == 4){  #Want pop coord med(y) = 0.
			y1 <- exp(y1)
                        y1 <- y1 - 1
                        y2 <- exp(y2)
                        y2 <- y2 - 1
                        y3 <- exp(y3)
                        y3 <- y3 - 1
                }
		y1 <- y1 %*% A
                y2 <- y2 %*% A
                y2 <- sig2 * y2
                if( cov3I != T){ 
                y3 <- y3 %*% A
                y3 <- sig3 * y3}
		if(outliers == 1) {
                    y1[1:val,  ] <- matrix(rnorm(val * m, sd = 0.01), ncol
				 = m, nrow = val)
			y1[1:val, m] <- y1[1:val, m] + pm
		}
		if(outliers == 2) {
			y1[1:val,  ] <- matrix(rnorm(val * m, sd = 0.01), ncol
				 = m, nrow = val)
			y1[1:val, 1] <- y1[1:val, 1] + pm
		}
		if(outliers == 3) {
			tem <- pm + 0 * 1:m
			y1[1:val,  ] <- y1[1:val,  ] + tem
		}
		if(outliers == 4) {
			y1[1:val, m] <- pm
		}
		if(outliers == 5) {
			y1[1:val, 1] <- pm
		}
		y1 <- mu1 + y1
                y2 <- mu2 + y2
                y3 <- mu3 + y3	
          #clean y1 has mean mu1 =delta1(1,...,1)^T, 
          #y2 has mean delta2(1,..,1)^T and y3 has mean delta3 (1,...,1)^T
                #get bootstrapped Ty - Ty3, Ty2 - Ty3 for various statistics T 
                for(j in 1:B){
                tem <- sample(indx1,n1,replace=T)
                medy <- apply(y1[tem,],2,median)
                mny <- apply(y1[tem,],2,mean)
                tmny <- apply(y1[tem,],2,tmn)
                tem <- sample(indx2,n2,replace=T)
                medy2 <- apply(y2[tem,],2,median)
                mny2 <- apply(y2[tem,],2,mean)
                tmny2 <- apply(y2[tem,],2,tmn)
                tem <- sample(indx3,n3,replace=T)
                medy3 <- apply(y3[tem,],2,median)
                mny3 <- apply(y3[tem,],2,mean)
                tmny3 <- apply(y3[tem,],2,tmn)
                medmus[j,] <- c(medy-medy3,medy2-medy3)
                mnmus[j,] <- c(mny-mny3,mny2-mny3)
                tmnmus[j,] <- c(tmny-tmny3,tmny2-tmny3)
             }   
                outmed<-predreg(medmus)
                medcv <- medcv + outmed$inr 
                outmn <- predreg(mnmus)
                mncv <- mncv + outmn$inr
                outtmn <- predreg(tmnmus)
                tmncv <- tmncv + outtmn$inr 
                cutoffs[i,]<-c(outmed$cuplim,outmn$cuplim,outtmn$cuplim)^2

	#Get the classical test coverage	
	yall<-rbind(y1,y2,y3)
	yall<-as.matrix(yall)
 	gp <- c(rep(1, n1),rep(2, n2),rep(3, n3))	
	grp<-factor(gp)
	out<-manova(yall~grp)

	pval <- summary(out,test="Hotelling-Lawley")$stats[1,6]  #pvalue for Hotelling-Lawley's test
	
		if(pval < 0.05){
		crej <- crej +1
		}
	
	}
	medcv <- 1 - medcv/nruns #prop of times Ho is rejected
        mncv  <- 1 - mncv/nruns
        tmncv  <- 1 - tmncv/nruns
        mncut <- apply(cutoffs,2,mean)
	ccv <- crej/nruns
	

	list(chisqcut = chisqcut, mncut=mncut, medcv = medcv,
             mncv = mncv,tmncv=tmncv,ccv=ccv)	
}


manbtsim4<-function(n1 = 100, n2 = 100, n3 = 100, m = 2, csteps = 5, B= 100, gam = 0.4, nruns = 100, ytype = 1, outliers = 0, pm = 10, sig2 = 1, sig3 = 1, eps = 0.4, dd=4, delta1 = 0, delta2 = 0, delta3 = 0, cov3I = F){
#needs library(MASS) and library(Matrix)
# This R function simulates  one way Manova type  tests for 
# Ho: mu_1 = mu_2 = mu_3 where p = g = 3 = number of groups.
#Can vary the mean vectors mui better than in manbtsim2.
#Has one more test than manbtsim3: 
#the large sample Manova type test based on the sample mean.
#Here y2 and y3 are clean,  y1 may have outliers, and y1 is m by 1.
# Uses the coordinatewise mean, median, and 25% trimmed mean.
#Also does the classical Hotelling Lawley one way MANOVA test.
#Partially written by Hasthika S. Rupasinghe Arachchige Don.
## Does not use the slow RMVN location estimators.
# Need m > 1. Want n > 20m, n2 > 20m, n3 > 20m, B > 20m.
# Multiply y by A where ytype = 1 for MVN Nm(0,I), 
# 2  for (1 - eps) Nm(0,I) + eps Nm(0, 25 I),
# 3 for multivariate t_d with d = dd,
# 4 for lognormal.
# outliers = 0 for no outliers and Y~N(0,diag(1,...,m)), 
# 1 for outliers a tight cluster at major axis (0,...,0,pm)'
# 2 for outliers a tight cluster at minor axis  (pm,0, ...,0)'
# 3 for outliers Y~N((pm,...,pm)',diag(1,...,m))
# 4 for outliers Y[i,m] = pm
# 5 for outliers Y[i,1] = pm
# For the point mass outlier types 4 and 5, need gam much smaller than 0.4.
# Power can be estimated by using unequal deltai so mu1 = delta1(1,...,1) 
# and mu2 = delta2(1, ..., 1), mu3 = delta3(1,...,1).
# Cov(y1) = diag(1,2,...,m), Cov(y2) = sig^2 Cov(y1) for clean data.
# Cov(y3) = sig^3 Cov(y1) for clean data if cov3I = F,
# or Cov(y3) = cI_3 if cov3I = T.
# For outliers=0, want hquant and rquant approx 1.
	A <- sqrt(diag(1:m))
        munot <- 0 * (1:m)
	mu1 <- delta1 * (1 + munot)
        mu2 <- delta2 * (1 + munot)
        mu3 <- delta3 * (1 + munot)
	val <- floor(gam * n1)
        indx1 <- 1:n1
        indx2 <- 1:n2
        indx3 <- 1:n3
        medmus <- matrix(0,nrow=B,ncol=2*m)
        mnmus <- medmus
        tmnmus <- medmus
        medcv <- 0
        mncv <- 0
        tmncv <- 0
        mantcov <- 0
	crej <- 0 
	gp <- 0
	grp <- 0
	pval <- 0
	out <- 0
        chisqcut <- qchisq(p=0.95,df=2*m)
        cutoffs <- matrix(0,nrow=nruns,ncol=3)
        for(i in 1:nruns) {
#make data
	       y1 <- matrix(rnorm(n1 * m), ncol = m, nrow = n1)
               y2 <- matrix(rnorm(n2 * m), ncol = m, nrow = n2)  
               y3 <- matrix(rnorm(n3 * m), ncol = m, nrow = n3)               
		if(ytype == 2) {
			zu <- runif(n)
			y1[zu < eps,  ] <- y1[zu < eps,  ] * 5
                        zu <- runif(n2)
			y2[zu < eps,  ] <- y2[zu < eps,  ] * 5
                        zu <- runif(n3)
			y3[zu < eps,  ] <- y3[zu < eps,  ] * 5
		}
		if(ytype == 3) {
			zu <- sqrt(rchisq(n1, dd)/dd)
			y1 <- y1/zu
                        zu <- sqrt(rchisq(n2, dd)/dd)
			y2 <- y2/zu
                        zu <- sqrt(rchisq(n3, dd)/dd)
			y3 <- y3/zu
		}
		if(ytype == 4){  #Want pop coord med(y) = 0.
			y1 <- exp(y1)
                        y1 <- y1 - 1
                        y2 <- exp(y2)
                        y2 <- y2 - 1
                        y3 <- exp(y3)
                        y3 <- y3 - 1
                }
		y1 <- y1 %*% A
                y2 <- y2 %*% A
                y2 <- sig2 * y2
                if( cov3I != T){ 
                y3 <- y3 %*% A
                y3 <- sig3 * y3}
		if(outliers == 1) {
                    y1[1:val,  ] <- matrix(rnorm(val * m, sd = 0.01), ncol
				 = m, nrow = val)
			y1[1:val, m] <- y1[1:val, m] + pm
		}
		if(outliers == 2) {
			y1[1:val,  ] <- matrix(rnorm(val * m, sd = 0.01), ncol
				 = m, nrow = val)
			y1[1:val, 1] <- y1[1:val, 1] + pm
		}
		if(outliers == 3) {
			tem <- pm + 0 * 1:m
			y1[1:val,  ] <- y1[1:val,  ] + tem
		}
		if(outliers == 4) {
			y1[1:val, m] <- pm
		}
		if(outliers == 5) {
			y1[1:val, 1] <- pm
		}
		y1 <- mu1 + y1
                y2 <- mu2 + y2
                y3 <- mu3 + y3	
          #clean y1 has mean mu1 =delta1(1,...,1)^T, 
          #y2 has mean delta2(1,..,1)^T and y3 has mean delta3 (1,...,1)^T
                #get bootstrapped Ty - Ty3, Ty2 - Ty3 for various statistics T 
                for(j in 1:B){
                tem <- sample(indx1,n1,replace=T)
                medy <- apply(y1[tem,],2,median)
                mny <- apply(y1[tem,],2,mean)
                tmny <- apply(y1[tem,],2,tmn)
                tem <- sample(indx2,n2,replace=T)
                medy2 <- apply(y2[tem,],2,median)
                mny2 <- apply(y2[tem,],2,mean)
                tmny2 <- apply(y2[tem,],2,tmn)
                tem <- sample(indx3,n3,replace=T)
                medy3 <- apply(y3[tem,],2,median)
                mny3 <- apply(y3[tem,],2,mean)
                tmny3 <- apply(y3[tem,],2,tmn)
                medmus[j,] <- c(medy-medy3,medy2-medy3)
                mnmus[j,] <- c(mny-mny3,mny2-mny3)
                tmnmus[j,] <- c(tmny-tmny3,tmny2-tmny3)
             }   
                outmed<-predreg(medmus)
                medcv <- medcv + outmed$inr 
                outmn <- predreg(mnmus)
                mncv <- mncv + outmn$inr
                outtmn <- predreg(tmnmus)
                tmncv <- tmncv + outtmn$inr 
                cutoffs[i,]<-c(outmed$cuplim,outmn$cuplim,outtmn$cuplim)^2

	#Get the classical test coverage	
	yall<-rbind(y1,y2,y3)
	yall<-as.matrix(yall)
 	gp <- c(rep(1, n1),rep(2, n2),rep(3, n3))	
	grp<-factor(gp)
	out<-manova(yall~grp)

	pval <- summary(out,test="Hotelling-Lawley")$stats[1,6]  #pvalue for Hotelling-Lawley's test
	
		if(pval < 0.05){
		crej <- crej +1
		}
        #large sample Manova type test based on the sample mean
	pval <- mantyp(y=yall,p=3,group=grp)$pval
        if(pval < 0.05) mantcov <- mantcov + 1
	}
	medcv <- 1 - medcv/nruns #prop of times Ho is rejected
        mncv  <- 1 - mncv/nruns
        tmncv  <- 1 - tmncv/nruns
        mncut <- apply(cutoffs,2,mean)
	ccv <- crej/nruns
	mantcov <- mantcov/nruns

	list(chisqcut = chisqcut, mncut=mncut, medcv = medcv,
             mncv = mncv,tmncv=tmncv,ccv=ccv,mantcov=mantcov)	
}


manovasim<-function(n1=100,n2=100,n3=100,m=2,B=100,nruns=100,ytype=1, 
sig2=1,sig3=1,eps=0.4,dd=4,delta1=0,delta2=0,delta3=0,cov3I=F,alpha=0.05){
#needs library(Matrix)
# This function simulates  one way Manova type  tests for 
# Ho: mu_1 = mu_2 = mu_3 where p = g = 3 = number of groups.
# Has the classical test, bootstrap cutoff test,
# large sample test, and the test using C_n = I.
# yi is m by 1.
# Need m > 1. Want n1 > 20m, n2 > 20m, n3 > 20m, B > 20m.
# Multiply y by A where ytype = 1 for MVN Nm(0,I), 
# 2  for (1 - eps) Nm(0,I) + eps Nm(0, 25 I),
# 3 for multivariate t_d with d = dd,
# 4 for lognormal.
# Power can be estimated by using unequal deltai so mu1 = delta1(1,...,1) 
# and mu2 = delta2(1, ..., 1), mu3 = delta3(1,...,1).
# Cov(y1) = diag(1,2,...,m), Cov(y2) = sig^2 Cov(y1) for clean data.
# Cov(y3) = sig^3 Cov(y1) for clean data if cov3I = F,
# or Cov(y3) = cI_3 if cov3I = T.
	A <- sqrt(diag(1:m))
        munot <- 0 * (1:m)
	mu1 <- delta1 * (1 + munot)
        mu2 <- delta2 * (1 + munot)
        mu3 <- delta3 * (1 + munot)
	indx1 <- 1:n1
        indx2 <- 1:n2
        indx3 <- 1:n3
        mnmus <- matrix(0,nrow=B,ncol=2*m)
        mancov <- 0
        mantcov <- 0	
        prcv <- 0
        brcv <- 0
        p <- 3 # number of groups
        q <- p-1
        gp <- c(rep(1, n1),rep(2, n2),rep(3, n3))	
	grp<-factor(gp)
        num <- m*q
        n <- n1+n2+n3
        dn <- min(n1,n2,n3)
        C <-matrix(1,nrow=q,ncol=q)
        cut <- num*qf(1-alpha,df1=num,df2=dn)
        bstat <- 1:B
        cstat <- 0 * 1:nruns
        bcut <- 1:nruns
        up <- min((1 - alpha/2), (1 - alpha + 20*alpha*m/B))
        if(alpha > 0.1)
        up <- min((1 - alpha + 0.05), (1 - alpha + 2*m/B))
        qn <- up
        if(qn < 1 - alpha + 0.001)
        up <- 1 - alpha  
       for(i in 1:nruns) {
#make data
	       y1 <- matrix(rnorm(n1 * m), ncol = m, nrow = n1)
               y2 <- matrix(rnorm(n2 * m), ncol = m, nrow = n2)  
               y3 <- matrix(rnorm(n3 * m), ncol = m, nrow = n3)               
		if(ytype == 2) {
			zu <- runif(n1)
			y1[zu < eps,  ] <- y1[zu < eps,  ] * 5
                        zu <- runif(n2)
			y2[zu < eps,  ] <- y2[zu < eps,  ] * 5
                        zu <- runif(n3)
			y3[zu < eps,  ] <- y3[zu < eps,  ] * 5
		}
		if(ytype == 3) {
			zu <- sqrt(rchisq(n1, dd)/dd)
			y1 <- y1/zu
                        zu <- sqrt(rchisq(n2, dd)/dd)
			y2 <- y2/zu
                        zu <- sqrt(rchisq(n3, dd)/dd)
			y3 <- y3/zu
		}
		if(ytype == 4){  
			y1 <- exp(y1)
                        y1 <- y1 - exp(0.5)
                        y2 <- exp(y2)
                        y2 <- y2 - exp(0.5)
                        y3 <- exp(y3)
                        y3 <- y3 - exp(0.5)
                }
		y1 <- y1 %*% A
                y2 <- y2 %*% A
                y2 <- sig2 * y2
                if( cov3I != T){ 
                y3 <- y3 %*% A
                y3 <- sig3 * y3}
		y1 <- mu1 + y1
                y2 <- mu2 + y2
                y3 <- mu3 + y3	
          #yi has mean mui = deltai (1,...,1)^T
          
        #Get the classical test coverage	
	Sp <- var(y3)
        Tp <- apply(y3,2,mean)
        S1 = var(y1)
        T1 <- apply(y1,2,mean)
        S2 = var(y2)
        T2 <- apply(y2,2,mean)
        w <- c(T1-Tp,T2-Tp)
        Spool <- ((n1-1)*S1 + (n2-1)*S2 + (n3-1)*Sp)/(n-p)
        D <- Spool/n3
        ssigw <- kronecker(C,D) #still need to get the block diagonal of Sigw/n
        BD <- bdiag(Spool/n1,Spool/n2)
        ssigw <- ssigw + BD   #ssigw =sigw/n, n = sum(ni)
        ssigw <- as.matrix(ssigw)
        sigw <- n*ssigw  
        sigwinv <- solve(sigw)
        t0 <- n* t(w) %*% sigwinv %*% w
        if(t0 < cut) mancov <- mancov + 1
        cstat[i] <- t0
        #get bootstrapped Ty - Ty3, Ty2 - Ty3 for various statistics T 
        for(j in 1:B){
            tem <- sample(indx1,n1,replace=T)
            mny1 <- apply(y1[tem,],2,mean)
            tem <- sample(indx2,n2,replace=T)
            mny2 <- apply(y2[tem,],2,mean)
            tem <- sample(indx3,n3,replace=T)
            mny3 <- apply(y3[tem,],2,mean)
            mnmus[j,] <- c(mny1-mny3,mny2-mny3) 
            bw <- mnmus[j,] - w  
            bstat[j] <- n*t(bw)%*%sigwinv%*%bw 
         }   
        bcut[i] <- quantile(bstat,up)
        outmn <- identrgn(mnmus,stat=w,alpha=alpha)
                prcv <- prcv + outmn$inr
                brcv <- brcv + outmn$inr2
	#large sample Manova type test based on the sample mean
        D <- Sp/n3
        ssigw <- kronecker(C,D) #still need to get the block diagonal of Sigw/n
        BD <- bdiag(S1/n1,S2/n2)
        ssigw <- ssigw + BD   #ssigw =sigw/n, n = sum(ni)
        ssigw <- as.matrix(ssigw)
        sigw <- n*ssigw          
        t0 <- n* t(w) %*% solve(sigw) %*% w
        if(t0 < cut) mantcov <- mantcov + 1
	}
	mancov <- mancov/nruns
	mantcov <- mantcov/nruns
        bootcov <- sum(cstat < bcut)/nruns
        prcv <- prcv/nruns
        brcv <- brcv/nruns
# 
list(cut=cut,mancov=mancov,bootcov=bootcov,manLScov=mantcov,prcv=prcv,
brcv=brcv)
}


mantyp<-function(y,p,group,alph=0.05){
#Does a one way MANOVA type test based on large sample theory.
#needs library(Matrix) 
#p is the number of groups and 1 < p < 10
#group = i if y is from the ith group
#n = sum(ni)
group <- as.factor(group)
y <- as.matrix(y)
n<-dim(y)[1]
m<-dim(y)[2]
Sp <- var(y[group==p,])
Tp <- apply(y[group==p,],2,mean)
ni<-1:p
q<-p-1
num<- m*q
ni[p] <- sum(group==p)
for(i in 1:q){
ni[i] <- sum(group==i)
if(i==1) {S1 = var(y[group==1,])
          T1 <- apply(y[group==1,],2,mean)}
if(i==2) {S2 = var(y[group==2,])
          T2 <- apply(y[group==2,],2,mean)}
if(i==3) {S3 = var(y[group==3,])
          T3 <- apply(y[group==3,],2,mean)}
if(i==4) {S4 = var(y[group==4,])
          T4 <- apply(y[group==4,],2,mean)}
if(i==5) {S5 = var(y[group==5,])
          T5 <- apply(y[group==5,],2,mean)}
if(i==6) {S6 = var(y[group==6,])
          T6 <- apply(y[group==6,],2,mean)}
if(i==7) {S7 = var(y[group==7,])
          T7 <- apply(y[group==7,],2,mean)}
if(i==8) {S8 = var(y[group==8,])
          T8 <- apply(y[group==8,],2,mean)}
}
dn <- min(ni)
A<-matrix(1,nrow=q,ncol=q)
B <- Sp/ni[p]
ssigw <- kronecker(A,B) #still need to get the block diagonal of Sigw/n 
if(p==2) BD <- S1/ni[1] 
if(p==3) BD <- bdiag(S1/ni[1],S2/ni[2])
if(p==4) BD <- bdiag(S1/ni[1],S2/ni[2],S3/ni[3])
if(p==5) BD <- bdiag(S1/ni[1],S2/ni[2],S3/ni[3],S4/ni[4])
if(p==6) BD <- bdiag(S1/ni[1],S2/ni[2],S3/ni[3],S4/ni[4],S5/ni[5])
if(p==7) BD <- bdiag(S1/ni[1],S2/ni[2],S3/ni[3],S4/ni[4],S5/ni[5],S6/ni[6])
if(p==8) BD <- bdiag(S1/ni[1],S2/ni[2],S3/ni[3],S4/ni[4],S5/ni[5],S6/ni[6],
                     S7/ni[7])
if(p==9) BD <- bdiag(S1/ni[1],S2/ni[2],S3/ni[3],S4/ni[4],S5/ni[5],S6/ni[6],
                     S7/ni[7],S8/ni[8])
ssigw <- ssigw + BD   #ssigw =sigw/n, n = sum(ni)
n<-sum(ni)
ssigw <- as.matrix(ssigw)
sigw <- n*ssigw 
if(p==2) w <- T1 - Tp
if(p==3) w <- c(T1-Tp,T2-Tp)
if(p==4) w <- c(T1-Tp,T2-Tp,T3-Tp)
if(p==5) w <- c(T1-Tp,T2-Tp,T3-Tp,T4-Tp)
if(p==6) w <- c(T1-Tp,T2-Tp,T3-Tp,T4-Tp,T5-Tp)
if(p==7) w <- c(T1-Tp,T2-Tp,T3-Tp,T4-Tp,T5-Tp,T6-Tp)
if(p==8) w <- c(T1-Tp,T2-Tp,T3-Tp,T4-Tp,T5-Tp,T6-Tp,T7-Tp)
if(p==9) w <- c(T1-Tp,T2-Tp,T3-Tp,T4-Tp,T5-Tp,T6-Tp,T7-Tp,T8-Tp)
t0 <- n* t(w) %*% solve(sigw) %*% w
pval <-  1-pf((t0/num),df1=num,df2=dn) #approx 1 - pchisq(t0,num)
cut <- num*qf(1-alph,df1=num,df2=dn) #reject Ho if to > cut
list(t0=t0,cut<-cut,pval=pval)
}


#used in rmltreg

mbalata<-function(x, y, k=6, nsamp = 7)
{#gets the median ball fit with 7 centers, med resid crit, 7 ball sizes
        x <- as.matrix(x)
	n <- dim(x)[1]
	q <- dim(x)[2]	
	# q + 1 is number of predictors including intercept
	vals <- c(q + 3 + floor(n/100), q + 3 + floor(n/40), q + 3 + 
		floor(n/20), q + 3 + floor(n/10), q + 3 + floor(n/5), q +
		3 + floor(n/3), q + 3 + floor(n/2))
	covv <- diag(q)
	centers <- sample(n, nsamp)
	temp <- lsfit(x, y)
	mbaf <- temp$coef	## get LATA criterion
	res <- temp$residuals
	crit <- k^2*median(res^2)
	cn <- sum(res^2 <= crit)
	absres <- sort(abs(res))
	critf <- sum(absres[1:cn])	##
	for(i in 1:nsamp) {
		md2 <- mahalanobis(x, center = x[centers[i],  ], covv)
		smd2 <- sort(md2)
		for(j in 1:7) {
			temp <- lsfit(x[md2 <= smd2[vals[j]],  ], y[md2 <=
				smd2[vals[j]]])	
	#Use OLS on rows with md2 <= cutoff = smd2[vals[j]]
			res <- y - temp$coef[1] - x %*% temp$coef[-1]	
	## get LATA criterion
			crit <- k^2*median(res^2)
			cn <- sum(res^2 <= crit)
			absres <- sort(abs(res))
			crit <- sum(absres[1:cn])	##
			if(crit < critf) {
				critf <- crit
				mbaf <- temp$coef
			}
		}
	}
	list(coef = mbaf, critf = critf)
}


mbamv<-function(x, y, nsamp = 7)
{# This function is for simple linear regression. The 
# highlighted boxes get weight 1. Click on right 
# mouse button to advance plot, and in R, highlight "stop." Only uses 50% trimming.
        x <- as.matrix(x)
	n <- dim(x)[1]
	q <- dim(x)[2]
	covv <- diag(q)
	centers <- sample(n, nsamp)
	for(i in 1:nsamp) {
		md2 <- mahalanobis(x, center = x[centers[i],  ], covv)
		med <- median(md2)
		plot(x, y)
		points(x[md2 < med], y[md2 < med], pch = 15)
                abline(lsfit(x[md2 < med],y[md2 < med]))
		identify(x, y)
	}
}


mbamv2<-function(x, Y, nsamp = 7)
{ # This function is for multiple linear regression. The 
# highlighted boxes get weight 1. Click on right 
# mouse button to advance plot, and in R, highlight "stop." Only uses 50% trimming.
        x <- as.matrix(x)
       	n <- dim(x)[1]
	q <- dim(x)[2]
	covv <- diag(q)
	centers <- sample(n, nsamp)
        par(mfrow=c(2,1))
	for(i in 1:nsamp) {
		md2 <- mahalanobis(x, center = x[centers[i],  ], covv)
		med <- median(md2)
                if(q ==1){out <- lsfit(x[md2 < med],Y[md2 < med])}
                else{out <- lsfit(x[md2 < med,],Y[md2 < med])}
                FIT <- out$coef[1] + x%*%out$coef[-1]
		RES <- Y - FIT
                plot(FIT,Y)
		points(FIT[md2 < med], Y[md2 < med], pch = 15)
                abline(0,1)
		identify(FIT, Y)
                plot(FIT,RES)
                points(FIT[md2 < med], RES[md2 < med], pch = 15)
                abline(0,0)
		identify(FIT, RES)
	}
        par(mfrow=c(1,1))
}


##used in rmltreg, rrplot2

mbareg<-function(x, y, nsamp = 7)
{#gets the mbareg fit with 7 centers, med resid crit, 7 ball sizes
        x <- as.matrix(x)
	n <- dim(x)[1]
	q <- dim(x)[2]	# q + 1 is number of predictors including intercept
	vals <- c(q + 3 + floor(n/100), q + 3 + floor(n/40), q + 3 + floor(n/20
		), q + 3 + floor(n/10), q + 3 + floor(n/5), q + 3 + floor(n/3), 
		q + 3 + floor(n/2))
	covv <- diag(q)
	centers <- sample(n, nsamp)
	temp <- lsfit(x, y)
	mbaf <- temp$coef
	critf <- median(temp$residuals^2)
	for(i in 1:nsamp) {
		md2 <- mahalanobis(x, center = x[centers[i],  ], covv)
		smd2 <- sort(md2)
		for(j in 1:7) {
			temp <- lsfit(x[md2 <= smd2[vals[j]],  ], y[md2 <= smd2[
				vals[j]]])	
	#Use OLS on rows with md2 <= cutoff = smd2[vals[j]]
			res <- y - temp$coef[1] - x %*% temp$coef[-1]
			crit <- median(res^2)
			if(crit < critf) {
				critf <- crit
				mbaf <- temp$coef
			}
		}
	}
	list(coef = mbaf, critf = critf)
}


#used in ddplot2


mbsim <-function(n = 100, p = 2, csteps = 5, gam = 0.4, runs = 20, 
outliers = 0, pm = 10, cscale=F){
# Need p > 1.
# This R function simulates the  biased MB estimator.  
# outliers = 0 for no outliers and X~N(0,diag(1,...,p)),
# 1 for outliers a point mass on major axis (0,...,0,pm)'
# 2 for outliers a point mass on minor axis  (pm,0, ...,0)'
# 3 for outliers X~N((pm,...,pm)',diag(1,...,p))
# 4 for outliers X[i,p] = pm
# 5 for outliers X[i,1] = pm
# 6 for outliers a tight cluster at major axis (0,...,0,pm)'
        #set.seed(974)
	A <- sqrt(diag(1:p))
        cloc <- 0 * (1:p)
	csig <- 0 * A
        mbloc <- cloc
        mbsig <- csig
        cct <- 0
	mbct <- 0
        val <- floor(gam * n)
	for(i in 1:runs) {
	   x <- matrix(rnorm(n * p), ncol = p, nrow = n)
	   x <- x %*% A
	   if(outliers == 1) {
		x[1:val,  ] <- 0
                x[1:val,p] <- pm
		}
           if(outliers == 2) {
                x[1:val,  ] <- 0
                x[1:val,1] <- pm    
		}
           if(outliers == 3) {
	        tem <- pm + 0 * 1:p
		x[1:val,  ] <- x[1:val,  ] + tem
		}
           if(outliers == 4) {
		x[1:val, p ] <- pm
		}
           if(outliers == 5) {
		x[1:val, 1 ] <- pm
		}
           if(outliers == 6) {
  x[1:val,  ] <- matrix(rnorm(val * p, sd = 0.01), ncol = p, nrow = val)
	        x[1:val, p] <- x[1:val, p] + pm
		}
		out <- covmb(x, steps = csteps, scale = cscale)
		mbloc <- mbloc + out$center
		mbsig <- mbsig + out$cov
                rd2 <- mahalanobis(x, out$center, out$cov)
		if(min(rd2[1:val]) > max(rd2[(val + 1):n]))
                    mbct <- mbct + 1
                cmn <- apply(x,2,mean)
                cloc <- cloc + cmn
                ccov <- var(x)
		csig <- csig + ccov
                rd2 <- mahalanobis(x, cmn, ccov)
		if(min(rd2[1:val]) > max(rd2[(val + 1):n]))
                    cct <- cct + 1	
                }
        cloc <- cloc/runs
	csig <- csig/runs
        mbloc <- mbloc/runs
        mbsig <- mbsig/runs
	list(cloc = cloc, mbloc = mbloc,  csig = csig,  
              mbsig=mbsig, cct = cct, mbct=mbct)
}


medci<-function(x, alpha = 0.05)
{#Gets Bloch and Gastwirth SE for sample median and the  
#corresponding resistant 100(1-alpha)% CI. The default is alpha = .05. 
	n <- length(x)
	up <- 1 - alpha/2
	med <- median(x)
	ln <- floor(n/2) - ceiling(sqrt(n/4))
	un <- n - ln
	d <- sort(x)
	rdf <- un - ln - 1
	cut <- qt(up, rdf)
	sebg <- 0.5 * (d[un] - d[ln + 1])
	rval <- cut * sebg
	rlo <- med - rval
	rhi <- med + rval
	list(int = c(rlo, rhi), med = med, sebg = sebg)
}


medhotsim<-function(n = 100, p = 2, csteps = 5, gam = 0.4, nruns = 100, 
xtype=1, outliers = 0, pm = 10, eps=0.25, dd= 1, delta = 0, BB=1000){
# Simulates coord. median bootstrap Hotelling's T^2 type test. 
# Need p > 1. Want n > 20p and number of bootstrap samples BB > 20p.
# Multiply x by A where xtype = 1 for MVN Np(0,I), 
# 2, 3, 4 and 5 (with delta = eps) for (1 - delta) Np(0,I) + delta Np(0, 25 I)
# 6, 7, 8 and 9  for multivariate t_d with d = 3, 5, 19 or dd
# 10 for lognormal.
# outliers = 0 for no outliers and X~N(0,diag(1,...,p)), 
# 1 for outliers a tight cluster at major axis (0,...,0,pm)'
# 2 for outliers a tight cluster at minor axis  (pm,0, ...,0)'
# 3 for outliers X~N((pm,...,pm)',diag(1,...,p))
# 4 for outliers X[i,p] = pm
# 5 for outliers X[i,1] = pm
# Power can be estimated by increasing delta so mu = delta(1,...,1) 
# and mu_o = 0*mu.
	A <- sqrt(diag(1:p))
	hotcv <- 0
	munot <- 0 * (1:p)
	mu <- delta * (1 + munot)
       	val <- floor(gam * n)
	for(i in 1:nruns) {
#make data
		x <- matrix(rnorm(n * p), ncol = p, nrow = n)
                if(xtype == 2) {
			zu <- runif(n)
			x[zu < 0.4,  ] <- x[zu < 0.4,  ] * 5
		}
		if(xtype == 3) {
			zu <- runif(n)
			x[zu < 0.6,  ] <- x[zu < 0.6,  ] * 5
		}
		if(xtype == 4) {
			zu <- runif(n)
			x[zu < 0.1,  ] <- x[zu < 0.1,  ] * 5
		}
		if(xtype == 5) {
			zu <- runif(n)
			x[zu < eps,  ] <- x[zu < eps,  ] * 5
		}
		if(xtype == 6) {
			zu <- sqrt(rchisq(n, 3)/3)
			x <- x/zu
		}
		if(xtype == 7) {
			zu <- sqrt(rchisq(n, 5)/5)
			x <- x/zu
		}
		if(xtype == 8) {
			zu <- sqrt(rchisq(n, 19)/19)
			x <- x/zu
		}
		if(xtype == 9) {
			zu <- sqrt(rchisq(n, dd)/dd)
			x <- x/zu
		}
		if(xtype == 10){  #Want pop coord med(x) = 0.
			x <- exp(x)
                        x <- x - 1
                }
		x <- x %*% A
		if(outliers == 1) {
			x[1:val,  ] <- matrix(rnorm(val * p, sd = 0.01), ncol
				 = p, nrow = val)
			x[1:val, p] <- x[1:val, p] + pm
		}
		if(outliers == 2) {
			x[1:val,  ] <- matrix(rnorm(val * p, sd = 0.01), ncol
				 = p, nrow = val)
			x[1:val, 1] <- x[1:val, 1] + pm
		}
		if(outliers == 3) {
			tem <- pm + 0 * 1:p
			x[1:val,  ] <- x[1:val,  ] + tem
		}
		if(outliers == 4) {
			x[1:val, p] <- pm
		}
		if(outliers == 5) {
			x[1:val, 1] <- pm
		}
		x <- mu + x	#get bootstrapped coord. median
                out<-rhotboot(x,B=BB)
                hotcv <- hotcv + predreg(out$mus)$inr		
	}
	hotcv <- 1 - hotcv/nruns #prop of times Ho is rejected
	list(hotcv = hotcv)
}


medout<-function(x)
{# finds squared Euclidean distances from the coordinatewise median
x<-as.matrix(x)
p <- dim(x)[2]
covv <- diag(p)
med <- apply(x, 2, median)
d2 <- mahalanobis(x, center = med, covv)
plot(d2)
list(d2 = d2)
}


mldsim <-function(n = 100, p = 2, steps = 5, gam = 0.4, runs = 20, outliers = 0, pm = 10, loc = 0.5){
# Need p > 1.
# This R function compares the MBA, FCH, RFCH, RMVN, scaled DGK, classical, OGK, 
# FASTMCD,  CMVE and biased MB estimators.  
# R needs library(robustbase), calls covrob, covdgk
# outliers = 0 for no outliers and X~N(0,diag(1,...,p)),
# 1 for outliers a point mass on major axis (0,...,0,pm)'
# 2 for outliers a point mass on minor axis  (pm,0, ...,0)'
# 3 for outliers X~N((pm,...,pm)',diag(1,...,p))
# 4 for outliers X[i,p] = pm
# 5 for outliers X[i,1] = pm
# 6 for outliers a tight cluster at major axis (0,...,0,pm)'
# Also finds n*var(T[p]) which approx p for the classical estimator
# and n var(C(p,p)) for MLD estimator (T,C).
	A <- sqrt(diag(1:p))
        qchi <- qchisq(0.5, p)
        cs <- 1:runs
        cm <- cs
	cloc <- 0 * (1:p)
	csig <- 0 * A
        mbas <- 1:runs
        mbam <- cs
	mbaloc <- cloc
	mbasig <- csig
        fchs <- mbas
        fchm <- cs
        fchloc <- cloc
        fchsig <- csig
        rfchs <- mbas
        rfchm <- cs
        rfchloc <- cloc
        rfchsig <- csig
        rmvns <- mbas
        rmvnm <- cs
        rmvnloc <- cloc
        rmvnsig <- csig
        dgks <- mbas
        dgkm <- cs
	dgkloc <- cloc
	dgksig <- csig
        ogks <- mbas
        ogkm <- cs
	ogkloc <- cloc
	ogksig <- csig
        fmcds <- mbas
        fmcdm <- cs
	fmcdloc <- cloc
	fmcdsig <- csig
        cmveloc <- cloc
	cmvesig <- csig
        mbs <- mbas
        mbm <- cs
        mbsig <- csig
        mbloc <- cloc
        mbact <- 0
        fchct <- 0
        rfchct <- 0
        rmvnct <- 0
	dgkct <- 0
	cct <- 0
	ogkct <- 0
	fmcdct <- 0
        cmvect <- 0
        mbct <- 0
        val <- floor(gam * n)
	for(i in 1:runs) {
	   x <- matrix(rnorm(n * p), ncol = p, nrow = n)
	   x <- x %*% A
	   if(outliers == 1) {
		x[1:val,  ] <- 0
                x[1:val,p] <- pm
		}
           if(outliers == 2) {
                x[1:val,  ] <- 0
                x[1:val,1] <- pm    
		}
           if(outliers == 3) {
	        tem <- pm + 0 * 1:p
		x[1:val,  ] <- x[1:val,  ] + tem
		}
           if(outliers == 4) {
		x[1:val, p ] <- pm
		}
           if(outliers == 5) {
		x[1:val, 1 ] <- pm
		}
           if(outliers == 6) {
  x[1:val,  ] <- matrix(rnorm(val * p, sd = 0.01), ncol = p, nrow = val)
	        x[1:val, p] <- x[1:val, p] + pm
		}
		out <- covrob(x, csteps = steps, locc = loc)
		mbaloc <- mbaloc + out$center
		mbasig <- mbasig + out$cov
                mbam[i] <- out$center[p]
                mbas[i] <- out$cov[p,p]
                rd2 <- mahalanobis(x, out$center, out$cov)
		if(min(rd2[1:val]) > max(rd2[(val + 1):n]))
                   mbact <- mbact + 1
                rd2 <- mahalanobis(x, out$mnm, out$covm)
                const <- median(rd2)/qchi
                covm <- const * out$covm
                mbloc <- mbloc + out$mnm
		mbsig <- mbsig + covm
                mbm[i] <- out$mnm[p]
                mbs[i] <- covm[p,p]	
                #scaling does not effect outlier count
                if(min(rd2[1:val]) > max(rd2[(val + 1):n]))
                    mbct <- mbct + 1
                fchloc <- fchloc + out$mnf
		fchsig <- fchsig + out$covf
                fchm[i] <- out$mnf[p]
                fchs[i] <- out$covf[p,p]	
                rd2 <- mahalanobis(x, out$mnf, out$covf)
		if(min(rd2[1:val]) > max(rd2[(val + 1):n]))
                    fchct <- fchct + 1
                rfchloc <- rfchloc + out$rmnf
		rfchsig <- rfchsig + out$rcovf
                rfchm[i] <- out$rmnf[p]
                rfchs[i] <- out$rcovf[p,p]
                rd2 <- mahalanobis(x, out$rmnf, out$rcovf)
		if(min(rd2[1:val]) > max(rd2[(val + 1):n]))
                    rfchct <- rfchct + 1
                rmvnloc <- rmvnloc + out$rmnmvn
		rmvnsig <- rmvnsig + out$rcovmvn
                rmvnm[i] <- out$rmnmvn[p]
                rmvns[i] <- out$rcovmvn[p,p]
                rd2 <- mahalanobis(x, out$rmnmvn, out$rcovmvn)
		if(min(rd2[1:val]) > max(rd2[(val + 1):n]))
                    rmvnct <- rmvnct + 1
                cmveloc <- cmveloc + out$mncmv
		cmvesig <- cmvesig + out$covcmv
                rd2 <- mahalanobis(x, out$mncmv, out$covcmv)
		if(min(rd2[1:val]) > max(rd2[(val + 1):n]))
                    cmvect <- cmvect + 1
		out <- covdgk(x, csteps = 10)
		dgkloc <- dgkloc + out$center
		dgksig <- dgksig + out$cov
                dgkm[i] <- out$center[p]
                dgks[i] <- out$cov[p,p]
                rd2 <- mahalanobis(x, out$center, out$cov)
		if(min(rd2[1:val]) > max(rd2[(val + 1):n]))
                    dgkct <- dgkct + 1
                center <- apply(x,2,mean)	
		cloc <- cloc + center
                ccov <- var(x)
		csig <- csig + ccov
                cm[i] <- center[p]
                cs[i] <- ccov[p,p]
                rd2 <- mahalanobis(x, center, ccov)
		if(min(rd2[1:val]) > max(rd2[(val + 1):n]))
                    cct <- cct + 1	
                out <- covOGK(x,sigmamu = scaleTau2)
		ogkloc <- ogkloc + out$center
		ogksig <- ogksig + out$cov
                ogkm[i] <- out$center[p]
                ogks[i] <- out$cov[p,p]
                rd2 <- mahalanobis(x, out$center, out$cov)
		if(min(rd2[1:val]) > max(rd2[(val + 1):n]))
                    ogkct <- ogkct + 1	
		out <- covMcd(x)
                #for Det-MCD, use
                #out <- covMcd(x,nsamp="deterministic")
                #Use out <- covmb2(x) for the covmb2 estimator. 
		fmcdloc <- fmcdloc + out$center
		fmcdsig <- fmcdsig + out$cov
                fmcdm[i] <- out$center[p]
                fmcds[i] <- out$cov[p,p]
                rd2 <- mahalanobis(x, out$center, out$cov)
		if(min(rd2[1:val]) > max(rd2[(val + 1):n]))
                    fmcdct <- fmcdct + 1
                }
        mbasv <- n*var(mbas)
        mbam <- n*var(mbam)
	mbaloc <- mbaloc/runs
	mbasig <- mbasig/runs
        fchsv <- n*var(fchs)
        fchm <- n*var(fchm)
        fchloc <- fchloc/runs
	fchsig <- fchsig/runs
        rfchsv <- n*var(rfchs)
        rfchm <- n*var(rfchm)
        rfchloc <- rfchloc/runs
	rfchsig <- rfchsig/runs
        rmvnsv <- n*var(rmvns)
        rmvnm <- n*var(rmvnm)
        rmvnloc <- rmvnloc/runs
	rmvnsig <- rmvnsig/runs
        dgksv <- n*var(dgks)
        dgkm <- n*var(dgkm)
	dgkloc <- dgkloc/runs
	dgksig <- dgksig/runs
        ogksv <- n*var(ogks)
        ogkm <- n*var(ogkm)
	ogkloc <- ogkloc/runs
	ogksig <- ogksig/runs
        csv <- n*var(cs)
        cm <- n*var(cm)
        cloc <- cloc/runs
	csig <- csig/runs
        fmcdsv <- n*var(fmcds)
        fmcdm <- n*var(fmcdm)
	fmcdloc <- fmcdloc/runs
	fmcdsig <- fmcdsig/runs
        cmveloc <- cmveloc/runs
	cmvesig <- cmvesig/runs
        mbsv <- n*var(mbs)
        mbm <- n*var(mbm)
        mbloc <- mbloc/runs
        mbsig <- mbsig/runs
	list(mbaloc = mbaloc, fchloc = fchloc, rfchloc = rfchloc,
        rmvnloc = rmvnloc,dgkloc = dgkloc, cloc = cloc, ogkloc = ogkloc,
        fmcdloc = fmcdloc, cmveloc = cmveloc, mbloc = mbloc, mbasig = mbasig, 
        fchsig = fchsig, rfchsig = rfchsig, rmvnsig = rmvnsig, dgksig = 
        dgksig, csig = csig, ogksig = ogksig, fmcdsig = fmcdsig,
        cmvesig = cmvesig, mbsig=mbsig, mbasv = mbasv, fchsv=fchsv, 
        rfchsv = rfchsv, rmvnsv=rmvnsv, dgksv=dgksv, ogksv=ogksv, csv=csv,
        fmcdsv=fmcdsv, mbsv=mbsv, mbam=mbam, fchm=fchm, rfchm=rfchm,
rmvnm=rmvnm, dgkm=dgkm, ogkm=ogkm, cm=cm, fmcdm=fmcdm,
mbm=mbm, cct = cct, dgkct=dgkct, mbact=mbact, 
        fchct = fchct, rfchct = rfchct, rmvnct=rmvnct,
        ogkct=ogkct, fmcdct=fmcdct,cmvect=cmvect,mbct=mbct)
}


mldsim6<-function(n = 100, p = 2, steps = 5, gam = 0.4, runs = 100, outliers = 0, pm = 10, kk=5, osteps = 0){
# This R function compares the 
# FCH, RFCH, CMVE, RCMVE, RMVN, COVMB2, and MB estimators. 
# outliers = 0 for no outliers and X~N(0,diag(1,...,p)), 
# 1 for outliers a tight cluster at major axis (0,...,0,pm)'
# 2 for outliers a tight cluster at minor axis  (pm,0, ...,0)'
# 3 for outliers X~N((pm,...,pm)',diag(1,...,p))
# 4 for outliers X[i,p] = pm
# 5 for outliers X[i,1] = pm
# Calls cmve, covfch, covrmvn, covmb2
	A <- sqrt(diag(1:p))
	fchct <- 0
	rfchct <- 0
	cmvect <- 0
	rcmvect <- 0
	rmvnct <- 0
	mbct <- 0
        covmb2ct <- 0 
	val <- floor(gam * n)
	for(i in 1:runs) {
		x <- matrix(rnorm(n * p), ncol = p, nrow = n)
		x <- x %*% A
		if(outliers == 1) {
			x[1:val,  ] <- matrix(rnorm(val * p, sd = 0.01), ncol = p, nrow = val
				)
			x[1:val, p] <- x[1:val, p] + pm
		}
		if(outliers == 2) {
			x[1:val,  ] <- matrix(rnorm(val * p, sd = 0.01), ncol = p, nrow = val
				)
			x[1:val, 1] <- x[1:val, 1] + pm
		}
		if(outliers == 3) {
			tem <- pm + 0 * 1:p
			x[1:val,  ] <- x[1:val,  ] + tem
		}
		if(outliers == 4) {
			x[1:val, p] <- pm
		}
		if(outliers == 5) {
			x[1:val, 1] <- pm
		}
		out <- covfch(x, csteps = steps)
		rd2 <- mahalanobis(x, out$center, out$cov)
		if(min(rd2[1:val]) > max(rd2[(val + 1):n]))
			fchct <- fchct + 1
		rd2 <- mahalanobis(x, out$rmnf, out$rcovf)
		if(min(rd2[1:val]) > max(rd2[(val + 1):n]))
			rfchct <- rfchct + 1
		out <- cmve(x, csteps = steps)
		rd2 <- mahalanobis(x, out$center, out$cov)
		if(min(rd2[1:val]) > max(rd2[(val + 1):n]))
			cmvect <- cmvect + 1
		rd2 <- mahalanobis(x, out$rmnf, out$rcovf)
		if(min(rd2[1:val]) > max(rd2[(val + 1):n]))
			rcmvect <- rcmvect + 1
		rd2 <- mahalanobis(x, out$mnm, out$covm)
		if(min(rd2[1:val]) > max(rd2[(val + 1):n]))
			mbct <- mbct + 1
		out <- covrmvn(x, csteps = steps)
		rd2 <- mahalanobis(x, out$center, out$cov)
		if(min(rd2[1:val]) > max(rd2[(val + 1):n]))
			rmvnct <- rmvnct + 1
                out <- covmb2(x, k=kk, msteps=osteps)
		rd2 <- mahalanobis(x, out$center, out$cov)
		if(min(rd2[1:val]) > max(rd2[(val + 1):n]))
			covmb2ct <- covmb2ct + 1
	}	
list(fchct = fchct, rfchct = rfchct, cmvect = cmvect,           
rcmvect = rcmvect, rmvnct = rmvnct, covmb2ct = covmb2ct, mbct = mbct)
}


mldsim7<-function(n = 100, p = 2, gam = 0.4, runs = 100, outliers = 0, pm = 10, kk=5, osteps = 0){
# This R function examines the  COVMB2 estimators. 
#The function mldsim6 uses the weighted covariance matrix out$cov
#The function mldsim7 uses squared Euclidian distances based on Ip are used,
#or the squared Mahalanobis distances using diag(out$cov)
# Counts number of times all outlier distances > clean distances.
# outliers = 0 for no outliers and X~N(0,diag(1,...,p)), 
# 1 for outliers a tight cluster at major axis (0,...,0,pm)'
# 2 for outliers a tight cluster at minor axis  (pm,0, ...,0)'
# 3 for outliers X~N((pm,...,pm)',diag(1,...,p))
# 4 for outliers X[i,p] = pm
# 5 for outliers X[i,1] = pm
# Calls  covmb2
	A <- sqrt(diag(1:p))
        covv <- diag(p)    #identity matrix Ip
	covmb2ct <- 0 
        diagct <- 0
	val <- floor(gam * n)
	for(i in 1:runs) {
		x <- matrix(rnorm(n * p), ncol = p, nrow = n)
		x <- x %*% A
		if(outliers == 1) {
			x[1:val,  ] <- matrix(rnorm(val * p, sd = 0.01), ncol = p, nrow = val
				)
			x[1:val, p] <- x[1:val, p] + pm
		}
		if(outliers == 2) {
			x[1:val,  ] <- matrix(rnorm(val * p, sd = 0.01), ncol = p, nrow = val
				)
			x[1:val, 1] <- x[1:val, 1] + pm
		}
		if(outliers == 3) {
			tem <- pm + 0 * 1:p
			x[1:val,  ] <- x[1:val,  ] + tem
		}
		if(outliers == 4) {
			x[1:val, p] <- pm
		}
		if(outliers == 5) {
			x[1:val, 1] <- pm
		}
		out <- covmb2(x, k=kk, msteps=osteps)
		rd2 <- mahalanobis(x, out$center, covv)
		if(min(rd2[1:val]) > max(rd2[(val + 1):n]))
			covmb2ct <- covmb2ct + 1                
                rd2 <- mahalanobis(x, out$center, diag(diag(out$cov)))
		if(min(rd2[1:val]) > max(rd2[(val + 1):n]))
			diagct <- diagct + 1                
	}	
list( covmb2ct = covmb2ct, diagct = diagct)
}


MLRplot<-function(x, Y){
# Response plot and residual plot.
# Workstation need to activate a graphics
# device with command "X11()" or "motif()."
# R needs command "library(lqs)" if a robust estimator replaces lsfit.
# Advance the view with the right mouse button.
	x <- as.matrix(x)
	out <- lsfit(x, Y)
	cook <- ls.diag(out)$cooks
	n <- dim(x)[1]
	p <- dim(x)[2] + 1
	tem <- cook > min(0.5, (2 * p)/n)
	bhat <- out$coef
	FIT <- Y - out$res
        cmar <- par("mar")
	par(mfrow = c(2, 1))
        par(mar=c(4.0,4.0,2.0,0.5))
	plot(FIT, Y)
	abline(0, 1)
	points(FIT[tem], Y[tem], pch = 15)
	title("Response Plot")
        identify(FIT, Y)
	RES <- Y - FIT
	plot(FIT, RES)
	points(FIT[tem], RES[tem], pch = 15)
	title("Residual Plot")
        identify(FIT, RES)
        par(mfrow = c(1, 1))
        par(mar=cmar)
}


mlrplot2 <- function(x, Y)
{# Makes the response plot and residual plot for two mbareg estimators.
# Workstation need to activate a graphics
# device with command "X11()" or "motif()."
# R needs command "library(MASS)" if a robust estimator replaces lsfit.
# Advance the view with the right mouse button, and in R, highlight "stop."
	x <- as.matrix(x)
	out <- mbareg(x, Y)
	bhat <- out$coef
	FIT <- bhat[1] + x %*% bhat[-1]
	par(mfrow = c(2, 2))
	plot(FIT, Y)
	abline(0, 1)
	identify(FIT, Y)
	title("MBA Response Plot")
	RES <- Y - FIT
	plot(FIT, RES)
	identify(FIT, RES)
	title("MBA Residual Plot")
#
        out <- mbalata(x, Y)
	bhat <- out$coef
	FIT <- bhat[1] + x %*% bhat[-1]
	plot(FIT, Y)
	abline(0, 1)
	identify(FIT, Y)
	title("MBALATA Response Plot")
	RES <- Y - FIT
	plot(FIT, RES)
	identify(FIT, RES)
	title("MBALATA Residual Plot")
        par(mfrow=c(1,1))
}


mlrplot4<-function(x, Y){
# This function is for R. Use mlrplot2 for R and Splus.
# Makes response plot and residual plot with large
# Cook's distances and Pena's distances highlighted.
# Squares have large Cook's distances
# and crosses have large Pena's distances.
	x <- as.matrix(x)
	out <- lsfit(x, Y)
	cook <- ls.diag(out)$cooks
	n <- dim(x)[1]
	p <- dim(x)[2] + 1	#get Pena's distances s
	one <- 1 + 0 * 1:n
	w <- cbind(one, x)
	cp <- t(w) %*% w
	h <- w %*% solve(cp) %*% t(w)
	s <- 0 * 1:n
	for(i in 1:n) {
		for(j in 1:n) {
			s[i] <- s[i] + (cook[j] * h[i, j]^2)/(h[i, i] * h[j, j])
		}
	}
	tem <- cook > min(0.5, (2 * p)/n)
	medd <- median(s)
	madd <- mad(s, constant = 1)
	tem2 <- abs(s - medd) > 4.5 * madd
	bhat <- out$coef
	FIT <- bhat[1] + x %*% bhat[-1]
        cmar <- par("mar")
	par(mfrow = c(2, 1))
        par(mar=c(4.0,4.0,2.0,0.5))
        plot(FIT, Y)
	abline(0, 1)
	points(FIT[tem], Y[tem], pch = 0)
	points(FIT[tem2], Y[tem2], pch = 3)	
	identify(FIT, Y)
	title("Response Plot")
	RES <- Y - FIT
	plot(FIT, RES)
	points(FIT[tem], RES[tem], pch = 0)
	points(FIT[tem2], RES[tem2], pch = 3)
	identify(FIT, RES)
	title("Residual Plot")
        par(mfrow = c(1, 1))
        par(mar=cmar)
}


MLRsim<-function(n = 100, q = 7, nruns = 4,  
eps = 0.1, shift = 9, type = 1){
#Right click Stop for each plot.
#Generates response and residual plots for MLR
#for a few iid error distributions: 
# if type = 1 for N(0,1) errors, 2 for t3 errors, 3 for exp(1) - 1 errors
# 4 for uniform(-1,1) errors, 5 for (1-eps) N(0,1) + eps N(0,(1+shift)^2) errors.
# constant = 1 so there are p = q+1 coefficients
	b <- 0 * 1:q + 1
	for(i in 1:nruns) {
		x <- matrix(rnorm(n * q), nrow = n, ncol = q)
		if(type == 1) {
			y <- 1 + x %*% b + rnorm(n)
					}
		if(type == 2) {
			y <- 1 + x %*% b + rt(n, df = 3)
					}
		if(type == 3) {
			y <- 1 + x %*% b + rexp(n) - 1
					}
		if(type == 4) {
			y <- 1 + x %*% b + runif(n, min = -1, max = 1)
					}
		if(type == 5) {
			err <- rnorm(n, sd = 1 + rbinom(n, 1, eps) * shift)
			y <- 1 + x %*% b + err
					} #make an MLR data set
		MLRplot(x,y) #get the response and residual plots
}}


mltreg<-function(x, y, indices = c(1,2)){
##Need p > 1, m > 1.
# Does multivariate linear regression.
# Advance the plot by highlighting Stop with the right mouse button.
# Want n > mp and n > 10p for Hotelling Lawley pvalues.
# The indices are the variables to be left out of the
# reduced model for the MANOVA partial F test. 
x <- as.matrix(x)
y <- as.matrix(y)
n <- dim(x)[1]
q <- dim(x)[2]	
p <- q+1
m <- dim(y)[2]
r <- length(indices)
#Get L for the MANOVA partial F test
L <- matrix(0,nrow=r,ncol=p)
for(i in 1:r)
   L[i,indices[i]] <- 1
res <- matrix(nrow = n, ncol = m, 0)
fit <- res
Bhat <- matrix(nrow = p, ncol = m, 0)
# q + 1 = p is number of predictors including intercept
for(i in 1:m){
  out <- lsfit(x,y[,i])
  res[,i] <- out$res
  fit[,i] <- y[,i] - res[,i]
  Bhat[,i] <- out$coef	
}
for(i in 1:m){
MLRplot(x,y[,i])
}
Covhat <- (n-1)/(n-p) * var(res)
#Get pvalues for testing whether jth predictor is
#needed in the model given the other predictors are in the model.
one <- 1 + 0*1:n
w <- cbind(one,x)
pvals <- 0*1:p
Fj <- pvals
J <- t(w)%*%w
J <- solve(J)
Covinv <- solve(Covhat)
dendf <- n - m*p
if( n <= m*p)
  dendf <- 1
for(j in 1:p){
   T <- t(Bhat[j,])%*%Covinv%*%Bhat[j,]/J[j,j]
   pvals[j] <- 1 - pf((T/m),m,dendf)
   Fj[j] <- T/m
    }
#Get pval for MANOVA F test for whether the
#nontrivial predictors are needed in the model.
D <- as.matrix(Bhat[-1,])
if(p == 2)
D <- t(D)
tem <- as.matrix(J[-1,-1])
teminv <- solve(tem)
Weinv <- Covinv/(n-p)
H <- t(D)%*%teminv%*%D
C <- Weinv%*%H
eig <- eigen(C,symmetric=FALSE,only.values=TRUE)$values
eig <- as.double(eig) #as.real(eig)
u <- sum(eig)
MANOVAF <- (n-p)*u/((p-1)*m)
pval <- 1 - pf(MANOVAF,(p-1)*m,dendf)
MANOVA <- cbind(MANOVAF, pval)
#got MANOVA F test summaries
#Get pval for MANOVA partial F test for whether the
#predictors given by indices are needed in the model.
tem <- L%*%J%*%t(L)
teminv <- solve(tem)
H <- t(L%*%Bhat)%*%teminv%*%(L%*%Bhat)
C <- Weinv%*%H
eig <- eigen(C,symmetric=FALSE,only.values=TRUE)$values
eig <- as.double(eig)# as.real(eig)
u <- sum(eig)
partialF <- (n-p)*u/(r*m)
Pval <- 1 - pf(partialF,r*m,dendf)
partial <- cbind(partialF, Pval)
##got partial F test summaries
Ftable <- cbind(Fj,pvals)
list(fit = fit, res = res, Covhat = Covhat, Bhat = Bhat, 
partial=partial, Ftable=Ftable, MANOVA=MANOVA)
}


modIboot<-function(x,y,B = 1000){
#needs library(leaps), n > 5p, p > 2
#bootstrap the I_I model for forward selection regression 
#Does not make sense to do variable selection if there
#is only one nontrivial predictor.
x <- as.matrix(x)
n <- length(y)
p <- 1 + dim(x)[2]
vmax <- min(p,as.integer(n/5))
vars <- as.vector(1:(p-1))
#get the full model
full <- lsfit(x,y)
res <- full$resid
fit <- y - res
#get the I_I submodel
tem<-regsubsets(x,y,nvmax=vmax,method="forward")
out<-summary(tem)
num <- 1:length(out$cp)
tnum <- num[out$cp <= min(out$cp)+1]
Icp <- out$cp[min(tnum)]
modI <- out$which[out$cp==Icp]
#do not need the constant in vin
vin <- vars[modI[-1]]
sub <- lsfit(x[,vin],y)
betas <- matrix(0,nrow=B,ncol=p)
#bootstrap the I_I submodel
for(i in 1:B){
yb <- fit + sample(res,n,replace=T)
tem<-regsubsets(x,y=yb,method="forward")
out<-summary(tem)
num <- 1:length(out$cp)
tnum <- num[out$cp <= min(out$cp)+1]
Icp <- out$cp[min(tnum)]
modI <- out$which[out$cp==Icp]
vin <- vars[modI[-1]]
indx <- c(1,1+vin)
betas[i,indx] <- lsfit(x[,vin],yb)$coef
}
list(full=full,sub=sub,betas=betas)
}



modIpisim<-function(n = 100, p = 4, k = 1, nruns = 100,  
eps = 0.1, shift = 9, psi = 0.0, type = 1, alpha = 0.05){
#Needs library(leaps).
#Simulates PIs for forward selection variable selection for model I_I. 
# 1 <= k <= p-1, k is the number of nonnoise variables
#Uses five iid error distributions: 
# type = 1 for N(0,1) errors, 2 for t3 errors, 3 for exp(1) - 1 errors
# 4 for uniform(-1,1) errors, 5 for (1-eps) N(0,1) + eps N(0,(1+shift)^2) 
#errors.
# constant = 1 so there are p = q+1 coefficients
#need p > 1, beta = (1, 1, ..., 1, 0, ..., 0) with k+1 ones, p-k-1  zeroes
# Multiply x by A: for MVN data this results
# in a covariance matrix with eigenvector c(1, ..., 1)^T
# corresponding to the largest eigenvalue. As psi gets
# close to 1, the data clusters about the line in the
# direction of (1, ..., 1)^T. See Maronna and Zamar (2002).
# cor(X_i,X_j) = [2 psi +(q-2)psi^2]/[1 + (q-1)psi^2], i not = j
# when the correlation exists.
set.seed(974)
corfac <- (1 + 15/n) * sqrt( (n+2*p)/(n - p) )
  if (alpha > 0.1)
    {qn <- min(1 - alpha + 0.05, 1 - alpha + p/n)}
  if (alpha <= 0.1)
    {qn <- min(1 - alpha/2, 1 - alpha + 10*alpha*p/n)}
  pn <- qn
  if(pn < 1 - alpha + 0.001)
    qn <- 1 - alpha
alphan <- 1 - qn
pilen <- 1:nruns
ps <- pilen
opicov <- 0
q <- p-1
vmax <- min(p,as.integer(n/5))
rho <- (2*psi + (q-2)*psi^2)/(1 + (q-1)*psi^2)
A <- matrix(psi,nrow=q,ncol=q)
diag(A) <- 1
b <- 0 * 1:q
b[1:k] <- 1 #b[1:0] acts like b[1:1] = b[1]
vars <- as.vector(1:(p-1))
	for(i in 1:nruns) {
		x <- matrix(rnorm(n * q), nrow = n, ncol = q)
                x <- x %*% A
                xf <- rnorm(q) %*% A
		if(type == 1) {
			y <- 1 + x %*% b + rnorm(n)
                        yf <- 1 + xf %*% b + rnorm(1)
					}
		if(type == 2) {
			y <- 1 + x %*% b + rt(n, df = 3)
                        yf <- 1 + xf %*% b + rt(1, df = 3)
					}
		if(type == 3) {
			y <- 1 + x %*% b + rexp(n) - 1
                        yf <- 1 + xf %*% b + rexp(1) - 1
					}
		if(type == 4) {
			y <- 1 + x %*% b + runif(n, min = -1, max = 1)
                        yf <- 1 + xf %*% b + runif(1, min = -1, max = 1)
					}
		if(type == 5) {
			err <- rnorm(n, sd = 1 + rbinom(n, 1, eps) * shift)
			y <- 1 + x %*% b + err
                        ef <- rnorm(1, sd = 1 + rbinom(1, 1, eps) * shift)
                        yf <- 1 + xf %*% b + ef
					} #make an MLR data set
		#find the forward sel model I_I model
                tem<-regsubsets(x,y,nvmax=vmax,method="forward")
                out<-summary(tem)
                num <- 1:length(out$cp)
                tnum <- num[out$cp <= min(out$cp)+1]
                Icp <- out$cp[min(tnum)]
                modI <- out$which[out$cp==Icp,]
                #do not need the constant in vin
                vin <- vars[modI[-1]]
                sub <- lsfit(x[,vin],y) 
                ps[i]<-length(sub$coef)
                yfhat <- sub$coef[1] + xf[vin] %*% sub$coef[-1]
                fres <- sub$resid
#get asymptotically optimal PI
		sres <- sort(fres)
		cc <- ceiling(n * (1 - alphan))
		rup <- sres[cc]
		rlow <- sres[1]
		olen <- rup - rlow
		if(cc < n) {
			for(j in (cc + 1):n) {
				zlen <- sres[j] - sres[j - cc + 1]
				if(zlen < olen) {
				  olen <- zlen
				  rup <- sres[j]
				  rlow <- sres[j - cc + 1]
				}
			}
		}
		up <- yfhat + corfac*rup
		low <- yfhat + corfac*rlow
		pilen[i] <- up - low
		if(low < yf && up > yf)
			opicov <- opicov + 1
	}
psmn <- mean(ps)-k #0 if subset is selecting optimal subset
pimnlen <- mean(pilen)
opicov <- opicov/nruns
list(psmn=psmn, opicov=opicov, pimenlen = pimnlen)}


mplot<-function(x)
{# Need p > 1.
# Makes a DD plot only using the MDi, the RDi are not used.
	p <- dim(x)[2]
	center <- apply(x, 2, mean)
	cov <- var(x)
	md2 <- mahalanobis(x, center, cov)
	md <- sqrt(md2)
	rd <- md
	const <- sqrt(qchisq(0.5, p))/median(rd)
	rd <- const * rd
	plot(md, rd)
	abline(0, 1)
	identify(md, rd)
}


mpredsim<-function(n = 100, m = 2, p = 4, nruns = 10, 
etype = 1,  eps = 0.25,  psi = 0.1, dd = 7, mnull = F, alpha = 0.1){
# Simulates prediction regions for multivariate linear regression 
# p = number of predictors including intercept
# m > 1 is the number of response variables
# want n > mp and n > 10 p
# multiply E by A where etype = 1 for MVN Nm(0,I), 
# etype = 2 for   (1 - eps) Nm(0,I) + eps Nm(0, 25 I)
# eps = 0.1, 0.25, 0.4, and 0.6 are interesting
# etype = 3 for multivariate t_d with d =  dd degrees of freedom
# dd = 1, 2, 3, 5, 7 are interesting 
# etype = 4 for lognormal - E(lognormal).
# For MVN data multiplying E by A results
# in a covariance matrix with eigenvector c(1, ..., 1)^T
# corresponding to the largest eigenvalue. The diagonal elements
# of the covariance matrix are 1 + (m-1) psi^2, while the off
# diagonal elements are 2 rho + (m-2) psi^2. Hence the correlations
# are (2 psi + (m-2) psi^2)/(1 + (m-1) psi^2). As psi gets
# close to 1, the data clusters about the line in the
# direction of (1, ..., 1)^T. 
# set.seed(974)
	ccvr <- 0
	scvr <- 0
	rcvr <- 0
	volc <- 1:nruns
	vols <- volc
	volr <- volc	#up <- 1 - alpha 
	up <- min((1 - alpha/2), (1 - alpha + 10*alpha*m/n))
	if(alpha > 0.1)
	  up <- min((1 - alpha + 0.05), (1 - alpha + m/n))
        qn <- up
        if(qn < 1 - alpha + 0.001)
          up <- 1 - alpha
np1 <- n + 1
q <- p - 1
A <- matrix(psi,nrow=m,ncol=m)
diag(A) <- 1
res <- matrix(nrow = n, ncol = m, 0)
fit <- res
B <- matrix(nrow = p, ncol = m, 1)
if(p == m){
  for(j in 1:(m-1)){
     B[p-j+1,j:m] <- 0
  }
}
if(p > m){
  for(j in 1:m){
     B[p-j+1,j:m] <- 0
  }
}
if(p < m){
  for(j in 1:(p-1)){
     B[p-j+1,j:m] <- 0
  }
}
if (mnull == T)
B[-1,] <- 0
Bhat <- B
one <- 1 + 0*1:np1
onen <- one[-1]
for(i in 1:nruns) {
   x <- matrix(rnorm(np1 * q), nrow = np1, ncol = q)
   w <- cbind(one,x)
   y <- w %*% B 
#make error matrix E
		E <- matrix(rnorm(np1 * m), nrow = np1, ncol = m)
		if(etype == 2)  {
			zu <- runif(np1)
			E[zu < eps,  ] <- E[zu < eps,  ] * 5
		}
		if(etype == 3) {
			zu <- sqrt(rchisq(np1, dd)/dd)
			E <- E/zu
		}
		if(etype == 4)
			E <- exp(E) - exp(0.5)
#want mean 0 error vectors
   E <- E %*% A
#  got error matrix E
   y <- y + E
   yf <- y[np1,]
#want xf to be a column vector with a 1
   xf <- rbind(1,as.matrix(x[np1,]))
   y <- y[-np1,]
   x <- x[-np1,]
   for(j in 1:m){
      out <- lsfit(x,y[,j])
      res[,j] <- out$res
      fit[,j] <- y[,j] + res[,j]
      Bhat[,j] <- out$coef	
      }
##Note that ith row of xx is t(hat(E(yf)) + hat(eps)_i) 
##for i = 1,...,n.
##Get prediction regions based on xx which has m columns, 
##and see if yf is in the prediction regions.
    zz <- t(t(Bhat)%*%xf) 
    xx <- res + onen %*% zz
                center <- apply(xx, 2, mean)
		cov <- var(xx)
		md2 <- mahalanobis(xx, center, cov)
		hsq <- quantile(md2, up)
		if(mahalanobis(t(yf), center, cov) <= hsq)
			ccvr <- ccvr + 1
		volc[i] <- sqrt(hsq)^m * prod(diag(chol(cov)))
		out <- covrmvn(xx)
		center <- out$center
		cov <- out$cov
		md2 <- mahalanobis(xx, center, cov)
		hsq <- quantile(md2, up)
		dsq <- mahalanobis(t(yf), center, cov)
		if(dsq <= hsq)
			scvr <- scvr + 1
		sqrtdet <- prod(diag(chol(cov)))
		vols[i] <- sqrt(hsq)^m * sqrtdet
		hsq <- qchisq(up, m)
		if(dsq <= hsq)
			rcvr <- rcvr + 1
		volr[i] <- sqrt(hsq)^m * sqrtdet
}
	ccvr <- ccvr/nruns
	scvr <- scvr/nruns
	rcvr <- rcvr/nruns	
#get a measure of efficiency wrt vols, so eff(vols) = 1
	vols <- mean(vols)
	volc <- mean(volc)/vols
	volr <- mean(volr)/vols
	vols <- 1
list(ncvr = ccvr, scvr = scvr, mcvr = rcvr, 
voln = volc, vols = vols, volm = volr, up = up)
}


mregddsim<-function(n = 100, m = 2, p = 4, nruns = 10, 
etype = 1,  eps = 0.25,  psi = 0.1, dd = 7, mnull = F, alph=0.1){
## Need p > 1, m > 1.
# Simulates DD plots of residuals for
# multivariate linear regression model. 
# The identity line and lines corresponding to the 
# 100(1-alph)% prediction regions are added.
# p = number of predictors including intercept
# m > 1 is the number of response variables
# want n > mp + 10, n > 10m and n > 10 p
# multiply E by A where etype = 1 for MVN Nm(0,I), 
# etype = 2 for   (1 - eps) Nm(0,I) + eps Nm(0, 25 I)
# eps = 0.1, 0.25, 0.4, and 0.6 are interesting
# etype = 3 for multivariate t_d with d =  dd degrees of freedom
# dd = 1, 2, 3, 5, 7 are interesting 
# etype = 4 for lognormal - E(lognormal).
# For MVN data multiplying E by A results
# in a covariance matrix with eigenvector c(1, ..., 1)^T
# corresponding to the largest eigenvalue. The diagonal elements
# of the covariance matrix are 1 + (m-1) psi^2, while the off
# diagonal elements are 2 psi + (m-2) psi^2. Hence the correlations
# are (2 psi + (m-2) psi^2)/(1 + (m-1) psi^2). As psi gets
# close to 1, the data clusters about the line in the
# direction of (1, ..., 1)^T. 
q <- p - 1
A <- matrix(psi,nrow=m,ncol=m)
diag(A) <- 1
res <- matrix(nrow = n, ncol = m, 0)
fit <- res
B <- matrix(nrow = p, ncol = m, 1)
if(p == m){
  for(j in 1:(m-1)){
     B[p-j+1,j:m] <- 0
  }
}
if(p > m){
  for(j in 1:m){
     B[p-j+1,j:m] <- 0
  }
}
if(p < m){
  for(j in 1:(p-1)){
     B[p-j+1,j:m] <- 0
  }
}
if (mnull == T)
B[-1,] <- 0
Bhat <- B
one <- 1 + 0*1:n
for(i in 1:nruns) {
   x <- matrix(rnorm(n * q), nrow = n, ncol = q)
   w <- cbind(one,x)
   y <- w %*% B 
#make error matrix E
		E <- matrix(rnorm(n * m), nrow = n, ncol = m)
		if(etype == 2)  {
			zu <- runif(n)
			E[zu < eps,  ] <- E[zu < eps,  ] * 5
		}
		if(etype == 3) {
			zu <- sqrt(rchisq(n, dd)/dd)
			E <- E/zu
		}
		if(etype == 4)
			E <- exp(E) - exp(0.5)
#want mean 0 error vectors
   E <- E %*% A
#  got error matrix E
   y <- y + E
   for(j in 1:m){
      out <- lsfit(x,y[,j])
      res[,j] <- out$res
      fit[,j] <- y[,j] + res[,j]
      Bhat[,j] <- out$coef	
      }
##make DD plot of the errors
ddplot4(res, alpha=alph)  
}
Covhat <- (n-1)/(n-p) * var(res)
list(Bhat = Bhat, B=B, Covhat = Covhat)
}


mregsim<-function(n = 100, m = 2, p = 4, nruns = 10, 
etype = 1,  eps = 0.25,  psi = 0.1, dd = 7, mnull = T){
## Need p > 1, m > 1.
# Simulates multivariate linear regression model and gets 
# pvalues for tests for whether the p-1 nontrivial
# predictors are needed in the model and
# pvalues for tests for whether Xi is needed in the model. 
# p = number of predictors including intercept
# m > 1 is the number of response variables
# want n > mp and n > 10 p
# multiply E by A where etype = 1 for MVN Nm(0,I), 
# etype = 2 for   (1 - eps) Nm(0,I) + eps Nm(0, 25 I)
# eps = 0.1, 0.25, 0.4, and 0.6 are interesting
# etype = 3 for multivariate t_d with d =  dd degrees of freedom
# dd = 1, 2, 3, 5, 7 are interesting 
# etype = 4 for lognormal - E(lognormal).
# For MVN data multiplying E by A results
# in a covariance matrix with eigenvector c(1, ..., 1)^T
# corresponding to the largest eigenvalue. The diagonal elements
# of the covariance matrix are 1 + (m-1) psi^2, while the off
# diagonal elements are 2 psi + (m-2) psi^2. Hence the correlations
# are (2 rho + (m-2) psi^2)/(1 + (m-1) psi^2). As psi gets
# close to 1, the data clusters about the line in the
# direction of (1, ..., 1)^T. 
# Illustrates that Hotelling Lawley statistic = last statistic/(n-p).
q <- p - 1
A <- matrix(psi,nrow=m,ncol=m)
diag(A) <- 1
res <- matrix(nrow = n, ncol = m, 0)
fit <- res
B <- matrix(nrow = p, ncol = m, 1)
if(p == m){
  for(j in 1:(m-1)){
     B[p-j+1,j:m] <- 0
  }
}
if(p > m){
  for(j in 1:m){
     B[p-j+1,j:m] <- 0
  }
}
if(p < m){
  for(j in 1:(p-1)){
     B[p-j+1,j:m] <- 0
  }
}
if (mnull == T)
B[-1,] <- 0
Bhat <- B
one <- 1 + 0*1:n
fcov <- 0*1:p
wilkcov <- fcov
pilcov <- fcov
hotlawcov <- fcov
roycov <- fcov
rden <- n - p - m + 1
rcut <- qf(0.95,m,rden)
dendf <- n - m*p
wcv <- 0
pcv <- 0
hlcv<-0
rcv<-0
fcv<-0
ndf <- (p-1)*m
mancv <- cbind(wcv,pcv,hlcv,rcv,fcv)
#L <- cbind(fcov[-1],diag(p-1))
#want n > mp
fcut <- qf(0.95, m, dendf)
mcut <- qf(0.95,m*(p-1),dendf)
h <- max(p-1,m)
mrcut <- qf(0.95,h,(n-h-1))
for(i in 1:nruns) {
   x <- matrix(rnorm(n * q), nrow = n, ncol = q)
   w <- cbind(one,x)
   y <- w %*% B 
#make error matrix E
		E <- matrix(rnorm(n * m), nrow = n, ncol = m)
		if(etype == 2)  {
			zu <- runif(n)
			E[zu < eps,  ] <- E[zu < eps,  ] * 5
		}
		if(etype == 3) {
			zu <- sqrt(rchisq(n, dd)/dd)
			E <- E/zu
		}
		if(etype == 4)
			E <- exp(E) - exp(0.5)
#want mean 0 error vectors
   E <- E %*% A
#  got error matrix E
   y <- y + E
   for(j in 1:m){
      out <- lsfit(x,y[,j])
      res[,j] <- out$res
      fit[,j] <- y[,j] - res[,j]
      Bhat[,j] <- out$coef	
      }
    Covhat <- (n-1)/(n-p) * var(res)
#   w is the data matrix with a column of ones
    J <- t(w)%*%w
    J <- solve(J)
    We <- Covhat*(n-p)
    Covinv <- solve(Covhat)
    Weinv <- Covinv/(n-p)
#get the test statistics for whether Xj is needed in the model
    for(j in 1:p){
      T <- t(Bhat[j,])%*%Covinv%*%Bhat[j,]/J[j,j]
      if(T/m > fcut)
      fcov[j] <- fcov[j] + 1
      H <- Bhat[j,]%*%t(Bhat[j,])/J[j,j]
      C <- Weinv%*%H
      eig <- eigen(C,symmetric=FALSE,only.values=TRUE)$values
      eig <- as.double(eig) # as.real(eig)
      lmax <- eig[1]
      u <- sum(eig)
      v <- sum(eig/(eig+1))
      wilk <- prod(1/(eig+1))
      if(-(n-p-1-0.5*m)*log(wilk)/m > fcut)
      wilkcov[j] <- wilkcov[j] + 1
      if((n-p)*v/m > fcut)
      pilcov[j] <- pilcov[j] + 1
      if((n-p)*u/m > fcut)
      hotlawcov[j] <- hotlawcov[j] + 1
      if(rden*lmax/m > rcut)
      roycov[j] <- roycov[j] + 1
    }
#get the MANOVA F test statistics for whether nontrivial
#predictors are needed in the model
D <- as.matrix(Bhat[-1,])
if(p == 2)
D <- t(D)
vecD <- as.matrix(D[,1])
for(j in 2:m){
  vecD <- rbind(vecD,as.matrix(D[,j]))}
tem <- as.matrix(J[-1,-1])
teminv <- solve(tem)
T <- t(vecD)%*%kronecker(Covinv,teminv)%*%vecD
      if(T/ndf > mcut)
      mancv[5] <- mancv[5] + 1
      H <- t(D)%*%teminv%*%D
      C <- Weinv%*%H
      eig <- eigen(C,symmetric=FALSE,only.values=TRUE)$values
      eig <- as.double(eig)
      lmax <- eig[1]
      u <- sum(eig)
      v <- sum(eig/(eig+1))
      wilk <- prod(1/(eig+1))
      if(-(n-0.5*p-0.5*m-2)*log(wilk)/ndf > mcut)
      mancv[1] <- mancv[1] + 1
      if((n-p)*v/ndf > mcut)
      mancv[2] <- mancv[2] + 1
      if((n-p)*u/ndf > mcut)
      mancv[3] <- mancv[3] + 1
      if((n-h-1)*lmax/h > mrcut)
      mancv[4] <- mancv[4] + 1
}
wilkcov <- wilkcov/nruns
pilcov <- pilcov/nruns
hotlawcov <- hotlawcov/nruns
roycov <- roycov/nruns
fcov <- fcov/nruns
mancv <- mancv/nruns
list(Bhat = Bhat, B=B, Covhat = Covhat, wilkcov=wilkcov,
pilcov=pilcov, hotlawcov=hotlawcov, roycov=roycov, fcov=fcov,
mancv=mancv)
}


nltv<-function(gam = 0.5)
{# Gets asy var for lts(h) and lta(h) at standard normal
# where h/n -> gam.
	k <- qnorm(0.5 + gam/2)
	den <- gam - 2 * k * dnorm(k)
	ltsv <- 1/den
	tem <- (1 - exp( - (k^2)/2))^2
	ltav <- (2 * pi * gam)/(4 * tem)
	list(ltsv=ltsv, ltav=ltav)
}


pcaboot<-function(x,  corr=T, rob=F, B = 1000){

#Bootstraps PCA. Likely only accurate for positive eigenvalues.
#If rob = T, bootstrap the robust PCA.
#Nominal 95% CIs
x <- as.matrix(x)
n <- dim(x)[1]
p <- dim(x)[2]
lsciL <- 1:p
lsciU <- 1:p
indx <- 1:n
lams <- matrix(0,nrow=B,ncol=p)
if(rob==F){
  for(i in 1:B){
  tem <- sample(indx,n,replace=T)
  lams[i,] <- prcomp(x[tem,], scale = corr)$sd^2
  }
}
else{
  for(i in 1:B){
  tem <- sample(indx,n,replace=T)
  lams[i,] <- rprcomp(x[tem,], corr = corr)$out$sd^2
  }
}
shorci <- apply(lams,2,shorth3)
covhat <- var(lams)
for(i in 1:p){
lsciL[i] <- mean(lams[,i]) - 2*sqrt(covhat[i,i])
lsciU[i] <- mean(lams[,i]) + 2*sqrt(covhat[i,i])
}
lscis <- cbind(lsciL,lsciU)
list(lams=lams,covhat=covhat,shorci=shorci,lscis=lscis)
}



pcabootsim<-function(n = 100, p = 4, nruns = 1, xtype = 1,  dd = 1, eps = 0.25, corr=F,rob=F,B=1000){
# Generates a simulated data set for the PCA bootstrap CIs.
# Need n > 2q. 
# MAY NOT WORK IF p = 1
# If corr = F, then eigenvalues = c(1,2,...,p).
# If corr = T, then eigenvalues = c(1,1,...,1).
# Multiply x by A where xtype = 1 for MVN Np(0,I), 
# 2, 3, 4 and 5 for (1 - eps) Np(0,I) + eps Np(0, 25 I)
# 6, 7, 8 and 9  for multivariate t_d with d = 3, 5, 19 or dd
# 10 for lognormal.
# Can't get accurate counts if xtype = 9.
	set.seed(974)
	A <- sqrt(diag(1:p))
        lscict <- 0*(1:p) 
        shcict <- lscict
        sigsq <- 1
	for(i in 1:nruns) {
#make data
		x <- matrix(rnorm(n * p), nrow = n, ncol = p)
		if(xtype == 2) {
			zu <- runif(n)
			x[zu < 0.4,  ] <- x[zu < 0.4,  ] * 5
                        sigsq <- 10.6
		}
		if(xtype == 3) {
			zu <- runif(n)
			x[zu < 0.6,  ] <- x[zu < 0.6,  ] * 5
                        sigsq <- 15.4
		}
		if(xtype == 4) {
			zu <- runif(n)
			x[zu < 0.1,  ] <- x[zu < 0.1,  ] * 5
                        sigsq <- 3.4
		}
		if(xtype == 5) {
			zu <- runif(n)
			x[zu < eps,  ] <- x[zu < eps,  ] * 5
                        sigsq <- 1 +eps*24
		}
		if(xtype == 6) {
			zu <- sqrt(rchisq(n, 3)/3)
			x <- x/zu
                        sigsq <- 3
		}
		if(xtype == 7) {
			zu <- sqrt(rchisq(n, 5)/5)
			x <- x/zu
                        sigsq <- 5/3
		}
		if(xtype == 8) {
			zu <- sqrt(rchisq(n, 19)/19)
			x <- x/zu
                        sigsq <- 19/17
		}
		if(xtype == 9) {
			zu <- sqrt(rchisq(n, dd)/dd)
			x <- x/zu
                        #sigsq <- dd/(dd-2)
		}
		if(xtype == 10){
			x <- exp(x)
                       sigsq <- exp(1)*(exp(1)-1)
                }
		x <- x %*% A
                out <- pcaboot(x,corr=corr,rob=rob,B=B)
                lam <- sigsq*(1:p)
                for(j in 1:p){
   if(out$lscis[j,1] <= lam[p+1-j] & lam[p+1-j] <= out$lscis[j,2])
                    lscict[j] <- lscict[j] + 1
if(out$shorci[[j]]$shorth[1] <= lam[p+1-j] & lam[p+1-j] <= 
out$shorci[[j]]$shorth[2])
                    shcict[j] <- shcict[j]+1
                }
   }
                covh <- cov(x)
                lscict <- lscict/nruns
                shcict <- shcict/nruns
	list(out=out,covh=covh,lscicv=lscict,shcicv=shcict)
}


pcasim<-function(n = 100, q = 4, nruns = 100, xtype = 1, 
corr = T, dd = 1, eps = 0.25, rot = T, rho = 0.99)
{# Need n > 2q, q > 2. R users need to type library(MASS). 
# Need q > 2 (want at least 3 principal components).
# If rot = F, multiply x by A where xtype = 1 for MVN Nq(0,I), 
# 2, 3, 4 and 5 (with delta = eps) for (1 - delta) Nq(0,I) + delta Nq(0, 25 I)
# 6, 7, 8 and 9  for multivariate t_d with d = 3, 5, 19 or dd
# 10 for lognormal.
# If rot = T, multiply x by RR: for MVN data this results
# in a covariance matrix with eigenvector c(1, ..., 1)^T
# corresponding to the largest eigenvalue. As rho gets
# close to 1, the data clusters about the line in the
# direction of (1, ..., 1)^T. See Maronna and Zamar (2002).
# Note: prcomp(x)$sd gives the vector of the square roots of
# the eigenvalues of the dispersion or correlation matrix.
# Note: prcomp$rot gives the matrix of eigenvectors.
# Robust and classical eigenvectors sometimes have
# very low absolute correlation.
        #set.seed(974)
        RR <- matrix(rho,nrow=q,ncol=q)
        diag(RR) <- 1
	corrs <- 1:nruns
	eig1 <- corrs
        eig2 <- eig1
        eig3 <- eig1
        pc1 <- eig1
        pc2 <- pc1
        pc3 <- pc1
        ve1 <- eig1
        ve2 <- ve1
        ve3 <- ve1
        rve1 <- ve1
        rve2 <-  ve1
        rve3 <- ve1
        vexpl <- 1:3
        rvexpl <- 1:3
	A <- sqrt(diag(1:q))
	for(i in 1:nruns) {
#make data
		x <- matrix(rnorm(n * q), nrow = n, ncol = q)
		if(xtype == 2) {
			zu <- runif(n)
			x[zu < 0.4,  ] <- x[zu < 0.4,  ] * 5
		}
		if(xtype == 3) {
			zu <- runif(n)
			x[zu < 0.6,  ] <- x[zu < 0.6,  ] * 5
		}
		if(xtype == 4) {
			zu <- runif(n)
			x[zu < 0.1,  ] <- x[zu < 0.1,  ] * 5
		}
		if(xtype == 5) {
			zu <- runif(n)
			x[zu < eps,  ] <- x[zu < eps,  ] * 5
		}
		if(xtype == 6) {
			zu <- sqrt(rchisq(n, 3)/3)
			x <- x/zu
		}
		if(xtype == 7) {
			zu <- sqrt(rchisq(n, 5)/5)
			x <- x/zu
		}
		if(xtype == 8) {
			zu <- sqrt(rchisq(n, 19)/19)
			x <- x/zu
		}
		if(xtype == 9) {
			zu <- sqrt(rchisq(n, dd)/dd)
			x <- x/zu
		}
		if(xtype == 10)
			x <- exp(x)
		if(rot == F) x <- x %*% A
                else x <- x %*% RR
                z <- prcomp(x,scale=corr)
                zz <- rprcomp(x,corr=corr)
                tem <- z$sd^2
                temr <- zz$out$sd^2
                corrs[i] <- cor(tem,temr)
                den <- sum(tem)
                ve1[i] <- tem[1]/den
                ve2[i] <- tem[2]/den
                ve3[i] <- tem[3]/den
                den <- sum(temr)
                rve1[i] <- temr[1]/den
                rve2[i] <- temr[2]/den
                rve3[i] <- temr[3]/den  
                eig1[i] <- abs(cor(z$rot[,1],zz$out$rot[,1])) 
                eig2[i] <- abs(cor(z$rot[,2],zz$out$rot[,2])) 
                eig3[i] <- abs(cor(z$rot[,3],zz$out$rot[,3]))  
            if(corr==F){
                pc1[i] <- abs(cor(x%*%z$rot[,1],x%*%zz$out$rot[,1])) 
                pc2[i] <- abs(cor(x%*%z$rot[,2],x%*%zz$out$rot[,2])) 
                pc3[i] <- abs(cor(x%*%z$rot[,3],x%*%zz$out$rot[,3])) }
            else { 
                tem <- 1/sqrt(diag(cov(x)))
                dinv <- diag(tem)
                w <- x%*%dinv
                pc1[i] <- abs(cor(w%*%z$rot[,1],w%*%zz$out$rot[,1])) 
                pc2[i] <- abs(cor(w%*%z$rot[,2],w%*%zz$out$rot[,2])) 
                pc3[i] <- abs(cor(w%*%z$rot[,3],w%*%zz$out$rot[,3])) }        
	}
	mncor <- mean(corrs)
        vexpl[1] <- mean(ve1)
        vexpl[2] <- mean(ve2)
        vexpl[3] <- mean(ve3)
        rvexpl[1] <- mean(rve1)
        rvexpl[2] <- mean(rve2)
        rvexpl[3] <- mean(rve3)
        m1 <- mean(eig1)
        m2 <- mean(eig2)
        m3 <- mean(eig3)
        v1 <- sqrt(var(ve1))
        v2 <- sqrt(var(ve2))
        v3 <- sqrt(var(ve3))
        sdc <- c(v1,v2,v3)
        rv1 <- sqrt(var(rve1))
        rv2 <- sqrt(var(rve2))
        rv3 <- sqrt(var(rve3))
        sdr <- c(rv1,rv2,rv3)
        p1 <- mean(pc1)
        p2 <- mean(pc2)
        p3 <- mean(pc3)
        abscorpc <- c(p1,p2,p3)
	list(mncor = mncor, vexpl=vexpl, sdc=sdc, rvexpl=rvexpl, sdr=sdr, 
             abscoreigv1=m1, abscoreigv2=m2, abscoreigv3=m3, abscorpc=abscorpc)
}


pcisim2<-function(nruns=100,n1=10,n2=10,mu1=0,mu2=0,var1=1,var2=1,dist=1,BB=100,
alph=0.05){
#gets the pooled, modified pooled, and Welch 100 (1-alpha)% CIs for mu1 - mu2 
#defaults are alpha = .05, lengths make the most sense if n1 = n2
#Also gets the bootstrap pooled CI and  
#three bootstrap CIs: shorth, prediction region method, Bickel and Ren.
#dist = 1 for x ~ N(mu1, var1), y ~ N(mu2, var2)
#dist = 2 for x ~ EXP with mean mu1 variance var1, 
#             y ~ EXP with mean mu2 variance var2 
#dist = 3 for x ~ N(mu1, var1), y EXP with mean mu2 variance var2 
#calls shorth3 and bootci
        bup <- min((1 - alph/2), (1 - alph + 10*alph/BB))
        if(alph > 0.1)
	  bup <- min((1 - alph + 0.05), (1 - alph + 1/BB))
        qB <- bup
        if(qB < 1 - alph + 0.001)
         bup <- 1 - alph
        indx1 <- 1:n1
        indx2 <- 1:n2
	diff <- mu1 - mu2
	up <- 1 - alph/2
	pdf <- n1 + n2 - 2
	rhohat <- n1/(n1 + n2)
	pcov <- 0
	mpcov <- 0
	wcov <- 0
        shcov <- 0
        shlen <- 0
        prcov <- 0
        prlen <- 0   
        brcov <- 0
        brlen <- 0   
        bpcov <- 0
        bplen <- 0  
	plow <- 1:nruns
	pup <- plow
	mplow <- plow
	mpup <- plow
	wlow <- plow
	wup <- plow
	tpcut <- qt(up, pdf)
	mtpcut <- qt(up, n1 + n2 - 4)
        bdiff <- 1:BB
	for(i in 1:nruns) {
		if(dist == 1) {
			x <- mu1 + sqrt(var1) * rnorm(n1)
			y <- mu2 + sqrt(var2) * rnorm(n2)
		}
		if(dist == 2) {
			x <- mu1 - sqrt(var1) + sqrt(var1) * rexp(n1)
			y <- mu2 - sqrt(var2) + sqrt(var2) * rexp(n2)
		}
		if(dist == 3) {
			x <- mu1 + sqrt(var1) * rnorm(n1)
			y <- mu2 - sqrt(var2) + sqrt(var2) * rexp(n2)
		}
		xbar <- mean(x)
		s1sq <- var(x)
		ybar <- mean(y)
		s2sq <- var(y)
		xmybar <- xbar - ybar
		sp <- (n1 - 1) * s1sq + (n2 - 1) * s2sq
		sp <- sqrt(sp/pdf)
		val <- sp * sqrt(1/n1 + 1/n2)	#get pooled CI
                den <- val
		temp <- tpcut * val
		plow[i] <- xmybar - temp
		pup[i] <- xmybar + temp
		if(plow[i] < diff && pup[i] > diff) pcov <- pcov + 1	
        #get modified pooled CI
		thetahat <- s2sq/s1sq
	tauhatsq <- (1 - rhohat + rhohat * thetahat)/(rhohat + (1 - rhohat) * 
			thetahat)
		tauhat <- sqrt(tauhatsq)
		tem <- mtpcut * val * tauhat
		mplow[i] <- xmybar - tem
		mpup[i] <- xmybar + tem
        if(mplow[i] < diff && mpup[i] > diff) mpcov <- mpcov + 1	
        #get Welch CI
		t1 <- s1sq/n1
		t2 <- s2sq/n2
		do <- (t1 + t2)^2/(t1^2/(n1 - 1) + t2^2/(n2 - 1))
		d <- max(1, floor(do))
		wtcut <- qt(up, d)
		val <- sqrt(s1sq/n1 + s2sq/n2)
		tem <- wtcut * val
		wlow[i] <- xmybar - tem
		wup[i] <- xmybar + tem
		if(wlow[i] < diff && wup[i] > diff)
			wcov <- wcov + 1
                for(j in 1:BB){
                   tem <- sample(indx1,n1,replace=T)
                   mnx <- mean(x[tem])
                   tem <- sample(indx2,n2,replace=T)
                   mny <- mean(y[tem])
                   bdiff[j] <- mnx - mny                   
                } 
                #get shorth CI
                tem <- shorth3(bdiff,alpha=alph)
                   if(diff >= tem$shorth[1] && diff <= tem$shorth[2])
                    shcov <- shcov + 1
                    shlen <- shlen + tem$shorth[2] - tem$shorth[1]
                #get prediction region CI
                tem<-bootci(bstat=bdiff,stat=xmybar,upc=bup)
                  if(diff >= tem$prci[1] && diff <= tem$prci[2])
                     prcov <- prcov + 1
                     prlen <- prlen + tem$prcilen
                    #get Bickel and Ren CI
                   if(diff >= tem$brci[1] && diff <= tem$brci[2])
                     brcov <- brcov + 1
                     brlen <- brlen + tem$brcilen
                #get bootstrap pooled CI
                  dsq <- ((bdiff - xmybar)/den)^2
                  Dhat <- sqrt(quantile(dsq,bup))
                  bplow <- xmybar - Dhat*den
		  bpup <- xmybar + Dhat*den
		if(bplow < diff && bpup > diff) bpcov <- bpcov + 1
                  bplen <- bplen + bpup - bplow
	}
	pcov <- pcov/nruns
	plen <- sqrt(n1) * mean(pup - plow)
	mpcov <- mpcov/nruns
	mplen <- sqrt(n1) * mean(mpup - mplow)
	wcov <- wcov/nruns
	wlen <- sqrt(n1) * mean(wup - wlow)
        shcov <- shcov/nruns	
        shlen <- sqrt(n1)*shlen/nruns
        prcov <- prcov/nruns	
        prlen <- sqrt(n1)*prlen/nruns
        brcov <- brcov/nruns	
        brlen <- sqrt(n1)*brlen/nruns
        bpcov <- bpcov/nruns
	bplen <- sqrt(n1)*bplen/nruns
#print(mean(x))
#print(var(x))
#print(mean(y))
#print(var(y))
list(pcov=pcov,plen=plen,bpcov=bpcov,bplen=bplen,mpcov=mpcov,mplen=mplen,
wcov=wcov,wlen=wlen,shcov=shcov,shlen=shlen,prcov=prcov,prlen=prlen,
brcov=brcov,brlen=brlen)
}



pcrpisim<-function(n = 100, p = 4, k = 1, nruns = 100,  
eps = 0.1, shift = 9, psi=0.0, type = 1, alpha = 0.05){
#Needs library(pls).
#Simulates PIs for principle components regression with 10 fold CV. 
# 1 <= k <= p-1, k is the number of nonnoise variables
#Uses five iid error distributions: 
# type = 1 for N(0,1) errors, 2 for t3 errors, 3 for exp(1) - 1 errors
# 4 for uniform(-1,1) errors, 5 for (1-eps) N(0,1) + eps N(0,(1+shift)^2) 
#errors.
# constant = 1 so there are p = q+1 coefficients
#need p > 1, beta = (1, 1, ..., 1, 0, ..., 0) with k+1 ones, p-k-1  zeroes
# Multiply x by A: for MVN data this results
# in a covariance matrix with eigenvector c(1, ..., 1)^T
# corresponding to the largest eigenvalue. As psi gets
# close to 1, the data clusters about the line in the
# direction of (1, ..., 1)^T. See Maronna and Zamar (2002).
# cor(X_i,X_j) = [2 psi +(q-2)psi^2]/[1 + (q-1)psi^2], i not = j
# when the correlation exists.
set.seed(974)
corfac <- (1 + 15/n) * sqrt( (n+2*p)/(n - p) )
  if (alpha > 0.1)
    {qn <- min(1 - alpha + 0.05, 1 - alpha + p/n)}
  if (alpha <= 0.1)
    {qn <- min(1 - alpha/2, 1 - alpha + 10*alpha*p/n)}
  pn <- qn
  if(pn < 1 - alpha + 0.001)
    qn <- 1 - alpha
alphan <- 1 - qn
ncvec<-1:nruns
pilen <- 1:nruns
opicov <- 0
q <- p-1
rho <- (2*psi + (q-2)*psi^2)/(1 + (q-1)*psi^2)
A <- matrix(psi,nrow=q,ncol=q)
diag(A) <- 1
b <- 0 * 1:q
b[1:k] <- 1 #b[1:0] acts like b[1:1] = b[1]
vars <- as.vector(1:(p-1))
	for(i in 1:nruns) {
		x <- matrix(rnorm(n * q), nrow = n, ncol = q)
                x <- x %*% A
                xf <- rnorm(q) %*% A
		if(type == 1) {
			y <- 1 + x %*% b + rnorm(n)
                        yf <- 1 + xf %*% b + rnorm(1)
					}
		if(type == 2) {
			y <- 1 + x %*% b + rt(n, df = 3)
                        yf <- 1 + xf %*% b + rt(1, df = 3)
					}
		if(type == 3) {
			y <- 1 + x %*% b + rexp(n) - 1
                        yf <- 1 + xf %*% b + rexp(1) - 1
					}
		if(type == 4) {
			y <- 1 + x %*% b + runif(n, min = -1, max = 1)
                        yf <- 1 + xf %*% b + runif(1, min = -1, max = 1)
					}
		if(type == 5) {
			err <- rnorm(n, sd = 1 + rbinom(n, 1, eps) * shift)
			y <- 1 + x %*% b + err
                        ef <- rnorm(1, sd = 1 + rbinom(1, 1, eps) * shift)
                        yf <- 1 + xf %*% b + ef
					} #make an MLR data set
		#find the PCR 10 fold CV estimator
                z <- as.data.frame(cbind(y,x))
                zz <- rbind(z,c(0,xf))
                out<-pcr(V1~.,data=z,scale=T,validation="CV")
                #If y is used instead of V1, predict does not work,
                #and nc tends to equal p, which should be impossible.
                tem<-MSEP(out)
                cvmse<-tem$val[,,1:out$ncomp][1,]
                nc <-which.min(cvmse)
                #using predict is rather difficult
                #if xf is used, predict does not work
                yfhat<-predict(out,zz[(n+1),-1],ncomp=nc)[1,1,1]
                ncvec[i] <- nc
                #get the number of components to use
                #if nc=p-1=q, same as OLS
                res <- out$residuals[,,nc]
                #get asymptotically optimal PI
		sres <- sort(res)
		cc <- ceiling(n * (1 - alphan))
		rup <- sres[cc]
		rlow <- sres[1]
		olen <- rup - rlow
		if(cc < n) {
			for(j in (cc + 1):n) {
				zlen <- sres[j] - sres[j - cc + 1]
				if(zlen < olen) {
				  olen <- zlen
				  rup <- sres[j]
				  rlow <- sres[j - cc + 1]
				}
			}
		}
		up <- yfhat + corfac*rup
		low <- yfhat + corfac*rlow
		pilen[i] <- up - low
		if(low < yf && up > yf)
			opicov <- opicov + 1
	}
pimnlen <- mean(pilen)
opicov <- opicov/nruns
qminncmn <- q - mean(ncvec)
#If qminncmn is 0 then PCR is equivalent to OLS.
list(qminncmn, opicov=opicov, pimenlen = pimnlen)}


pifclean<-function(k, gam)
{
	p <- floor(log(3/k)/log(1 - gam))
	list(p = p)
}


plspisim<-function(n = 100, p = 4, k = 1, nruns = 100,  
eps = 0.1, shift = 9, psi=0.0, type = 1, alpha = 0.05){
#Needs library(pls).
#Simulates PIs for partial least squares  with 10 fold CV. 
# 1 <= k <= p-1, k is the number of nonnoise variables
#Uses five iid error distributions: 
# type = 1 for N(0,1) errors, 2 for t3 errors, 3 for exp(1) - 1 errors
# 4 for uniform(-1,1) errors, 5 for (1-eps) N(0,1) + eps N(0,(1+shift)^2) 
#errors.
# constant = 1 so there are p = q+1 coefficients
#need p > 1, beta = (1, 1, ..., 1, 0, ..., 0) with k+1 ones, p-k-1  zeroes
# Multiply x by A: for MVN data this results
# in a covariance matrix with eigenvector c(1, ..., 1)^T
# corresponding to the largest eigenvalue. As psi gets
# close to 1, the data clusters about the line in the
# direction of (1, ..., 1)^T. See Maronna and Zamar (2002).
# cor(X_i,X_j) = [2 psi +(q-2)psi^2]/[1 + (q-1)psi^2], i not = j
# when the correlation exists.
set.seed(974)
corfac <- (1 + 15/n) * sqrt( (n+2*p)/(n - p) )
  if (alpha > 0.1)
    {qn <- min(1 - alpha + 0.05, 1 - alpha + p/n)}
  if (alpha <= 0.1)
    {qn <- min(1 - alpha/2, 1 - alpha + 10*alpha*p/n)}
  pn <- qn
  if(pn < 1 - alpha + 0.001)
    qn <- 1 - alpha
alphan <- 1 - qn
ncvec<-1:nruns
pilen <- 1:nruns
opicov <- 0
q <- p-1
rho <- (2*psi + (q-2)*psi^2)/(1 + (q-1)*psi^2)
A <- matrix(psi,nrow=q,ncol=q)
diag(A) <- 1
b <- 0 * 1:q
b[1:k] <- 1 #b[1:0] acts like b[1:1] = b[1]
vars <- as.vector(1:(p-1))
	for(i in 1:nruns) {
		x <- matrix(rnorm(n * q), nrow = n, ncol = q)
                x <- x %*% A
                xf <- rnorm(q) %*% A
		if(type == 1) {
			y <- 1 + x %*% b + rnorm(n)
                        yf <- 1 + xf %*% b + rnorm(1)
					}
		if(type == 2) {
			y <- 1 + x %*% b + rt(n, df = 3)
                        yf <- 1 + xf %*% b + rt(1, df = 3)
					}
		if(type == 3) {
			y <- 1 + x %*% b + rexp(n) - 1
                        yf <- 1 + xf %*% b + rexp(1) - 1
					}
		if(type == 4) {
			y <- 1 + x %*% b + runif(n, min = -1, max = 1)
                        yf <- 1 + xf %*% b + runif(1, min = -1, max = 1)
					}
		if(type == 5) {
			err <- rnorm(n, sd = 1 + rbinom(n, 1, eps) * shift)
			y <- 1 + x %*% b + err
                        ef <- rnorm(1, sd = 1 + rbinom(1, 1, eps) * shift)
                        yf <- 1 + xf %*% b + ef
					} #make an MLR data set
		#find the PLS 10 fold CV estimator
                z <- as.data.frame(cbind(y,x))
                zz <- rbind(z,c(0,xf))
                out<-plsr(V1~.,data=z,scale=T,validation="CV")
                #If y is used instead of V1, predict does not work,
                #and nc tends to equal p, which should be impossible.
                tem<-MSEP(out)
                cvmse<-tem$val[,,1:out$ncomp][1,]
                nc <-which.min(cvmse)
                #using predict is rather difficult
                #if xf is used, predict does not work
                yfhat<-predict(out,zz[(n+1),-1],ncomp=nc)[1,1,1]
                ncvec[i] <- nc
                #get the number of components to use
                #if nc=p-1=q, same as OLS
                res <- out$residuals[,,nc]
                #get asymptotically optimal PI
		sres <- sort(res)
		cc <- ceiling(n * (1 - alphan))
		rup <- sres[cc]
		rlow <- sres[1]
		olen <- rup - rlow
		if(cc < n) {
			for(j in (cc + 1):n) {
				zlen <- sres[j] - sres[j - cc + 1]
				if(zlen < olen) {
				  olen <- zlen
				  rup <- sres[j]
				  rlow <- sres[j - cc + 1]
				}
			}
		}
		up <- yfhat + corfac*rup
		low <- yfhat + corfac*rlow
		pilen[i] <- up - low
		if(low < yf && up > yf)
			opicov <- opicov + 1
	}
pimnlen <- mean(pilen)
opicov <- opicov/nruns
qminncmn <- q - mean(ncvec)
#If qminncmn is 0 then PLS is equivalent to OLS.
list(qminncmn, opicov=opicov, pimenlen = pimnlen)}


predreg<-function(x, alpha = 0.05){
# Makes a prediction region for the rows of x.
# If p = 1, the shorth interval should work better.
#Also computes the distance for the 0 vector
#to check if the 0 vector is in the prediction region.
x <- as.matrix(x)
p <- dim(x)[2]
n <- dim(x)[1]
inr<-0
zero <- 0*(1:p)
up <- min((1 - alpha/2), (1 - alpha + 10*alpha*p/n))
if(alpha > 0.1)
	up <- min((1 - alpha + 0.05), (1 - alpha + p/n))
qn <- up
if(qn < 1 - alpha + 0.001)
        up <- 1 - alpha
center <- apply(x, 2, mean)
cov <- var(x)
md2 <- mahalanobis(x, center, cov)
# MD is the classical  distance
MD <- sqrt(md2)
#get nonparametric prediction region boundary
cuplim <- sqrt(quantile(md2, up))
D0 <- sqrt(mahalanobis(zero, center, cov))
if(D0 <= cuplim)
       inr <- 1
list(MD=MD, center=center,cov=cov,cuplim = cuplim, D0=D0, inr=inr)
}


predrgn <-function(x, xf, alpha = 0.05){
# Makes a prediction region for xf when cases are rows of x.
# If p = 1, the shorth interval should work better.
x <- as.matrix(x)
p <- dim(x)[2]
n <- dim(x)[1]
inr <- 0
up <- min((1 - alpha/2), (1 - alpha + 10*alpha*p/n))
if(alpha > 0.1)
up <- min((1 - alpha + 0.05), (1 - alpha + p/n))
qn <- up
if(qn < 1 - alpha + 0.001)
        up <- 1 - alpha
center <- apply(x, 2, mean)
cov <- var(x)
md2 <- mahalanobis(x, center, cov)
# md2 is the classical squared Mahalanobis distance
#get nonparametric prediction region boundary
cuplim <- quantile(md2, up)
#inr <- 1 if xf is in the prediction region
xfd <- mahalanobis(xf, center, cov)
if(xfd <= cuplim)
       inr <- 1
list(md2=md2, center=center,cov=cov, cuplim = cuplim, xfd, inr=inr)
}


predsim<-function(n = 100, p = 4, nruns = 100, xtype = 1, dd = 1, eps = 0.25, alpha = 0.1)
{# MAY NOT WORK IF p = 1. 
# Gets coverages of nonparametric, semiparametric and 
# parametric MVN prediction regions.
# Multiply x by A where xtype = 1 for MVN Nq(0,I), 
# 2, 3, 4 and 10 (with delta = eps) for delta Np(0,I) + (1-delta) Np(0, 25 I)
# 5 for lognormal, 
# 6, 7, 8 and 9  for multivariate t_d with d = 3, 5, 19 or dd 
# mahalanobis gives squared Maha distances 
# set.seed(974)
	ccvr <- 0
	scvr <- 0
	rcvr <- 0
	volc <- 1:nruns
	vols <- volc
	volr <- volc	#up <- 1 - alpha 
	up <- min((1 - alpha/2), (1 - alpha + 10*alpha*p/n))
	if(alpha > 0.1)
	  up <- min((1 - alpha + 0.05), (1 - alpha + p/n))
        qn <- up
        if(qn < 1 - alpha + 0.001)
          up <- 1 - alpha
	A <- sqrt(diag(1:p))
	m <- n + 1
	for(i in 1:nruns) {
#make data
		x <- matrix(rnorm(m * p), nrow = m, ncol = p)
		if(xtype == 2) {
			zu <- runif(m)
			x[zu < 0.4,  ] <- x[zu < 0.4,  ] * 5
		}
		if(xtype == 3) {
			zu <- runif(m)
			x[zu < 0.6,  ] <- x[zu < 0.6,  ] * 5
		}
		if(xtype == 4) {
			zu <- runif(m)
			x[zu < 0.1,  ] <- x[zu < 0.1,  ] * 5
		}
		if(xtype == 5)
			x <- exp(x)
		if(xtype == 6) {
			zu <- sqrt(rchisq(m, 3)/3)
			x <- x/zu
		}
		if(xtype == 7) {
			zu <- sqrt(rchisq(m, 5)/5)
			x <- x/zu
		}
		if(xtype == 8) {
			zu <- sqrt(rchisq(m, 19)/19)
			x <- x/zu
		}
		if(xtype == 9) {
			zu <- sqrt(rchisq(m, dd)/dd)
			x <- x/zu
		}
		if(xtype == 10) {
			zu <- runif(m)
			x[zu < eps,  ] <- x[zu < eps,  ] * 5
		}
		x <- x %*% A
		xx <- x[ - m,  ]
		center <- apply(xx, 2, mean)
		cov <- var(xx)
		md2 <- mahalanobis(xx, center, cov)
		hsq <- quantile(md2, up)
		if(mahalanobis(t(x[m,  ]), center, cov) <= hsq)
			ccvr <- ccvr + 1
		volc[i] <- sqrt(hsq)^p * prod(diag(chol(cov)))
		out <- covrmvn(xx)
		center <- out$center
		cov <- out$cov
		md2 <- mahalanobis(xx, center, cov)
		hsq <- quantile(md2, up)
		dsq <- mahalanobis(t(x[m,  ]), center, cov)
		if(dsq <= hsq)
			scvr <- scvr + 1
		sqrtdet <- prod(diag(chol(cov)))
		vols[i] <- sqrt(hsq)^p * sqrtdet
		hsq <- qchisq(up, p)
		if(dsq <= hsq)
			rcvr <- rcvr + 1
		volr[i] <- sqrt(hsq)^p * sqrtdet
	}
	ccvr <- ccvr/nruns
	scvr <- scvr/nruns
	rcvr <- rcvr/nruns	#get a measure of efficiency wrt vols, so eff(vols) = 1
	vols <- mean(vols)
	volc <- mean(volc)/vols
	volr <- mean(volr)/vols
	vols <- 1
	list(ncvr = ccvr, scvr = scvr, mcvr = rcvr, voln = volc, vols = vols, volm = volr, up = up)
}


rcancor<-function(x, y, csteps = 5, locc = 0.5, cent = T)
{# This does robust canonical correlations analysis.
        x <- as.matrix(x)
        y <- as.matrix(y)
        px <- dim(x)[2]
        w <- cbind(x,y)
        p <- dim(w)[2]
        n <- dim(w)[1]
        up <- qchisq(0.975, p)
        qchi <- qchisq(0.5, p)  ##get the DGK estimator
        covs <- var(w)
        mns <- apply(w, 2, mean)        ## concentrate
        for(i in 1:csteps) {
                md2 <- mahalanobis(w, mns, covs)
                medd2 <- median(md2)
                mns <- apply(w[md2 <= medd2,  ], 2, mean)
                covs <- var(w[md2 <= medd2,  ])
        }
        covd <- covs
        mnd <- mns      ##get the MB estimator
        covv <- diag(p)
        med <- apply(w, 2, median)
        md2 <- mahalanobis(w, center = med, covv)
        medd2 <- median(md2)##get the location criterion cutoff
        lcut <- medd2 
        if(locc != 0.5) lcut <- quantile(md2,locc) ## get the start
        mns <- apply(w[md2 <= medd2,  ], 2, mean)
        covs <- var(w[md2 <= medd2,  ]) ## concentrate
        for(i in 1:csteps) {
                md2 <- mahalanobis(w, mns, covs)
                medd2 <- median(md2)
                mns <- apply(w[md2 <= medd2,  ], 2, mean)
                covs <- var(w[md2 <= medd2,  ])
        }
        covm <- covs
        mnm <- mns      ##get FCH attractor
        covf <- covm
        mnf <- mnm
        val2 <- mahalanobis(t(mnd), med, covv)
        if(val2 < lcut) {
##crit = square root of det(cov)
                critd <- prod(diag(chol(covd)))
                critm <- prod(diag(chol(covm)))
                if(critd < critm) {
                        covf <- covd
                        mnf <- mnd
                }
        }
## get the FCH estimator
        rd2 <- mahalanobis(w, mnf, covf)
        const <- median(rd2)/qchi
        covf <- const * covf    
##reweight the above FCH estimator (mnf,covf) 
        rd2 <- mahalanobis(w, mnf, covf)
        rmnmvn <- apply(w[rd2 <= up,  ], 2, mean)
        rcovmvn <- var(w[rd2 <= up,  ])
        d1 <- sum(rd2 <= up)
        rd2 <- mahalanobis(w, rmnmvn, rcovmvn)
        qchi2 <- (0.5 * 0.975 * n)/d1
        qchi2 <- min(qchi2, 0.995)
        const <- median(rd2)/qchisq(qchi2, p)
        rcovmvn <- const * rcovmvn      
##now get the subset of data z used to construct the RMVN estimator
        rd2 <- mahalanobis(w, rmnmvn, rcovmvn)
        z <- w[rd2 <= up,  ]
        zx <- z[,1:px]
        zy <- z[,(px+1):p]
        out <- cancor(zx, zy, xcenter = cent, ycenter =  cent)
        list(out = out)
}


rcovsim<-function(n = 100, p = 2, steps = 5, gam = 0.4, runs = 20, outliers = T)
{# Need p > 1.
# This function demonstrates that covmba estimates mu 
# but is slightly biased for sigma if n is small. The function rmba
# is better for small n. Similar results hold for covfch and rfch.
# R users need to type "library(MASS)."
	A <- sqrt(diag(1:p))
	cloc <- 0 * (1:p)
	csig <- 0 * A
	mbaloc <- cloc
	mbasig <- csig
	rmbaloc <- cloc
	rmbasig <- csig
	fmcdloc <- cloc
	fmcdsig <- csig
	fchloc <- cloc
	fchsig <- csig
	rfchloc <- cloc
	rfchsig <- csig
	for(i in 1:runs) {
		x <- matrix(rnorm(n * p), ncol = p, nrow = n)	
	# code below would give mean = (10, ..., 10)^T to the outliers
# if(outliers == T) {
#  val <- floor(gam * n)
#  tem <- 10 + 0 * 1:p
#  x <- x %*% A
#  x[1:val,  ] <- x[1:val,  ] + 
#   tem
# }
# else {
#  x <- x %*% A
# }
## code below: outliers have mean (10, 10 sqrt(2), ..., 10 sqrt(p))^T
		if(outliers == T) {
			val <- floor(gam * n)
			tem <- 10 + 0 * 1:p
			x[1:val,  ] <- x[1:val,  ] + tem
		}
		x <- x %*% A
		out <- covmba(x, csteps = steps)
		mbaloc <- mbaloc + out$center
		mbasig <- mbasig + out$cov
		out <- rmba(x)
		rmbaloc <- rmbaloc + out$center
		rmbasig <- rmbasig + out$cov
		cloc <- cloc + apply(x, 2, mean)
		csig <- csig + var(x)
		out <- cov.mcd(x)
		fmcdloc <- fmcdloc + out$center
		fmcdsig <- fmcdsig + out$cov
		out <- covfch(x, csteps = steps)
		fchloc <- fchloc + out$center
		fchsig <- fchsig + out$cov
		rfchloc <- rfchloc + out$rmnf
		rfchsig <- rfchsig + out$rcovf
	}
	mbaloc <- mbaloc/runs
	mbasig <- mbasig/runs
	rmbaloc <- rmbaloc/runs
	rmbasig <- rmbasig/runs
	cloc <- cloc/runs
	csig <- csig/runs
	fmcdloc <- fmcdloc/runs
	fmcdsig <- fmcdsig/runs
	fchloc <- fchloc/runs
	fchsig <- fchsig/runs
	rfchloc <- rfchloc/runs
	rfchsig <- rfchsig/runs
	list(mbaloc = mbaloc, rmbaloc = rmbaloc, fchloc = fchloc, rfchloc = rfchloc, cloc = 
		cloc, fmcdloc = fmcdloc, mbasig = mbasig, rmbasig = rmbasig, fchsig = fchsig, 
		rfchsig = rfchsig, csig = csig, fmcdsig = fmcdsig)
}


regboot<-function(x,y, B = 1000){
#bootstrap with residuals for MLR
#asymptotically the same as MSE (X'X)^(-1), so use to compare vselboot
#out <- regboot(belx,bely)
#ddplot4(out$betas)  #this does not work for vselboot: too many 0s
#mplot(out$betas)
#cov(out$betas)
full <- lsfit(x,y)
res <- full$resid
fit <- y - res
n <- length(y)
x <- as.matrix(x)
p <- dim(x)[2]+1
betas <- matrix(0,nrow=B,ncol=p)
for(i in 1:B){
yb <- fit + sample(res,n,replace=T)
betas[i,] <- lsfit(x,yb)$coef
}
list(betas=betas)
}



regbootsim<-function(n = 100, p = 4, nruns = 100,  
eps = 0.1, shift = 9, type = 1, alph = 0.05){
#Simulates residual bootstrap for regression.
#Uses five iid error distributions: 
# type = 1 for N(0,1) errors, 2 for t3 errors, 3 for exp(1) - 1 errors
# 4 for uniform(-1,1) errors, 5 for (1-eps) N(0,1) + eps N(0,(1+shift)^2) errors.
# constant = 1 so there are p = q+1 coefficients
#need p > 1, beta = (1, 1, ..., 1, 0, ..., 0) with k ones, p-k  zeroes
        q <- p-1
        k <- floor(p/2)
	b <- 0 * 1:q
        b[1:(k-1)] <- 1 #b[1:0] acts like b[1:1] = b[1]
        beta <- c(1,b)
        pp1 <- p + 1
        cicov <- 0*(1:pp1)
        avelen <- 0*(1:pp1)
	for(i in 1:nruns) {
		x <- matrix(rnorm(n * q), nrow = n, ncol = q)
		if(type == 1) {
			y <- 1 + x %*% b + rnorm(n)
					}
		if(type == 2) {
			y <- 1 + x %*% b + rt(n, df = 3)
					}
		if(type == 3) {
			y <- 1 + x %*% b + rexp(n) - 1
					}
		if(type == 4) {
			y <- 1 + x %*% b + runif(n, min = -1, max = 1)
					}
		if(type == 5) {
			err <- rnorm(n, sd = 1 + rbinom(n, 1, eps) * shift)
			y <- 1 + x %*% b + err
					} #make an MLR data set
		out <-regboot(x,y) #residual bootstrap
                for (j in 1:p){
                   tem <- shorth3(out$betas[,j],alpha=alph)
                   if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2])
                   #tem <- locpi2(out$betas[,j],alpha=alph)
                   #if(beta[j] >= tem$LOCPI[1] && beta[j] <= tem$LOCPI[2])
                    cicov[j] <- cicov[j] + 1
                    avelen[j] <- avelen[j] + tem$shorth[2] - tem$shorth[1]
                    #avelen[j] <- avelen[j] + tem$LOCPI[2] - tem$LOCPI[1]
                    } 
                #test whether the last p - k values of beta are 0
                tem <- predreg(out$betas[,(k+1):p],alpha=alph)
                if(tem$D0 <= tem$cuplim) 
                cicov[pp1] <- cicov[pp1] + 1
                avelen[pp1] <- avelen[pp1] + tem$cuplim
           }
cicov <- cicov/nruns
avelen <- avelen/nruns
list(cicov=cicov,avelen=avelen,beta=beta,k=k)}


rhotboot<-function(x, B=1000, med=T){
#Bootstraps  RMVN center (med=F) or coordinatewise median.
# Need p > 1.
x <- as.matrix(x)
n <- dim(x)[1]
p <- dim(x)[2]
indx <- 1:n
munot <- 0 * (1:p)
mus <- matrix(0,nrow=B,ncol=p)
	#get Hotelling's T^2 statistic for munot = 0
		#covs <- var(x)
		#mns <- apply(x, 2, mean)
		#loc <- mns - munot
		#hot <- n * t(loc) %*% solve(covs) %*% loc	
	#get robust statistic for munot = 0
               	#out <- covrmvn(x)
		#covs <- out$cov
		#mns <- out$center
		#loc <- mns - munot
		#rhot <- n * t(loc) %*% solve(covs) %*% loc
        #get bootstrap sample of the robust estimator of center
                if(med==T){
                 for(j in 1:B){
                  tem <- sample(indx,n,replace=T)
                  mus[j,] <- apply(x[tem,],2,median)
                 }
                }
                else{
                 for(j in 1:B){
                  tem <- sample(indx,n,replace=T)
                  mus[j,] <- covrmvn(x[tem,])$center
                 }
                }
list(mus=mus)
}


rhot2boot<-function(x, y, B=1000, med=T){
#Bootstraps  RMVN or coord median 2 sample Hotelling Type T^2 test.
# Need p > 1.
x <- as.matrix(x)
nx <- dim(x)[1]
p <- dim(x)[2]
y <- as.matrix(y)
ny <- dim(y)[1]
indx <- 1:nx
indy <- 1:ny
mus <- matrix(0,nrow=B,ncol=p)	
        #get bootstrap sample of the robust statistic Tx - Ty
if(med == T){
         for(j in 1:B){
                tem <- sample(indx,nx,replace=T)
                Tx <- apply(x[tem,],2,median)
                tem <- sample(indy,ny,replace=T)
                Ty <- apply(y[tem,],2,median)
                mus[j,] <- Tx-Ty
         }
}
         else{
                for(j in 1:B){
                tem <- sample(indx,nx,replace=T)
                Tx <- covrmvn(x[tem,])$center
                tem <- sample(indy,ny,replace=T)
                Ty <- covrmvn(y[tem,])$center
                mus[j,] <- Tx-Ty
                }
         }
list(mus=mus)
}


rhotsim<-function(n = 100,p = 2,csteps = 5,gam = 0.4,runs = 100,outliers = 0,pm = 10,delta = 0)
{# Need p > 1.
# This R function simulates an ad hod robust Hotelling's T^2 test. 
# outliers = 0 for no outliers and X~N(0,diag(1,...,p)), 
# 1 for outliers a tight cluster at major axis (0,...,0,pm)'
# 2 for outliers a tight cluster at minor axis  (pm,0, ...,0)'
# 3 for outliers X~N((pm,...,pm)',diag(1,...,p))
# 4 for outliers X[i,p] = pm
# 5 for outliers X[i,1] = pm
# Power can be estimated by increasing delta so mu = delta(1,...,1) 
# and mu_o = 0*mu.
# For outliers=0, want hquant and rquant approx 1.
	A <- sqrt(diag(1:p))
	up <- ((n - 1) * p * qf(0.95, df1 = p, df2 = (n - p)))/(n - p)
	rup <- (1.04 + 0.12/p + (40+p)/n) * up
	hot <- 1:runs
	rhot <- hot
	munot <- 0 * (1:p)
	mu <- delta * (1 + munot)
	val <- floor(gam * n)
	for(i in 1:runs) {
		x <- matrix(rnorm(n * p), ncol = p, nrow = n)
		x <- x %*% A
		if(outliers == 1) {
			x[1:val,  ] <- matrix(rnorm(val * p, sd = 0.01), ncol
				 = p, nrow = val)
			x[1:val, p] <- x[1:val, p] + pm
		}
		if(outliers == 2) {
			x[1:val,  ] <- matrix(rnorm(val * p, sd = 0.01), ncol
				 = p, nrow = val)
			x[1:val, 1] <- x[1:val, 1] + pm
		}
		if(outliers == 3) {
			tem <- pm + 0 * 1:p
			x[1:val,  ] <- x[1:val,  ] + tem
		}
		if(outliers == 4) {
			x[1:val, p] <- pm
		}
		if(outliers == 5) {
			x[1:val, 1] <- pm
		}
		x <- mu + x	#get Hotelling's T^2 statistic
		covs <- var(x)
		mns <- apply(x, 2, mean)
		loc <- mns - munot
		hot[i] <- n * t(loc) %*% solve(covs) %*% loc	
	#get robust statistic
		out <- covrmvn(x)
		covs <- out$cov
		mns <- out$center
		loc <- mns - munot
		rhot[i] <- n * t(loc) %*% solve(covs) %*% loc
	}
	hcvr <- sum(hot > up)/runs
	rhcvr <- sum(rhot > rup)/runs
        hquant <- quantile(hot, probs = 0.95)/up
	rquant <- quantile(rhot, probs = 0.95)/rup
#plot of scaled rhot vs hot should scatter about the identity line
#rhot <- rhot/(1.04 + 0.12/p + (40+p)/n)
	list(hot = hot, rhot = rhot, hcv = hcvr, rhcv = rhcvr, hquant = hquant,
		rquant = rquant)
}


rhotsim2<-function(n = 100, p = 2, csteps = 5, B= 1000, gam = 0.4, runs = 100, outliers = 0, pm = 10, delta = 0){
# This R function simulates robust Hotelling's T^2 test based on the 
#bootstrap.
########### Pretty slow.
# Need p > 1.
# outliers = 0 for no outliers and X~N(0,diag(1,...,p)), 
# 1 for outliers a tight cluster at major axis (0,...,0,pm)'
# 2 for outliers a tight cluster at minor axis  (pm,0, ...,0)'
# 3 for outliers X~N((pm,...,pm)',diag(1,...,p))
# 4 for outliers X[i,p] = pm
# 5 for outliers X[i,1] = pm
# Power can be estimated by increasing delta so mu = delta(1,...,1) 
# and mu_o = 0*mu.
# For outliers=0, want hquant and rquant approx 1.
	A <- sqrt(diag(1:p))
	up <- ((n - 1) * p * qf(0.95, df1 = p, df2 = (n - p)))/(n - p)
	rup <- (1.04 + 0.12/p + (40+p)/n) * up
	hot <- 1:runs
	rhot <- hot
	munot <- 0 * (1:p)
	mu <- delta * (1 + munot)
	val <- floor(gam * n)
        indx <- 1:n
        mus <- matrix(0,nrow=B,ncol=p)
        rcv <- 0
        avelen <- 0
	for(i in 1:runs) {
		x <- matrix(rnorm(n * p), ncol = p, nrow = n)
		x <- x %*% A
		if(outliers == 1) {
			x[1:val,  ] <- matrix(rnorm(val * p, sd = 0.01), ncol
				 = p, nrow = val)
			x[1:val, p] <- x[1:val, p] + pm
		}
		if(outliers == 2) {
			x[1:val,  ] <- matrix(rnorm(val * p, sd = 0.01), ncol
				 = p, nrow = val)
			x[1:val, 1] <- x[1:val, 1] + pm
		}
		if(outliers == 3) {
			tem <- pm + 0 * 1:p
			x[1:val,  ] <- x[1:val,  ] + tem
		}
		if(outliers == 4) {
			x[1:val, p] <- pm
		}
		if(outliers == 5) {
			x[1:val, 1] <- pm
		}
		x <- mu + x	#get Hotelling's T^2 statistic
		covs <- var(x)
		mns <- apply(x, 2, mean)
		loc <- mns - munot
		hot[i] <- n * t(loc) %*% solve(covs) %*% loc	
	#get robust statistic
		out <- covrmvn(x)
		covs <- out$cov
		mns <- out$center
		loc <- mns - munot
		rhot[i] <- n * t(loc) %*% solve(covs) %*% loc
                #do the prediction region bootstrap test
                for(j in 1:B){
                tem <- sample(indx,n,replace=T)
                mus[j,] <- covrmvn(x[tem,])$center
                }
                temp <- predreg(mus,alpha=0.05)
                if(temp$D0 > temp$cuplim) 
                rcv <- rcv + 1
                avelen <- avelen + temp$cuplim
	}
	hcvr <- sum(hot > up)/runs
	rhcvr <- sum(rhot > rup)/runs
        rcv <- rcv/runs
        avelen <- avelen/runs
        hquant <- quantile(hot, probs = 0.95)/up
	rquant <- quantile(rhot, probs = 0.95)/rup
#plot of scaled rhot vs hot should scatter about the identity line
#rhot <- rhot/(1.04 + 0.12/p + (40+p)/n)
	list(hot = hot, rhot = rhot, hcv = hcvr, rhcv = rhcvr, hquant = hquant,
		rquant = rquant, bootcv = rcv, bootlen = avelen)
}


rmaha<-function(x)
{# Need p > 1 or x a matrix.
# Produces robust Mahalanobis distances (scaled for normal data).
	p <- dim(x)[2]
	out <- cov.mcd(x)
	center <- out$center
	cov <- out$cov
	rd <- mahalanobis(x, center, cov)
	const <- sqrt(qchisq(0.5, p))/median(rd)
	return(const * sqrt(rd))
}


rmba<-function(x, csteps = 5)
{# gets the reweighted MBA estimator, works for p = 1
        zx <- x
        x <- as.matrix(x)
	p <- dim(x)[2]
		##get the DGK estimator
	covs <- var(x)
	mns <- apply(x, 2, mean)	## concentrate
	for(i in 1:csteps) {
		md2 <- mahalanobis(x, mns, covs)
		medd2 <- median(md2)
                if(p > 1) {
			mns <- apply(x[md2 <= medd2,  ], 2, mean)
		}
		if(p == 1) {
			mns <- mean(zx[md2 <= medd2])
		}
		covs <- var(x[md2 <= medd2,  ])
	}
	covb <- covs
	mnb <- mns	##get the square root of det(covb)
	critb <- prod(diag(chol(covb)))	##get the MB estimator
	covv <- diag(p)
	med <- apply(x, 2, median)
	md2 <- mahalanobis(x, center = med, covv)
	medd2 <- median(md2)	## get the start
        if(p > 1) {
		mns <- apply(x[md2 <= medd2,  ], 2, mean)
	}
	if(p == 1) {
		mns <- mean(zx[md2 <= medd2])
	}
	covs <- var(x[md2 <= medd2,  ])	## concentrate
	for(i in 1:csteps) {
		md2 <- mahalanobis(x, mns, covs)
		medd2 <- median(md2)
                if(p > 1) {
			mns <- apply(x[md2 <= medd2,  ], 2, mean)
		}
		if(p == 1) {
			mns <- mean(zx[md2 <= medd2])
		}
		covs <- var(x[md2 <= medd2,  ])
	}
	crit <- prod(diag(chol(covs)))
	if(crit < critb) {
		critb <- crit
		covb <- covs
		mnb <- mns
	}
##scale for better performance at MVN
	rd2 <- mahalanobis(x, mnb, covb)
	const <- median(rd2)/(qchisq(0.5, p))
	covb <- const * covb	
	##reweight the above MBA estimator (mnb,covb) for efficiency
	rd2 <- mahalanobis(x, mnb, covb)
	up <- qchisq(0.975, p)
        if(p > 1) {
          rmnb <- apply(x[rd2 <= up,  ], 2, mean)
        }
        if(p == 1){
          rmnb = mean(zx[rd2 <= up])
        }
	rcovb <- var(x[rd2 <= up,  ])
	rd2 <- mahalanobis(x, rmnb, rcovb)
	const <- median(rd2)/(qchisq(0.5, p))
	rcovb <- const * rcovb	## reweight again
	rd2 <- mahalanobis(x, rmnb, rcovb)
	up <- qchisq(0.975, p)
        if(p > 1){
	  rmnb <- apply(x[rd2 <= up,  ], 2, mean)
        }
        if(p == 1){
          rmnb = mean(zx[rd2 <= up])
        }
	rcovb <- var(x[rd2 <= up,  ])
	rd2 <- mahalanobis(x, rmnb, rcovb)
	const <- median(rd2)/(qchisq(0.5, p))
	rcovb <- const * rcovb
	list(center = rmnb, cov = rcovb)
}


rmbsim <-function(n = 100, p = 2, csteps = 5, gam = 0.4, runs = 20, outliers = 0, pm = 10){
# This R function simulates the MB and RMB estimators.  
# outliers = 0 for no outliers and X~N(0,diag(1,...,p)),
# 1 for outliers a point mass on major axis (0,...,0,pm)'
# 2 for outliers a point mass on minor axis  (pm,0, ...,0)'
# 3 for outliers X~N((pm,...,pm)',diag(1,...,p))
# 4 for outliers X[i,p] = pm
# 5 for outliers X[i,1] = pm
# 6 for outliers a tight cluster at major axis (0,...,0,pm)'
        #set.seed(974)
	A <- sqrt(diag(1:p))
        cloc <- 0 * (1:p)
	csig <- 0 * A
        mbloc <- cloc
        mbsig <- csig
        rmbloc <- cloc
        rmbsig <- csig
        cct <- 0
	mbct <- 0
        rmbct <- 0
        val <- floor(gam * n)
	for(i in 1:runs) {
	   x <- matrix(rnorm(n * p), ncol = p, nrow = n)
	   x <- x %*% A
	   if(outliers == 1) {
		x[1:val,  ] <- 0
                x[1:val,p] <- pm
		}
           if(outliers == 2) {
                x[1:val,  ] <- 0
                x[1:val,1] <- pm    
		}
           if(outliers == 3) {
	        tem <- pm + 0 * 1:p
		x[1:val,  ] <- x[1:val,  ] + tem
		}
           if(outliers == 4) {
		x[1:val, p ] <- pm
		}
           if(outliers == 5) {
		x[1:val, 1 ] <- pm
		}
           if(outliers == 6) {
  x[1:val,  ] <- matrix(rnorm(val * p, sd = 0.01), ncol = p, nrow = val)
	        x[1:val, p] <- x[1:val, p] + pm
		}
		out <- covrmb(x, csteps = csteps)
		rmbloc <- rmbloc + out$center
		rmbsig <- rmbsig + out$cov
                rd2 <- mahalanobis(x, out$center, out$cov)
		if(min(rd2[1:val]) > max(rd2[(val + 1):n]))
                    rmbct <- rmbct + 1
                mbloc <- mbloc + out$mnm
		mbsig <- mbsig + out$covm
                rd2 <- mahalanobis(x, out$mnm, out$covm)
		if(min(rd2[1:val]) > max(rd2[(val + 1):n]))
                    mbct <- mbct + 1
                cmn <- apply(x,2,mean)
                cloc <- cloc + cmn
                ccov <- var(x)
		csig <- csig + ccov
                rd2 <- mahalanobis(x, cmn, ccov)
		if(min(rd2[1:val]) > max(rd2[(val + 1):n]))
                    cct <- cct + 1	
                }
        cloc <- cloc/runs
	csig <- csig/runs
        mbloc <- mbloc/runs
        mbsig <- mbsig/runs
        rmbloc <- rmbloc/runs
        rmbsig <- rmbsig/runs
	list(cloc = cloc, mbloc = mbloc, rmbloc = rmbloc, 
csig = csig,  mbsig=mbsig, rmbsig=rmbsig, cct = cct, mbct=mbct, rmbct=rmbct)
}


rmltreg<-function(x, y,indices=c(1,2)){
##Need p > 1, m > 1.
# Does robust multivariate linear regression.
# Advance the plot by highlighting Stop with the right mouse button.
# Want n > mp and n > 10p for Hotelling Lawley pvalues.
# The indices are the variables to be left out of the
# reduced model for the MANOVA partial F test. 
x <- as.matrix(x)
y <- as.matrix(y)
n <- dim(x)[1]
q <- dim(x)[2]	
p <- q+1
m <- dim(y)[2]
r <- length(indices)
#Get L for the MANOVA partial F test
L <- matrix(0,nrow=r,ncol=p)
for(i in 1:r)
   L[i,indices[i]] <- 1
res <- matrix(nrow = n, ncol = m, 0)
fit <- res
Bhat <- matrix(nrow = p, ncol = m, 0)
# q + 1 = p is number of predictors including intercept
cmar <- par("mar")
	par(mfrow = c(2, 1))
        par(mar=c(4.0,4.0,2.0,0.5))
for(i in 1:m){
  out <- hbreg(x,y[,i])
  res[,i] <- y[,i] - out$coef[1] - x %*% out$coef[-1]
  fit[,i] <- y[,i] - res[,i]
  Bhat[,i] <- out$coef	
}
for(i in 1:m){
	plot(fit[,i],y[,i])
	abline(0, 1)
	title("Response Plot")
        identify(fit[,i],y[,i])
	plot(fit[,i], res[,i])
	title("Residual Plot")
        identify(fit[,i], res[,i])
}
par(mfrow = c(1, 1))
par(mar=cmar)
Covhat <- (n-1)/(n-p) * var(res)
#Get pvalues for testing whether jth predictor is
#needed in the model given the other predictors are in the model.
one <- 1 + 0*1:n
w <- cbind(one,x)
pvals <- 0*1:p
Fj <- pvals
J <- t(w)%*%w
J <- solve(J)
Covinv <- solve(Covhat)
dendf <- n - m*p
if( n <= m*p)
  dendf <- 1
for(j in 1:p){
   T <- t(Bhat[j,])%*%Covinv%*%Bhat[j,]/J[j,j]
   pvals[j] <- 1 - pf((T/m),m,dendf)
    Fj[j] <- T/m
    }
#get the MANOVA F test statistics for whether nontrivial
#predictors are needed in the model
D <- as.matrix(Bhat[-1,])
if(p == 2)
D <- t(D)
tem <- as.matrix(J[-1,-1])
teminv <- solve(tem)
Weinv <- Covinv/(n-p)
H <- t(D)%*%teminv%*%D
C <- Weinv%*%H
eig <- eigen(C,symmetric=FALSE,only.values=TRUE)$values
eig <- as.double(eig)
u <- sum(eig)
MANOVAF <- (n-p)*u/((p-1)*m)
pval <- 1 - pf(MANOVAF,(p-1)*m,dendf)
MANOVA <- cbind(MANOVAF, pval)
#got MANOVA F test summaries
#Get pval for MANOVA partial F test for whether the
#predictors given by indices are needed in the model.
tem <- L%*%J%*%t(L)
teminv <- solve(tem)
H <- t(L%*%Bhat)%*%teminv%*%(L%*%Bhat)
C <- Weinv%*%H
eig <- eigen(C,symmetric=FALSE,only.values=TRUE)$values
eig <- as.double(eig)
u <- sum(eig)
partialF <- (n-p)*u/(r*m)
Pval <- 1 - pf(partialF,r*m,dendf)
partial <- cbind(partialF, Pval)
##got partial F test summaries
Ftable <- cbind(Fj,pvals)
list(fit = fit, res = res, Covhat = Covhat, Bhat = Bhat, 
partial=partial, Ftable=Ftable, MANOVA=MANOVA)
}


rmpredsim<-function(n = 100, m = 2, p = 4, nruns = 10, 
etype = 1,  eps = 0.25,  psi = 0.1, dd = 7, mnull = F, alpha=0.1){
# Need p > 1, m > 1.
# Simulates prediciton regions for 
# robust multivariate linear regression model.
# p = number of predictors including intercept
# m > 1 is the number of response variables
# want n > mp, n > 10m and n > 10 p
# multiply E by A where etype = 1 for MVN Nm(0,I), 
# etype = 2 for   (1 - eps) Nm(0,I) + eps Nm(0, 25 I)
# eps = 0.1, 0.25, 0.4, and 0.6 are interesting
# etype = 3 for multivariate t_d with d =  dd degrees of freedom
# dd = 1, 2, 3, 5, 7 are interesting 
# etype = 4 for lognormal - E(lognormal).
# For MVN data multiplying E by A results
# in a covariance matrix with eigenvector c(1, ..., 1)^T
# corresponding to the largest eigenvalue. The diagonal elements
# of the covariance matrix are 1 + (m-1) psi^2, while the off
# diagonal elements are 2 psi + (m-2) psi^2. Hence the correlations
# are (2 rho + (m-2) psi^2)/(1 + (m-1) psi^2). As psi gets
# close to 1, the data clusters about the line in the
# direction of (1, ..., 1)^T. 
ccvr <- 0
	scvr <- 0
	rcvr <- 0
	volc <- 1:nruns
	vols <- volc
	volr <- volc	#up <- 1 - alpha 
	up <- min((1 - alpha/2), (1 - alpha + 10*alpha*m/n))
	if(alpha > 0.1)
	  up <- min((1 - alpha + 0.05), (1 - alpha + m/n))
        qn <- up
        if(qn < 1 - alpha + 0.001)
          up <- 1 - alpha
np1 <- n + 1
q <- p - 1
A <- matrix(psi,nrow=m,ncol=m)
diag(A) <- 1
res <- matrix(nrow = n, ncol = m, 0)
fit <- res
B <- matrix(nrow = p, ncol = m, 1)
if(p == m){
  for(j in 1:(m-1)){
     B[p-j+1,j:m] <- 0
  }
}
if(p > m){
  for(j in 1:m){
     B[p-j+1,j:m] <- 0
  }
}
if(p < m){
  for(j in 1:(p-1)){
     B[p-j+1,j:m] <- 0
  }
}
if (mnull == T)
B[-1,] <- 0
Bhat <- B
one <- 1 + 0*1:np1
onen <- one[-1]
for(i in 1:nruns){
   x <- matrix(rnorm(np1 * q), nrow = np1, ncol = q)
   w <- cbind(one,x)
   y <- w %*% B 
#make error matrix E
		E <- matrix(rnorm(np1 * m), nrow = np1, ncol = m)
		if(etype == 2)  {
			zu <- runif(np1)
			E[zu < eps,  ] <- E[zu < eps,  ] * 5
		}
		if(etype == 3) {
			zu <- sqrt(rchisq(np1, dd)/dd)
			E <- E/zu
		}
		if(etype == 4)
			E <- exp(E) - exp(0.5)
#want mean 0 error vectors
   E <- E %*% A
#  got error matrix E
   y <- y + E
   yf <- y[np1,]
#want xf to be a column vector with a 1
   xf <- rbind(1,as.matrix(x[np1,]))
   y <- y[-np1,]
   x <- x[-np1,]
   w <- w[-np1,]
   for(j in 1:m){
      out <- hbreg(x,y[,j])
      res[,j] <- y[,j] -  w %*% out$coef
      fit[,j] <- y[,j] + res[,j]
      Bhat[,j] <- out$coef	
      }
##Note that ith row of xx is t(hat(E(yf)) + hat(eps)_i) 
##for i = 1,...,n.
##Get prediction regions based on xx which has m columns, 
##and see if yf is in the prediction regions.
    zz <- t(t(Bhat)%*%xf) 
    xx<- res + onen %*% zz
                center <- apply(xx, 2, mean)
		cov <- var(xx)
		md2 <- mahalanobis(xx, center, cov)
		hsq <- quantile(md2, up)
		if(mahalanobis(t(yf), center, cov) <= hsq)
			ccvr <- ccvr + 1
		volc[i] <- sqrt(hsq)^m * prod(diag(chol(cov)))
		out <- covrmvn(xx)
		center <- out$center
		cov <- out$cov
		md2 <- mahalanobis(xx, center, cov)
		hsq <- quantile(md2, up)
		dsq <- mahalanobis(t(yf), center, cov)
		if(dsq <= hsq)
			scvr <- scvr + 1
		sqrtdet <- prod(diag(chol(cov)))
		vols[i] <- sqrt(hsq)^m * sqrtdet
		hsq <- qchisq(up, m)
		if(dsq <= hsq)
			rcvr <- rcvr + 1
		volr[i] <- sqrt(hsq)^m * sqrtdet
}
	ccvr <- ccvr/nruns
	scvr <- scvr/nruns
	rcvr <- rcvr/nruns	
#get a measure of efficiency wrt vols, so eff(vols) = 1
	vols <- mean(vols)
	volc <- mean(volc)/vols
	volr <- mean(volr)/vols
	vols <- 1
list(ncvr = ccvr, scvr = scvr, mcvr = rcvr, 
voln = volc, vols = vols, volm = volr, up = up)
}


rmregboot<-function(x,y, B = 1000){

#bootstrap with residuals for MLR using rmreg3
#out <- rmregboot(belx,bely)
#ddplot4(out$betas); ddplot5(out$betas)
#mplot(out$betas)
#cov(out$betas)
full <- lsfit(x,y)
res <- full$resid
fit <- y - res
n <- length(y)
x <- as.matrix(x)
p <- dim(x)[2]+1
betas <- matrix(0,nrow=B,ncol=p)
for(i in 1:B){
yb <- fit + sample(res,n,replace=T)
betas[i,] <- rmreg3(x,yb)$Bhat
}
list(betas=betas)
}



rmregbootsim<-function(n = 100, p = 4, nruns = 100,  
eps = 0.1, shift = 9, type = 1, alph = 0.05){
#Very Slow.
#Simulates residual bootstrap for rmreg3 with m = 1.
#Uses five iid error distributions: 
# type = 1 for N(0,1) errors, 2 for t3 errors, 3 for exp(1) - 1 errors
# 4 for uniform(-1,1) errors, 5 for (1-eps) N(0,1) + eps N(0,(1+shift)^2) errors.
# constant = 1 so there are p = q+1 coefficients
#need p > 1, beta = (1, 1, ..., 1, 0, ..., 0) with k ones, p - k  zeroes
        q <- p-1
        k <- floor(p/2)
	b <- 0 * 1:q
        b[1:(k-1)] <- 1 #b[1:0] acts like b[1:1] = b[1]
        beta <- c(1,b)
        pp1 <- p + 1
        cicov <- 0*(1:pp1)
        avelen <- 0*(1:pp1)
	for(i in 1:nruns) {
		x <- matrix(rnorm(n * q), nrow = n, ncol = q)
		if(type == 1) {
			y <- 1 + x %*% b + rnorm(n)
					}
		if(type == 2) {
			y <- 1 + x %*% b + rt(n, df = 3)
					}
		if(type == 3) {
			y <- 1 + x %*% b + rexp(n) - 1
					}
		if(type == 4) {
			y <- 1 + x %*% b + runif(n, min = -1, max = 1)
					}
		if(type == 5) {
			err <- rnorm(n, sd = 1 + rbinom(n, 1, eps) * shift)
			y <- 1 + x %*% b + err
					} #make an MLR data set
		out <-rmregboot(x,y) #residual bootstrap using rmreg3
                for (j in 1:p){
                   tem <- shorth2(out$betas[,j],alpha=alph)
                   if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2])
                   #tem <- locpi2(out$betas[,j],alpha=alph)
                   #if(beta[j] >= tem$LOCPI[1] && beta[j] <= tem$LOCPI[2])
                    cicov[j] <- cicov[j] + 1
                    avelen[j] <- avelen[j] + tem$shorth[2] - tem$shorth[1]
                    #avelen[j] <- avelen[j] + tem$LOCPI[2] - tem$LOCPI[1]
                    } 
                #test whether the last p - k values of beta are 0
                tem <- predreg(out$betas[,(k+1):p],alpha=alph)
                if(tem$D0 <= tem$cuplim) 
                cicov[pp1] <- cicov[pp1] + 1
                avelen[pp1] <- avelen[pp1] + tem$cuplim
           }
cicov <- cicov/nruns
avelen <- avelen/nruns
list(cicov=cicov,avelen=avelen,beta=beta,k=k)}


rmreg2<-function(x, y, csteps = 5, locc = 0.5){
# Does robust multivariate linear regression based on RMVN.
# Here x contains the nontrivial predictors. 
# Advance the plot by highlighting Stop with the right mouse button.
x <- as.matrix(x)
y <- as.matrix(y)
n <- dim(x)[1]
q <- dim(x)[2]	
p <- q+1
m <- dim(y)[2]
d <- m + q
u <- cbind(x,y)
res <- matrix(nrow = n, ncol = m, 0)
fit <- res
Bhat <- matrix(nrow = p, ncol = m, 0)
# q + 1 = p is number of predictors including intercept
cmar <- par("mar")
	par(mfrow = c(2, 1))
        par(mar=c(4.0,4.0,2.0,0.5))
#get the RMVN subset of the predictors and response u
up <- qchisq(0.975, d)
        qchi <- qchisq(0.5, d)  ##get the DGK estimator
        covs <- var(u)
        mns <- apply(u, 2, mean)        ## concentrate
        for(i in 1:csteps) {
                md2 <- mahalanobis(u, mns, covs)
                medd2 <- median(md2)
                mns <- apply(u[md2 <= medd2,  ], 2, mean)
                covs <- var(u[md2 <= medd2,  ])
        }
        covd <- covs
        mnd <- mns      ##get the MB estimator
        covv <- diag(d)
        med <- apply(u, 2, median)
        md2 <- mahalanobis(u, center = med, covv)
        medd2 <- median(md2)##get the location criterion cutoff
        lcut <- medd2 
        if(locc != 0.5) lcut <- quantile(md2,locc) ## get the start
        mns <- apply(u[md2 <= medd2,  ], 2, mean)
        covs <- var(u[md2 <= medd2,  ]) ## concentrate
        for(i in 1:csteps) {
                md2 <- mahalanobis(u, mns, covs)
                medd2 <- median(md2)
                mns <- apply(u[md2 <= medd2,  ], 2, mean)
                covs <- var(u[md2 <= medd2,  ])
        }
        covm <- covs
        mnm <- mns      ##get FCH attractor
        covf <- covm
        mnf <- mnm
        val2 <- mahalanobis(t(mnd), med, covv)
        if(val2 < lcut) {
##crit = square root of det(cov)
                critd <- prod(diag(chol(covd)))
                critm <- prod(diag(chol(covm)))
                if(critd < critm) {
                        covf <- covd
                        mnf <- mnd
                }
        }
## get the FCH estimator
        rd2 <- mahalanobis(u, mnf, covf)
        const <- median(rd2)/qchi
        covf <- const * covf    
##reweight the above FCH estimator (mnf,covf) 
        rd2 <- mahalanobis(u, mnf, covf)
        rmnmvn <- apply(u[rd2 <= up,  ], 2, mean)
        rcovmvn <- var(u[rd2 <= up,  ])
        d1 <- sum(rd2 <= up)
        rd2 <- mahalanobis(u, rmnmvn, rcovmvn)
        qchi2 <- (0.5 * 0.975 * n)/d1
        qchi2 <- min(qchi2, 0.995)
        const <- median(rd2)/qchisq(qchi2, d)
        rcovmvn <- const * rcovmvn      
##now get the subset of data w used to construct the RMVN estimator
        rd2 <- mahalanobis(u, rmnmvn, rcovmvn)
        w <- u[rd2 <= up,  ]
xw <- as.matrix(w[,1:q])
yw <- as.matrix(w[,(q+1):d])
#get the multivariate regression after ellipsoidal trimming
for(i in 1:m){
  out <- lsfit(xw,yw[,i])
  res[,i] <- y[,i] - out$coef[1] - x %*% out$coef[-1]
  fit[,i] <- y[,i] - res[,i]
  Bhat[,i] <- out$coef	
}
for(i in 1:m){
	plot(fit[,i],y[,i])
	abline(0, 1)
	title("Response Plot")
        identify(fit[,i],y[,i])
	plot(fit[,i], res[,i])
	title("Residual Plot")
        identify(fit[,i], res[,i])
}
par(mfrow = c(1, 1))
par(mar=cmar)
list(fit = fit, res = res, Bhat = Bhat)
}


rmreg3<-function(x, y, csteps = 5, locc = 0.5){
# Does robust multiple linear regression based on RMVN.
# Here x contains the nontrivial predictors. 
# This is rmreg2 except plots are not made.
# Used by hbreg.
x <- as.matrix(x)
y <- as.matrix(y)
n <- dim(x)[1]
q <- dim(x)[2]	
p <- q+1
m <- dim(y)[2]
d <- m + q
u <- cbind(x,y)
res <- matrix(nrow = n, ncol = m, 0)
fit <- res
Bhat <- matrix(nrow = p, ncol = m, 0)
# q + 1 = p is number of predictors including intercept
#get the RMVN subset of the predictors and response u
up <- qchisq(0.975, d)
        qchi <- qchisq(0.5, d)  ##get the DGK estimator
        covs <- var(u)
        mns <- apply(u, 2, mean)        ## concentrate
        for(i in 1:csteps) {
                md2 <- mahalanobis(u, mns, covs)
                medd2 <- median(md2)
                mns <- apply(u[md2 <= medd2,  ], 2, mean)
                covs <- var(u[md2 <= medd2,  ])
        }
        covd <- covs
        mnd <- mns      ##get the MB estimator
        covv <- diag(d)
        med <- apply(u, 2, median)
        md2 <- mahalanobis(u, center = med, covv)
        medd2 <- median(md2)##get the location criterion cutoff
        lcut <- medd2 
        if(locc != 0.5) lcut <- quantile(md2,locc) ## get the start
        mns <- apply(u[md2 <= medd2,  ], 2, mean)
        covs <- var(u[md2 <= medd2,  ]) ## concentrate
        for(i in 1:csteps) {
                md2 <- mahalanobis(u, mns, covs)
                medd2 <- median(md2)
                mns <- apply(u[md2 <= medd2,  ], 2, mean)
                covs <- var(u[md2 <= medd2,  ])
        }
        covm <- covs
        mnm <- mns      ##get FCH attractor
        covf <- covm
        mnf <- mnm
        val2 <- mahalanobis(t(mnd), med, covv)
        if(val2 < lcut) {
##crit = square root of det(cov)
                critd <- prod(diag(chol(covd)))
                critm <- prod(diag(chol(covm)))
                if(critd < critm) {
                        covf <- covd
                        mnf <- mnd
                }
        }
## get the FCH estimator
        rd2 <- mahalanobis(u, mnf, covf)
        const <- median(rd2)/qchi
        covf <- const * covf    
##reweight the above FCH estimator (mnf,covf) 
        rd2 <- mahalanobis(u, mnf, covf)
        rmnmvn <- apply(u[rd2 <= up,  ], 2, mean)
        rcovmvn <- var(u[rd2 <= up,  ])
        d1 <- sum(rd2 <= up)
        rd2 <- mahalanobis(u, rmnmvn, rcovmvn)
        qchi2 <- (0.5 * 0.975 * n)/d1
        qchi2 <- min(qchi2, 0.995)
        const <- median(rd2)/qchisq(qchi2, d)
        rcovmvn <- const * rcovmvn      
##now get the subset of data w used to construct the RMVN estimator
        rd2 <- mahalanobis(u, rmnmvn, rcovmvn)
        w <- u[rd2 <= up,  ]
xw <- as.matrix(w[,1:q])
yw <- as.matrix(w[,(q+1):d])
#get the multivariate regression after ellipsoidal trimming
for(i in 1:m){
  out <- lsfit(xw,yw[,i])
  res[,i] <- y[,i] - out$coef[1] - x %*% out$coef[-1]
  fit[,i] <- y[,i] - res[,i]
  Bhat[,i] <- out$coef	
}
list(fit = fit, res = res, Bhat = Bhat)
}


rmregddsim<-function(n = 100, m = 2, p = 4, nruns = 10, 
etype = 1,  eps = 0.25,  psi = 0.1, dd = 7, mnull = F, alph=0.1){
# Need p > 1, m > 1.
# Simulates DD plots robust multivariate linear regression model.
# The identity line and lines corresponding to the
# 100(1-alph)% prediction regions are added. 
# p = number of predictors including intercept
# m > 1 is the number of response variables
# want n > mp, n > 10m and n > 10 p
# multiply E by A where etype = 1 for MVN Nm(0,I), 
# etype = 2 for   (1 - eps) Nm(0,I) + eps Nm(0, 25 I)
# eps = 0.1, 0.25, 0.4, and 0.6 are interesting
# etype = 3 for multivariate t_d with d =  dd degrees of freedom
# dd = 1, 2, 3, 5, 7 are interesting 
# etype = 4 for lognormal - E(lognormal).
# For MVN data multiplying E by A results
# in a covariance matrix with eigenvector c(1, ..., 1)^T
# corresponding to the largest eigenvalue. The diagonal elements
# of the covariance matrix are 1 + (m-1) psi^2, while the off
# diagonal elements are 2 psi + (m-2) psi^2. Hence the correlations
# are (2 rho + (m-2) psi^2)/(1 + (m-1) psi^2). As psi gets
# close to 1, the data clusters about the line in the
# direction of (1, ..., 1)^T.
q <- p - 1
A <- matrix(psi,nrow=m,ncol=m)
diag(A) <- 1
res <- matrix(nrow = n, ncol = m, 0)
fit <- res
B <- matrix(nrow = p, ncol = m, 1)
if(p == m){
  for(j in 1:(m-1)){
     B[p-j+1,j:m] <- 0
  }
}
if(p > m){
  for(j in 1:m){
     B[p-j+1,j:m] <- 0
  }
}
if(p < m){
  for(j in 1:(p-1)){
     B[p-j+1,j:m] <- 0
  }
}
if (mnull == T)
B[-1,] <- 0
Bhat <- B
one <- 1 + 0*1:n
for(i in 1:nruns) {
   x <- matrix(rnorm(n * q), nrow = n, ncol = q)
   w <- cbind(one,x)
   y <- w %*% B 
#make error matrix E
		E <- matrix(rnorm(n * m), nrow = n, ncol = m)
		if(etype == 2)  {
			zu <- runif(n)
			E[zu < eps,  ] <- E[zu < eps,  ] * 5
		}
		if(etype == 3) {
			zu <- sqrt(rchisq(n, dd)/dd)
			E <- E/zu
		}
		if(etype == 4)
			E <- exp(E) - exp(0.5)
#want mean 0 error vectors
   E <- E %*% A
#  got error matrix E
   y <- y + E
   for(j in 1:m){
      out <- hbreg(x,y[,j])
      res[,j] <- y[,j] - out$coef[1] - x %*% out$coef[-1]
      fit[,j] <- y[,j] + res[,j]
      Bhat[,j] <- out$coef	
      }
ddplot4(res, alpha = alph)
}
Covhat <- (n-1)/(n-p) * var(res)
list(Bhat = Bhat, B=B, Covhat = Covhat)
}


rmregsim<-function(n = 100, m = 2, p = 4, nruns = 10, 
etype = 1,  eps = 0.25,  psi = 0.1, dd = 7, mnull = T){
# Need p > 1, m > 1.
# Simulates robust multivariate linear regression model and gets 
# pvalues for tests for whether the p-1 nontrivial
# predictors are needed in the model and
# pvalues for tests for whether Xi is needed in the model. 
# p = number of predictors including intercept
# m > 1 is the number of response variables
# want n > mp and n > 10 p
# multiply E by A where etype = 1 for MVN Nm(0,I), 
# etype = 2 for   (1 - eps) Nm(0,I) + eps Nm(0, 25 I)
# eps = 0.1, 0.25, 0.4, and 0.6 are interesting
# etype = 3 for multivariate t_d with d =  dd degrees of freedom
# dd = 1, 2, 3, 5, 7 are interesting 
# etype = 4 for lognormal - E(lognormal).
# For MVN data multiplying E by A results
# in a covariance matrix with eigenvector c(1, ..., 1)^T
# corresponding to the largest eigenvalue. The diagonal elements
# of the covariance matrix are 1 + (m-1) psi^2, while the off
# diagonal elements are 2 psi + (m-2) psi^2. Hence the correlations
# are (2 rho + (m-2) psi^2)/(1 + (m-1) psi^2). As psi gets
# close to 1, the data clusters about the line in the
# direction of (1, ..., 1)^T. 
# Illustrates that Hotelling Lawley statistic = last statistic/(n-p).
q <- p - 1
A <- matrix(psi,nrow=m,ncol=m)
diag(A) <- 1
res <- matrix(nrow = n, ncol = m, 0)
fit <- res
B <- matrix(nrow = p, ncol = m, 1)
if(p == m){
  for(j in 1:(m-1)){
     B[p-j+1,j:m] <- 0
  }
}
if(p > m){
  for(j in 1:m){
     B[p-j+1,j:m] <- 0
  }
}
if(p < m){
  for(j in 1:(p-1)){
     B[p-j+1,j:m] <- 0
  }
}
if (mnull == T)
B[-1,] <- 0
Bhat <- B
one <- 1 + 0*1:n
fcov <- 0*1:p
wilkcov <- fcov
pilcov <- fcov
hotlawcov <- fcov
roycov <- fcov
rden <- n - p - m + 1
rcut <- qf(0.95,m,rden)
dendf <- n - m*p
wcv <- 0
pcv <- 0
hlcv<-0
rcv<-0
fcv<-0
ndf <- (p-1)*m
mancv <- cbind(wcv,pcv,hlcv,rcv,fcv)
#L <- cbind(fcov[-1],diag(p-1))
#want n > mp
fcut <- qf(0.95, m, dendf)
mcut <- qf(0.95,m*(p-1),dendf)
h <- max(p-1,m)
mrcut <- qf(0.95,h,(n-h-1))
for(i in 1:nruns) {
   x <- matrix(rnorm(n * q), nrow = n, ncol = q)
   w <- cbind(one,x)
   y <- w %*% B 
#make error matrix E
		E <- matrix(rnorm(n * m), nrow = n, ncol = m)
		if(etype == 2)  {
			zu <- runif(n)
			E[zu < eps,  ] <- E[zu < eps,  ] * 5
		}
		if(etype == 3) {
			zu <- sqrt(rchisq(n, dd)/dd)
			E <- E/zu
		}
		if(etype == 4)
			E <- exp(E) - exp(0.5)
#want mean 0 error vectors
   E <- E %*% A
#  got error matrix E
   y <- y + E
   for(j in 1:m){
      out <- hbreg(x,y[,j])
      res[,j] <- y[,j] - out$coef[1] - x %*% out$coef[-1]
      fit[,j] <- y[,j] - res[,j]
      Bhat[,j] <- out$coef	
      }
    Covhat <- (n-1)/(n-p) * var(res)
#   w is the data matrix with a column of ones
    J <- t(w)%*%w
    J <- solve(J)
    We <- Covhat*(n-p)
    Covinv <- solve(Covhat)
    Weinv <- Covinv/(n-p)
#get the test statistics for whether Xj is needed in the model
    for(j in 1:p){
      T <- t(Bhat[j,])%*%Covinv%*%Bhat[j,]/J[j,j]
      if(T/m > fcut)
      fcov[j] <- fcov[j] + 1
      H <- Bhat[j,]%*%t(Bhat[j,])/J[j,j]
      C <- Weinv%*%H
      eig <- eigen(C,symmetric=FALSE,only.values=TRUE)$values
      eig<- as.double(eig)
      lmax <- eig[1]
      u <- sum(eig)
      v <- sum(eig/(eig+1))
      wilk <- prod(1/(eig+1))
      if(-(n-p-1-0.5*m)*log(wilk)/m > fcut)
      wilkcov[j] <- wilkcov[j] + 1
      if((n-p)*v/m > fcut)
      pilcov[j] <- pilcov[j] + 1
      if((n-p)*u/m > fcut)
      hotlawcov[j] <- hotlawcov[j] + 1
      if(rden*lmax/m > rcut)
      roycov[j] <- roycov[j] + 1
    }
#get the MANOVA F test statistics for whether nontrivial
#predictors are needed in the model
D <- as.matrix(Bhat[-1,])
if(p == 2)
D <- t(D)
vecD <- as.matrix(D[,1])
for(j in 2:m){
  vecD <- rbind(vecD,as.matrix(D[,j]))}
tem <- as.matrix(J[-1,-1])
teminv <- solve(tem)
T <- t(vecD)%*%kronecker(Covinv,teminv)%*%vecD
      if(T/ndf > mcut)
      mancv[5] <- mancv[5] + 1
      H <- t(D)%*%teminv%*%D
      C <- Weinv%*%H
      eig <- eigen(C,symmetric=FALSE,only.values=TRUE)$values
      eig <- as.double(eig)
      lmax <- eig[1]
      u <- sum(eig)
      v <- sum(eig/(eig+1))
      wilk <- prod(1/(eig+1))
      if(-(n-0.5*p-0.5*m-2)*log(wilk)/ndf > mcut)
      mancv[1] <- mancv[1] + 1
      if((n-p)*v/ndf > mcut)
      mancv[2] <- mancv[2] + 1
      if((n-p)*u/ndf > mcut)
      mancv[3] <- mancv[3] + 1
      if((n-h-1)*lmax/h > mrcut)
      mancv[4] <- mancv[4] + 1
}
wilkcov <- wilkcov/nruns
pilcov <- pilcov/nruns
hotlawcov <- hotlawcov/nruns
roycov <- roycov/nruns
fcov <- fcov/nruns
mancv <- mancv/nruns
list(Bhat = Bhat, B=B, Covhat = Covhat, wilkcov=wilkcov,
pilcov=pilcov, hotlawcov=hotlawcov, roycov=roycov, fcov=fcov,
mancv=mancv)
}


rowboot<-function(x,y, B = 1000){
#rowwise nonparametric bootstrap for MLR
#use if (y^T, x^T) are iid from some distribution
#out <- rowboot(belx,bely)
#ddplot4(out$betas) 
#mplot(out$betas)
#cov(out$betas)
full <- lsfit(x,y)
n <- length(y)
x <- as.matrix(x)
p <- dim(x)[2]+1
indx <- 1:n
betas <- matrix(0,nrow=B,ncol=p)
for(i in 1:B){
tem <- sample(indx,n,replace=T)
betas[i,] <- lsfit(x[tem,],y[tem])$coef
}
list(betas=betas,full=full)
}



rprcomp<-function(x, csteps = 5, locc = 0.5, corr = T)
{# Needs number of predictors > 1.
# This does robust principal components analysis.
        x <- as.matrix(x)
        p <- dim(x)[2]
        n <- dim(x)[1]
        up <- qchisq(0.975, p)
        qchi <- qchisq(0.5, p)  ##get the DGK estimator
        covs <- var(x)
        mns <- apply(x, 2, mean)        ## concentrate
        for(i in 1:csteps) {
                md2 <- mahalanobis(x, mns, covs)
                medd2 <- median(md2)
                mns <- apply(x[md2 <= medd2,  ], 2, mean)
                covs <- var(x[md2 <= medd2,  ])
        }
        covd <- covs
        mnd <- mns      ##get the MB estimator
        covv <- diag(p)
        med <- apply(x, 2, median)
        md2 <- mahalanobis(x, center = med, covv)
        medd2 <- median(md2)##get the location criterion cutoff
        lcut <- medd2 
        if(locc != 0.5) lcut <- quantile(md2,locc) ## get the start
        mns <- apply(x[md2 <= medd2,  ], 2, mean)
        covs <- var(x[md2 <= medd2,  ]) ## concentrate
        for(i in 1:csteps) {
                md2 <- mahalanobis(x, mns, covs)
                medd2 <- median(md2)
                mns <- apply(x[md2 <= medd2,  ], 2, mean)
                covs <- var(x[md2 <= medd2,  ])
        }
        covm <- covs
        mnm <- mns      ##get FCH attractor
        covf <- covm
        mnf <- mnm
        val2 <- mahalanobis(t(mnd), med, covv)
        if(val2 < lcut) {
##crit = square root of det(cov)
                critd <- prod(diag(chol(covd)))
                critm <- prod(diag(chol(covm)))
                if(critd < critm) {
                        covf <- covd
                        mnf <- mnd
                }
        }
## get the FCH estimator
        rd2 <- mahalanobis(x, mnf, covf)
        const <- median(rd2)/qchi
        covf <- const * covf    
##reweight the above FCH estimator (mnf,covf) 
        rd2 <- mahalanobis(x, mnf, covf)
        rmnmvn <- apply(x[rd2 <= up,  ], 2, mean)
        rcovmvn <- var(x[rd2 <= up,  ])
        d1 <- sum(rd2 <= up)
        rd2 <- mahalanobis(x, rmnmvn, rcovmvn)
        qchi2 <- (0.5 * 0.975 * n)/d1
        qchi2 <- min(qchi2, 0.995)
        const <- median(rd2)/qchisq(qchi2, p)
        rcovmvn <- const * rcovmvn      
##now get the subset of data w used to construct the RMVN estimator
        rd2 <- mahalanobis(x, rmnmvn, rcovmvn)
        w <- x[rd2 <= up,  ]
        out <- prcomp(w, scale = corr)
        list(out = out)
}


rrplot2<-function(x, y, nsamps = 7)
{# In Unix, use X11() to turn on the graphics device before 
# using this function. First type library(MASS). 
# Makes an RR plot. Needs hbreg, mbareg, mbalata, and rmreg3.
	n <- length(y)
        x <- as.matrix(x)
	rmat <- matrix(nrow = n, ncol = 7)
	lsres <- lsfit(x, y)$residuals
	print("got OLS")
	almsres <- lmsreg(x, y)$resid
	print("got ALMS")
	altsres <- ltsreg(x, y)$residuals
	print("got ALTS")
	mbacoef <- mbareg(x, y, nsamp = nsamps)$coef
	MBAres <- y - mbacoef[1] - x %*% mbacoef[-1]
	print("got MBA")
        bbcoef <- hbreg(x, y)$bbcoef
	BBres <- y - bbcoef[1] - x %*% bbcoef[-1]
	print("got BB")
        mbalcoef <- mbalata(x, y, nsamp = nsamps)$coef
	MBALATA <- y - mbalcoef[1] - x %*% mbalcoef[-1]
	print("got MBALATA")
        RMREG2 <- rmreg3(x,y)$res
        print("got RMREG2")
	rmat[, 1] <- lsres
	rmat[, 2] <- almsres
	rmat[, 3] <- altsres
	rmat[, 4] <- MBAres
        rmat[, 5] <- BBres
        rmat[, 6] <- MBALATA
        rmat[, 7] <- RMREG2
	pairs(rmat, labels = c("OLS resid", "ALMS resid", 
	"ALTS resid", "MBA resid", "BB resid", "MBALATA","RMREG2"))
}



sctplt <-function(n = 10,p = 2,steps = 5,gam = 0.4,outliers = 0,pm = 10,jit = F)
{# Use for 1 < p < 11.
# This R function makes a DD plot and scatterplot matrix
# for sample data set from function mldsim.  
# Click left mouse button to identify points.
# Click right mouse button after examining the DD plot
# and in R, highlight "stop".
# 
# outliers = 0 for no outliers and X~N(0,diag(1,...,p)),
# 1 for outliers a point mass on major axis (0,...,0,pm)'
# 2 for outliers a point mass on minor axis  (pm,0, ...,0)'
# 3 for outliers X~N((pm,...,pm)',diag(1,...,p))
# 4 for outliers X[i,p] = pm
# 5 for outliers X[i,1] = pm
#
#generate data set
	A <- sqrt(diag(1:p))
	cloc <- 0 * (1:p)
	csig <- 0 * A
	val <- floor(gam * n)
		x <- matrix(rnorm(n * p), ncol = p, nrow = n)
		x <- x %*% A
		if(outliers == 1) {
			x[1:val,  ] <- 0
                        x[1:val,p] <- pm
		}
                if(outliers == 2) {
                        x[1:val,  ] <- 0
                        x[1:val,1] <- pm    
		}
                if(outliers == 3) {
			val <- floor(gam * n)
			tem <- pm + 0 * 1:p
			x[1:val,  ] <- x[1:val,  ] + tem
		}
                if(outliers == 4) {
			x[1:val, p ] <- pm
		}
                if(outliers == 5) {
			x[1:val, 1 ] <- pm
		}
                ddplot2(x)
                par(mfrow=c(1,1))
                z <- x
                if(jit == T){
                z <- x + matrix(runif(n * p, max=0.4), 
                         ncol = p, nrow = n)
                }
                pairs(z)		
}


shorth2<-function(y, alpha = 0.05){
# computes the shorth(c) interval [Ln,Un] for c = ceiling[n(1-alpha)].
n <- length(y)
cc <- ceiling(n * (1 - alpha))
  sy <- sort(y)
  rup <- sy[cc]
  rlow <- sy[1]
  olen <- rup - rlow
  if(cc < n){
    for(j in (cc + 1):n){
	zlen <- sy[j] - sy[j - cc + 1]
	if(zlen < olen) {
	   olen <- zlen
	   rup <- sy[j]
	   rlow <- sy[j - cc + 1]
	}
    }
} 
Ln <- rlow; Un <- rup
list(shorth=c(Ln, Un))
}


shorth3<-function(y, alpha = 0.05){
# computes the shorth(c) interval [Ln,Un] for c = cc.
#shorth lists Ln and Un using Frey's correction
n <- length(y)
cc <- ceiling(n * (1 - alpha + 1.12*sqrt(alpha/n)))
cc <- min(n,cc)
  sy <- sort(y)
  rup <- sy[cc]
  rlow <- sy[1]
  olen <- rup - rlow
  if(cc < n){
    for(j in (cc + 1):n){
	zlen <- sy[j] - sy[j - cc + 1]
	if(zlen < olen) {
	   olen <- zlen
	   rup <- sy[j]
	   rlow <- sy[j - cc + 1]
	}
    }
} 
Ln <- rlow; Un <- rup
list(shorth=c(Ln, Un))
}


shorthLU<-function(y, alpha = 0.05){
# computes the shorth(c) interval [Ln,Un] for c = cc.
#shorth lists Ln and Un using Frey's correction
#also gets left endpoint and right endpoints for one sided lower and upper CIs
#[left,infty) and (-infty,right]. 
#So left is a lower bound and right an upper bound for the parameter theta.
n <- length(y)
cc <- ceiling(n * (1 - alpha + 1.12*sqrt(alpha/n)))
cc <- min(n,cc)
  sy <- sort(y)
  rup <- sy[cc]
  rlow <- sy[1]
  olen <- rup - rlow
  if(cc < n){
    for(j in (cc + 1):n){
	zlen <- sy[j] - sy[j - cc + 1]
	if(zlen < olen) {
	   olen <- zlen
	   rup <- sy[j]
	   rlow <- sy[j - cc + 1]
	}
    }
} 
Ln <- rlow; Un <- rup
left <- sy[n-cc+1]
right <- sy[cc]
list(shorth=c(Ln, Un),left=left,right=right)
}


symviews<-function(x, Y)
{# Need number of predictors > 1.
# Makes trimmed views for 90, 80, ..., 0 
# percent trimming and sometimes works even if m
# is symmetric about E(x^t beta) where
# y = m(x^T beta ) + e. 
# For work stations, activate a graphics
# device with command "X11()" or "motif()."
# Use the rightmost mouse button to advance
# the view, and in R, highlight ``stop."
	x <- as.matrix(x)
	tem <- seq(0.1, 1, 0.1)
	bols <- lsfit(x, Y)$coef
	fit <- x %*% bols[-1]
	temx <- x[fit > median(fit),  ]
	temy <- Y[fit > median(fit)]
	out <- covmba(temx)	
	center <- out$center
	cov <- out$cov
	rd2 <- mahalanobis(temx, center, cov)
	for(i in 1:10) {
		val <- quantile(rd2, tem[i])
		bhat <- lsfit(temx[rd2 <= val,  ], temy[rd2 <= val])$coef
		ESP <- x %*% bhat[-1] + bhat[1]
		plot(ESP, Y)
		identify(ESP, Y)
		print(bhat)
	}
}


tmn<-function(x, tp = 0.25)
{# computes 100tp% trimmed mean
    mean(x, trim = tp)
}


trviews<-function(x, Y, ii = 1, type = 3)
{# Trimmed views for 0, 10,  ..., 90 percent trimming.
# Use ii = 10 for 0, ii = 9 for 10 then 0, ...,
# ii = 1 for 90 then 80 ..., then 0 percent trimming.  
# Allows visualization of  m and crudely estimation of   
# c beta in models of the form y = m(x^T beta) + e.
# Workstation: activate a graphics device
# with commands "X11()" or "motif()."
# R needs command "library(MASS)."
# Advance the view with the right mouse button and
# in R, highight "stop."
	x <- as.matrix(x)
        if(type == 1)
	out <- cov.mcd(x)
        if(type == 2)
        out <- covmba(x)
        if(type == 3) 
        out <- covfch(x)
        if(type == 4) 
        out <- covrmvn(x)
        if (type <= 4){
	center <- out$center
	cov <- out$cov}
        #type = 5 for rfch estimator
        if (type == 5){
        out <- covfch(x)
        center <- out$rmnf
	cov <- out$rcovf}
	rd2 <- mahalanobis(x, center, cov)
	labs <- c("90%", "80%", "70%", "60%", "50%", "40%", "30%", 
		"20%", "10%", "0%")
	tem <- seq(0.1, 1, 0.1)
	for(i in ii:10) {
		val <- quantile(rd2, tem[i])
		bhat <- lsfit(x[rd2 <= val,  ], Y[rd2 <= val])$coef
		ESP <- x %*% bhat[-1] + bhat[1]
		plot(ESP, Y)
		title(labs[i])
		identify(ESP, Y)
		print(bhat)
	}
}


tvreg<-function(x, Y, ii = 1, type = 4){
# Trimmed views (TV) regression for 0, 10, ..., 90 percent 
# trimming. Use ii = 10 for 0, ii = 9 for 10 then 0, ...
# ii = 1 for 90 then 80 ... then 0 percent trimming.
# Workstation: activate a graphics device
# with commands "X11()" or "motif()."
# R needs command "library(MASS)."
# Advance the view with the right mouse button and
# in R, highight "stop."
	x <- as.matrix(x)
        if(type == 1)
	out <- cov.mcd(x)
        if(type == 2)
        out <- covmba(x)
        if (type == 3)
        out <- covfch(x)
        if(type > 3) out <- covrmvn(x)
	center <- out$center
	cov <- out$cov
	rd2 <- mahalanobis(x, center, cov)
	labs <- c("90%", "80%", "70%", "60%", "50%", "40%", "30%", 
		"20%", "10%", "0%")
	tem <- seq(0.1, 1, 0.1)
	for(i in ii:10) {
		val <- quantile(rd2, tem[i])
		bhat <- lsfit(x[rd2 <= val,  ], Y[rd2 <= val])$coef
		FIT <- x %*% bhat[-1] + bhat[1]
		plot(FIT, Y)
		abline(0, 1)
		title(labs[i])
		identify(FIT, Y)
		print(bhat)
	}
}


tvreg2<-function(X, Y, M = 0, type = 4)
{# Trimmed views regression for M percent trimming.  
# Workstation: activate a graphics device
# with commands "X11()" or "motif()."
# R needs command "library(MASS)."
	X <- as.matrix(X)
        if(type == 1)
	out <- cov.mcd(X)
        if(type == 2)
        out <- covmba(X)
        if(type == 3)
        out <- covfch(X)
        if(type > 3) out <- covrmvn(X)
	center <- out$center
	cov <- out$cov
	rd2 <- mahalanobis(X, center, cov)
	tem <- (100 - M)/100
	val <- quantile(rd2, tem)
	bhat <- lsfit(X[rd2 <= val,  ], Y[rd2 <= val])$coef
	FIT <- X %*% bhat[-1] + bhat[1]
	plot(FIT, Y)
	abline(0, 1)
	identify(FIT, Y)
	list(coef = bhat)
}


vecw <- function(z){
#Computes vec(z): stacks the columns of z into a column vector.
z <- as.matrix(z)
vecz <- as.vector(z[,1])
p <- dim(z)[2]
if(p > 1){
for(i in 2:p)
vecz <- c(vecz,as.vector(z[,i]))
}
list(vecz=vecz)
}


vsbootsim<-function(n = 100, p = 4, nruns = 100,  
eps = 0.1, shift = 9, type = 1, alph = 0.05){
#needs library(leaps)
#PROGRAM FAILS IF A VARIABLE IS NEVER SELECTED IN THE B BOOTSTRAPS.
#Simulates bootstrap for variable selection using all subsets variable
#selection. So need p small. 
#Uses five iid error distributions: 
# type = 1 for N(0,1) errors, 2 for t3 errors, 3 for exp(1) - 1 errors
# 4 for uniform(-1,1) errors, 5 for (1-eps) N(0,1) + eps N(0,(1+shift)^2) errors.
# constant = 1 so there are p = q+1 coefficients
#need p > 1, beta = (1, 1, ..., 1, 0, ..., 0) with k ones p - k  zeroes
        q <- p-1
        k <- floor(p/2)
	b <- 0 * 1:q
        b[1:(k-1)] <- 1 #b[1:0] acts like b[1:1] = b[1]
        beta <- c(1,b)
        pp1 <- p + 1
        cicov <- 0*(1:pp1)
        avelen <- 0*(1:pp1)
	for(i in 1:nruns) {
		x <- matrix(rnorm(n * q), nrow = n, ncol = q)
		if(type == 1) {
			y <- 1 + x %*% b + rnorm(n)
					}
		if(type == 2) {
			y <- 1 + x %*% b + rt(n, df = 3)
					}
		if(type == 3) {
			y <- 1 + x %*% b + rexp(n) - 1
					}
		if(type == 4) {
			y <- 1 + x %*% b + runif(n, min = -1, max = 1)
					}
		if(type == 5) {
			err <- rnorm(n, sd = 1 + rbinom(n, 1, eps) * shift)
			y <- 1 + x %*% b + err
					} #make an MLR data set
		out <-vselboot(x,y) #bootstrap the minimum Cp model
                for (j in 1:p){
                   tem <- shorth3(out$betas[,j],alpha=alph)
                   if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2])
                   #tem <- locpi2(out$betas[,j],alpha=alph)
                   #if(beta[j] >= tem$LOCPI[1] && beta[j] <= tem$LOCPI[2])
                    cicov[j] <- cicov[j] + 1
                    avelen[j] <- avelen[j] + tem$shorth[2] - tem$shorth[1]
                    #avelen[j] <- avelen[j] + tem$LOCPI[2] - tem$LOCPI[1]
                    } 
                #test whether the last p - k values of beta are 0
                tem <- predreg(out$betas[,(k+1):p],alpha=alph)
                if(tem$D0 <= tem$cuplim) 
                cicov[pp1] <- cicov[pp1] + 1
                avelen[pp1] <- avelen[pp1] + tem$cuplim
           }
cicov <- cicov/nruns
avelen <- avelen/nruns
list(cicov=cicov,avelen=avelen,beta=beta,k=k)}


vsbootsim2<-function(n = 100, p = 4, nruns = 100,  
eps = 0.1, shift = 9, type = 1, alph = 0.05){
#needs library(leaps)
#PROGRAM FAILS IF A VARIABLE IS NEVER SELECTED IN THE B BOOTSTRAPS.
#Simulates bootstrap for forward selection variable selection. 
#Uses five iid error distributions: 
# type = 1 for N(0,1) errors, 2 for t3 errors, 3 for exp(1) - 1 errors
# 4 for uniform(-1,1) errors, 5 for (1-eps) N(0,1) + eps N(0,(1+shift)^2) errors.
# constant = 1 so there are p = q+1 coefficients
#need p > 1, beta = (1, 1, ..., 1, 0, ..., 0) with k ones p - k  zeroes
        q <- p-1
        k <- floor(p/2)
	b <- 0 * 1:q
        b[1:(k-1)] <- 1 #b[1:0] acts like b[1:1] = b[1]
        beta <- c(1,b)
        pp1 <- p + 1
        cicov <- 0*(1:pp1)
        avelen <- 0*(1:pp1)
	for(i in 1:nruns) {
		x <- matrix(rnorm(n * q), nrow = n, ncol = q)
		if(type == 1) {
			y <- 1 + x %*% b + rnorm(n)
					}
		if(type == 2) {
			y <- 1 + x %*% b + rt(n, df = 3)
					}
		if(type == 3) {
			y <- 1 + x %*% b + rexp(n) - 1
					}
		if(type == 4) {
			y <- 1 + x %*% b + runif(n, min = -1, max = 1)
					}
		if(type == 5) {
			err <- rnorm(n, sd = 1 + rbinom(n, 1, eps) * shift)
			y <- 1 + x %*% b + err
					} #make an MLR data set
		out <-fselboot(x,y) #bootstrap the forward sel minimum Cp model
                for (j in 1:p){
                   tem <- shorth3(out$betas[,j],alpha=alph)
                   if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2])
                   #tem <- locpi2(out$betas[,j],alpha=alph)
                   #if(beta[j] >= tem$LOCPI[1] && beta[j] <= tem$LOCPI[2])
                    cicov[j] <- cicov[j] + 1
                    avelen[j] <- avelen[j] + tem$shorth[2] - tem$shorth[1]
                    #avelen[j] <- avelen[j] + tem$LOCPI[2] - tem$LOCPI[1]
                    } 
                #test whether the last p - k values of beta are 0
                tem <- predreg(out$betas[,(k+1):p],alpha=alph)
                if(tem$D0 <= tem$cuplim) 
                cicov[pp1] <- cicov[pp1] + 1
                avelen[pp1] <- avelen[pp1] + tem$cuplim
           }
cicov <- cicov/nruns
avelen <- avelen/nruns
list(cicov=cicov,avelen=avelen,beta=beta,k=k)}


vselboot<-function(x,y,B = 1000){
#needs library(leaps)
#need n and p small, 2 < p < 30
#allsubsets minimum Cp regression
#does not make sense to do variable selection if there
#is only one nontrivial predictor
x <- as.matrix(x)
n <- length(y)
p <- 1 + dim(x)[2]
vars <- as.vector(1:(p-1))
#get the full model
full <- lsfit(x,y)
res <- full$resid
fit <- y - res
#get the minimum Cp submodel
out<-leaps(x,y)
mincp <- out$which[out$Cp==min(out$Cp)]
vin <- vars[mincp]
sub <- lsfit(x[,vin],y)
betas <- matrix(0,nrow=B,ncol=p)
#bootstrap the minimum Cp submodel
for(i in 1:B){
yb <- fit + sample(res,n,replace=T)
out<-leaps(x,y=yb)
mincp <- out$which[out$Cp==min(out$Cp)]
vin <- vars[mincp]
indx <- c(1,1+vin)
betas[i,indx] <- lsfit(x[,vin],yb)$coef
}
list(full=full,sub=sub,betas=betas)
}



vspisim<-function(n = 100, p = 4, k = 1, nruns = 100,  
eps = 0.1, shift = 9, psi = 0.0, type = 1, alpha = 0.05){
#Needs library(leaps).
#Simulates PIs for forward selection variable selection. 
# 1 <= k <= p-1, k is the number of nonnoise variables
#Uses five iid error distributions: 
# type = 1 for N(0,1) errors, 2 for t3 errors, 3 for exp(1) - 1 errors
# 4 for uniform(-1,1) errors, 5 for (1-eps) N(0,1) + eps N(0,(1+shift)^2) 
#errors.
# constant = 1 so there are p = q+1 coefficients
#need p > 1, beta = (1, 1, ..., 1, 0, ..., 0) with k+1 ones, p-k-1  zeroes
# Multiply x by A: for MVN data this results
# in a covariance matrix with eigenvector c(1, ..., 1)^T
# corresponding to the largest eigenvalue. As psi gets
# close to 1, the data clusters about the line in the
# direction of (1, ..., 1)^T. See Maronna and Zamar (2002).
# cor(X_i,X_j) = [2 psi +(q-2)psi^2]/[1 + (q-1)psi^2], i not = j
# when the correlation exists.
set.seed(974)
corfac <- (1 + 15/n) * sqrt( (n+2*p)/(n - p) )
  if (alpha > 0.1)
    {qn <- min(1 - alpha + 0.05, 1 - alpha + p/n)}
  if (alpha <= 0.1)
    {qn <- min(1 - alpha/2, 1 - alpha + 10*alpha*p/n)}
  pn <- qn
  if(pn < 1 - alpha + 0.001)
    qn <- 1 - alpha
alphan <- 1 - qn
pilen <- 1:nruns
ps <- pilen
opicov <- 0
q <- p-1
vmax <- min(p,as.integer(n/5))
rho <- (2*psi + (q-2)*psi^2)/(1 + (q-1)*psi^2)
A <- matrix(psi,nrow=q,ncol=q)
diag(A) <- 1
b <- 0 * 1:q
b[1:k] <- 1 #b[1:0] acts like b[1:1] = b[1]
vars <- as.vector(1:(p-1))
	for(i in 1:nruns) {
		x <- matrix(rnorm(n * q), nrow = n, ncol = q)
                x <- x %*% A
                xf <- rnorm(q) %*% A
		if(type == 1) {
			y <- 1 + x %*% b + rnorm(n)
                        yf <- 1 + xf %*% b + rnorm(1)
					}
		if(type == 2) {
			y <- 1 + x %*% b + rt(n, df = 3)
                        yf <- 1 + xf %*% b + rt(1, df = 3)
					}
		if(type == 3) {
			y <- 1 + x %*% b + rexp(n) - 1
                        yf <- 1 + xf %*% b + rexp(1) - 1
					}
		if(type == 4) {
			y <- 1 + x %*% b + runif(n, min = -1, max = 1)
                        yf <- 1 + xf %*% b + runif(1, min = -1, max = 1)
					}
		if(type == 5) {
			err <- rnorm(n, sd = 1 + rbinom(n, 1, eps) * shift)
			y <- 1 + x %*% b + err
                        ef <- rnorm(1, sd = 1 + rbinom(1, 1, eps) * shift)
                        yf <- 1 + xf %*% b + ef
					} #make an MLR data set
		#find the forward sel minimum Cp model
                tem<-regsubsets(x,y,nvmax=vmax,method="forward")
                out<-summary(tem)
                mincp <- out$which[out$cp==min(out$cp),]
                #do not need the constant in vin
                vin <- vars[mincp[-1]]
                sub <- lsfit(x[,vin],y) 
                ps[i]<-length(sub$coef)
                yfhat <- sub$coef[1] + xf[vin] %*% sub$coef[-1]
                fres <- sub$resid
#get asymptotically optimal PI
		sres <- sort(fres)
		cc <- ceiling(n * (1 - alphan))
		rup <- sres[cc]
		rlow <- sres[1]
		olen <- rup - rlow
		if(cc < n) {
			for(j in (cc + 1):n) {
				zlen <- sres[j] - sres[j - cc + 1]
				if(zlen < olen) {
				  olen <- zlen
				  rup <- sres[j]
				  rlow <- sres[j - cc + 1]
				}
			}
		}
		up <- yfhat + corfac*rup
		low <- yfhat + corfac*rlow
		pilen[i] <- up - low
		if(low < yf && up > yf)
			opicov <- opicov + 1
	}
psmn <- mean(ps)-k #0 if subset is selecting optimal subset
pimnlen <- mean(pilen)
opicov <- opicov/nruns
list(psmn=psmn,opicov=opicov, pimenlen = pimnlen)}


wddplot<-function(x)
{# Shows the southwest corner of the DD plot. Need p > 1.
	n <- dim(x)[1]
	wt <- 0 * (1:n)
	p <- dim(x)[2]
	center <- apply(x, 2, mean)
	cov <- var(x)
	md2 <- mahalanobis(x, center, cov)
	out <- covfch(x)
	center <- out$center
	cov <- out$cov
	rd2 <- mahalanobis(x, center, cov)
	md <- sqrt(md2)
	rd <- sqrt(rd2)
	const <- sqrt(qchisq(0.5, p))/median(rd)
	rd <- const * rd
	wt[rd < sqrt(qchisq(0.975, p))] <- 1
	MD <- md[wt > 0]
	RD <- rd[wt > 0]
	plot(MD, RD)
}


wildboot<-function(x,y,B=1000,slices=7)
{
#nonparametric, wild, parametric bootstrap for WLS
#want n/slices > 13
n <- length(y)
x <- as.matrix(x)
p <- dim(x)[2]+1
full <- lsfit(x,y)
res <- full$resid 
fit <- y - res  #ESP
indx <- sort.list(fit)
vars<-1:n
val <- as.integer(n/slices)
    for(i in 1:slices){
        lo <- (i-1)*val + 1
        hi <- i*val
        if(i == slices) hi <- n
	vars[indx[lo:hi]] <- var(res[indx[lo:hi]])
	    }
sds <- sqrt( n*vars/(n-p) )
sres <- res * sqrt(n/(n-p))
betas <- matrix(0,nrow=B,ncol=p)
wbetas <- betas
pbetas <- betas
indx <- 1:n
for(i in 1:B){
tem <- sample(indx,n,replace=T)
betas[i,] <- lsfit(x[tem,],y[tem])$coef #nonparametric bootstrap
eps <- 2*rbinom(n,size=1,0.5)-1 #wild bootstrap
yb <- fit + eps*sres
wbetas[i,] <- lsfit(x,yb)$coef
yb <- fit + rnorm(n,mean=0,sd=sds)
pbetas[i,] <- lsfit(x,yb)$coef #parametric bootstrap
}
list(betas=betas,wbetas=wbetas,pbetas=pbetas,full=full)
} 


wlsbootsim<-function(n = 100, p = 4, k=1, nruns = 100, nslices=7,  
eps = 0.1, shift = 9, etype = 1, wtype=1, psi = 0.0, BB=200, alph = 0.05)
{#calls confreg, rowboot, shorth3
#Simulates WLS nonparamtric and wild bootstrap.
# Use 0 <= psi < 1. Want n/nslices > 13.
#Uses five iid error distributions: 
# etype = 1 for N(0,1) errors, 2 for t3 errors, 3 for exp(1) - 1 errors
# 4 for uniform(-1,1) errors, 5 for (1-eps) N(0,1) + eps N(0,(1+shift)^2) 
#errors.
#wtype = 1 for OLS, 2 if err = abs(SP - 5) * e, 3 if sqrt(1 + 0.5* x[,1]^2)*e
#4 for exp[1 + log(|x_2|) + ... + log(|x_p|] * e, 
#5 for [1 + log(|x_2|) + ... + log(|x_p|] * e
# constant = 1 so there are p = q+1 coefficients
#1 <= k <= p-2, k is the number of nonnoise variables
#need p > 2, beta = (1, 1, ..., 1, 0, ..., 0) with k+1 ones p-k-1  zeroes
# Multiply x by A: for MVN data this results
# in a covariance matrix with eigenvector c(1, ..., 1)^T
# corresponding to the largest eigenvalue. As psi gets
# close to 1, the data clusters about the line in the
# direction of (1, ..., 1)^T. See Maronna and Zamar (2002).
# cor(X_i,X_j) = [2 psi +(p-3)psi^2]/[1 + (p-2)psi^2], i not = j, p > 2
# when the correlation exists.
 q <- p-1
        b <- 0 * 1:q
        dd <- b + 1
        dq <- dd/q
        b[1:k] <- 1 #b[1:0] acts like b[1:1] = b[1]
        beta <- c(1,b)
        pp6<-p+6; pp5<-p+5; pp4<-p+4;pp3<-p+3; pp1<-p+1; pp2<-p+2
        cicov <- 0*(1:pp6)
        avelen <- 0*(1:pp6)
        wcicov <- cicov
        wavelen <- avelen
        pcicov <- cicov
        pavelen <- avelen
        rho <- (2*psi + (q-2)*psi^2)/(1 + (q-1)*psi^2)
        A <- matrix(psi,nrow=q,ncol=q)
        diag(A) <- 1
        one <- as.vector(0*1:(k+1) + 1)
	for(i in 1:nruns) {
		x <- matrix(rnorm(n * q), nrow = n, ncol = q)
                x <- x %*% A
                SP <- 1 + x %*% b
		if(etype == 1) 		
                  err <- rnorm(n) 
                if(etype == 2) 
			err <- rt(n, df = 3)					
		if(etype == 3) 
			err <- rexp(n) - 1				
                if(etype == 4) 
	err <- runif(n, min = -1, max = 1)					
		if(etype == 5) 
        err <- rnorm(n, sd = 1 + rbinom(n, 1, eps) *shift)
                if(wtype == 2)
                   err <- abs(SP - 5) * err
                if(wtype == 3)
                   err <- sqrt(1 +  x[,1]^2)*err
                if(wtype == 4)
                   err <- exp(1 + log(abs(x))%*%dd) * err
                if(wtype == 5)
                   err <- (1 + log(abs(x))%*%dd) * err
                if(wtype == 6)
                   err <- exp(log(abs(x))%*%dq) * err
                if(wtype == 7)
                   err <- (log(abs(x))%*%dq) * err
#make an MLR data set
y <- SP + err
  out <- wildboot(x,y,B=BB,slices=nslices)  #nonparametric bootstrap
for (j in 1:p){
                   tem <- shorth3(out$betas[,j],alpha=alph)
                   temw <- shorth3(out$wbetas[,j],alpha=alph)
                   temp <- shorth3(out$pbetas[,j],alpha=alph)
                   if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2])
                    cicov[j] <- cicov[j] + 1
                    avelen[j] <- avelen[j] + tem$shorth[2] - tem$shorth[1]
   if(beta[j] >= temw$shorth[1] && beta[j] <= temw$shorth[2])                   
                    wcicov[j] <- wcicov[j] + 1
  wavelen[j] <- wavelen[j] + temw$shorth[2] - temw$shorth[1]   
  if(beta[j] >= temp$shorth[1] && beta[j] <= temp$shorth[2])                   
                    pcicov[j] <- pcicov[j] + 1
  pavelen[j] <- pavelen[j] + temp$shorth[2] - temp$shorth[1]                  
                    } 
                #test whether the last p-k-1 values of beta are 0
                gg <- p - k - 1
                tstat <- out$full$coef[(k+2):p]
           tem <- confreg(out$betas[,(k+2):p],g=gg,that=tstat,alpha=alph)
                if(tem$D0 <= tem$cuplim) #pred. reg. method
                cicov[pp1] <- cicov[pp1] + 1
                avelen[pp1] <- avelen[pp1] + tem$cuplim
                if(tem$br0 <= tem$cuplim) #hybrid method
                cicov[pp2] <- cicov[pp2] + 1
                avelen[pp2] <- avelen[pp1] #same cutoff so same length
                if(tem$br0 <= tem$brlim) #Bickel and Ren method
                cicov[pp3] <- cicov[pp3] + 1
                avelen[pp3] <- avelen[pp3] + tem$brlim
#
temw <- confreg(out$wbetas[,(k+2):p],g=gg,that=tstat,alpha=alph)
                if(temw$D0 <= temw$cuplim) #pred. reg. method
                wcicov[pp1] <- wcicov[pp1] + 1
                wavelen[pp1] <- wavelen[pp1] + temw$cuplim
                if(temw$br0 <= temw$cuplim) #hybrid method
                wcicov[pp2] <- wcicov[pp2] + 1
                wavelen[pp2] <- wavelen[pp1] #same cutoff so same length
                if(temw$br0 <= temw$brlim) #Bickel and Ren method
                wcicov[pp3] <- wcicov[pp3] + 1
                wavelen[pp3] <- wavelen[pp3] + temw$brlim
#
temp <- confreg(out$pbetas[,(k+2):p],g=gg,that=tstat,alpha=alph)
                if(temp$D0 <= temp$cuplim) #pred. reg. method
                pcicov[pp1] <- pcicov[pp1] + 1
                pavelen[pp1] <- pavelen[pp1] + temp$cuplim
                if(temp$br0 <= temp$cuplim) #hybrid method
                pcicov[pp2] <- pcicov[pp2] + 1
                pavelen[pp2] <- pavelen[pp1] #same cutoff so same length
                if(temp$br0 <= temp$brlim) #Bickel and Ren method
                pcicov[pp3] <- pcicov[pp3] + 1
                pavelen[pp3] <- pavelen[pp3] + temp$brlim
#test whether the first k+1 values of beta are 1
                gg <- k + 1
                tstat <- out$full$coef[1:(k+1)] 
           tem <- confreg(out$betas[,1:(k+1)],g=gg,that=tstat,alpha=alph)
                D0 <- sqrt(mahalanobis(one, tem$center, tem$cov))
                if(D0 <= tem$cuplim) #pred. reg. method
                cicov[pp4] <- cicov[pp4] + 1
                avelen[pp4] <- avelen[pp4] + tem$cuplim 
                D1 <- sqrt(mahalanobis(one, tstat, tem$cov))
                if(D1 <= tem$cuplim) #hybrid method
                cicov[pp5] <- cicov[pp5] + 1
                avelen[pp5] <- avelen[pp4] #same cutoff so same length
                if(D1 <= tem$brlim) #Bickel and Ren method
                cicov[pp6] <- cicov[pp6] + 1
                avelen[pp6] <-  avelen[pp6] + tem$brlim
#
  temw <- confreg(out$wbetas[,1:(k+1)],g=gg,that=tstat,alpha=alph)
                D0 <- sqrt(mahalanobis(one, temw$center, temw$cov))
                if(D0 <= temw$cuplim) #pred. reg. method
                wcicov[pp4] <- wcicov[pp4] + 1
                wavelen[pp4] <- wavelen[pp4] + temw$cuplim 
                D1 <- sqrt(mahalanobis(one, tstat, temw$cov))
                if(D1 <= temw$cuplim) #hybrid method
                wcicov[pp5] <- wcicov[pp5] + 1
                wavelen[pp5] <- wavelen[pp4] #same cutoff so same length
                if(D1 <= temw$brlim) #Bickel and Ren method
                wcicov[pp6] <- wcicov[pp6] + 1
                wavelen[pp6] <-  wavelen[pp6] + temw$brlim
#
  temp <- confreg(out$pbetas[,1:(k+1)],g=gg,that=tstat,alpha=alph)
                D0 <- sqrt(mahalanobis(one, temp$center, temp$cov))
                if(D0 <= temp$cuplim) #pred. reg. method
                pcicov[pp4] <- pcicov[pp4] + 1
                pavelen[pp4] <- pavelen[pp4] + temp$cuplim 
                D1 <- sqrt(mahalanobis(one, tstat, temp$cov))
                if(D1 <= temp$cuplim) #hybrid method
                pcicov[pp5] <- pcicov[pp5] + 1
                pavelen[pp5] <- pavelen[pp4] #same cutoff so same length
                if(D1 <= temp$brlim) #Bickel and Ren method
                pcicov[pp6] <- pcicov[pp6] + 1
                pavelen[pp6] <-  pavelen[pp6] + temp$brlim
#FIT <- y-out$full$res
#plot(FIT,y)
#abline(0,1)
                }
#FIT <- y-out$full$res
#plot(FIT,y)
#abline(0,1)
cicov <- cicov/nruns
avelen <- avelen/nruns
wcicov <- wcicov/nruns
wavelen <- wavelen/nruns
pcicov <- pcicov/nruns
pavelen <- pavelen/nruns
list(cicov=cicov,avelen=avelen,wcicov=wcicov,wavelen=wavelen,
pcicov=pcicov,pavelen=pavelen,beta=beta,k=k)
}