##The command
##source("http://lagrange.math.siu.edu/Olive/regpack.txt")
##is an easy way to get these functions into R.


anovasim<-function(n1 = 20, n2 = 20, n3 = 20, n4 = 20, m1 = 0, m2 = 0,
m3 = 0, m4 = 0, sd1 = 1, sd2 = 1, sd3 = 1, sd4 = 1, nruns = 1000){
# Simulates ANOVA F, modified F, Welch F, modified Welch F and rank F statistic 
# for 1 way ANOVA. 
# tests Ho: mu1 = mu2 = mu3 = mu4
	p <- 4
	n <- n1 + n2 + n3 + n4
	y <- 0 * 1:n
	c1 <- 0 * 1:n
	c2 <- c1
	c3 <- c1
	c4 <- c1
	c1[1:n1] <- 1
	c2[(n1 + 1):(n1 + n2)] <- 1
	c3[(n1 + n2 + 1):(n1 + n2 + n3)] <- 1
	c4[(n1 + n2 + n3 + 1):n] <- 1
	x <- cbind(c1, c2, c3, c4)
	y1 <- 1:n1
	y2 <- 1:n2
	y3 <- 1:n3
	y4 <- 1:n4
	beta <- c(m1, m2, m3, m4)
	bols <- matrix(0, nrow = nruns, ncol = p)
	faov <- 0 * 1:nruns
	rf <- faov
	mct <- 0
	wct <- 0
	mwct <- 0
	for(i in 1:nruns) {
#make data
		y1 <- rnorm(n1, mean = m1, sd = sd1)
		y2 <- rnorm(n2, mean = m2, sd = sd2)
		y3 <- rnorm(n3, mean = m3, sd = sd3)
		y4 <- rnorm(n4, mean = m4, sd = sd4)
		y <- c(y1, y2, y3, y4)
		out <- lsfit(x, y, intercept = F)
		bols[i,  ] <- out$coef
		mn1 <- bols[i, 1]
		mn2 <- bols[i, 2]
		mn3 <- bols[i, 3]
		mn4 <- bols[i, 4]
		s1sq <- var(y1)
		s2sq <- var(y2)
		s3sq <- var(y3)
		s4sq <- var(y4)	#get ANOVA  F statistic
		res <- out$resid
		mse <- t(res) %*% res/(n - p)
		sse <- mse * (n - p)
		mny <- mean(y)
		sstr <- n1 * (mn1 - mny)^2 + n2 * (mn2 - mny)^2 + 
                        n3 * (mn3 - mny)^2 + n4 * (mn4 - mny)^2
		fnum <- sstr/(p - 1)
		faov[i] <- fnum/mse	#get modified  F statistic
		num <- (p - 1) * fnum
		den <- (1 - n1/n) * s1sq + (1 - n2/n) * s2sq + 
                       (1 - n3/n) * s3sq + (1 - n4/n) * s4sq
		modf <- num/den
		c1 <- ((1 - n1/n) * s1sq)/den
		c2 <- ((1 - n2/n) * s2sq)/den
		c3 <- ((1 - n3/n) * s3sq)/den
		c4 <- ((1 - n4/n) * s4sq)/den
		finv <- c1^2/(n1 - 1) + c2^2/(n2 - 1) + c3^2/(n3 - 1) + 
                        c4^2/(n4 - 1)
		md <- ceiling(1/finv)
		if(modf > qf(0.95, p - 1, md)) mct <- mct + 1	#get Welch  F statistic
		w1 <- n1/s1sq
		w2 <- n2/s2sq
		w3 <- n3/s3sq
		w4 <- n4/s4sq
		u <- w1 + w2 + w3 + w4
		xdd <- (w1 * mn1)/u + (w2 * mn2)/u + (w3 * mn3)/u + (w4 * mn4)/u
		num <- (w1 * (mn1 - xdd)^2 + w2 * (mn2 - xdd)^2 + 
                        w3 * (mn3 - xdd)^2 + w4 * (mn4 - xdd)^2)/(p - 1)
		tem <- (1 - w1/u)^2/(n1 - 1) + (1 - w2/u)^2/(n2 - 1) + 
                       (1 - w3/u)^2/(n3 - 1) + (1 - w4/u)^2/(n4 - 1)
		den <- 1 + (2 * (p - 2) * tem)/(p^2 - 1)
		wf <- num/den
		finv <- (3 * tem)/(p^2 - 1)
		wd <- ceiling(1/finv)
		if(wf > qf(0.95, p - 1, wd)) wct <- wct + 1	
	#get modified Welch  F statistic
		dfnum <- (1/w1 + 1/w2 + 1/w3 + 1/w4)^2
		dfden <- (1/w1)^2/(n1 - 1) + (1/w2)^2/(n2 - 1) + 
                         (1/w3)^2/(n3 - 1) + (1/w4)^2/(n4 - 1)
		wd <- ceiling(dfnum/dfden)
		if(wf > qf(0.95, p - 1, wd)) mwct <- mwct + 1	#get rank F statistic
		y <- rank(y)
		out <- lsfit(x, y, intercept = F)
		brols <- out$coef
		mn1 <- brols[1]
		mn2 <- brols[2]
		mn3 <- brols[3]
		mn4 <- brols[4]
		res <- out$resid
		mse <- t(res) %*% res/(n - p)
		sse <- mse * (n - p)
		mny <- mean(y)
		sstr <- n1 * (mn1 - mny)^2 + n2 * (mn2 - mny)^2 + 
                        n3 * (mn3 - mny)^2 + n4 * (mn4 - mny)^2
		fnum <- sstr/(p - 1)
		rf[i] <- fnum/mse
	}
	mnbols <- apply(bols, 2, mean)
	ssdbols <- sqrt(n) * sqrt(apply(bols, 2, var))
	faovcov <- sum(faov > qf(0.95, p - 1, n - p))/nruns
	modfcov <- mct/nruns
	mwfcov <- mwct/nruns
	wfcov <- wct/nruns
	rfcov <- sum(rf > qf(0.95, p - 1, n - p))/nruns
	list(mnbols = mnbols, ssdbols = ssdbols, faovcov = faovcov, 
        modfcov = modfcov, wfcov = wfcov, mwfcov = mwfcov, rfcov = rfcov)
}


aovtplt<-function(x, y){
# Need x to have the factor levels needed for one way Anova.
# This function plots y^L vs one way Anova fitted values 
# from the one way Anova of y^L on x where 
# L = -1, -0.5, 0, 0.5 or 1 except log(Y) is used if L = 0. 
# If plot is linear for L, use y^L (or log(Y) for L = 0) instead
# of Y. This is a graphical method for a response transform.
lam <- c(-1, -1/2, 0, 1/2, 1)
xl <- c("1/Z", "1/sqrt(Z)", "LOG(Z)", "sqrt(Z)", "Z")
par(mfrow=c(2,3))
x<-factor(x)
for(i in 1:length(lam)) {
	if(lam[i] == 0)
		ytem <- log(y)
	else ytem <- y^lam[i]
	aovfit <- ytem - aov(ytem~x)$resid
		plot(aovfit, ytem, xlab = "TZHAT", ylab = xl[i])
		abline(0, 1)
		#identify(aovfit, ytem)
	}
	par(mfrow=c(1,1))
}


binrplot <- function(x, y){
# Calls the ``sir" function (in rpack but obtained from STATLIB).
# The binary response plot allows visualization of binary data.
# Workstation need to activate a graphics
# device with command "X11()" or "motif()."
#
# x needs at least two columns or the call to sir fails
#
q <- dim(x)[2]
h <- q + 7
out <- sir(x, y, h)
SIRESP <- x %*% out$edr[, 1]
V <- x %*% out$edr[, 2]
plot(SIRESP, V, type = "n")
points(SIRESP[y == 1], V[y == 1], pch = 3)
points(SIRESP[y == 0], V[y == 0], pch = 1)
}


bphgfit<-function(time, status, group){
# May not work in Splus.
#
# For R, first type library(survival). 
#
# Checks goodness of fit of Cox proportional
# hazard model if time is a vector of times, status is 0 for 
# censored times and 1 otherwise and there is a single covariate 
# group = 1 for group (treatment) 1 and 0 else.
#
# Splus Bug: wants zd defined outside the function or it says it 
# can't find zd also it fails if the data has an n larger than that 
# of the zd defined outside of the function
#
	zd <- as.data.frame(cbind(time, status,
		group))
	z0 <- zd[group == 0,  ]
	z1 <- zd[group == 1,  ]
	zk0 <- survfit(Surv(time[group == 0], 
		status[group == 0])~1, conf.type
		 = "none")
	zk1 <- survfit(Surv(time[group == 1], 
		status[group == 1])~1, conf.type
		 = "none")
	par(mfrow = c(2, 1))
	plot(survfit(coxph(Surv(time, status) ~ 
		group, zd), newdata = z0))
	lines(zk0,type="p")
	title("Estimated Cox and KM survival, group 0"
		)
	plot(survfit(coxph(Surv(time, status) ~ 
		group, zd), newdata = z1))
	lines(zk1,type="p")
	title("Estimated Cox and KM survival, group 1"
		)
        par(mfrow=c(1,1))
}


bphsim3<-function(n = 100, k = 1, nruns = 1){
# May not work in Splus.
#
# For R, first type library(survival). 
#
# Gets plot for Cox proportional hazards data
# with 1 indicator variable as a predictor.
# calls bphgfit
# Splus bug: bug once it works for n = 100, 
# it won't work for n =200
	for(i in 1:nruns) {
		beta <- 1
		group <- 0 * 1:n
		group[(n/2 + 1):n] <- 1
		SP <- group * beta
		lambdai <- exp(SP)
		y <- rexp(n, rate = lambdai)
		gy <- y^(1/k)
		cen <- rexp(n, rate = 0.1)
		cengy <- pmin(gy, cen)
		status <- as.numeric(cen >= gy)
		bphgfit(cengy, status, group)
	}
}


ctrviews<-function(x, y, ii = 1){
# Uses classical distances instead of robust distances.
# Trimmed views for 90, 80, ... 0 percent
# trimming. Allows visualization of m
# and crude estimatation of c beta in models
# of the form y = m(x^T beta) + e.
# Workstation: activate a graphics
# device with command "X11()" or "motif()."
# R needs command "library(lqs)."
# Advance the view with the right mouse button.
# In R, highight "stop."
	x <- as.matrix(x)
	center <- apply(x, 2, mean)
	cov <- var(x)
	rd2 <- mahalanobis(x, center, cov)
	labs <- c("90%", "80%", "70%", "60%", "50%", "40%", "30%", "20%", "10%",
		"0%")
	tem <- seq(0.1, 1, 0.1)
	for(i in ii:10) {
		val <- quantile(rd2, tem[i])
		bhat <- lsfit(x[rd2 <= val,  ], y[rd2 <= val])$coef
		fit <- x %*% bhat[-1]
		plot(fit, y)
		title(labs[i])
		identify(fit, y)
		print(bhat)
	}
}


ddcomp<-function(x, steps = 5){
# Makes 4 DD plots using the FMCD and MBA estimators. 
# Click left mouse button to identify points.
# Click right mouse button to end the function.
# Unix systems turn on graphics device eg enter
#  command "X11()" or "motif()" before using.
# R users need to type "library(lqs)" before using.
	p <- dim(x)[2]
	par(mfrow = c(2, 2))
	center <- apply(x, 2, mean)
	cov <- var(x)
	md2 <- mahalanobis(x, center, cov)	
	# MD is the classical and RD the robust distance
	MD <- sqrt(md2)	#DGK start
	md2 <- mahalanobis(x, center, cov)
	medd2 <- median(md2)	## get the start
	mns <- center
	covs <- cov	## concentrate
	for(i in 1:steps) {
		md2 <- mahalanobis(x, mns, covs)
		medd2 <- median(md2)
		mns <- apply(x[md2 <= medd2,  ], 2, mean)
		covs <- var(x[md2 <= medd2,  ])
	}
	rd2 <- mahalanobis(x, mns, covs)
	rd <- sqrt(rd2)	#Scale the RD so the plot follows the 0-1 line
#if the data is multivariate normal.
	const <- sqrt(qchisq(0.5, p))/median(rd)
	RDdgk <- const * rd
	plot(MD, RDdgk)
	abline(0, 1)
	identify(MD, RDdgk)
	title("DGK DD Plot")	#MBA
	out <- covmba(x)
	center <- out$center
	cov <- out$cov
	rd2 <- mahalanobis(x, center, cov)
	rd <- sqrt(rd2)	#Scale the RD so the plot follows the identity line
#if the data is multivariate normal.
	const <- sqrt(qchisq(0.5, p))/median(rd)
	RDm <- const * rd
	plot(MD, RDm)
	abline(0, 1)
	identify(MD, RDm)
	title("MBA DD Plot")	#FMCD
	out <- cov.mcd(x)
	center <- out$center
	cov <- out$cov
	rd2 <- mahalanobis(x, center, cov)
	rd <- sqrt(rd2)	#Scale the RD so the plot follows the 0-1 line
#if the data is multivariate normal.
	const <- sqrt(qchisq(0.5, p))/median(rd)
	RDf <- const * rd
	plot(MD, RDf)
	abline(0, 1)
	identify(MD, RDf)
	title("FMCD DD Plot")	#Median Ball start
	covv <- diag(p)
	med <- apply(x, 2, median)
	md2 <- mahalanobis(x, center = med, covv)
	medd2 <- median(md2)	## get the start
	mns <- apply(x[md2 <= medd2,  ], 2, mean)
	covs <- var(x[md2 <= medd2,  ])	## concentrate
	for(i in 1:steps) {
		md2 <- mahalanobis(x, mns, covs)
		medd2 <- median(md2)
		mns <- apply(x[md2 <= medd2,  ], 2, mean)
		covs <- var(x[md2 <= medd2,  ])
	}
	rd2 <- mahalanobis(x, mns, covs)
	rd <- sqrt(rd2)	#Scale the RD so the plot follows the 0-1 line
#if the data is multivariate normal.
	const <- sqrt(qchisq(0.5, p))/median(rd)
	RDmb <- const * rd
	plot(MD, RDmb)
	abline(0, 1)
	identify(MD, RDmb)
	title("Med Ball DD Plot")
}


edtplt<-function(x, y, int = F){
# Need x to be an appropriate design matrix for the experimental 
# design fit by OLS. This function plots y^L vs OLS fit where 
# L = -1, -0.5, 0, 0.5 or 1 except log(Y) is used if L = 0. 
# If plot is linear for L, use y^L (or log(Y) for L = 0) instead
# of Y. This is a graphical method for a response transform.
lam <- c(-1, -1/2, 0, 1/2, 1)
xl <- c("1/Z", "1/sqrt(Z)", "LOG(Z)", "sqrt(Z)", "Z")
par(mfrow=c(2,3))
for(i in 1:length(lam)) {
	if(lam[i] == 0)
		ytem <- log(y)
	else ytem <- y^lam[i]
	olsfit <- ytem - lsfit(x, ytem, intercept = int)$resid
		plot(olsfit, ytem, xlab = "TZHAT", ylab = xl[i])
		abline(0, 1)
		#identify(olsfit, ytem)
	}
	par(mfrow=c(1,1))
}


essp<- function(x, Y, M = 50, type = 3)
{# Trimmed view or ESSP for M percent
# trimming. Allows visualization of g
# and crude estimation of c beta in models
# of the form y = g(x^T beta,e).
# Workstation need to activate a graphics
# device with command "X11()" or "motif()."
# R needs command "library(MASS)" or "library(lqs)."
# Click on the right mouse button to finish, and
# in R, highlight "stop."
	x <- as.matrix(x)
        tval <- M/100
        if(type == 1)
	out <- cov.mcd(x)
        if(type == 2)
        out <- covmba(x)
        if(type == 3)
        out <- covfch(x)
        center <- out$center
	cov <- out$cov
	rd2 <- mahalanobis(x, center, cov)
	val <- quantile(rd2, (1 - tval))
	bhat <- lsfit(x[rd2 <= val,  ], Y[rd2 <= val])$
		coef
	ESP <- x %*% bhat[-1] + bhat[1]
	plot(ESP, Y)
	identify(ESP, Y)
	return(bhat[-1])
}


fwlssim<-function(n = 100, q = 4, sd = 1, nruns = 1, type = 1){
# Simulates FWLS data Y = SP + |SP - j| e, with SP = alpha + beta' x.
# So w = 1/(SP-j)^2 and what = 1/(ESP - j)^2
# type = 1 or 3 for wls, type = 2 or 4 for fwls
# There are q = p-1 nontrivial predictors as well as a constant
# Need p > 1 since x is n by q.
       	beta <- 2 + 0 * 1:q
	par(mfrow = c(2, 2))
	for(i in 1:nruns) {
#make data
		x <- matrix(rnorm(n * q), nrow = n, ncol = q)
		if(q == 1)
			x <- as.matrix(x)
		u <- x
		if(type == 1) {
			sp <- 25 + x %*% beta
			Y <- sp + abs(sp - 5) * sd * rnorm(n)
			ols <- lsfit(x, Y)
			olsesp <- Y - ols$resid
			what <- 1/(sp - 5)^2
		}
		if(type == 2) {
			sp <- 25 + x %*% beta
			Y <- sp + abs(sp - 5) * sd * rnorm(n)
			ols <- lsfit(x, Y)
			olsesp <- Y - ols$resid
			what <- 1/(olsesp - 5)^2
		}
		if(type == 3) {
			sp <- 25 + x %*% beta
			Y <- sp + abs(sp - 25) * sd * rnorm(n)
			ols <- lsfit(x, Y)
			olsesp <- Y - ols$resid
			what <- 1/(sp - 25)^2
		}
		if(type == 4) {
			sp <- 25 + x %*% beta
			Y <- sp + abs(sp - 25) * sd * rnorm(n)
			ols <- lsfit(x, Y)
			olsesp <- Y - ols$resid
			what <- 1/(olsesp - 25)^2
		}
		uu <- sqrt(what)
		Z <- uu * Y
		for(j in 1:q) {
			u[, j] <- uu * x[, j]
		}
		u <- cbind(uu, u)
		wls <- lsfit(u, Z, intercept = F)
		FIT <- Y - ols$resid
		plot(FIT, Y)
		abline(0, 1)
		title("a) OLS Response Plot")
		RESID <- ols$resid
		plot(FIT, RESID)
		title("b) OLS Residual Plot")
		ZFIT <- Z - wls$resid
		plot(ZFIT, Z)
		abline(0, 1)
		title("c) FWLS  Response Plot")
		ZRESID <- wls$resid
		plot(ZFIT, ZRESID)
		title("d) FWLS Residual Plot")
	}
}


ganova<-function(x, y){
# Let x have the factor levels 1,2, ..., k needed for one way Anova
# and y the corresponding response values.
# This function does graphical anova for the one way model. 
Treatments <- c("A","B","C","D","E","F","G","H","I","J","K")
xx <- as.integer(x)
k <- max(xx)
n <- length(y)
v10 <- 10 + 0*1:n
v20 <- 20 + 0*1:k
gmn <- mean(y)
mn <- 1:k
sdev <- mn
for (i in 1:k){
mn[i] <- mean(y[xx==i])
}
mn <- mn - gmn
sdev <- sqrt((n-k)/(k-1))*mn
w <- as.factor(x)
residuals <- aov(y~w)$resid
graphicalanova <- c(v10,v20)
Residuals <- c(residuals,sdev)
plot(Residuals,graphicalanova)
title("Scaled Treatment Deviations")
Treatments <- Treatments[1:k]
list(sdev=sdev,Treatments=Treatments)
}


ganova2<-function(x, blocks, y){
# Let x and blocks have levels 1,2, ..., k  and y the corresponding
# response values. This function does graphical anova for
# completely randomized block design. 
Treatments <- c("A","B","C","D","E","F","G","H","I","J","K")
xx <- as.integer(x)
blk <- as.integer(blocks)
k <- max(xx)
b <- max(blk)
n <- length(y)
v10 <- 10 + 0*1:n
v20 <- 20 + 0*1:k
v30 <- 30 + 0*1:b
gmn <- mean(y)
mn <- 1:k
smn <- mn
bmn <- 1:b
sbmn <- bmn
for (i in 1:k){
mn[i] <- mean(y[xx==i])
}
for (i in 1:b){
bmn[i] <- mean(y[blk==i])
}
mn <- mn - gmn
smn <- sqrt(b-1)*mn
bmn <- bmn - gmn
sbmn <- sqrt(k-1)*bmn
w <- as.factor(x)
block <- as.factor(blocks)
residuals <- aov(y~block+w)$resid
Treatmentdevs <- c(v10,v20,v30)
Residuals <- c(residuals,smn,sbmn)
plot(Residuals,Treatmentdevs)
title("Scaled Block Deviations")
Treatments <- Treatments[1:k]
Blocks <- 1:b
list(sbdev=sbmn,Blocks,sdev=smn,Treatments=Treatments)
}


kmsim<-function(n = 100, runs = 5){
# simulation of classical, log, log-log and plus four 95% confidence intervals for Kaplan
# Meier estimator
	ccov <- 0 * 1:n
	lcov <- ccov
	llcov <- ccov
	p4cov <- ccov
	np3 <- n + 3
	np4 <- n + 4
	for(i in 1:runs) {
		y <- rexp(n)
		z <- rexp(n, rate = 0.1)	# get censored data t
		t <- pmin(y, z)
		delta <- as.numeric(z >= y)
		out <- survfit(Surv(t, delta)~1, conf.type = "plain")	
	#Get S(t) then see if S(t_i) is in the CI for S(t_i) for i = 1 to n
		st <- exp( - out$time)
		ccov <- ccov + as.integer(st > out$low & st < out$up)
		out <- survfit(Surv(t, delta)~1)
		lcov <- lcov + as.integer(st > out$low & st < out$up)
		out <- survfit(Surv(t, delta)~1, conf.type = "log-log")
		llcov <- llcov + as.integer(st > out$low & st < out$up)	# get plus 4 interval
		tmin <- min(t)
		tmax <- max(t)
		tp4 <- c(tmin/3, tmin/2, t, tmax + 1, tmax + 2)
		dp4 <- c(1, 1, delta, 1, 1)
		out <- survfit(Surv(tp4, dp4)~1, conf.type = "plain")
		outl <- out$low[ - c(1, 2, np3, np4)]
		outu <- out$up[ - c(1, 2, np3, np4)]
		p4cov <- p4cov + as.integer(st > outl & st < outu)
	}
	ccov <- ccov/runs
	lcov <- lcov/runs
	llcov <- llcov/runs
	p4cov <- p4cov/runs
	list(ccov = ccov, lcov = lcov, llcov = llcov, p4cov = p4cov)
}


kmsim2<-function(n = 100, runs = 100){
# simulation of classical, log, log-log and plus four 95% 
# confidence intervals for Kaplan Meier estimator
# library(survival)
	ccov <- 0 * 1:n
	lcov <- ccov
	llcov <- ccov
	p4cov <- ccov
	clen <- ccov
	llen <- ccov
	lllen <- ccov
	p4len <- ccov
	np3 <- n + 3
	np4 <- n + 4
	for(i in 1:runs) {
		y <- rexp(n)
		z <- rexp(n, rate = 0.1)	# get censored data t
		t <- pmin(y, z)
		delta <- as.numeric(z >= y)
		out <- survfit(Surv(t, delta)~1, conf.type = "plain")	
	#Get S(t) then see if S(t_i) is in the CI for S(t_i) for i = 1 to n
		st <- exp( - out$time)
		ccov <- ccov + as.integer(st > out$low & st < out$up)
		clen <- clen + (out$up - out$low)
		out <- survfit(Surv(t, delta)~1)
		lcov <- lcov + as.integer(st > out$low & st < out$up)
		llen <- llen + (out$up - out$low)
		out <- survfit(Surv(t, delta)~1, conf.type = "log-log")
		llcov <- llcov + as.integer(st > out$low & st < out$up)
		lllen <- lllen + (out$up - out$low)	# get plus 4 interval
		tmin <- min(t)
		tmax <- max(t)
		tp4 <- c(tmin/3, tmin/2, t, tmax + 1, tmax + 2)
		dp4 <- c(1, 1, delta, 1, 1)
		out <- survfit(Surv(tp4, dp4)~1, conf.type = "plain")
		outl <- out$low[ - c(1, 2, np3, np4)]
		outu <- out$up[ - c(1, 2, np3, np4)]
		p4cov <- p4cov + as.integer(st > outl & st < outu)
		p4len <- p4len + (outu - outl)
	}
	ccov <- ccov/runs
	lcov <- lcov/runs
	llcov <- llcov/runs
	p4cov <- p4cov/runs
	clen <- (sqrt(n) * clen)/runs
	llen <- (sqrt(n) * llen)/runs
	lllen <- (sqrt(n) * lllen)/runs
        p4len <- (sqrt(n) * p4len)/runs
list(ccov = ccov, lcov = lcov, llcov = llcov, p4cov = p4cov, clen = clen, llen = llen, lllen = lllen, p4len = p4len)
}



lrdata <- function(n = 200,  type = 3){
# Generates data for logistic regression.
# If X|y=1 ~ N(mu_1,I) and X|Y=0 ~ N(0,I) then beta = mu_1 and alpha = -0.5 ||mu_1||^2.
#
#   If q is changed, change the formula in the glm statement.
	q <- 5
	y <- 0 * 1:n
	y[(n/2 + 1):n] <- y[(n/2 + 1):n] + 1
	beta <- 0 * 1:q
        if(type == 1) {
		beta[1] <- 1
                alpha <- -0.5
	}
	if(type == 2) {
		beta <- beta + 1
                alpha <- -q/2
	}
        if(type == 3) {
		beta[1:3] <-  1
                alpha <- -1.5
	}
        x <- matrix(rnorm(n * q), nrow = n, 
			ncol = q)
	if(type == 1) {
		x[(n/2 + 1):n, 1] <- x[(n/2 + 1
				):n, 1] + 1
		}
	if(type == 2) {
		x[(n/2 + 1):n,  ] <- x[(n/2 + 1
				):n,  ] + 1
		}
        if(type == 3) {
		x[(n/2 + 1):n, 1:3 ] <- x[(n/2 + 1
				):n, 1:3 ] + 1
		}
        #X|y=0 ~ N(0, I) and X|y=1 ~ N(beta,I)
	# change formula to x[,1]+ ... + x[,q] with q
	out <- glm(y ~ x[, 1] + x[, 2] + x[, 3] +
			x[, 4] + x[,5], family = binomial)
	list(alpha = alpha, beta = beta, lrcoef = out$coef,x=x,y=y)
}


lrdata2<-function(n = 200, m = 10, type = 1)
{# Generates data for logistic regression.
#   If q is changed, change the formula in the glm statement.
q <- 5
y <- 1:n
z <- y
mv <- m + 0 * y
beta <- 0 * 1:q
if(type == 1) {
	beta <- beta + 2
	alpha <- 0
	}
if(type == 2) {
	beta[1] <- 2
	alpha <- 0
	}
if(type == 3) {
	beta[1:3] <- 2
	alpha <- 0
	}
x <- matrix(rnorm(n * q), nrow = n, ncol = q)
SP <- alpha + x %*% beta
pv <- exp(SP)/(1 + exp(SP))
for(i in 1:n)
   y[i] <- rbinom(1, size = m, prob = pv[i])
z <- y/m	# change formula to x[,1]+ ... + x[,q] with q
out <- glm(z ~ x[, 1] + x[, 2] + x[, 3] + x[, 4] + x[, 5], 
              family = binomial, weights = mv)
list(x = x, y = y, mv = mv, alpha = alpha, beta = beta, 
      lrcoef = out$coef)
}


lressp <- function(x,y,slices=10){
# Makes the response plot for logistic regression.
# If X|y=1 ~ N(mu_1,I) and X|Y=0 ~ N(0,I) then beta = mu_1 and alpha = ||mu_1||^2.
# Workstation need to activate a graphics
# device with command "X11()" or "motif()."
#
#   If q is changed, change the formula in the glm statement.
	q <- 5
# change formula to x[,1]+ ... + x[,q] with q
	out <- glm(y ~ x[, 1] + x[, 2] + x[, 3] +
			x[, 4] + x[,5], family = binomial)
	#ESP <- x %*% out$coef[-1] + out$coef[1]
        ESP <- predict(out)
        Y <- y
        plot(ESP,Y)	
        abline(mean(y),0)
        fit <- y
        fit <- exp(ESP)/(1 + exp(ESP))
      #  lines(sort(ESP),sort(fit))
        indx <- sort.list(ESP)
        lines(ESP[indx],fit[indx])
        fit2 <- fit
        n <- length(y)
        val <- as.integer(n/slices)
        for(i in 1: (slices-1)){
          fit2[((i-1)*val+1):(i*val)] <- mean(y[indx[((i-1)*val+1):(i*val)]])
        }
        fit2[((slices-1)*val+1):n] <- mean(y[indx[((slices-1)*val+1):n]])
# fit2 is already sorted in order corresponding to indx
        lines(ESP[indx],fit2)
#list(fit2=fit2,n=n,slices=slices,val=val)
}


lrplot<-function(x, y, mv, slices = 10, ff = 0.97, step = T){
# Makes response and OD plots for binomial  logistic regression.
# mv = (m1, ..., mn) where yi is bin(mi,p(SP))
# If step = T use step function, else use lowess.
# Workstation: need to activate a graphics
# device with command "X11()" or "motif()."
#
#   If q is changed, change the formula in the glm statement.
	q <- 5	# change formula to x[,1]+ ... + x[,q] with q
	n <- length(y)
	Z <- y/mv
	out <- glm(Z ~ x[, 1] + x[, 2] + x[, 3] + x[, 4] + x[, 5], family = 
		binomial, weights = mv)
	ESP <- predict(out)
	par(mfrow = c(1, 2),pty="s")
	plot(ESP, Z)
	abline(weighted.mean(Z, mv), 0)
	fit <- y
	fit <- exp(ESP)/(1 + exp(ESP))
	indx <- sort.list(ESP)
	lines(ESP[indx], fit[indx])
	if(step == T) {
		fit2 <- fit
		val <- as.integer(n/slices)
		for(i in 1:(slices - 1)) {
			fit2[((i - 1) * val + 1):(i * val)] <- weighted.mean(Z[
				indx[((i - 1) * val + 1):(i * val)]], mv[indx[((
				i - 1) * val + 1):(i * val)]])
		}
		fit2[((slices - 1) * val + 1):n] <- weighted.mean(Z[indx[((
			slices - 1) * val + 1):n]], mv[indx[((slices - 1) * val +
			1):n]])	
	# fit2 is already sorted in order corresponding to indx
		lines(ESP[indx], fit2)
	}
	else lines(lowess(ESP, Z, f = ff), type = "s")
	title("a) ESS Plot")	#get OD plot
	val <- exp(ESP)/(1 + exp(ESP))
	Ehat <- mv * val
	Vmodhat <- Ehat * (1 - val)
	Vhat <- (y - Ehat)^2
	plot(Vmodhat, Vhat)
	abline(0, 1)
	abline(0, 4)
	abline(lsfit(Vmodhat, Vhat)$coef)
	title("b) OD Plot")
        par(mfrow=c(1,1))
}


lrplot2<-function(x, y, slices = 10, ff = 0.99, step = T){
# Makes the response plot for binary logistic regression.
# If step = T use step function, else use lowess.
# Workstation need to activate a graphics
# device with command "X11()" or "motif()."
#
#   If q is changed, change the formula in the glm statement.
        x <- as.matrix(x)
	q <- 5	# change formula to x[,1]+ ... + x[,q] with q
	out <- glm(y ~ x[, 1] + x[, 2] + x[, 3] + x[, 4] + x[, 5], family = binomial)
	ESP <- predict(out)
	Y <- y
	plot(ESP, Y)
	fit <- y
	fit <- exp(ESP)/(1 + exp(ESP))	#  lines(sort(ESP),sort(fit))
	indx <- sort.list(ESP)
	lines(ESP[indx], fit[indx])
        if(step == T){
	fit2 <- fit
	n <- length(y)
	val <- as.integer(n/slices)
	for(i in 1:(slices - 1)) {
		fit2[((i - 1) * val + 1):(i * val)] <- mean(y[indx[((i - 1) * 
			val + 1):(i * val)]])
	}
	fit2[((slices - 1) * val + 1):n] <- mean(y[indx[((slices - 1) * val + 1
		):n]])	# fit2 is already sorted in order corresponding to indx
	lines(ESP[indx], fit2)}
        else lines(lowess(ESP, Y, f = ff), type = "s")
	title("ESS Plot")
}


lmsviews<-function(x, Y, ii = 1){
# Trimmed views using lmsreg for 90, 80, ... 0 percent 
# trimming.   Allows visualization of  m
# and crudely estimation of  c beta in models 
# of the form y = m(x^T beta) + e.
# Workstation: activate a graphics device
# with commands "X11()" or "motif()."
# R needs command "library(lqs)."
# Advance the view with the right mouse button and
# in R, highight "stop."
	x <- as.matrix(x)
	out <- cov.mcd(x)
	center <- out$center
	cov <- out$cov
	rd2 <- mahalanobis(x, center, cov)
	labs <- c("90%", "80%", "70%", "60%", "50%", "40%", "30%", "20%", "10%",
		"0%")
	tem <- seq(0.1, 1, 0.1)
	for(i in ii:10) {
		val <- quantile(rd2, tem[i])
		b <- lmsreg(x[rd2 <= val,  ], Y[rd2 <= val])$coef
		ESP <- x %*% b[-1]
		plot(ESP, Y)
		title(labs[i])
		identify(ESP, Y)
		print(b)
	}
}


maha<-function(x){
# Generates the classical mahalanobis distances.
	center <- apply(x, 2, mean)
	cov <- var(x)
	return(sqrt(mahalanobis(x, center, cov)))
}


mlrplot4<-function(x, Y)
{# This function is for R. Use mlrplot3 for R and Splus.
# Makes response plot and residual plot with large
# Cook's distances and Pena's distances highlighted.
# Squares have large Cook's distances
# and crosses have large Pena's distances.
	x <- as.matrix(x)
	out <- lsfit(x, Y)
	cook <- ls.diag(out)$cooks
	n <- dim(x)[1]
	p <- dim(x)[2] + 1	#get Pena's distances s
	one <- 1 + 0 * 1:n
	w <- cbind(one, x)
	cp <- t(w) %*% w
	h <- w %*% solve(cp) %*% t(w)
	s <- 0 * 1:n
	for(i in 1:n) {
		for(j in 1:n) {
			s[i] <- s[i] + (cook[j] * h[i, j]^2)/(h[i, i] * h[j, j])
		}
	}
	tem <- cook > min(0.5, (2 * p)/n)
	medd <- median(s)
	madd <- mad(s, constant = 1)
	tem2 <- abs(s - medd) > 4.5 * madd
	bhat <- out$coef
	FIT <- bhat[1] + x %*% bhat[-1]
        cmar <- par("mar")
	par(mfrow = c(2, 1))
        par(mar=c(4.0,4.0,2.0,0.5))
        plot(FIT, Y)
	abline(0, 1)
	points(FIT[tem], Y[tem], pch = 0)
	points(FIT[tem2], Y[tem2], pch = 3)	
	identify(FIT, Y)
	title("Response Plot")
	RES <- Y - FIT
	plot(FIT, RES)
	points(FIT[tem], RES[tem], pch = 0)
	points(FIT[tem2], RES[tem2], pch = 3)
	identify(FIT, RES)
	title("Residual Plot")
        par(mfrow = c(1, 1))
        par(mar=cmar)
}


oddata<-function(n = 100, q = 5, k = 1)
{# Generates overdispersion (negative binomial) data 
# for Poisson regression.
	y <- 1:n
	beta <- 0 * 1:q
	beta[1:3] <- 1
	alpha <- -2.5
	x <- matrix(rnorm(n * q), nrow = n, ncol = 
		q)
	x <- 0.5 * x + 1
	SP <- alpha + x %*% beta
	for(i in 1:n) {
		pr <- k/(exp(SP[i]) + k)
		y[i] <- rnbinom(1, size = k, pr)
	}
	list(x = x, y = y)
}


pb12<-function(resp,k=11,MSE=NA){
# Makes the effects and QQ plot for the PB(12) design.
# Need the response resp given in standard order. 
# MSE should be given if k=11. 
# MSe is used for the MS for the E effect
# Assumes number of replicates m = 1.
# Could define c1-c11 as factors and use
# out2<-aov(resp~c1+c2+c3+c4+c5+c6+c7+c8+c9+c10+c11)
c1 <- c(1,1,-1,1,1,1,-1,-1,-1,1,-1,-1)
c2 <- c(c1[11],c1[1:10],-1)
c3 <- c(c2[11],c2[1:10],-1)
c4 <- c(c3[11],c3[1:10],-1)
c5 <- c(c4[11],c4[1:10],-1)
c6 <- c(c5[11],c5[1:10],-1)
c7 <- c(c6[11],c6[1:10],-1)
c8 <- c(c7[11],c7[1:10],-1)
c9 <- c(c8[11],c8[1:10],-1)
c10 <- c(c9[11],c9[1:10],-1)
c11 <- c(c10[11],c10[1:10],-1)
Aeff <- as.real(t(c1)%*%resp/6)
Beff <- as.real(t(c2)%*%resp/6)
Ceff <- as.real(t(c3)%*%resp/6)
Deff <- as.real(t(c4)%*%resp/6)
Eeff <- as.real(t(c5)%*%resp/6)
Feff <- as.real(t(c6)%*%resp/6)
Geff <- as.real(t(c7)%*%resp/6)
Heff <- as.real(t(c8)%*%resp/6)
Jeff <- as.real(t(c9)%*%resp/6)
Keff <- as.real(t(c10)%*%resp/6)
Leff <- as.real(t(c11)%*%resp/6)
MSA <- 3*(Aeff)^2
MSB <- 3*(Beff)^2
MSC <- 3*(Ceff)^2
MSD <- 3*(Deff)^2
MSe <- 3*(Eeff)^2
MSF <- 3*(Feff)^2
MSG <- 3*(Geff)^2
MSH <- 3*(Heff)^2
MSJ <- 3*(Jeff)^2
MSK <- 3*(Keff)^2
MSL <- 3*(Leff)^2
effects <- c(Aeff,Beff,Ceff,Deff,Eeff,Feff,Geff,Heff,
Jeff,Keff=Keff,Leff)
library(MASS)
nquantiles <- qnorm(ppoints(11))
qqplot(nquantiles,effects)
abline(lmsreg(nquantiles,sort(effects))$coef)
title("Normal QQ plot for PB12 Design")
x <- cbind(c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11)
out <- lsfit(x[,1:k],resp)
ls.print(out)
if(k < 11) MSE <- sum(out$resid^2)/(11-k)
SEeff <- sqrt(MSE/3)
list(Aeff=Aeff,Beff=Beff,Ceff=Ceff,Deff=Deff,Eeff=Eeff,Feff=Feff,
Geff=Geff,Heff=Heff,Jeff=Jeff,Keff=Keff,Leff=Leff,MSA=MSA,MSB=MSB,
MSC=MSC,MSD=MSD,MSe=MSe,MSF=MSF,MSG=MSG,MSH=MSH,MSJ=MSJ,MSK=MSK,
MSL=MSL,MSE=MSE,SEeff=SEeff)
}


pcisim<-function(nruns = 1000, n1 = 10, n2 = 10, mu1 = 0, mu2 = 0, 
var1 = 1, var2 = 1, dist = 1, alpha = 0.05){
#gets the pooled, modified pooled, and Welch 100 (1-alpha)% CIs for mu1 - mu2 
#defaults are alpha = .05, lengths make the most sense if n1 = n2 
#dist = 1 for x ~ N(mu1, var1), y ~ N(mu2, var2)
#dist = 2 for x ~ EXP with mean mu1 variance var1, 
#             y ~ EXP with mean mu2 variance var2 
#dist = 3 for x ~ N(mu1, var1), y EXP with mean mu2 variance var2 
	diff <- mu1 - mu2
	up <- 1 - alpha/2
	pdf <- n1 + n2 - 2
	rhohat <- n1/(n1 + n2)
	pcov <- 0
	mpcov <- 0
	wcov <- 0
	plow <- 1:nruns
	pup <- plow
	mplow <- plow
	mpup <- plow
	wlow <- plow
	wup <- plow
	tpcut <- qt(up, pdf)
	mtpcut <- qt(up, n1 + n2 - 4)
	for(i in 1:nruns) {
		if(dist == 1) {
			x <- mu1 + sqrt(var1) * rnorm(n1)
			y <- mu2 + sqrt(var2) * rnorm(n2)
		}
		if(dist == 2) {
			x <- mu1 - sqrt(var1) + sqrt(var1) * rexp(n1)
			y <- mu2 - sqrt(var2) + sqrt(var2) * rexp(n2)
		}
		if(dist == 3) {
			x <- mu1 + sqrt(var1) * rnorm(n1)
			y <- mu2 - sqrt(var2) + sqrt(var2) * rexp(n2)
		}
		xbar <- mean(x)
		s1sq <- var(x)
		ybar <- mean(y)
		s2sq <- var(y)
		xmybar <- xbar - ybar
		sp <- (n1 - 1) * s1sq + (n2 - 1) * s2sq
		sp <- sqrt(sp/pdf)
		val <- sp * sqrt(1/n1 + 1/n2)	#get pooled CI
		tem <- tpcut * val
		plow[i] <- xmybar - tem
		pup[i] <- xmybar + tem
		if(plow[i] < diff && pup[i] > diff) pcov <- pcov + 1	#get modified pooled CI
		thetahat <- s2sq/s1sq
		tauhatsq <- (1 - rhohat + rhohat * thetahat)/(rhohat + (1 - rhohat) * 
			thetahat)
		tauhat <- sqrt(tauhatsq)
		tem <- mtpcut * val * tauhat
		mplow[i] <- xmybar - tem
		mpup[i] <- xmybar + tem
		if(mplow[i] < diff && mpup[i] > diff) mpcov <- mpcov + 1	#get Welch CI
		t1 <- s1sq/n1
		t2 <- s2sq/n2
		do <- (t1 + t2)^2/(t1^2/(n1 - 1) + t2^2/(n2 - 1))
		d <- max(1, floor(do))
		wtcut <- qt(up, d)
		val <- sqrt(s1sq/n1 + s2sq/n2)
		tem <- wtcut * val
		wlow[i] <- xmybar - tem
		wup[i] <- xmybar + tem
		if(wlow[i] < diff && wup[i] > diff)
			wcov <- wcov + 1
	}
	pcov <- pcov/nruns
	plen <- sqrt(n1) * mean(pup - plow)
	mpcov <- mpcov/nruns
	mplen <- sqrt(n1) * mean(mpup - mplow)
	wcov <- wcov/nruns
	wlen <- sqrt(n1) * mean(wup - wlow)	#print(mean(x))
#print(var(x))
#print(mean(y))
#print(var(y))
	list(pcov = pcov, plen = plen, mpcov = mpcov, mplen = mplen, wcov = wcov, wlen = wlen
		)
}


phdata<-function(n = 100, q = 3, k = 1, clam = 0.1){
# Generates data for Cox proportional hazards regression.
# Follows Zhou (2001)
# Get Weibull regression data with g(Y) = Y^(1/k) and
# h_(g(Y))(t) = k t^(k-1) exp(SP).
# Workstation: need to activate a graphics
# device with command "X11()" or "motif()."
#
	beta <- 1 + 0 * 1:q
	x <- matrix(rnorm(n * q), nrow = n, ncol = q)/2
	SP <- x %*% beta
	lambdai <- exp(SP)
	y <- rexp(n, rate = lambdai)
	gy <- y^(1/k)
	cen <- rexp(n, rate = clam)
	cengy <- pmin(gy, cen)
	status <- as.numeric(cen >= gy)	##change formula for coxph with q
#out <- coxph(Surv(cengy, status) ~ x[, 1] + x[, 2] + x[, 3])
#ESP <- x %*% out$coef
	list(x = x, time = cengy, status = status)
}


phgfit2<-function(x, time, status, slices = 9){
# Checks goodness of fit of Cox PH model, time is a vector of times. 
# KM curves should have the same shape if PH model is valid.
# status is 0 for censored times and 1 otherwize.
# Workstation need to activate a graphics
# device with command "X11()" or "motif()."
# In R type library(survival)
#   
q <- 3	# change formula to x[,1]+ ... + x[,q] with q
out <- coxph(Surv(time, status) ~ x[, 1] + x[, 2] + x[, 3])
ESP <- x %*% out$coef
indx <- sort.list(ESP)
n <- length(time)
val <- as.integer(n/slices)
if(slices <= 4) par(mfrow = c(2, 2))
if( (slices == 5) || (slices ==  6)) par(mfrow = c(2, 3))
if(slices >= 7) par(mfrow = c(3, 3))
    for(i in 1:slices){
        lo <- (i-1)*val + 1
        hi <- i*val
        if(i == slices) hi <- n
	tt <- time[indx[lo:hi]]
	ts <- status[indx[lo:hi]]
	zksi <- survfit(Surv(tt, ts)~1, conf.type = "none")
	plot(zksi)
    }
par(mfrow=c(1,1))
}


phsim<-function(n = 100, q = 3, k = 1, nruns = 1){
# Simulates censored response plots for Cox PH data.
# Calls phdata and seyp.
	for(i in 1:nruns) {
		out <- phdata(n, q, k)
		seyp(out$x, out$time, out$status)
	}
}


phsim2 <- function(n = 100, p = 3, k = 1, slices = 9, nruns = 1){
# Simulates KM curves in slices. 
# KM curves should have the same shape if PH model is valid.
# Calls phdata and phgfit2.
for(i in 1:nruns) {
   out <- phdata(n, p, k)
   phgfit2(out$x, out$time, out$status, slices)
	}
}


phsim5 <- function(n = 99,  k = 1, slices = 9){
# I may not have fixed the bug with a new version of R.
# Simulates sliced survival plots for Cox PH regression with 
# pointwise CI bands. Follows Zhou (2001) to
# get Weibull regression data with g(Y) = Y^(1/k) and
# h_(g(Y))(t) = k t^(k-1) exp(SP).
# 
# May not work in Splus.
#
# In R, type library(survival)
      p <- 3 ## change formula with p
      beta <- 1 + 0 * 1:p
	x <- matrix(rnorm(n * p), nrow = n, ncol = p)/2
	SP <- x %*% beta
	lambdai <- exp(SP)
	y <- rexp(n, rate = lambdai)
	gy <- y^(1/k)
	cen <- rexp(n, rate = 0.1)
	time <- pmin(gy, cen)
	status <- as.numeric(cen >= gy)	
	##change formula for coxph with p
x1<- x[,1]; x2 <- x[,2]; x3<-x[,3]
xdata<-data.frame(cbind(time,status,x1,x2,x3))
      out <- coxph(Surv(time, status) ~ x1 + x2 + x3,data=xdata)
      ESP <- x %*% out$coef
indx <- sort.list(ESP)
val <- as.integer(n/slices)
if(slices <= 4) par(mfrow = c(2, 2))
if( (slices == 5) || (slices ==  6)) par(mfrow = c(2, 3))
if(slices >= 7) par(mfrow = c(3, 3))
for(i in 1:slices){
    lo <- (i-1)*val + 1
    hi <- i*val
    if(i == slices) hi <- n
    tt <- time[indx[lo:hi]]
    ts <- status[indx[lo:hi]]
    zksi <- survfit(Surv(tt, ts)~1, conf.type = "none")
    ## change model formula for nv and plot with p 
    midpt <- lo + floor((hi-lo)/2)
    nvt <- c(x[indx[midpt],1],x[indx[midpt],2],x[indx[midpt],3])
nv<-data.frame(time=1,status=1,x1=nvt[1],x2=nvt[3],x3=nvt[3])
plot(survfit(coxph(Surv(time, status) ~ x1 + x2 + x3), newdata = nv))
    lines(zksi,type="p")
}
par(mfrow=c(1,1))
list(coef = out$coef)
}




piplot<-function(x, y, alpha = 0.05){
# For Unix, use X11() to turn on the graphics device before 
# using this function.
# Makes a response plot with prediction limits added.
	x <- as.matrix(x)
	p <- dim(x)[2] + 1
	n <- length(y)
	up <- 1:n
	low <- up
	out <- lsfit(x, y)
	tem <- ls.diag(out)
	lev <- tem$hat
	res <- out$residuals
	FIT <- y - res
	Y <- y
	corfac <- (1 + 15/n)*sqrt(n/(n - p))
	val2 <- quantile(res, c(alpha/2, 1 - alpha/2))	
	#get lower and upper PI limits for each case
	for(i in 1:n) {
		val <- sqrt(1 + lev[i])
		val3 <- as.single(corfac * val2[1] * val)
		val4 <- as.single(corfac * val2[2] * val)
		up[i] <- FIT[i] + val4
		low[i] <- FIT[i] + val3
	}
	zy <- c(min(low), Y, max(up))
	zx <- c(min(FIT), FIT, max(FIT))
	plot(zx, zy, type = "n")
	points(FIT, Y)
	abline(0, 1)
	points(FIT, up, pch = 17)
	points(FIT, low, pch = 17)
}


pisim<-function(n = 100, q = 7, nruns = 100, alpha = 0.05, 
eps = 0.1, shift = 9, type = 1){
# compares new and classical PIs for multiple linear regression 
# if type = 1 for N(0,1) errors, 2 for t3 errors, 3 for exp(1) - 1 errors
# 4 for uniform(-1,1) errors, 5 for (1-eps) N(0,1) + eps N(0,(1+shift)^2) errors
# constant = 1 so there are p = q+1 coefficients
	b <- 0 * 1:q + 1
	cpicov <- 0
	npicov <- 0
	acpicov <- 0
	opicov <- 0
	val3 <- 1:nruns
	val4 <- val3
	val5 <- val3
	pilen <- matrix(0, nrow = nruns, ncol = 4)
	coef <- matrix(0, nrow = nruns, ncol = q + 1)
	corfac <- (1 + 15/n) * sqrt(n/(n - q - 1))
	corfac2 <- sqrt(n/(n - q - 1))
	for(i in 1:nruns) {
		x <- matrix(rnorm(n * q), nrow = n, ncol = q)
		if(type == 1) {
			y <- 1 + x %*% b + rnorm(n)
			xf <- rnorm(q)
			yf <- 1 + xf %*% b + rnorm(1)
		}
		if(type == 2) {
			y <- 1 + x %*% b + rt(n, df = 3)
			xf <- rnorm(q)
			yf <- 1 + xf %*% b + rt(1, df = 3)
		}
		if(type == 3) {
			y <- 1 + x %*% b + rexp(n) - 1
			xf <- rnorm(q)
			yf <- 1 + xf %*% b + rexp(1) - 1
		}
		if(type == 4) {
			y <- 1 + x %*% b + runif(n, min = -1, max = 1)
			xf <- rnorm(q)
			yf <- 1 + xf %*% b + runif(1, min = -1, max = 1)
		}
		if(type == 5) {
			err <- rnorm(n, sd = 1 + rbinom(n, 1, eps) * shift)
			y <- 1 + x %*% b + err
			xf <- rnorm(q)
			yf <- 1 + xf %*% b + rnorm(1, sd = 1 + rbinom(1, 1, eps
				) * shift)
		}
		out <- lsfit(x, y)
		fres <- out$resid
		coef[i,  ] <- out$coef
		yfhat <- out$coef[1] + xf %*% out$coef[-1]
		w <- cbind(1, x)
		xtxinv <- solve(t(w) %*% w)
		xf <- c(1, xf)
		hf <- xf %*% xtxinv
		hf <- hf %*% xf
		val <- sqrt(1 + hf)	#get classical PI
		mse <- sum(fres^2)/(n - q - 1)
		val2 <- qt(1 - alpha/2, n - q - 1) * sqrt(mse) * val
		up <- yfhat + val2
		low <- yfhat - val2
		pilen[i, 1] <- up - low
		if(low < yf && up > yf) cpicov <- cpicov + 1	
	#get semiparametric PI
		val2 <- quantile(fres, c(alpha/2, 1 - alpha/2))
		val3[i] <- as.single(corfac * val2[1] * val)
		val4[i] <- as.single(corfac * val2[2] * val)
		up <- yfhat + val4[i]
		low <- yfhat + val3[i]
		pilen[i, 2] <- up - low
		if(low < yf && up > yf) npicov <- npicov + 1	
	# asymptotically conservative PI
		val6 <- corfac2 * max(abs(val2))
		val5[i] <- val6 * val
		up <- yfhat + val5[i]
		low <- yfhat - val5[i]
		pilen[i, 3] <- up - low
		if(low < yf && up > yf) acpicov <- acpicov + 1	
	# asymptotically optimal PI
		sres <- sort(fres)
		cc <- ceiling(n * (1 - alpha))
		rup <- sres[cc]
		rlow <- sres[1]
		olen <- rup - rlow
		if(cc < n) {
			for(j in (cc + 1):n) {
				zlen <- sres[j] - sres[j - cc + 1]
				if(zlen < olen) {
				  olen <- zlen
				  rup <- sres[j]
				  rlow <- sres[j - cc + 1]
				}
			}
		}
		up <- yfhat + corfac * val * rup
		low <- yfhat + corfac * val * rlow
		pilen[i, 4] <- up - low
		if(low < yf && up > yf)
			opicov <- opicov + 1
	}
	pimnlen <- apply(pilen, 2, mean)
	mnbhat <- apply(coef, 2, mean)
	lcut <- mean(val3)
	hcut <- mean(val4)
	accut <- mean(val5)
	cpicov <- cpicov/nruns
	npicov <- npicov/nruns
	acpicov <- acpicov/nruns
	opicov <- opicov/nruns
	list(mnbhat = mnbhat, pimenlen = pimnlen, cpicov = cpicov, spicov = 
		npicov, acpicov = acpicov, opicov = opicov, lcut = lcut, hcut
		 = hcut, accut = accut)
}


poisontplt<-function(){
# Need the poison data frame from regdata.
# This function plots y^L vs two way Anova fitted values 
# from the two way Anova of y^L on x where 
# L = -1, -0.5, 0, 0.5 or 1 except log(Y) is used if L = 0. 
# If plot is linear for L, use y^L (or log(Y) for L = 0) instead
# of Y. This is a graphical method for a response transform.
lam <- c(-1, -1/2, 0, 1/2, 1)
xl <- c("1/Z", "1/sqrt(Z)", "LOG(Z)", "sqrt(Z)", "Z")
par(mfrow=c(2,3))
attach(poison)
for(i in 1:length(lam)) {
	if(lam[i] == 0)
		ytem <- log(poison$stime)
	else ytem <- poison$stime^lam[i]
	aovfit <- ytem - aov(ytem~ptype*treat,poison)$resid
		plot(aovfit, ytem, xlab = "TZHAT", ylab = xl[i])
		abline(0, 1)
		#identify(aovfit, ytem)
	}
par(mfrow=c(1,1))
detach(poison)
##using aov(ytem~.^2,poison) would fit ytem on
##ptype, treat, stime, the 3 two way interactions
##and the three way interaction
}


prdata <- function(n = 100, q = 5)
{# Generates data for Poisson regression.
y <- 0 * 1:n
beta <- 0 * 1:q
beta[1:3] <- 1
alpha <- -2.5
x <- matrix(rnorm(n * q), nrow = n, ncol = q)
        x <- 0.5*x + 1
        SP <- alpha + x%*%beta
	y <- rpois(n,lambda=exp(SP))
        # change formula to x[,1]+ ... + x[,q] with q
	#out <- glm(y ~ x[, 1] + x[, 2] + x[, 3] +
	#		x[, 4] + x[,5], family = poisson)
	#list(alpha = alpha, beta = beta, prcoef = out$coef,x=x,y=y)
        list(x=x,y=y)
}


pressp <- function(x,y)
{# Makes the response plot for  Poisson regression.
# Workstation: need to activate a graphics
# device with command "X11()" or "motif()."
#
#   If q is changed, change the formula in the glm statement.
	q <- 5
# change formula to x[,1]+ ... + x[,q] with q
	out <- glm(y ~ x[, 1] + x[, 2] + x[, 3] +
			x[, 4] + x[,5], family = poisson)
	#ESP <- x %*% out$coef[-1] + out$coef[1]
        ESP <- predict(out)
        Y <- y
        plot(ESP,Y)	
        abline(mean(y),0)
        fit <- y
        fit <- exp(ESP)
        indx <- sort.list(ESP)
        lines(ESP[indx],fit[indx])
        lines(lowess(ESP,y),type="s")
                        }


prplot<- function(x, y)
{# Makes response plot, OD plot,  weighted forward response 
# and residual plots  for Poisson regression.
# Workstation: need to activate a graphics
# device with command "X11()" or "motif()."
#
#   If q is changed, change the formula in the glm statement.
	q <- 5	# change formula to x[,1]+ ... + x[,q] with q
	out <- glm(y ~ x[, 1] + x[, 2] + x[, 
		3] + x[, 4] + x[, 5], family
		 = poisson)
	ESP <- predict(out)
	par(mfrow = c(2, 2))
        Y <- y
	plot(ESP, Y)
	abline(mean(Y), 0)
	Ehat <- exp(ESP)
	indx <- sort.list(ESP)
	lines(ESP[indx], Ehat[indx])
	lines(lowess(ESP, Y), type = "s")
	title("a) Response Plot")
	Vhat <- (Y - Ehat)^2
	plot(Ehat, Vhat)
	abline(0, 1)
	abline(0, 4)	
	#lines(lowess(Ehat, Vhat)) #abline(lsfit(Ehat, Vhat)$coef)
	title("b) OD Plot")
	Z <- Y
	Z[Y < 1] <- Z[Y < 1] + 0.5
	WRES <- sqrt(Z) * (log(Z) - x %*% 
		out$coef[-1] - out$coef[1])
	WFIT <- sqrt(Z) * log(Z) - WRES
	plot(WFIT, sqrt(Z) * log(Z))
	abline(0, 1)	
	#abline(lsfit(WFIT, sqrt(Z) * log(Z))$coef)
	title("c) WFRP")
	plot(WFIT, WRES)
	title("d) Wtd Residual Plot")
}


prsim<-function(n = 100, nruns = 1, type = 1, k = 1)
{# Runs prplot nruns times on simulated data. 
# Type = 1 for Poisson regression data, Type = 2 for negative binomial regression data
# Calls prdata, oddata, prplot.
	q <- 5
	for(i in 1:nruns) {
		if(type == 1)
			out <- prdata(n, q)
		else out <- oddata(n, q, k)
		x <- out$x
		y <- out$y
		prplot(x, y)	#identify(WFIT, WRES)
	}
}


rand <- function(n = 10, k = 3){
#randomize n units into k treatment groups of nearly equal size
#groups[i] gives the group for the ith unit
	groups <- 1:n
	gsize <- as.integer(n/k)
	z <- sample(n)
	for(i in 1:k) {
		lo <- (i - 1) * gsize + 1
		hi <- i * gsize
		if(i == k)
			hi <- n
		groups[z[lo:hi]] <- i
	}
	list(perm = z, groups = groups)
}


rand1way <- function(y,group,B=1000){
#computes the randomization F test for the 1 way Anova design
#based on Ernst (2009)
#y and group should have the same length
if(length(y) != length(group))
   stop("Response and factor vectors must be the same length")
if(!is.factor(group)) 
   group <- as.factor(group)
Fstat <- NULL
z <- anova(lm(y ~ group))
Fstat[1] <- z$F[1]
Fpval <- z$P[1]
Fdf <- z$Df
n <- length(y)
m <- max(B,floor(n*log(n)))
for(i in 2:m){
   Fstat[i] <- anova(lm(sample(y) ~ group))$F[1]
} 
randpval <- sum(Fstat>=Fstat[1])/m
list(rdist=Fstat,Fpval=Fpval,randpval=randpval)
}


seyp<-function(x, time, status){
# Makes the censored response plot for Cox proportional hazards regression.
# Workstation: need to activate a graphics
# device with command "X11()" or "motif()."
#
#   If q is changed, change the formula in the coxph statement.
	q <- 3	# change formula to x[,1]+ ... + x[,q] with q
	out <- coxph(Surv(time, status) ~ x[, 
		1] + x[, 2] + x[, 3])
	ESP <- x %*% out$coef
	plot(ESP, time, type = "n")
	points(ESP[status == 1], time[status == 
		1], pch = 3)
	points(ESP[status == 0], time[status == 
		0], pch = 1)
}


swhat <- function(stime,bhat,nv,ghat,lhat){
# Finds estimated survival function for Weibull regression
# evaluated at x = nv.
r <- length(stime)
shat <- 1:r
for(i in 1:r){
shat[i] <- exp(-exp(-ghat*bhat%*%nv)*lhat*stime[i]^ghat)
}
list(shat=shat)
}


tplot<-function(x, y){
# Use the rightmost mouse button to advance
# the plot, and in R, highlight ``stop."
# Values of y should be positive.
# Assume the MLR model contains a constant, but
# x is the design matrix without the vector of ones.
# This function makes transformation plots from the MLR 
# of y^L on x where L = -1,  -0.5, -1/3, 0, 1/3, 0.5, or 1 
# except  log(Y) is used if L = 0. 
# If plot is linear for L, use y^L (or log(Y) for L = 0) instead
# of Y. This is a graphical method for a response transform.
lam <- c(-1,  -1/2, -1/3,  0, 1/3, 1/2,  1)
	xl <- c("1/Z",  "1/sqrt(Z))", "Z**(-1/3)",  
		"LOG(Z)",  "Z**(1/3)", "sqrt(Z)",  "Z")
for(i in 1:length(lam)) {
	if(lam[i] == 0)
		ytem <- log(y)
	else ytem <- y^lam[i]
	fit <- ytem - lsfit(x,ytem)$resid
		plot(fit, ytem, xlab = "TZHAT", ylab = xl[i])
		abline(0, 1)
		identify(fit, ytem)
	}
}


trviews<-function(x, Y, ii = 10, type = 3)
{# Trimmed views for 0, 10,  ..., 90 percent trimming.
# Use ii = 10 for 0, ii = 9 for 10 then 0, ...,
# ii = 1 for 90 then 80 ..., then 0 percent trimming.  
# Allows visualization of  m and crudely estimation of   
# c beta in models of the form y = m(x^T beta) + e.
# Workstation: activate a graphics device
# with commands "X11()" or "motif()."
# R needs command "library(MASS)."
# Advance the view with the right mouse button and
# in R, highight "stop."
	x <- as.matrix(x)
        if(type == 1)
	out <- cov.mcd(x)
        if(type == 2)
        out <- covmba(x)
        if(type > 2) out <- covfch(x)
	center <- out$center
	cov <- out$cov
	rd2 <- mahalanobis(x, center, cov)
	labs <- c("90%", "80%", "70%", "60%", "50%", "40%", "30%", 
		"20%", "10%", "0%")
	tem <- seq(0.1, 1, 0.1)
	for(i in ii:10) {
		val <- quantile(rd2, tem[i])
		b <- lsfit(x[rd2 <= val,  ], Y[rd2 <= val])$coef
		ESP <- x %*% b[-1]
		plot(ESP, Y)
		title(labs[i])
		identify(ESP, Y)
		print(b)
	}
}


twocub<-function(means,m=1,MSE=NA){
# Makes the effects and QQ plot for the 2^3 design.
# Need the means of a 2^3 design replicated m times given
# in standard order. MSE should be given if m > 1.
# The commands out <- aov(y~A*B*C) and summary(out)
# can be used to find the MSE.
ca <- c(-1,1,-1,1,-1,1,-1,1)
cb <- c(-1,-1,1,1,-1,-1,1,1)
cc <- c(-1,-1,-1,-1,1,1,1,1)
cab <- c(1,-1,-1,1,1,-1,-1,1)
cac <- c(1,-1,1,-1,-1,1,-1,1)
cbc <- c(1,1,-1,-1,-1,-1,1,1)
cabc <- c(-1,1,1,-1,1,-1,-1,1)
Aeff <- as.real(t(ca)%*%means/4)
Beff <- as.real(t(cb)%*%means/4)
Ceff <- as.real(t(cc)%*%means/4)
ABeff <- as.real(t(cab)%*%means/4)
ACeff <- as.real(t(cac)%*%means/4)
BCeff <- as.real(t(cbc)%*%means/4)
ABCeff <- as.real(t(cabc)%*%means/4)
MSA <- 2*m*(Aeff)^2
MSB <- 2*m*(Beff)^2
MSC <- 2*m*(Ceff)^2
MSAB <- 2*m*(ABeff)^2
MSAC <- 2*m*(ACeff)^2
MSBC <- 2*m*(BCeff)^2
MSABC <- 2*m*(ABCeff)^2
SEeff <- NA
if(m > 1) SEeff <- sqrt(MSE/(2*m))
effects <- c(Aeff,Beff,Ceff,ABeff,ACeff,BCeff,ABCeff)
library(MASS)
nquantiles <- qnorm(ppoints(7))
qqplot(nquantiles,effects)
abline(lmsreg(nquantiles,sort(effects))$coef)
title("Normal QQ plot")
list(Aeff=Aeff,Beff=Beff,Ceff=Ceff,ABeff=ABeff,ACeff=ACeff,
BCeff=BCeff,ABCeff=ABCeff,MSA=MSA,MSB=MSB,MSC=MSC,MSAB=MSAB,
MSAC=MSAC,MSABC=MSABC,MSE=MSE,SEeff=SEeff)
}


twofourth<-function(means,m=1,MSE=NA){
# Makes the effects and QQ plot for the 2^4 design.
# Need the means of a 2^4 design replicated m times given
# in standard order. MSE should be given if m > 1.
# The commands out <- aov(y~A*B*C*D) and summary(out)
# can be used to find the MSE.
ca <- rep(c(-1,1,-1,1,-1,1,-1,1),2)
cb <- rep(c(-1,-1,1,1,-1,-1,1,1),2)
cc <- rep(c(-1,-1,-1,-1,1,1,1,1),2)
cd <- c(-1,-1,-1,-1,-1,-1,-1,-1,1,1,1,1,1,1,1,1)
cab <- rep(c(1,-1,-1,1,1,-1,-1,1),2)
cac <- rep(c(1,-1,1,-1,-1,1,-1,1),2)
cad <- ca*cd
cbc <- rep(c(1,1,-1,-1,-1,-1,1,1),2)
cbd <- cb*cd
ccd <- cc*cd
cabc <- rep(c(-1,1,1,-1,1,-1,-1,1),2)
cabd <- cab*cd
cacd <- cac*cd
cbcd <- cbc*cd
cabcd <- cabc*cd
Aeff <- as.real(t(ca)%*%means/8)
Beff <- as.real(t(cb)%*%means/8)
Ceff <- as.real(t(cc)%*%means/8)
Deff <- as.real(t(cd)%*%means/8)
ABeff <- as.real(t(cab)%*%means/8)
ACeff <- as.real(t(cac)%*%means/8)
ADeff <- as.real(t(cad)%*%means/8)
BCeff <- as.real(t(cbc)%*%means/8)
BDeff <- as.real(t(cbd)%*%means/8)
CDeff <- as.real(t(ccd)%*%means/8)
ABCeff <- as.real(t(cabc)%*%means/8)
ABDeff <- as.real(t(cabd)%*%means/8)
ACDeff <- as.real(t(cacd)%*%means/8)
BCDeff <- as.real(t(cbcd)%*%means/8)
ABCDeff <- as.real(t(cabcd)%*%means/8)
MSA <- 4*m*(Aeff)^2
MSB <- 4*m*(Beff)^2
MSC <- 4*m*(Ceff)^2
MSD <- 4*m*(Deff)^2
MSAB <- 4*m*(ABeff)^2
MSAC <- 4*m*(ACeff)^2
MSAD <- 4*m*(ADeff)^2
MSBC <- 4*m*(BCeff)^2
MSBD <- 4*m*(BDeff)^2
MSCD <- 4*m*(CDeff)^2
MSABC <- 4*m*(ABCeff)^2
MSABD <- 4*m*(ABDeff)^2
MSACD <- 4*m*(ACDeff)^2
MSBCD <- 4*m*(BCDeff)^2
MSABCD <- 4*m*(ABCDeff)^2
SEeff <- NA
if(m > 1) SEeff <- sqrt(MSE/(4*m))
effects <- c(Aeff,Beff,Ceff,Deff,ABeff,ACeff,ADeff,BCeff,
BDeff,CDeff=CDeff,ABCeff,ABDeff,ACDeff,BCDeff,ABCDeff)
if(m == 1) par(mfrow = c(2,2))
library(MASS)
nquantiles <- qnorm(ppoints(15))
qqplot(nquantiles,effects)
abline(lmsreg(nquantiles,sort(effects))$coef)
title("Normal QQ plot")
if(m == 1){
#ca <- as.factor(ca)
#cb <- as.factor(cb)
#cc <- as.factor(cc)
#cd <- as.factor(cd)
#using factors gives the wrong output
out <- aov(means~(ca+cb+cc+cd)^2)
print(summary.lm(out))
Fit <- means - out$resid
Resid <- out$resid
Y <- means
plot(Fit,Y)
abline(0,1)
title("Response Plot")
plot(Fit,Resid)
abline(0,0)
title("Residual Plot")
par(mfrow = c(1,1))
}
list(Aeff=Aeff,Beff=Beff,Ceff=Ceff,Deff=Deff,ABeff=ABeff,ACeff=ACeff,
ADeff=ADeff,BCeff=BCeff,BDeff=BDeff,CDeff=CDeff,ABCeff=ABCeff,ABDeff=ABDeff,
ACDeff=ACDeff,BCDeff=BCDeff,ABCDeff=ABCDeff,MSA=MSA,MSB=MSB,MSC=MSC,MSD=MSD,
MSAB=MSAB,MSAC=MSAC,MSAD=MSAD,MSBC=MSBC,MSBD=MSBD,MSCD=MSCD,MSABC=MSABC,
MSABD=MSABD,MSACD=MSACD,MSBCD=MSBCD,MSABCD=MSABCD,MSE=MSE,SEeff=SEeff)
}


vlung2 <- function(sexv=2){
# uses 4 slices instead of the 9 used by vlung
# Check goodness of fit of Cox PH for R lung data.
# for females use sexv = 2
# for males use sexv = 1, change the 2 to a 1
flung<-lung[lung$sex==sexv,]
nflung<-na.omit(flung[,c("time","status","ph.ecog","ph.karno",
"pat.karno","wt.loss")])
out <- coxph(Surv(time, status) 
~ ph.ecog + ph.karno + pat.karno + wt.loss,data = nflung)
x <- as.matrix(nflung[,c(3,4,5,6)])
time <- as.vector(nflung[,1])
status <- as.vector(nflung[,2])
ESP <- x %*% out$coef
indx <- sort.list(ESP)
n <- 85
val <- as.integer(n/4)
cmar<-par("mar")
par(mfrow = c(2, 2))
#par(mar=c(4.0,4.0,2.0,0.5))
for(i in 1:4){
    lo <- (i-1)*val + 1
    hi <- i*val
    if(i == 4) hi <- n
    tt <- time[indx[lo:hi]]
    ts <- status[indx[lo:hi]]
    zksi <- survfit(Surv(tt, ts)~1, conf.type = "none")
    ## change model formula for nv with p 
    midpt <- lo + floor((hi-lo)/2)
nvt <- c(x[indx[midpt],1],x[indx[midpt],2],x[indx[midpt],3],x[indx[midpt],4])
#take a typical value of time
nv<-data.frame(time=300,status=1,ph.ecog=nvt[1],ph.karno=nvt[2],
pat.karno=nvt[3],wt.loss=nvt[4])
plot(survfit(coxph(Surv(time, status) 
~ ph.ecog + ph.karno + pat.karno + wt.loss,data = nflung), newdata = nv),
xlab="Time",ylab="Estimated S(t)")
lines(zksi,type="p")    
}
par(mfrow=c(1,1))
par(mar=cmar)}




vnwtco <- function(){
#visualizing NWTCO data
##type library(survival)
#>nwtco[1,]
#  seqno instit histol stage study rel edrel age in.subcohort
#     1      2      2     1     3   0  6075  25        FALSE
z<-coxph(Surv(edrel,rel)~histol+instit+age+stage,data=nwtco)
summary(z)
ccex<-par("cex")
par(mfrow=c(2,2))
par(cex=0.3)
zz<- cox.zph(z)
# adding cex=0.1 has no effect
plot(zz, cex=0.1)
print(zz)
par(cex=ccex)
#test seems to contradict plots which have fairly horizontal loess
time <- as.vector(nwtco[,7])
status <- as.vector(nwtco[,6])
out <- coxph(Surv(time, status)~histol+instit+age+stage,data=nwtco) 
x <- as.matrix(nwtco[,c(3,2,8,4)])
ESP <- x %*% out$coef
indx <- sort.list(ESP)
n <- 4028
##change par(mfrow=c(2,3)) with slices
slices<-6
val <- as.integer(n/slices)
cmar<-par("mar")
ccex<-par("cex")
par(mfrow = c(2, 3))
par(mar=c(4.0,4.0,2.0,0.5))
for(i in 1:slices){
    lo <- (i-1)*val + 1
    hi <- i*val
    if(i == slices) hi <- n
    tt <- time[indx[lo:hi]]
    ts <- status[indx[lo:hi]]
    zksi <- survfit(Surv(tt, ts)~1, conf.type = "none")
    ## change model formula for nv with p 
    midpt <- lo + floor((hi-lo)/2)
    nvt <- c(x[indx[midpt],1],x[indx[midpt],2],x[indx[midpt],3],
    x[indx[midpt],4])
#use typical values for seqno,study,time, and in.subcohort
nv<-data.frame(seqno=4089,instit=nvt[2],histol=nvt[1],stage=nvt[4],study=3, status=1,time=300,age=nvt[3],in.subcohort=TRUE)
plot(survfit(coxph(Surv(time, status)~ histol+instit+age+stage,data = nwtco), newdata = nv),
xlab="Time",ylab="Estimated S(t)",cex=0.1)
    lines(zksi,cex=0.1,type="p")
    }
par(mfrow=c(1,1))
par(mar=cmar)
par(cex=ccex)
}






vovar <- function(){
####bug in new version of R for slice survival plot
# uses 4 slices, calls swhat
# check goodness of fit of Cox PH and Weibull PH
# models for the ovarian cancer data

time <- c(156, 1040, 59, 421, 329, 769, 365, 770, 1227, 
268, 475, 1129, 464, 1206, 638, 563, 1106, 431, 855, 
803, 115, 744, 477, 448, 353, 377)

status <- c(1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1,
1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0)

treat <- c(1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1,
0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0)

age <- c(66, 38, 72, 53, 43, 59, 64, 57, 59, 74, 59, 53,
56, 44, 56, 55, 44, 50, 43, 39, 74, 50, 64, 56, 63, 58)

ovar <- cbind(time,status,treat,age)

#paste these commands for the LCR plot
z <- survreg(Surv(ovar[,1],ovar[,2])~ovar[,3]+ovar[,4],dist="weibull")
AFTESP <- 11.548 -.561*ovar[,3] -.079*ovar[,4]
logT <- log(ovar[,1])
plot(AFTESP,logT,pch = ' ')
points(AFTESP[ovar[,2]==0],log(ovar[ovar[,2]==0,1]),pch=1)
points(AFTESP[ovar[,2]==1],log(ovar[ovar[,2]==1,1]),pch=3)
abline(0,1) 

#paste these commands for the Weibull EE plot
z <- survreg(Surv(ovar[,1],ovar[,2])~ovar[,3]+ovar[,4],dist="weibull")
zx <- cbind(ovar[,3],ovar[,4])
zc <- coxph(Surv(ovar[,1],ovar[,2])~ovar[,3]+ovar[,4])
sighat<-z$scale
ESPC <- zx%*%zc$coef
ESPW <- -zx%*%z$coef[-1]/sighat
plot( ESPW, ESPC)
abline(0,1)
title("a) Weibull")
cor( ESPW, ESPC)

#paste these commands for the Exponential EE plot

ze <- survreg(Surv(ovar[,1],ovar[,2])~ovar[,3]+ovar[,4],dist="exponential")
ESPE <- -zx%*%ze$coef[-1]
plot( ESPE, ESPC)
abline(0,1)
title("b) Exponential")

##########bug in plot below for new version of R

#type vovar() to produce the slice survival plot
ghat <- 1/z$scale
ahat <- z$coef[1]
bhat <- z$coef[-1]
lhat <- exp(-ahat*ghat)
stime <- sort(time)
x <- zx
ESP <- ESPC
indx <- sort.list(ESP)
n <- 26
val <- as.integer(n/4)
cmar<-par("mar")
par(mfrow = c(2, 2))
#par(mar=c(4.0,4.0,2.0,0.5))
for(i in 1:4){
    lo <- (i-1)*val + 1
    hi <- i*val
    if(i == 4) hi <- n
    tt <- time[indx[lo:hi]]
    ts <- status[indx[lo:hi]]
    zksi <- survfit(Surv(tt, ts)~1, conf.type = "none")
    ## change model formula for nv with p 
    midpt <- lo + floor((hi-lo)/2)
    nv <- c(x[indx[midpt],1],x[indx[midpt],2])
plot(survfit(coxph(Surv(time, status)~ ovar[,3]+ovar[,4]),
         newdata = nv),xlab="Time",ylab="Estimated S(t)")
    sfit <- swhat(stime,bhat,nv,ghat,lhat)$shat
    points(stime,sfit,pch=3)
    lines(zksi,type="p")
    }
par(mfrow=c(1,1))
par(mar=cmar)
}


weyp<-
function(x, time, status){
# Makes the log censored response plot for Weibull proportional hazards regression.
# Workstation: need to activate a graphics
# device with command "X11()" or "motif()."
#
#   If q is changed, change the formula in the glm statement.
	q <- 3	# change formula to x[,1]+ ... + x[,q] with q
	out <- survreg(Surv(time, status) ~ x[, 1] + x[, 2] + x[, 3])
	ESP <- x %*% out$coef[-1] + out$coef[1]
	plot(ESP, log(time), type = "n")
	points(ESP[status == 1], log(time[status == 1]), pch = 3)
	points(ESP[status == 0], log(time[status == 0]), pch = 1)
	abline(0, 1)
	#abline(lsfit(ESP[status == 1], log(time[status == 1]))$coef)
}


wlsplot<-function(x, y, w){
# Make OLS response and residual plots,
# then transform data with weights and make OLS plots 
# based on weights to check that the weights are reasonable.
# use X11() to turn on the graphics device before using this function
	ols <- lsfit(x, y)
	Y <- y
	Z <- sqrt(w) * y
	u <- sqrt(w) * x
	uu <- sqrt(w)
	u <- cbind(uu, u)
	wls <- lsfit(u, Z, intercept = F)
	par(mfrow = c(2, 2))
	FIT <- Y - ols$resid
	plot(FIT, Y)
	abline(0, 1)
	title("a) OLS Response Plot")
	RESID <- ols$resid
	plot(FIT, RESID)
	title("b) OLS Residual Plot")
	ZFIT <- Z - wls$resid
	plot(ZFIT, Z)
	abline(0, 1)
	title("c) WLS Response Plot")
	ZRESID <- wls$resid
	plot(ZFIT, ZRESID)
	title("d) WLS Residual Plot")
	list(bols = ols$coef, bwls = wls$coef)
        par(mfrow=c(1,1))
}


wphsim <- function(n = 99,  k = 1, slices = 9){
# Generates data for Weibull PH regression using Zhou (2001).
# Makes a slice survival plot for Weibull regression.
# The AFT ESP is used instead of the PH ESP, so survival
# increases as the AFT ESP increases.
# Uses crosses for Weibull and circles for KM.
# Get Weibull regression data with g(Y) = Y^(1/k) and
# h_(g(Y))(t) = k t^(k-1) exp(SP).
# 
# Calls function swhat.
#
# May not work in Splus.
#
# In R, type library(survival)
p <- 3 ## change formula with p
beta <- 1 + 0 * 1:p
x <- matrix(rnorm(n * p), nrow = n, ncol = p)/2
SP <- x %*% beta
lambdai <- exp(SP)
y <- rexp(n, rate = lambdai)
gy <- y^(1/k)
cen <- rexp(n, rate = 0.1)
time <- pmin(gy, cen)
status <- as.numeric(cen >= gy)	
##change formula for survreg with p
out <- survreg(Surv(time, status) ~ x[, 1] + x[, 2] + x[, 3])
ESP <- x %*% out$coef[-1]
ghat <- 1/out$scale
ahat <- out$coef[1]
bhat <- out$coef[-1]
lhat <- exp(-ahat*ghat)
stime <- sort(time)
indx <- sort.list(ESP)
val <- as.integer(n/slices)
if(slices <= 4) par(mfrow = c(2, 2))
if( (slices == 5) || (slices ==  6)) par(mfrow = c(2, 3))
if(slices >= 7) par(mfrow = c(3, 3))
for(i in 1:slices){
    lo <- (i-1)*val + 1
    hi <- i*val
    if(i == slices) hi <- n
    tt <- time[indx[lo:hi]]
    ts <- status[indx[lo:hi]]
    zksi <- survfit(Surv(tt, ts)~1, conf.type = "none")
    ## change model formula for nv with p 
    midpt <- lo + floor((hi-lo)/2)
    nv <- c(x[indx[midpt],1],x[indx[midpt],2],x[indx[midpt],3])
    sfit <- swhat(stime,bhat,nv,ghat,lhat)$shat
    plot(stime,sfit,pch=3)
    lines(zksi,type="p")
    }
par(mfrow=c(1,1))
list(coef = out$coef)
}


wregsim<-function(n = 100, q = 3, k = 1, nruns = 1){
# Simulates log censored response plots for Weibull PH data.
# calls phdata and weyp
	for(i in 1:nruns) {
		out <- phdata(n, q, k)
		weyp(out$x, out$time, out$status)
	}
}


wregsim2<-function(n = 100, k = 1, nruns = 1){
# Simulates EE plots for Weibull proportional hazards AFT data.
# calls phdata 
# R users need to type library(survival) 
        corr <- 1:nruns
	for(i in 1:nruns) {
		p <- 3	# change formula to x[,1]+ ... + x[,p] with p
		out <- phdata(n, p, k)
		x <- out$x
		time <- out$time
		status <- out$status
		outp <- coxph(Surv(time, status) ~ x[, 1] + x[, 2] + x[, 3])
		outw <- survreg(Surv(time, status) ~ x[, 1] + x[, 2] + x[, 3])
                shat <- outw$scale
		ESPC <- x %*% outp$coef
		ESPW <- -(x %*% outw$coef[-1])/shat
		plot( ESPW, ESPC)
                corr[i] <- cor(ESPW, ESPC)
		abline(0, 1)
	}
        mncorr <- mean(corr)
        list(mncorr=mncorr)
}


wregsim3<-function(n = 100, k = 1, lam = 0.01, nruns = 100){
# See if alphahat and betahat are consistent estimators.
# Calls phdata. 
# want mnint = 0, mncoef[i] = -1/k, mnscale = 1/k
	q <- 3
	wout <- matrix(0, nrow = nruns, ncol = 5)
	for(i in 1:nruns) {
		out <- phdata(n, q, k, lam)
		x <- out$x
		time <- out$time
		status <- out$status
		outw <- survreg(Surv(time, status) ~ x[, 1] + x[, 2] + x[, 3])
		wout[i, 1:4] <- outw$coef
		wout[i, 5] <- outw$scale
	}
	wmn <- apply(wout, 2, mean)
	mnint <- wmn[1]
	mncoef <- wmn[2:4]
	mnscale <- wmn[5]
	list(mnint = mnint, mncoef = mncoef, mnscale = mnscale)
}