##The command ##source("http://lagrange.math.siu.edu/Olive/regpack.txt") ##is an easy way to get these functions into R. anovasim<-function(n1 = 20, n2 = 20, n3 = 20, n4 = 20, m1 = 0, m2 = 0, m3 = 0, m4 = 0, sd1 = 1, sd2 = 1, sd3 = 1, sd4 = 1, nruns = 1000){ # Simulates ANOVA F, modified F, Welch F, modified Welch F and rank F statistic # for 1 way ANOVA. # tests Ho: mu1 = mu2 = mu3 = mu4 p <- 4 n <- n1 + n2 + n3 + n4 y <- 0 * 1:n c1 <- 0 * 1:n c2 <- c1 c3 <- c1 c4 <- c1 c1[1:n1] <- 1 c2[(n1 + 1):(n1 + n2)] <- 1 c3[(n1 + n2 + 1):(n1 + n2 + n3)] <- 1 c4[(n1 + n2 + n3 + 1):n] <- 1 x <- cbind(c1, c2, c3, c4) y1 <- 1:n1 y2 <- 1:n2 y3 <- 1:n3 y4 <- 1:n4 beta <- c(m1, m2, m3, m4) bols <- matrix(0, nrow = nruns, ncol = p) faov <- 0 * 1:nruns rf <- faov mct <- 0 wct <- 0 mwct <- 0 for(i in 1:nruns) { #make data y1 <- rnorm(n1, mean = m1, sd = sd1) y2 <- rnorm(n2, mean = m2, sd = sd2) y3 <- rnorm(n3, mean = m3, sd = sd3) y4 <- rnorm(n4, mean = m4, sd = sd4) y <- c(y1, y2, y3, y4) out <- lsfit(x, y, intercept = F) bols[i, ] <- out$coef mn1 <- bols[i, 1] mn2 <- bols[i, 2] mn3 <- bols[i, 3] mn4 <- bols[i, 4] s1sq <- var(y1) s2sq <- var(y2) s3sq <- var(y3) s4sq <- var(y4) #get ANOVA F statistic res <- out$resid mse <- t(res) %*% res/(n - p) sse <- mse * (n - p) mny <- mean(y) sstr <- n1 * (mn1 - mny)^2 + n2 * (mn2 - mny)^2 + n3 * (mn3 - mny)^2 + n4 * (mn4 - mny)^2 fnum <- sstr/(p - 1) faov[i] <- fnum/mse #get modified F statistic num <- (p - 1) * fnum den <- (1 - n1/n) * s1sq + (1 - n2/n) * s2sq + (1 - n3/n) * s3sq + (1 - n4/n) * s4sq modf <- num/den c1 <- ((1 - n1/n) * s1sq)/den c2 <- ((1 - n2/n) * s2sq)/den c3 <- ((1 - n3/n) * s3sq)/den c4 <- ((1 - n4/n) * s4sq)/den finv <- c1^2/(n1 - 1) + c2^2/(n2 - 1) + c3^2/(n3 - 1) + c4^2/(n4 - 1) md <- ceiling(1/finv) if(modf > qf(0.95, p - 1, md)) mct <- mct + 1 #get Welch F statistic w1 <- n1/s1sq w2 <- n2/s2sq w3 <- n3/s3sq w4 <- n4/s4sq u <- w1 + w2 + w3 + w4 xdd <- (w1 * mn1)/u + (w2 * mn2)/u + (w3 * mn3)/u + (w4 * mn4)/u num <- (w1 * (mn1 - xdd)^2 + w2 * (mn2 - xdd)^2 + w3 * (mn3 - xdd)^2 + w4 * (mn4 - xdd)^2)/(p - 1) tem <- (1 - w1/u)^2/(n1 - 1) + (1 - w2/u)^2/(n2 - 1) + (1 - w3/u)^2/(n3 - 1) + (1 - w4/u)^2/(n4 - 1) den <- 1 + (2 * (p - 2) * tem)/(p^2 - 1) wf <- num/den finv <- (3 * tem)/(p^2 - 1) wd <- ceiling(1/finv) if(wf > qf(0.95, p - 1, wd)) wct <- wct + 1 #get modified Welch F statistic dfnum <- (1/w1 + 1/w2 + 1/w3 + 1/w4)^2 dfden <- (1/w1)^2/(n1 - 1) + (1/w2)^2/(n2 - 1) + (1/w3)^2/(n3 - 1) + (1/w4)^2/(n4 - 1) wd <- ceiling(dfnum/dfden) if(wf > qf(0.95, p - 1, wd)) mwct <- mwct + 1 #get rank F statistic y <- rank(y) out <- lsfit(x, y, intercept = F) brols <- out$coef mn1 <- brols[1] mn2 <- brols[2] mn3 <- brols[3] mn4 <- brols[4] res <- out$resid mse <- t(res) %*% res/(n - p) sse <- mse * (n - p) mny <- mean(y) sstr <- n1 * (mn1 - mny)^2 + n2 * (mn2 - mny)^2 + n3 * (mn3 - mny)^2 + n4 * (mn4 - mny)^2 fnum <- sstr/(p - 1) rf[i] <- fnum/mse } mnbols <- apply(bols, 2, mean) ssdbols <- sqrt(n) * sqrt(apply(bols, 2, var)) faovcov <- sum(faov > qf(0.95, p - 1, n - p))/nruns modfcov <- mct/nruns mwfcov <- mwct/nruns wfcov <- wct/nruns rfcov <- sum(rf > qf(0.95, p - 1, n - p))/nruns list(mnbols = mnbols, ssdbols = ssdbols, faovcov = faovcov, modfcov = modfcov, wfcov = wfcov, mwfcov = mwfcov, rfcov = rfcov) } aovtplt<-function(x, y){ # Need x to have the factor levels needed for one way Anova. # This function plots y^L vs one way Anova fitted values # from the one way Anova of y^L on x where # L = -1, -0.5, 0, 0.5 or 1 except log(Y) is used if L = 0. # If plot is linear for L, use y^L (or log(Y) for L = 0) instead # of Y. This is a graphical method for a response transform. lam <- c(-1, -1/2, 0, 1/2, 1) xl <- c("1/Z", "1/sqrt(Z)", "LOG(Z)", "sqrt(Z)", "Z") par(mfrow=c(2,3)) x<-factor(x) for(i in 1:length(lam)) { if(lam[i] == 0) ytem <- log(y) else ytem <- y^lam[i] aovfit <- ytem - aov(ytem~x)$resid plot(aovfit, ytem, xlab = "TZHAT", ylab = xl[i]) abline(0, 1) #identify(aovfit, ytem) } par(mfrow=c(1,1)) } binrplot <- function(x, y){ # Calls the ``sir" function (in rpack but obtained from STATLIB). # The binary response plot allows visualization of binary data. # Workstation need to activate a graphics # device with command "X11()" or "motif()." # # x needs at least two columns or the call to sir fails # q <- dim(x)[2] h <- q + 7 out <- sir(x, y, h) SIRESP <- x %*% out$edr[, 1] V <- x %*% out$edr[, 2] plot(SIRESP, V, type = "n") points(SIRESP[y == 1], V[y == 1], pch = 3) points(SIRESP[y == 0], V[y == 0], pch = 1) } bphgfit<-function(time, status, group){ # May not work in Splus. # # For R, first type library(survival). # # Checks goodness of fit of Cox proportional # hazard model if time is a vector of times, status is 0 for # censored times and 1 otherwise and there is a single covariate # group = 1 for group (treatment) 1 and 0 else. # # Splus Bug: wants zd defined outside the function or it says it # can't find zd also it fails if the data has an n larger than that # of the zd defined outside of the function # zd <- as.data.frame(cbind(time, status, group)) z0 <- zd[group == 0, ] z1 <- zd[group == 1, ] zk0 <- survfit(Surv(time[group == 0], status[group == 0])~1, conf.type = "none") zk1 <- survfit(Surv(time[group == 1], status[group == 1])~1, conf.type = "none") par(mfrow = c(2, 1)) plot(survfit(coxph(Surv(time, status) ~ group, zd), newdata = z0)) lines(zk0,type="p") title("Estimated Cox and KM survival, group 0" ) plot(survfit(coxph(Surv(time, status) ~ group, zd), newdata = z1)) lines(zk1,type="p") title("Estimated Cox and KM survival, group 1" ) par(mfrow=c(1,1)) } bphsim3<-function(n = 100, k = 1, nruns = 1){ # May not work in Splus. # # For R, first type library(survival). # # Gets plot for Cox proportional hazards data # with 1 indicator variable as a predictor. # calls bphgfit # Splus bug: bug once it works for n = 100, # it won't work for n =200 for(i in 1:nruns) { beta <- 1 group <- 0 * 1:n group[(n/2 + 1):n] <- 1 SP <- group * beta lambdai <- exp(SP) y <- rexp(n, rate = lambdai) gy <- y^(1/k) cen <- rexp(n, rate = 0.1) cengy <- pmin(gy, cen) status <- as.numeric(cen >= gy) bphgfit(cengy, status, group) } } ctrviews<-function(x, y, ii = 1){ # Uses classical distances instead of robust distances. # Trimmed views for 90, 80, ... 0 percent # trimming. Allows visualization of m # and crude estimatation of c beta in models # of the form y = m(x^T beta) + e. # Workstation: activate a graphics # device with command "X11()" or "motif()." # R needs command "library(lqs)." # Advance the view with the right mouse button. # In R, highight "stop." x <- as.matrix(x) center <- apply(x, 2, mean) cov <- var(x) rd2 <- mahalanobis(x, center, cov) labs <- c("90%", "80%", "70%", "60%", "50%", "40%", "30%", "20%", "10%", "0%") tem <- seq(0.1, 1, 0.1) for(i in ii:10) { val <- quantile(rd2, tem[i]) bhat <- lsfit(x[rd2 <= val, ], y[rd2 <= val])$coef fit <- x %*% bhat[-1] plot(fit, y) title(labs[i]) identify(fit, y) print(bhat) } } ddcomp<-function(x, steps = 5){ # Makes 4 DD plots using the FMCD and MBA estimators. # Click left mouse button to identify points. # Click right mouse button to end the function. # Unix systems turn on graphics device eg enter # command "X11()" or "motif()" before using. # R users need to type "library(lqs)" before using. p <- dim(x)[2] par(mfrow = c(2, 2)) center <- apply(x, 2, mean) cov <- var(x) md2 <- mahalanobis(x, center, cov) # MD is the classical and RD the robust distance MD <- sqrt(md2) #DGK start md2 <- mahalanobis(x, center, cov) medd2 <- median(md2) ## get the start mns <- center covs <- cov ## concentrate for(i in 1:steps) { md2 <- mahalanobis(x, mns, covs) medd2 <- median(md2) mns <- apply(x[md2 <= medd2, ], 2, mean) covs <- var(x[md2 <= medd2, ]) } rd2 <- mahalanobis(x, mns, covs) rd <- sqrt(rd2) #Scale the RD so the plot follows the 0-1 line #if the data is multivariate normal. const <- sqrt(qchisq(0.5, p))/median(rd) RDdgk <- const * rd plot(MD, RDdgk) abline(0, 1) identify(MD, RDdgk) title("DGK DD Plot") #MBA out <- covmba(x) center <- out$center cov <- out$cov rd2 <- mahalanobis(x, center, cov) rd <- sqrt(rd2) #Scale the RD so the plot follows the identity line #if the data is multivariate normal. const <- sqrt(qchisq(0.5, p))/median(rd) RDm <- const * rd plot(MD, RDm) abline(0, 1) identify(MD, RDm) title("MBA DD Plot") #FMCD out <- cov.mcd(x) center <- out$center cov <- out$cov rd2 <- mahalanobis(x, center, cov) rd <- sqrt(rd2) #Scale the RD so the plot follows the 0-1 line #if the data is multivariate normal. const <- sqrt(qchisq(0.5, p))/median(rd) RDf <- const * rd plot(MD, RDf) abline(0, 1) identify(MD, RDf) title("FMCD DD Plot") #Median Ball start covv <- diag(p) med <- apply(x, 2, median) md2 <- mahalanobis(x, center = med, covv) medd2 <- median(md2) ## get the start mns <- apply(x[md2 <= medd2, ], 2, mean) covs <- var(x[md2 <= medd2, ]) ## concentrate for(i in 1:steps) { md2 <- mahalanobis(x, mns, covs) medd2 <- median(md2) mns <- apply(x[md2 <= medd2, ], 2, mean) covs <- var(x[md2 <= medd2, ]) } rd2 <- mahalanobis(x, mns, covs) rd <- sqrt(rd2) #Scale the RD so the plot follows the 0-1 line #if the data is multivariate normal. const <- sqrt(qchisq(0.5, p))/median(rd) RDmb <- const * rd plot(MD, RDmb) abline(0, 1) identify(MD, RDmb) title("Med Ball DD Plot") } edtplt<-function(x, y, int = F){ # Need x to be an appropriate design matrix for the experimental # design fit by OLS. This function plots y^L vs OLS fit where # L = -1, -0.5, 0, 0.5 or 1 except log(Y) is used if L = 0. # If plot is linear for L, use y^L (or log(Y) for L = 0) instead # of Y. This is a graphical method for a response transform. lam <- c(-1, -1/2, 0, 1/2, 1) xl <- c("1/Z", "1/sqrt(Z)", "LOG(Z)", "sqrt(Z)", "Z") par(mfrow=c(2,3)) for(i in 1:length(lam)) { if(lam[i] == 0) ytem <- log(y) else ytem <- y^lam[i] olsfit <- ytem - lsfit(x, ytem, intercept = int)$resid plot(olsfit, ytem, xlab = "TZHAT", ylab = xl[i]) abline(0, 1) #identify(olsfit, ytem) } par(mfrow=c(1,1)) } essp<- function(x, Y, M = 50, type = 3) {# Trimmed view or ESSP for M percent # trimming. Allows visualization of g # and crude estimation of c beta in models # of the form y = g(x^T beta,e). # Workstation need to activate a graphics # device with command "X11()" or "motif()." # R needs command "library(MASS)" or "library(lqs)." # Click on the right mouse button to finish, and # in R, highlight "stop." x <- as.matrix(x) tval <- M/100 if(type == 1) out <- cov.mcd(x) if(type == 2) out <- covmba(x) if(type == 3) out <- covfch(x) center <- out$center cov <- out$cov rd2 <- mahalanobis(x, center, cov) val <- quantile(rd2, (1 - tval)) bhat <- lsfit(x[rd2 <= val, ], Y[rd2 <= val])$ coef ESP <- x %*% bhat[-1] + bhat[1] plot(ESP, Y) identify(ESP, Y) return(bhat[-1]) } fwlssim<-function(n = 100, q = 4, sd = 1, nruns = 1, type = 1){ # Simulates FWLS data Y = SP + |SP - j| e, with SP = alpha + beta' x. # So w = 1/(SP-j)^2 and what = 1/(ESP - j)^2 # type = 1 or 3 for wls, type = 2 or 4 for fwls # There are q = p-1 nontrivial predictors as well as a constant # Need p > 1 since x is n by q. beta <- 2 + 0 * 1:q par(mfrow = c(2, 2)) for(i in 1:nruns) { #make data x <- matrix(rnorm(n * q), nrow = n, ncol = q) if(q == 1) x <- as.matrix(x) u <- x if(type == 1) { sp <- 25 + x %*% beta Y <- sp + abs(sp - 5) * sd * rnorm(n) ols <- lsfit(x, Y) olsesp <- Y - ols$resid what <- 1/(sp - 5)^2 } if(type == 2) { sp <- 25 + x %*% beta Y <- sp + abs(sp - 5) * sd * rnorm(n) ols <- lsfit(x, Y) olsesp <- Y - ols$resid what <- 1/(olsesp - 5)^2 } if(type == 3) { sp <- 25 + x %*% beta Y <- sp + abs(sp - 25) * sd * rnorm(n) ols <- lsfit(x, Y) olsesp <- Y - ols$resid what <- 1/(sp - 25)^2 } if(type == 4) { sp <- 25 + x %*% beta Y <- sp + abs(sp - 25) * sd * rnorm(n) ols <- lsfit(x, Y) olsesp <- Y - ols$resid what <- 1/(olsesp - 25)^2 } uu <- sqrt(what) Z <- uu * Y for(j in 1:q) { u[, j] <- uu * x[, j] } u <- cbind(uu, u) wls <- lsfit(u, Z, intercept = F) FIT <- Y - ols$resid plot(FIT, Y) abline(0, 1) title("a) OLS Response Plot") RESID <- ols$resid plot(FIT, RESID) title("b) OLS Residual Plot") ZFIT <- Z - wls$resid plot(ZFIT, Z) abline(0, 1) title("c) FWLS Response Plot") ZRESID <- wls$resid plot(ZFIT, ZRESID) title("d) FWLS Residual Plot") } } ganova<-function(x, y){ # Let x have the factor levels 1,2, ..., k needed for one way Anova # and y the corresponding response values. # This function does graphical anova for the one way model. Treatments <- c("A","B","C","D","E","F","G","H","I","J","K") xx <- as.integer(x) k <- max(xx) n <- length(y) v10 <- 10 + 0*1:n v20 <- 20 + 0*1:k gmn <- mean(y) mn <- 1:k sdev <- mn for (i in 1:k){ mn[i] <- mean(y[xx==i]) } mn <- mn - gmn sdev <- sqrt((n-k)/(k-1))*mn w <- as.factor(x) residuals <- aov(y~w)$resid graphicalanova <- c(v10,v20) Residuals <- c(residuals,sdev) plot(Residuals,graphicalanova) title("Scaled Treatment Deviations") Treatments <- Treatments[1:k] list(sdev=sdev,Treatments=Treatments) } ganova2<-function(x, blocks, y){ # Let x and blocks have levels 1,2, ..., k and y the corresponding # response values. This function does graphical anova for # completely randomized block design. Treatments <- c("A","B","C","D","E","F","G","H","I","J","K") xx <- as.integer(x) blk <- as.integer(blocks) k <- max(xx) b <- max(blk) n <- length(y) v10 <- 10 + 0*1:n v20 <- 20 + 0*1:k v30 <- 30 + 0*1:b gmn <- mean(y) mn <- 1:k smn <- mn bmn <- 1:b sbmn <- bmn for (i in 1:k){ mn[i] <- mean(y[xx==i]) } for (i in 1:b){ bmn[i] <- mean(y[blk==i]) } mn <- mn - gmn smn <- sqrt(b-1)*mn bmn <- bmn - gmn sbmn <- sqrt(k-1)*bmn w <- as.factor(x) block <- as.factor(blocks) residuals <- aov(y~block+w)$resid Treatmentdevs <- c(v10,v20,v30) Residuals <- c(residuals,smn,sbmn) plot(Residuals,Treatmentdevs) title("Scaled Block Deviations") Treatments <- Treatments[1:k] Blocks <- 1:b list(sbdev=sbmn,Blocks,sdev=smn,Treatments=Treatments) } kmsim<-function(n = 100, runs = 5){ # simulation of classical, log, log-log and plus four 95% confidence intervals for Kaplan # Meier estimator ccov <- 0 * 1:n lcov <- ccov llcov <- ccov p4cov <- ccov np3 <- n + 3 np4 <- n + 4 for(i in 1:runs) { y <- rexp(n) z <- rexp(n, rate = 0.1) # get censored data t t <- pmin(y, z) delta <- as.numeric(z >= y) out <- survfit(Surv(t, delta)~1, conf.type = "plain") #Get S(t) then see if S(t_i) is in the CI for S(t_i) for i = 1 to n st <- exp( - out$time) ccov <- ccov + as.integer(st > out$low & st < out$up) out <- survfit(Surv(t, delta)~1) lcov <- lcov + as.integer(st > out$low & st < out$up) out <- survfit(Surv(t, delta)~1, conf.type = "log-log") llcov <- llcov + as.integer(st > out$low & st < out$up) # get plus 4 interval tmin <- min(t) tmax <- max(t) tp4 <- c(tmin/3, tmin/2, t, tmax + 1, tmax + 2) dp4 <- c(1, 1, delta, 1, 1) out <- survfit(Surv(tp4, dp4)~1, conf.type = "plain") outl <- out$low[ - c(1, 2, np3, np4)] outu <- out$up[ - c(1, 2, np3, np4)] p4cov <- p4cov + as.integer(st > outl & st < outu) } ccov <- ccov/runs lcov <- lcov/runs llcov <- llcov/runs p4cov <- p4cov/runs list(ccov = ccov, lcov = lcov, llcov = llcov, p4cov = p4cov) } kmsim2<-function(n = 100, runs = 100){ # simulation of classical, log, log-log and plus four 95% # confidence intervals for Kaplan Meier estimator # library(survival) ccov <- 0 * 1:n lcov <- ccov llcov <- ccov p4cov <- ccov clen <- ccov llen <- ccov lllen <- ccov p4len <- ccov np3 <- n + 3 np4 <- n + 4 for(i in 1:runs) { y <- rexp(n) z <- rexp(n, rate = 0.1) # get censored data t t <- pmin(y, z) delta <- as.numeric(z >= y) out <- survfit(Surv(t, delta)~1, conf.type = "plain") #Get S(t) then see if S(t_i) is in the CI for S(t_i) for i = 1 to n st <- exp( - out$time) ccov <- ccov + as.integer(st > out$low & st < out$up) clen <- clen + (out$up - out$low) out <- survfit(Surv(t, delta)~1) lcov <- lcov + as.integer(st > out$low & st < out$up) llen <- llen + (out$up - out$low) out <- survfit(Surv(t, delta)~1, conf.type = "log-log") llcov <- llcov + as.integer(st > out$low & st < out$up) lllen <- lllen + (out$up - out$low) # get plus 4 interval tmin <- min(t) tmax <- max(t) tp4 <- c(tmin/3, tmin/2, t, tmax + 1, tmax + 2) dp4 <- c(1, 1, delta, 1, 1) out <- survfit(Surv(tp4, dp4)~1, conf.type = "plain") outl <- out$low[ - c(1, 2, np3, np4)] outu <- out$up[ - c(1, 2, np3, np4)] p4cov <- p4cov + as.integer(st > outl & st < outu) p4len <- p4len + (outu - outl) } ccov <- ccov/runs lcov <- lcov/runs llcov <- llcov/runs p4cov <- p4cov/runs clen <- (sqrt(n) * clen)/runs llen <- (sqrt(n) * llen)/runs lllen <- (sqrt(n) * lllen)/runs p4len <- (sqrt(n) * p4len)/runs list(ccov = ccov, lcov = lcov, llcov = llcov, p4cov = p4cov, clen = clen, llen = llen, lllen = lllen, p4len = p4len) } lrdata <- function(n = 200, type = 3){ # Generates data for logistic regression. # If X|y=1 ~ N(mu_1,I) and X|Y=0 ~ N(0,I) then beta = mu_1 and alpha = -0.5 ||mu_1||^2. # # If q is changed, change the formula in the glm statement. q <- 5 y <- 0 * 1:n y[(n/2 + 1):n] <- y[(n/2 + 1):n] + 1 beta <- 0 * 1:q if(type == 1) { beta[1] <- 1 alpha <- -0.5 } if(type == 2) { beta <- beta + 1 alpha <- -q/2 } if(type == 3) { beta[1:3] <- 1 alpha <- -1.5 } x <- matrix(rnorm(n * q), nrow = n, ncol = q) if(type == 1) { x[(n/2 + 1):n, 1] <- x[(n/2 + 1 ):n, 1] + 1 } if(type == 2) { x[(n/2 + 1):n, ] <- x[(n/2 + 1 ):n, ] + 1 } if(type == 3) { x[(n/2 + 1):n, 1:3 ] <- x[(n/2 + 1 ):n, 1:3 ] + 1 } #X|y=0 ~ N(0, I) and X|y=1 ~ N(beta,I) # change formula to x[,1]+ ... + x[,q] with q out <- glm(y ~ x[, 1] + x[, 2] + x[, 3] + x[, 4] + x[,5], family = binomial) list(alpha = alpha, beta = beta, lrcoef = out$coef,x=x,y=y) } lrdata2<-function(n = 200, m = 10, type = 1) {# Generates data for logistic regression. # If q is changed, change the formula in the glm statement. q <- 5 y <- 1:n z <- y mv <- m + 0 * y beta <- 0 * 1:q if(type == 1) { beta <- beta + 2 alpha <- 0 } if(type == 2) { beta[1] <- 2 alpha <- 0 } if(type == 3) { beta[1:3] <- 2 alpha <- 0 } x <- matrix(rnorm(n * q), nrow = n, ncol = q) SP <- alpha + x %*% beta pv <- exp(SP)/(1 + exp(SP)) for(i in 1:n) y[i] <- rbinom(1, size = m, prob = pv[i]) z <- y/m # change formula to x[,1]+ ... + x[,q] with q out <- glm(z ~ x[, 1] + x[, 2] + x[, 3] + x[, 4] + x[, 5], family = binomial, weights = mv) list(x = x, y = y, mv = mv, alpha = alpha, beta = beta, lrcoef = out$coef) } lressp <- function(x,y,slices=10){ # Makes the response plot for logistic regression. # If X|y=1 ~ N(mu_1,I) and X|Y=0 ~ N(0,I) then beta = mu_1 and alpha = ||mu_1||^2. # Workstation need to activate a graphics # device with command "X11()" or "motif()." # # If q is changed, change the formula in the glm statement. q <- 5 # change formula to x[,1]+ ... + x[,q] with q out <- glm(y ~ x[, 1] + x[, 2] + x[, 3] + x[, 4] + x[,5], family = binomial) #ESP <- x %*% out$coef[-1] + out$coef[1] ESP <- predict(out) Y <- y plot(ESP,Y) abline(mean(y),0) fit <- y fit <- exp(ESP)/(1 + exp(ESP)) # lines(sort(ESP),sort(fit)) indx <- sort.list(ESP) lines(ESP[indx],fit[indx]) fit2 <- fit n <- length(y) val <- as.integer(n/slices) for(i in 1: (slices-1)){ fit2[((i-1)*val+1):(i*val)] <- mean(y[indx[((i-1)*val+1):(i*val)]]) } fit2[((slices-1)*val+1):n] <- mean(y[indx[((slices-1)*val+1):n]]) # fit2 is already sorted in order corresponding to indx lines(ESP[indx],fit2) #list(fit2=fit2,n=n,slices=slices,val=val) } lrplot<-function(x, y, mv, slices = 10, ff = 0.97, step = T){ # Makes response and OD plots for binomial logistic regression. # mv = (m1, ..., mn) where yi is bin(mi,p(SP)) # If step = T use step function, else use lowess. # Workstation: need to activate a graphics # device with command "X11()" or "motif()." # # If q is changed, change the formula in the glm statement. q <- 5 # change formula to x[,1]+ ... + x[,q] with q n <- length(y) Z <- y/mv out <- glm(Z ~ x[, 1] + x[, 2] + x[, 3] + x[, 4] + x[, 5], family = binomial, weights = mv) ESP <- predict(out) par(mfrow = c(1, 2),pty="s") plot(ESP, Z) abline(weighted.mean(Z, mv), 0) fit <- y fit <- exp(ESP)/(1 + exp(ESP)) indx <- sort.list(ESP) lines(ESP[indx], fit[indx]) if(step == T) { fit2 <- fit val <- as.integer(n/slices) for(i in 1:(slices - 1)) { fit2[((i - 1) * val + 1):(i * val)] <- weighted.mean(Z[ indx[((i - 1) * val + 1):(i * val)]], mv[indx[(( i - 1) * val + 1):(i * val)]]) } fit2[((slices - 1) * val + 1):n] <- weighted.mean(Z[indx[(( slices - 1) * val + 1):n]], mv[indx[((slices - 1) * val + 1):n]]) # fit2 is already sorted in order corresponding to indx lines(ESP[indx], fit2) } else lines(lowess(ESP, Z, f = ff), type = "s") title("a) ESS Plot") #get OD plot val <- exp(ESP)/(1 + exp(ESP)) Ehat <- mv * val Vmodhat <- Ehat * (1 - val) Vhat <- (y - Ehat)^2 plot(Vmodhat, Vhat) abline(0, 1) abline(0, 4) abline(lsfit(Vmodhat, Vhat)$coef) title("b) OD Plot") par(mfrow=c(1,1)) } lrplot2<-function(x, y, slices = 10, ff = 0.99, step = T){ # Makes the response plot for binary logistic regression. # If step = T use step function, else use lowess. # Workstation need to activate a graphics # device with command "X11()" or "motif()." # # If q is changed, change the formula in the glm statement. x <- as.matrix(x) q <- 5 # change formula to x[,1]+ ... + x[,q] with q out <- glm(y ~ x[, 1] + x[, 2] + x[, 3] + x[, 4] + x[, 5], family = binomial) ESP <- predict(out) Y <- y plot(ESP, Y) fit <- y fit <- exp(ESP)/(1 + exp(ESP)) # lines(sort(ESP),sort(fit)) indx <- sort.list(ESP) lines(ESP[indx], fit[indx]) if(step == T){ fit2 <- fit n <- length(y) val <- as.integer(n/slices) for(i in 1:(slices - 1)) { fit2[((i - 1) * val + 1):(i * val)] <- mean(y[indx[((i - 1) * val + 1):(i * val)]]) } fit2[((slices - 1) * val + 1):n] <- mean(y[indx[((slices - 1) * val + 1 ):n]]) # fit2 is already sorted in order corresponding to indx lines(ESP[indx], fit2)} else lines(lowess(ESP, Y, f = ff), type = "s") title("ESS Plot") } lmsviews<-function(x, Y, ii = 1){ # Trimmed views using lmsreg for 90, 80, ... 0 percent # trimming. Allows visualization of m # and crudely estimation of c beta in models # of the form y = m(x^T beta) + e. # Workstation: activate a graphics device # with commands "X11()" or "motif()." # R needs command "library(lqs)." # Advance the view with the right mouse button and # in R, highight "stop." x <- as.matrix(x) out <- cov.mcd(x) center <- out$center cov <- out$cov rd2 <- mahalanobis(x, center, cov) labs <- c("90%", "80%", "70%", "60%", "50%", "40%", "30%", "20%", "10%", "0%") tem <- seq(0.1, 1, 0.1) for(i in ii:10) { val <- quantile(rd2, tem[i]) b <- lmsreg(x[rd2 <= val, ], Y[rd2 <= val])$coef ESP <- x %*% b[-1] plot(ESP, Y) title(labs[i]) identify(ESP, Y) print(b) } } maha<-function(x){ # Generates the classical mahalanobis distances. center <- apply(x, 2, mean) cov <- var(x) return(sqrt(mahalanobis(x, center, cov))) } mlrplot4<-function(x, Y) {# This function is for R. Use mlrplot3 for R and Splus. # Makes response plot and residual plot with large # Cook's distances and Pena's distances highlighted. # Squares have large Cook's distances # and crosses have large Pena's distances. x <- as.matrix(x) out <- lsfit(x, Y) cook <- ls.diag(out)$cooks n <- dim(x)[1] p <- dim(x)[2] + 1 #get Pena's distances s one <- 1 + 0 * 1:n w <- cbind(one, x) cp <- t(w) %*% w h <- w %*% solve(cp) %*% t(w) s <- 0 * 1:n for(i in 1:n) { for(j in 1:n) { s[i] <- s[i] + (cook[j] * h[i, j]^2)/(h[i, i] * h[j, j]) } } tem <- cook > min(0.5, (2 * p)/n) medd <- median(s) madd <- mad(s, constant = 1) tem2 <- abs(s - medd) > 4.5 * madd bhat <- out$coef FIT <- bhat[1] + x %*% bhat[-1] cmar <- par("mar") par(mfrow = c(2, 1)) par(mar=c(4.0,4.0,2.0,0.5)) plot(FIT, Y) abline(0, 1) points(FIT[tem], Y[tem], pch = 0) points(FIT[tem2], Y[tem2], pch = 3) identify(FIT, Y) title("Response Plot") RES <- Y - FIT plot(FIT, RES) points(FIT[tem], RES[tem], pch = 0) points(FIT[tem2], RES[tem2], pch = 3) identify(FIT, RES) title("Residual Plot") par(mfrow = c(1, 1)) par(mar=cmar) } oddata<-function(n = 100, q = 5, k = 1) {# Generates overdispersion (negative binomial) data # for Poisson regression. y <- 1:n beta <- 0 * 1:q beta[1:3] <- 1 alpha <- -2.5 x <- matrix(rnorm(n * q), nrow = n, ncol = q) x <- 0.5 * x + 1 SP <- alpha + x %*% beta for(i in 1:n) { pr <- k/(exp(SP[i]) + k) y[i] <- rnbinom(1, size = k, pr) } list(x = x, y = y) } pb12<-function(resp,k=11,MSE=NA){ # Makes the effects and QQ plot for the PB(12) design. # Need the response resp given in standard order. # MSE should be given if k=11. # MSe is used for the MS for the E effect # Assumes number of replicates m = 1. # Could define c1-c11 as factors and use # out2<-aov(resp~c1+c2+c3+c4+c5+c6+c7+c8+c9+c10+c11) c1 <- c(1,1,-1,1,1,1,-1,-1,-1,1,-1,-1) c2 <- c(c1[11],c1[1:10],-1) c3 <- c(c2[11],c2[1:10],-1) c4 <- c(c3[11],c3[1:10],-1) c5 <- c(c4[11],c4[1:10],-1) c6 <- c(c5[11],c5[1:10],-1) c7 <- c(c6[11],c6[1:10],-1) c8 <- c(c7[11],c7[1:10],-1) c9 <- c(c8[11],c8[1:10],-1) c10 <- c(c9[11],c9[1:10],-1) c11 <- c(c10[11],c10[1:10],-1) Aeff <- as.real(t(c1)%*%resp/6) Beff <- as.real(t(c2)%*%resp/6) Ceff <- as.real(t(c3)%*%resp/6) Deff <- as.real(t(c4)%*%resp/6) Eeff <- as.real(t(c5)%*%resp/6) Feff <- as.real(t(c6)%*%resp/6) Geff <- as.real(t(c7)%*%resp/6) Heff <- as.real(t(c8)%*%resp/6) Jeff <- as.real(t(c9)%*%resp/6) Keff <- as.real(t(c10)%*%resp/6) Leff <- as.real(t(c11)%*%resp/6) MSA <- 3*(Aeff)^2 MSB <- 3*(Beff)^2 MSC <- 3*(Ceff)^2 MSD <- 3*(Deff)^2 MSe <- 3*(Eeff)^2 MSF <- 3*(Feff)^2 MSG <- 3*(Geff)^2 MSH <- 3*(Heff)^2 MSJ <- 3*(Jeff)^2 MSK <- 3*(Keff)^2 MSL <- 3*(Leff)^2 effects <- c(Aeff,Beff,Ceff,Deff,Eeff,Feff,Geff,Heff, Jeff,Keff=Keff,Leff) library(MASS) nquantiles <- qnorm(ppoints(11)) qqplot(nquantiles,effects) abline(lmsreg(nquantiles,sort(effects))$coef) title("Normal QQ plot for PB12 Design") x <- cbind(c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11) out <- lsfit(x[,1:k],resp) ls.print(out) if(k < 11) MSE <- sum(out$resid^2)/(11-k) SEeff <- sqrt(MSE/3) list(Aeff=Aeff,Beff=Beff,Ceff=Ceff,Deff=Deff,Eeff=Eeff,Feff=Feff, Geff=Geff,Heff=Heff,Jeff=Jeff,Keff=Keff,Leff=Leff,MSA=MSA,MSB=MSB, MSC=MSC,MSD=MSD,MSe=MSe,MSF=MSF,MSG=MSG,MSH=MSH,MSJ=MSJ,MSK=MSK, MSL=MSL,MSE=MSE,SEeff=SEeff) } pcisim<-function(nruns = 1000, n1 = 10, n2 = 10, mu1 = 0, mu2 = 0, var1 = 1, var2 = 1, dist = 1, alpha = 0.05){ #gets the pooled, modified pooled, and Welch 100 (1-alpha)% CIs for mu1 - mu2 #defaults are alpha = .05, lengths make the most sense if n1 = n2 #dist = 1 for x ~ N(mu1, var1), y ~ N(mu2, var2) #dist = 2 for x ~ EXP with mean mu1 variance var1, # y ~ EXP with mean mu2 variance var2 #dist = 3 for x ~ N(mu1, var1), y EXP with mean mu2 variance var2 diff <- mu1 - mu2 up <- 1 - alpha/2 pdf <- n1 + n2 - 2 rhohat <- n1/(n1 + n2) pcov <- 0 mpcov <- 0 wcov <- 0 plow <- 1:nruns pup <- plow mplow <- plow mpup <- plow wlow <- plow wup <- plow tpcut <- qt(up, pdf) mtpcut <- qt(up, n1 + n2 - 4) for(i in 1:nruns) { if(dist == 1) { x <- mu1 + sqrt(var1) * rnorm(n1) y <- mu2 + sqrt(var2) * rnorm(n2) } if(dist == 2) { x <- mu1 - sqrt(var1) + sqrt(var1) * rexp(n1) y <- mu2 - sqrt(var2) + sqrt(var2) * rexp(n2) } if(dist == 3) { x <- mu1 + sqrt(var1) * rnorm(n1) y <- mu2 - sqrt(var2) + sqrt(var2) * rexp(n2) } xbar <- mean(x) s1sq <- var(x) ybar <- mean(y) s2sq <- var(y) xmybar <- xbar - ybar sp <- (n1 - 1) * s1sq + (n2 - 1) * s2sq sp <- sqrt(sp/pdf) val <- sp * sqrt(1/n1 + 1/n2) #get pooled CI tem <- tpcut * val plow[i] <- xmybar - tem pup[i] <- xmybar + tem if(plow[i] < diff && pup[i] > diff) pcov <- pcov + 1 #get modified pooled CI thetahat <- s2sq/s1sq tauhatsq <- (1 - rhohat + rhohat * thetahat)/(rhohat + (1 - rhohat) * thetahat) tauhat <- sqrt(tauhatsq) tem <- mtpcut * val * tauhat mplow[i] <- xmybar - tem mpup[i] <- xmybar + tem if(mplow[i] < diff && mpup[i] > diff) mpcov <- mpcov + 1 #get Welch CI t1 <- s1sq/n1 t2 <- s2sq/n2 do <- (t1 + t2)^2/(t1^2/(n1 - 1) + t2^2/(n2 - 1)) d <- max(1, floor(do)) wtcut <- qt(up, d) val <- sqrt(s1sq/n1 + s2sq/n2) tem <- wtcut * val wlow[i] <- xmybar - tem wup[i] <- xmybar + tem if(wlow[i] < diff && wup[i] > diff) wcov <- wcov + 1 } pcov <- pcov/nruns plen <- sqrt(n1) * mean(pup - plow) mpcov <- mpcov/nruns mplen <- sqrt(n1) * mean(mpup - mplow) wcov <- wcov/nruns wlen <- sqrt(n1) * mean(wup - wlow) #print(mean(x)) #print(var(x)) #print(mean(y)) #print(var(y)) list(pcov = pcov, plen = plen, mpcov = mpcov, mplen = mplen, wcov = wcov, wlen = wlen ) } phdata<-function(n = 100, q = 3, k = 1, clam = 0.1){ # Generates data for Cox proportional hazards regression. # Follows Zhou (2001) # Get Weibull regression data with g(Y) = Y^(1/k) and # h_(g(Y))(t) = k t^(k-1) exp(SP). # Workstation: need to activate a graphics # device with command "X11()" or "motif()." # beta <- 1 + 0 * 1:q x <- matrix(rnorm(n * q), nrow = n, ncol = q)/2 SP <- x %*% beta lambdai <- exp(SP) y <- rexp(n, rate = lambdai) gy <- y^(1/k) cen <- rexp(n, rate = clam) cengy <- pmin(gy, cen) status <- as.numeric(cen >= gy) ##change formula for coxph with q #out <- coxph(Surv(cengy, status) ~ x[, 1] + x[, 2] + x[, 3]) #ESP <- x %*% out$coef list(x = x, time = cengy, status = status) } phgfit2<-function(x, time, status, slices = 9){ # Checks goodness of fit of Cox PH model, time is a vector of times. # KM curves should have the same shape if PH model is valid. # status is 0 for censored times and 1 otherwize. # Workstation need to activate a graphics # device with command "X11()" or "motif()." # In R type library(survival) # q <- 3 # change formula to x[,1]+ ... + x[,q] with q out <- coxph(Surv(time, status) ~ x[, 1] + x[, 2] + x[, 3]) ESP <- x %*% out$coef indx <- sort.list(ESP) n <- length(time) val <- as.integer(n/slices) if(slices <= 4) par(mfrow = c(2, 2)) if( (slices == 5) || (slices == 6)) par(mfrow = c(2, 3)) if(slices >= 7) par(mfrow = c(3, 3)) for(i in 1:slices){ lo <- (i-1)*val + 1 hi <- i*val if(i == slices) hi <- n tt <- time[indx[lo:hi]] ts <- status[indx[lo:hi]] zksi <- survfit(Surv(tt, ts)~1, conf.type = "none") plot(zksi) } par(mfrow=c(1,1)) } phsim<-function(n = 100, q = 3, k = 1, nruns = 1){ # Simulates censored response plots for Cox PH data. # Calls phdata and seyp. for(i in 1:nruns) { out <- phdata(n, q, k) seyp(out$x, out$time, out$status) } } phsim2 <- function(n = 100, p = 3, k = 1, slices = 9, nruns = 1){ # Simulates KM curves in slices. # KM curves should have the same shape if PH model is valid. # Calls phdata and phgfit2. for(i in 1:nruns) { out <- phdata(n, p, k) phgfit2(out$x, out$time, out$status, slices) } } phsim5 <- function(n = 99, k = 1, slices = 9){ # I may not have fixed the bug with a new version of R. # Simulates sliced survival plots for Cox PH regression with # pointwise CI bands. Follows Zhou (2001) to # get Weibull regression data with g(Y) = Y^(1/k) and # h_(g(Y))(t) = k t^(k-1) exp(SP). # # May not work in Splus. # # In R, type library(survival) p <- 3 ## change formula with p beta <- 1 + 0 * 1:p x <- matrix(rnorm(n * p), nrow = n, ncol = p)/2 SP <- x %*% beta lambdai <- exp(SP) y <- rexp(n, rate = lambdai) gy <- y^(1/k) cen <- rexp(n, rate = 0.1) time <- pmin(gy, cen) status <- as.numeric(cen >= gy) ##change formula for coxph with p x1<- x[,1]; x2 <- x[,2]; x3<-x[,3] xdata<-data.frame(cbind(time,status,x1,x2,x3)) out <- coxph(Surv(time, status) ~ x1 + x2 + x3,data=xdata) ESP <- x %*% out$coef indx <- sort.list(ESP) val <- as.integer(n/slices) if(slices <= 4) par(mfrow = c(2, 2)) if( (slices == 5) || (slices == 6)) par(mfrow = c(2, 3)) if(slices >= 7) par(mfrow = c(3, 3)) for(i in 1:slices){ lo <- (i-1)*val + 1 hi <- i*val if(i == slices) hi <- n tt <- time[indx[lo:hi]] ts <- status[indx[lo:hi]] zksi <- survfit(Surv(tt, ts)~1, conf.type = "none") ## change model formula for nv and plot with p midpt <- lo + floor((hi-lo)/2) nvt <- c(x[indx[midpt],1],x[indx[midpt],2],x[indx[midpt],3]) nv<-data.frame(time=1,status=1,x1=nvt[1],x2=nvt[3],x3=nvt[3]) plot(survfit(coxph(Surv(time, status) ~ x1 + x2 + x3), newdata = nv)) lines(zksi,type="p") } par(mfrow=c(1,1)) list(coef = out$coef) } piplot<-function(x, y, alpha = 0.05){ # For Unix, use X11() to turn on the graphics device before # using this function. # Makes a response plot with prediction limits added. x <- as.matrix(x) p <- dim(x)[2] + 1 n <- length(y) up <- 1:n low <- up out <- lsfit(x, y) tem <- ls.diag(out) lev <- tem$hat res <- out$residuals FIT <- y - res Y <- y corfac <- (1 + 15/n)*sqrt(n/(n - p)) val2 <- quantile(res, c(alpha/2, 1 - alpha/2)) #get lower and upper PI limits for each case for(i in 1:n) { val <- sqrt(1 + lev[i]) val3 <- as.single(corfac * val2[1] * val) val4 <- as.single(corfac * val2[2] * val) up[i] <- FIT[i] + val4 low[i] <- FIT[i] + val3 } zy <- c(min(low), Y, max(up)) zx <- c(min(FIT), FIT, max(FIT)) plot(zx, zy, type = "n") points(FIT, Y) abline(0, 1) points(FIT, up, pch = 17) points(FIT, low, pch = 17) } pisim<-function(n = 100, q = 7, nruns = 100, alpha = 0.05, eps = 0.1, shift = 9, type = 1){ # compares new and classical PIs for multiple linear regression # if type = 1 for N(0,1) errors, 2 for t3 errors, 3 for exp(1) - 1 errors # 4 for uniform(-1,1) errors, 5 for (1-eps) N(0,1) + eps N(0,(1+shift)^2) errors # constant = 1 so there are p = q+1 coefficients b <- 0 * 1:q + 1 cpicov <- 0 npicov <- 0 acpicov <- 0 opicov <- 0 val3 <- 1:nruns val4 <- val3 val5 <- val3 pilen <- matrix(0, nrow = nruns, ncol = 4) coef <- matrix(0, nrow = nruns, ncol = q + 1) corfac <- (1 + 15/n) * sqrt(n/(n - q - 1)) corfac2 <- sqrt(n/(n - q - 1)) for(i in 1:nruns) { x <- matrix(rnorm(n * q), nrow = n, ncol = q) if(type == 1) { y <- 1 + x %*% b + rnorm(n) xf <- rnorm(q) yf <- 1 + xf %*% b + rnorm(1) } if(type == 2) { y <- 1 + x %*% b + rt(n, df = 3) xf <- rnorm(q) yf <- 1 + xf %*% b + rt(1, df = 3) } if(type == 3) { y <- 1 + x %*% b + rexp(n) - 1 xf <- rnorm(q) yf <- 1 + xf %*% b + rexp(1) - 1 } if(type == 4) { y <- 1 + x %*% b + runif(n, min = -1, max = 1) xf <- rnorm(q) yf <- 1 + xf %*% b + runif(1, min = -1, max = 1) } if(type == 5) { err <- rnorm(n, sd = 1 + rbinom(n, 1, eps) * shift) y <- 1 + x %*% b + err xf <- rnorm(q) yf <- 1 + xf %*% b + rnorm(1, sd = 1 + rbinom(1, 1, eps ) * shift) } out <- lsfit(x, y) fres <- out$resid coef[i, ] <- out$coef yfhat <- out$coef[1] + xf %*% out$coef[-1] w <- cbind(1, x) xtxinv <- solve(t(w) %*% w) xf <- c(1, xf) hf <- xf %*% xtxinv hf <- hf %*% xf val <- sqrt(1 + hf) #get classical PI mse <- sum(fres^2)/(n - q - 1) val2 <- qt(1 - alpha/2, n - q - 1) * sqrt(mse) * val up <- yfhat + val2 low <- yfhat - val2 pilen[i, 1] <- up - low if(low < yf && up > yf) cpicov <- cpicov + 1 #get semiparametric PI val2 <- quantile(fres, c(alpha/2, 1 - alpha/2)) val3[i] <- as.single(corfac * val2[1] * val) val4[i] <- as.single(corfac * val2[2] * val) up <- yfhat + val4[i] low <- yfhat + val3[i] pilen[i, 2] <- up - low if(low < yf && up > yf) npicov <- npicov + 1 # asymptotically conservative PI val6 <- corfac2 * max(abs(val2)) val5[i] <- val6 * val up <- yfhat + val5[i] low <- yfhat - val5[i] pilen[i, 3] <- up - low if(low < yf && up > yf) acpicov <- acpicov + 1 # asymptotically optimal PI sres <- sort(fres) cc <- ceiling(n * (1 - alpha)) rup <- sres[cc] rlow <- sres[1] olen <- rup - rlow if(cc < n) { for(j in (cc + 1):n) { zlen <- sres[j] - sres[j - cc + 1] if(zlen < olen) { olen <- zlen rup <- sres[j] rlow <- sres[j - cc + 1] } } } up <- yfhat + corfac * val * rup low <- yfhat + corfac * val * rlow pilen[i, 4] <- up - low if(low < yf && up > yf) opicov <- opicov + 1 } pimnlen <- apply(pilen, 2, mean) mnbhat <- apply(coef, 2, mean) lcut <- mean(val3) hcut <- mean(val4) accut <- mean(val5) cpicov <- cpicov/nruns npicov <- npicov/nruns acpicov <- acpicov/nruns opicov <- opicov/nruns list(mnbhat = mnbhat, pimenlen = pimnlen, cpicov = cpicov, spicov = npicov, acpicov = acpicov, opicov = opicov, lcut = lcut, hcut = hcut, accut = accut) } poisontplt<-function(){ # Need the poison data frame from regdata. # This function plots y^L vs two way Anova fitted values # from the two way Anova of y^L on x where # L = -1, -0.5, 0, 0.5 or 1 except log(Y) is used if L = 0. # If plot is linear for L, use y^L (or log(Y) for L = 0) instead # of Y. This is a graphical method for a response transform. lam <- c(-1, -1/2, 0, 1/2, 1) xl <- c("1/Z", "1/sqrt(Z)", "LOG(Z)", "sqrt(Z)", "Z") par(mfrow=c(2,3)) attach(poison) for(i in 1:length(lam)) { if(lam[i] == 0) ytem <- log(poison$stime) else ytem <- poison$stime^lam[i] aovfit <- ytem - aov(ytem~ptype*treat,poison)$resid plot(aovfit, ytem, xlab = "TZHAT", ylab = xl[i]) abline(0, 1) #identify(aovfit, ytem) } par(mfrow=c(1,1)) detach(poison) ##using aov(ytem~.^2,poison) would fit ytem on ##ptype, treat, stime, the 3 two way interactions ##and the three way interaction } prdata <- function(n = 100, q = 5) {# Generates data for Poisson regression. y <- 0 * 1:n beta <- 0 * 1:q beta[1:3] <- 1 alpha <- -2.5 x <- matrix(rnorm(n * q), nrow = n, ncol = q) x <- 0.5*x + 1 SP <- alpha + x%*%beta y <- rpois(n,lambda=exp(SP)) # change formula to x[,1]+ ... + x[,q] with q #out <- glm(y ~ x[, 1] + x[, 2] + x[, 3] + # x[, 4] + x[,5], family = poisson) #list(alpha = alpha, beta = beta, prcoef = out$coef,x=x,y=y) list(x=x,y=y) } pressp <- function(x,y) {# Makes the response plot for Poisson regression. # Workstation: need to activate a graphics # device with command "X11()" or "motif()." # # If q is changed, change the formula in the glm statement. q <- 5 # change formula to x[,1]+ ... + x[,q] with q out <- glm(y ~ x[, 1] + x[, 2] + x[, 3] + x[, 4] + x[,5], family = poisson) #ESP <- x %*% out$coef[-1] + out$coef[1] ESP <- predict(out) Y <- y plot(ESP,Y) abline(mean(y),0) fit <- y fit <- exp(ESP) indx <- sort.list(ESP) lines(ESP[indx],fit[indx]) lines(lowess(ESP,y),type="s") } prplot<- function(x, y) {# Makes response plot, OD plot, weighted forward response # and residual plots for Poisson regression. # Workstation: need to activate a graphics # device with command "X11()" or "motif()." # # If q is changed, change the formula in the glm statement. q <- 5 # change formula to x[,1]+ ... + x[,q] with q out <- glm(y ~ x[, 1] + x[, 2] + x[, 3] + x[, 4] + x[, 5], family = poisson) ESP <- predict(out) par(mfrow = c(2, 2)) Y <- y plot(ESP, Y) abline(mean(Y), 0) Ehat <- exp(ESP) indx <- sort.list(ESP) lines(ESP[indx], Ehat[indx]) lines(lowess(ESP, Y), type = "s") title("a) Response Plot") Vhat <- (Y - Ehat)^2 plot(Ehat, Vhat) abline(0, 1) abline(0, 4) #lines(lowess(Ehat, Vhat)) #abline(lsfit(Ehat, Vhat)$coef) title("b) OD Plot") Z <- Y Z[Y < 1] <- Z[Y < 1] + 0.5 WRES <- sqrt(Z) * (log(Z) - x %*% out$coef[-1] - out$coef[1]) WFIT <- sqrt(Z) * log(Z) - WRES plot(WFIT, sqrt(Z) * log(Z)) abline(0, 1) #abline(lsfit(WFIT, sqrt(Z) * log(Z))$coef) title("c) WFRP") plot(WFIT, WRES) title("d) Wtd Residual Plot") } prsim<-function(n = 100, nruns = 1, type = 1, k = 1) {# Runs prplot nruns times on simulated data. # Type = 1 for Poisson regression data, Type = 2 for negative binomial regression data # Calls prdata, oddata, prplot. q <- 5 for(i in 1:nruns) { if(type == 1) out <- prdata(n, q) else out <- oddata(n, q, k) x <- out$x y <- out$y prplot(x, y) #identify(WFIT, WRES) } } rand <- function(n = 10, k = 3){ #randomize n units into k treatment groups of nearly equal size #groups[i] gives the group for the ith unit groups <- 1:n gsize <- as.integer(n/k) z <- sample(n) for(i in 1:k) { lo <- (i - 1) * gsize + 1 hi <- i * gsize if(i == k) hi <- n groups[z[lo:hi]] <- i } list(perm = z, groups = groups) } rand1way <- function(y,group,B=1000){ #computes the randomization F test for the 1 way Anova design #based on Ernst (2009) #y and group should have the same length if(length(y) != length(group)) stop("Response and factor vectors must be the same length") if(!is.factor(group)) group <- as.factor(group) Fstat <- NULL z <- anova(lm(y ~ group)) Fstat[1] <- z$F[1] Fpval <- z$P[1] Fdf <- z$Df n <- length(y) m <- max(B,floor(n*log(n))) for(i in 2:m){ Fstat[i] <- anova(lm(sample(y) ~ group))$F[1] } randpval <- sum(Fstat>=Fstat[1])/m list(rdist=Fstat,Fpval=Fpval,randpval=randpval) } seyp<-function(x, time, status){ # Makes the censored response plot for Cox proportional hazards regression. # Workstation: need to activate a graphics # device with command "X11()" or "motif()." # # If q is changed, change the formula in the coxph statement. q <- 3 # change formula to x[,1]+ ... + x[,q] with q out <- coxph(Surv(time, status) ~ x[, 1] + x[, 2] + x[, 3]) ESP <- x %*% out$coef plot(ESP, time, type = "n") points(ESP[status == 1], time[status == 1], pch = 3) points(ESP[status == 0], time[status == 0], pch = 1) } swhat <- function(stime,bhat,nv,ghat,lhat){ # Finds estimated survival function for Weibull regression # evaluated at x = nv. r <- length(stime) shat <- 1:r for(i in 1:r){ shat[i] <- exp(-exp(-ghat*bhat%*%nv)*lhat*stime[i]^ghat) } list(shat=shat) } tplot<-function(x, y){ # Use the rightmost mouse button to advance # the plot, and in R, highlight ``stop." # Values of y should be positive. # Assume the MLR model contains a constant, but # x is the design matrix without the vector of ones. # This function makes transformation plots from the MLR # of y^L on x where L = -1, -0.5, -1/3, 0, 1/3, 0.5, or 1 # except log(Y) is used if L = 0. # If plot is linear for L, use y^L (or log(Y) for L = 0) instead # of Y. This is a graphical method for a response transform. lam <- c(-1, -1/2, -1/3, 0, 1/3, 1/2, 1) xl <- c("1/Z", "1/sqrt(Z))", "Z**(-1/3)", "LOG(Z)", "Z**(1/3)", "sqrt(Z)", "Z") for(i in 1:length(lam)) { if(lam[i] == 0) ytem <- log(y) else ytem <- y^lam[i] fit <- ytem - lsfit(x,ytem)$resid plot(fit, ytem, xlab = "TZHAT", ylab = xl[i]) abline(0, 1) identify(fit, ytem) } } trviews<-function(x, Y, ii = 10, type = 3) {# Trimmed views for 0, 10, ..., 90 percent trimming. # Use ii = 10 for 0, ii = 9 for 10 then 0, ..., # ii = 1 for 90 then 80 ..., then 0 percent trimming. # Allows visualization of m and crudely estimation of # c beta in models of the form y = m(x^T beta) + e. # Workstation: activate a graphics device # with commands "X11()" or "motif()." # R needs command "library(MASS)." # Advance the view with the right mouse button and # in R, highight "stop." x <- as.matrix(x) if(type == 1) out <- cov.mcd(x) if(type == 2) out <- covmba(x) if(type > 2) out <- covfch(x) center <- out$center cov <- out$cov rd2 <- mahalanobis(x, center, cov) labs <- c("90%", "80%", "70%", "60%", "50%", "40%", "30%", "20%", "10%", "0%") tem <- seq(0.1, 1, 0.1) for(i in ii:10) { val <- quantile(rd2, tem[i]) b <- lsfit(x[rd2 <= val, ], Y[rd2 <= val])$coef ESP <- x %*% b[-1] plot(ESP, Y) title(labs[i]) identify(ESP, Y) print(b) } } twocub<-function(means,m=1,MSE=NA){ # Makes the effects and QQ plot for the 2^3 design. # Need the means of a 2^3 design replicated m times given # in standard order. MSE should be given if m > 1. # The commands out <- aov(y~A*B*C) and summary(out) # can be used to find the MSE. ca <- c(-1,1,-1,1,-1,1,-1,1) cb <- c(-1,-1,1,1,-1,-1,1,1) cc <- c(-1,-1,-1,-1,1,1,1,1) cab <- c(1,-1,-1,1,1,-1,-1,1) cac <- c(1,-1,1,-1,-1,1,-1,1) cbc <- c(1,1,-1,-1,-1,-1,1,1) cabc <- c(-1,1,1,-1,1,-1,-1,1) Aeff <- as.real(t(ca)%*%means/4) Beff <- as.real(t(cb)%*%means/4) Ceff <- as.real(t(cc)%*%means/4) ABeff <- as.real(t(cab)%*%means/4) ACeff <- as.real(t(cac)%*%means/4) BCeff <- as.real(t(cbc)%*%means/4) ABCeff <- as.real(t(cabc)%*%means/4) MSA <- 2*m*(Aeff)^2 MSB <- 2*m*(Beff)^2 MSC <- 2*m*(Ceff)^2 MSAB <- 2*m*(ABeff)^2 MSAC <- 2*m*(ACeff)^2 MSBC <- 2*m*(BCeff)^2 MSABC <- 2*m*(ABCeff)^2 SEeff <- NA if(m > 1) SEeff <- sqrt(MSE/(2*m)) effects <- c(Aeff,Beff,Ceff,ABeff,ACeff,BCeff,ABCeff) library(MASS) nquantiles <- qnorm(ppoints(7)) qqplot(nquantiles,effects) abline(lmsreg(nquantiles,sort(effects))$coef) title("Normal QQ plot") list(Aeff=Aeff,Beff=Beff,Ceff=Ceff,ABeff=ABeff,ACeff=ACeff, BCeff=BCeff,ABCeff=ABCeff,MSA=MSA,MSB=MSB,MSC=MSC,MSAB=MSAB, MSAC=MSAC,MSABC=MSABC,MSE=MSE,SEeff=SEeff) } twofourth<-function(means,m=1,MSE=NA){ # Makes the effects and QQ plot for the 2^4 design. # Need the means of a 2^4 design replicated m times given # in standard order. MSE should be given if m > 1. # The commands out <- aov(y~A*B*C*D) and summary(out) # can be used to find the MSE. ca <- rep(c(-1,1,-1,1,-1,1,-1,1),2) cb <- rep(c(-1,-1,1,1,-1,-1,1,1),2) cc <- rep(c(-1,-1,-1,-1,1,1,1,1),2) cd <- c(-1,-1,-1,-1,-1,-1,-1,-1,1,1,1,1,1,1,1,1) cab <- rep(c(1,-1,-1,1,1,-1,-1,1),2) cac <- rep(c(1,-1,1,-1,-1,1,-1,1),2) cad <- ca*cd cbc <- rep(c(1,1,-1,-1,-1,-1,1,1),2) cbd <- cb*cd ccd <- cc*cd cabc <- rep(c(-1,1,1,-1,1,-1,-1,1),2) cabd <- cab*cd cacd <- cac*cd cbcd <- cbc*cd cabcd <- cabc*cd Aeff <- as.real(t(ca)%*%means/8) Beff <- as.real(t(cb)%*%means/8) Ceff <- as.real(t(cc)%*%means/8) Deff <- as.real(t(cd)%*%means/8) ABeff <- as.real(t(cab)%*%means/8) ACeff <- as.real(t(cac)%*%means/8) ADeff <- as.real(t(cad)%*%means/8) BCeff <- as.real(t(cbc)%*%means/8) BDeff <- as.real(t(cbd)%*%means/8) CDeff <- as.real(t(ccd)%*%means/8) ABCeff <- as.real(t(cabc)%*%means/8) ABDeff <- as.real(t(cabd)%*%means/8) ACDeff <- as.real(t(cacd)%*%means/8) BCDeff <- as.real(t(cbcd)%*%means/8) ABCDeff <- as.real(t(cabcd)%*%means/8) MSA <- 4*m*(Aeff)^2 MSB <- 4*m*(Beff)^2 MSC <- 4*m*(Ceff)^2 MSD <- 4*m*(Deff)^2 MSAB <- 4*m*(ABeff)^2 MSAC <- 4*m*(ACeff)^2 MSAD <- 4*m*(ADeff)^2 MSBC <- 4*m*(BCeff)^2 MSBD <- 4*m*(BDeff)^2 MSCD <- 4*m*(CDeff)^2 MSABC <- 4*m*(ABCeff)^2 MSABD <- 4*m*(ABDeff)^2 MSACD <- 4*m*(ACDeff)^2 MSBCD <- 4*m*(BCDeff)^2 MSABCD <- 4*m*(ABCDeff)^2 SEeff <- NA if(m > 1) SEeff <- sqrt(MSE/(4*m)) effects <- c(Aeff,Beff,Ceff,Deff,ABeff,ACeff,ADeff,BCeff, BDeff,CDeff=CDeff,ABCeff,ABDeff,ACDeff,BCDeff,ABCDeff) if(m == 1) par(mfrow = c(2,2)) library(MASS) nquantiles <- qnorm(ppoints(15)) qqplot(nquantiles,effects) abline(lmsreg(nquantiles,sort(effects))$coef) title("Normal QQ plot") if(m == 1){ #ca <- as.factor(ca) #cb <- as.factor(cb) #cc <- as.factor(cc) #cd <- as.factor(cd) #using factors gives the wrong output out <- aov(means~(ca+cb+cc+cd)^2) print(summary.lm(out)) Fit <- means - out$resid Resid <- out$resid Y <- means plot(Fit,Y) abline(0,1) title("Response Plot") plot(Fit,Resid) abline(0,0) title("Residual Plot") par(mfrow = c(1,1)) } list(Aeff=Aeff,Beff=Beff,Ceff=Ceff,Deff=Deff,ABeff=ABeff,ACeff=ACeff, ADeff=ADeff,BCeff=BCeff,BDeff=BDeff,CDeff=CDeff,ABCeff=ABCeff,ABDeff=ABDeff, ACDeff=ACDeff,BCDeff=BCDeff,ABCDeff=ABCDeff,MSA=MSA,MSB=MSB,MSC=MSC,MSD=MSD, MSAB=MSAB,MSAC=MSAC,MSAD=MSAD,MSBC=MSBC,MSBD=MSBD,MSCD=MSCD,MSABC=MSABC, MSABD=MSABD,MSACD=MSACD,MSBCD=MSBCD,MSABCD=MSABCD,MSE=MSE,SEeff=SEeff) } vlung2 <- function(sexv=2){ # uses 4 slices instead of the 9 used by vlung # Check goodness of fit of Cox PH for R lung data. # for females use sexv = 2 # for males use sexv = 1, change the 2 to a 1 flung<-lung[lung$sex==sexv,] nflung<-na.omit(flung[,c("time","status","ph.ecog","ph.karno", "pat.karno","wt.loss")]) out <- coxph(Surv(time, status) ~ ph.ecog + ph.karno + pat.karno + wt.loss,data = nflung) x <- as.matrix(nflung[,c(3,4,5,6)]) time <- as.vector(nflung[,1]) status <- as.vector(nflung[,2]) ESP <- x %*% out$coef indx <- sort.list(ESP) n <- 85 val <- as.integer(n/4) cmar<-par("mar") par(mfrow = c(2, 2)) #par(mar=c(4.0,4.0,2.0,0.5)) for(i in 1:4){ lo <- (i-1)*val + 1 hi <- i*val if(i == 4) hi <- n tt <- time[indx[lo:hi]] ts <- status[indx[lo:hi]] zksi <- survfit(Surv(tt, ts)~1, conf.type = "none") ## change model formula for nv with p midpt <- lo + floor((hi-lo)/2) nvt <- c(x[indx[midpt],1],x[indx[midpt],2],x[indx[midpt],3],x[indx[midpt],4]) #take a typical value of time nv<-data.frame(time=300,status=1,ph.ecog=nvt[1],ph.karno=nvt[2], pat.karno=nvt[3],wt.loss=nvt[4]) plot(survfit(coxph(Surv(time, status) ~ ph.ecog + ph.karno + pat.karno + wt.loss,data = nflung), newdata = nv), xlab="Time",ylab="Estimated S(t)") lines(zksi,type="p") } par(mfrow=c(1,1)) par(mar=cmar)} vnwtco <- function(){ #visualizing NWTCO data ##type library(survival) #>nwtco[1,] # seqno instit histol stage study rel edrel age in.subcohort # 1 2 2 1 3 0 6075 25 FALSE z<-coxph(Surv(edrel,rel)~histol+instit+age+stage,data=nwtco) summary(z) ccex<-par("cex") par(mfrow=c(2,2)) par(cex=0.3) zz<- cox.zph(z) # adding cex=0.1 has no effect plot(zz, cex=0.1) print(zz) par(cex=ccex) #test seems to contradict plots which have fairly horizontal loess time <- as.vector(nwtco[,7]) status <- as.vector(nwtco[,6]) out <- coxph(Surv(time, status)~histol+instit+age+stage,data=nwtco) x <- as.matrix(nwtco[,c(3,2,8,4)]) ESP <- x %*% out$coef indx <- sort.list(ESP) n <- 4028 ##change par(mfrow=c(2,3)) with slices slices<-6 val <- as.integer(n/slices) cmar<-par("mar") ccex<-par("cex") par(mfrow = c(2, 3)) par(mar=c(4.0,4.0,2.0,0.5)) for(i in 1:slices){ lo <- (i-1)*val + 1 hi <- i*val if(i == slices) hi <- n tt <- time[indx[lo:hi]] ts <- status[indx[lo:hi]] zksi <- survfit(Surv(tt, ts)~1, conf.type = "none") ## change model formula for nv with p midpt <- lo + floor((hi-lo)/2) nvt <- c(x[indx[midpt],1],x[indx[midpt],2],x[indx[midpt],3], x[indx[midpt],4]) #use typical values for seqno,study,time, and in.subcohort nv<-data.frame(seqno=4089,instit=nvt[2],histol=nvt[1],stage=nvt[4],study=3, status=1,time=300,age=nvt[3],in.subcohort=TRUE) plot(survfit(coxph(Surv(time, status)~ histol+instit+age+stage,data = nwtco), newdata = nv), xlab="Time",ylab="Estimated S(t)",cex=0.1) lines(zksi,cex=0.1,type="p") } par(mfrow=c(1,1)) par(mar=cmar) par(cex=ccex) } vovar <- function(){ ####bug in new version of R for slice survival plot # uses 4 slices, calls swhat # check goodness of fit of Cox PH and Weibull PH # models for the ovarian cancer data time <- c(156, 1040, 59, 421, 329, 769, 365, 770, 1227, 268, 475, 1129, 464, 1206, 638, 563, 1106, 431, 855, 803, 115, 744, 477, 448, 353, 377) status <- c(1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0) treat <- c(1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0) age <- c(66, 38, 72, 53, 43, 59, 64, 57, 59, 74, 59, 53, 56, 44, 56, 55, 44, 50, 43, 39, 74, 50, 64, 56, 63, 58) ovar <- cbind(time,status,treat,age) #paste these commands for the LCR plot z <- survreg(Surv(ovar[,1],ovar[,2])~ovar[,3]+ovar[,4],dist="weibull") AFTESP <- 11.548 -.561*ovar[,3] -.079*ovar[,4] logT <- log(ovar[,1]) plot(AFTESP,logT,pch = ' ') points(AFTESP[ovar[,2]==0],log(ovar[ovar[,2]==0,1]),pch=1) points(AFTESP[ovar[,2]==1],log(ovar[ovar[,2]==1,1]),pch=3) abline(0,1) #paste these commands for the Weibull EE plot z <- survreg(Surv(ovar[,1],ovar[,2])~ovar[,3]+ovar[,4],dist="weibull") zx <- cbind(ovar[,3],ovar[,4]) zc <- coxph(Surv(ovar[,1],ovar[,2])~ovar[,3]+ovar[,4]) sighat<-z$scale ESPC <- zx%*%zc$coef ESPW <- -zx%*%z$coef[-1]/sighat plot( ESPW, ESPC) abline(0,1) title("a) Weibull") cor( ESPW, ESPC) #paste these commands for the Exponential EE plot ze <- survreg(Surv(ovar[,1],ovar[,2])~ovar[,3]+ovar[,4],dist="exponential") ESPE <- -zx%*%ze$coef[-1] plot( ESPE, ESPC) abline(0,1) title("b) Exponential") ##########bug in plot below for new version of R #type vovar() to produce the slice survival plot ghat <- 1/z$scale ahat <- z$coef[1] bhat <- z$coef[-1] lhat <- exp(-ahat*ghat) stime <- sort(time) x <- zx ESP <- ESPC indx <- sort.list(ESP) n <- 26 val <- as.integer(n/4) cmar<-par("mar") par(mfrow = c(2, 2)) #par(mar=c(4.0,4.0,2.0,0.5)) for(i in 1:4){ lo <- (i-1)*val + 1 hi <- i*val if(i == 4) hi <- n tt <- time[indx[lo:hi]] ts <- status[indx[lo:hi]] zksi <- survfit(Surv(tt, ts)~1, conf.type = "none") ## change model formula for nv with p midpt <- lo + floor((hi-lo)/2) nv <- c(x[indx[midpt],1],x[indx[midpt],2]) plot(survfit(coxph(Surv(time, status)~ ovar[,3]+ovar[,4]), newdata = nv),xlab="Time",ylab="Estimated S(t)") sfit <- swhat(stime,bhat,nv,ghat,lhat)$shat points(stime,sfit,pch=3) lines(zksi,type="p") } par(mfrow=c(1,1)) par(mar=cmar) } weyp<- function(x, time, status){ # Makes the log censored response plot for Weibull proportional hazards regression. # Workstation: need to activate a graphics # device with command "X11()" or "motif()." # # If q is changed, change the formula in the glm statement. q <- 3 # change formula to x[,1]+ ... + x[,q] with q out <- survreg(Surv(time, status) ~ x[, 1] + x[, 2] + x[, 3]) ESP <- x %*% out$coef[-1] + out$coef[1] plot(ESP, log(time), type = "n") points(ESP[status == 1], log(time[status == 1]), pch = 3) points(ESP[status == 0], log(time[status == 0]), pch = 1) abline(0, 1) #abline(lsfit(ESP[status == 1], log(time[status == 1]))$coef) } wlsplot<-function(x, y, w){ # Make OLS response and residual plots, # then transform data with weights and make OLS plots # based on weights to check that the weights are reasonable. # use X11() to turn on the graphics device before using this function ols <- lsfit(x, y) Y <- y Z <- sqrt(w) * y u <- sqrt(w) * x uu <- sqrt(w) u <- cbind(uu, u) wls <- lsfit(u, Z, intercept = F) par(mfrow = c(2, 2)) FIT <- Y - ols$resid plot(FIT, Y) abline(0, 1) title("a) OLS Response Plot") RESID <- ols$resid plot(FIT, RESID) title("b) OLS Residual Plot") ZFIT <- Z - wls$resid plot(ZFIT, Z) abline(0, 1) title("c) WLS Response Plot") ZRESID <- wls$resid plot(ZFIT, ZRESID) title("d) WLS Residual Plot") list(bols = ols$coef, bwls = wls$coef) par(mfrow=c(1,1)) } wphsim <- function(n = 99, k = 1, slices = 9){ # Generates data for Weibull PH regression using Zhou (2001). # Makes a slice survival plot for Weibull regression. # The AFT ESP is used instead of the PH ESP, so survival # increases as the AFT ESP increases. # Uses crosses for Weibull and circles for KM. # Get Weibull regression data with g(Y) = Y^(1/k) and # h_(g(Y))(t) = k t^(k-1) exp(SP). # # Calls function swhat. # # May not work in Splus. # # In R, type library(survival) p <- 3 ## change formula with p beta <- 1 + 0 * 1:p x <- matrix(rnorm(n * p), nrow = n, ncol = p)/2 SP <- x %*% beta lambdai <- exp(SP) y <- rexp(n, rate = lambdai) gy <- y^(1/k) cen <- rexp(n, rate = 0.1) time <- pmin(gy, cen) status <- as.numeric(cen >= gy) ##change formula for survreg with p out <- survreg(Surv(time, status) ~ x[, 1] + x[, 2] + x[, 3]) ESP <- x %*% out$coef[-1] ghat <- 1/out$scale ahat <- out$coef[1] bhat <- out$coef[-1] lhat <- exp(-ahat*ghat) stime <- sort(time) indx <- sort.list(ESP) val <- as.integer(n/slices) if(slices <= 4) par(mfrow = c(2, 2)) if( (slices == 5) || (slices == 6)) par(mfrow = c(2, 3)) if(slices >= 7) par(mfrow = c(3, 3)) for(i in 1:slices){ lo <- (i-1)*val + 1 hi <- i*val if(i == slices) hi <- n tt <- time[indx[lo:hi]] ts <- status[indx[lo:hi]] zksi <- survfit(Surv(tt, ts)~1, conf.type = "none") ## change model formula for nv with p midpt <- lo + floor((hi-lo)/2) nv <- c(x[indx[midpt],1],x[indx[midpt],2],x[indx[midpt],3]) sfit <- swhat(stime,bhat,nv,ghat,lhat)$shat plot(stime,sfit,pch=3) lines(zksi,type="p") } par(mfrow=c(1,1)) list(coef = out$coef) } wregsim<-function(n = 100, q = 3, k = 1, nruns = 1){ # Simulates log censored response plots for Weibull PH data. # calls phdata and weyp for(i in 1:nruns) { out <- phdata(n, q, k) weyp(out$x, out$time, out$status) } } wregsim2<-function(n = 100, k = 1, nruns = 1){ # Simulates EE plots for Weibull proportional hazards AFT data. # calls phdata # R users need to type library(survival) corr <- 1:nruns for(i in 1:nruns) { p <- 3 # change formula to x[,1]+ ... + x[,p] with p out <- phdata(n, p, k) x <- out$x time <- out$time status <- out$status outp <- coxph(Surv(time, status) ~ x[, 1] + x[, 2] + x[, 3]) outw <- survreg(Surv(time, status) ~ x[, 1] + x[, 2] + x[, 3]) shat <- outw$scale ESPC <- x %*% outp$coef ESPW <- -(x %*% outw$coef[-1])/shat plot( ESPW, ESPC) corr[i] <- cor(ESPW, ESPC) abline(0, 1) } mncorr <- mean(corr) list(mncorr=mncorr) } wregsim3<-function(n = 100, k = 1, lam = 0.01, nruns = 100){ # See if alphahat and betahat are consistent estimators. # Calls phdata. # want mnint = 0, mncoef[i] = -1/k, mnscale = 1/k q <- 3 wout <- matrix(0, nrow = nruns, ncol = 5) for(i in 1:nruns) { out <- phdata(n, q, k, lam) x <- out$x time <- out$time status <- out$status outw <- survreg(Surv(time, status) ~ x[, 1] + x[, 2] + x[, 3]) wout[i, 1:4] <- outw$coef wout[i, 5] <- outw$scale } wmn <- apply(wout, 2, mean) mnint <- wmn[1] mncoef <- wmn[2:4] mnscale <- wmn[5] list(mnint = mnint, mncoef = mncoef, mnscale = mnscale) }