# tspack is a collection of R functions
#for Math 474 Time Series, Fall 2013.

##  source("http://parker.ad.siu.edu/Olive/tspack.txt")
##  source("http://parker.ad.siu.edu/Olive/tsdata.txt")
##a few functions need the forecast library
##library(forecast)

##install.packages(forecast)

acfpacf<-function(Y){
#makes the acf and pacf of a time series Y
#Note that the ACF starts at lag 0.
W <- as.vector(Y)
par(mfrow=c(2,1))
acf(W)
pacf(W)
par(mfrow=c(1,1))
}


acormat<-function(zz){
#Let zz be the output of the arima function, eg zz <- arima(lh,c(1,0,0)).
#Makes the asymptotic correlation matrix of the estimated coefficients. 
#Want these correlations to be small.
se <- sqrt(diag(zz$var.coef))
Dinv <- diag(1/se)
cormat <- Dinv %*% zz$var.coef %*% Dinv
return(cormat)
}


aicmat <- function(Y,dd=0,pmax=5,qmax=5,pbig=15,qbig=15){
#makes a matrix of AICs - min(AIC) for ARIMA(p,dd,q) models for time series Y
#uses method = "ML" to reduce errors
#reduce pmax by 1 if an error message occurs, may need to reduce pbig or qmax
#minaic is the min aic for the arima(p,dd,q) models, the ari and ima models
#could have lower aic
Y <- as.vector(Y)
aics <- matrix(, (pmax+1), (qmax+1), dimnames=list(p=0:pmax, q=0:qmax))
ariaics <- 1:pbig
imaaics <- 1:qbig
for(p in 0:pmax)
   for(q in 0:qmax) aics[1+p, 1+q] <- arima(Y, c(p,d=dd,q), method = "ML",
        optim.control = list(maxit = 500))$aic
minaic <- min(aics, na.rm = TRUE)
aics <- round(aics - minaic, 2)
for(p in 1:pbig) ariaics[p] <- arima(Y,c(p,d=dd,0), method = "ML",
             optim.control = list(maxit = 500))$aic
for(q in 1:qbig) imaaics[q] <- arima(Y,c(0,d=dd,q), method = "ML",
             optim.control = list(maxit = 500))$aic
ariaics <- round(ariaics - minaic, 2)
imaaics <- round(imaaics - minaic, 2)
list(aics=aics, ariaics=ariaics, imaaics=imaaics)
}


arboot<-function(Y,B=100,pmax=10,c=0.01){
#Bootstraps AR(p) model selection using the parametric bootstrap.
#assumes n > 10 pmax and adds cB full model bootstrap samples.
#The full model and betahatmix are also bootstrapped.
out <- ar.yw(Y,aic=FALSE,order.max=pmax)
phihat = out$ar
n<-length(Y)
nmpmax <- n-pmax
sig<-sqrt(out$var.pred)
d <- ceiling(c*B)
bpd <- B + d
bp1 <- B + 1
betas <- matrix(0,nrow=bpd,ncol=pmax)
btmix <- betas
btfull <- betas
bhatvs <- 0*1:pmax
outvs<-ar.yw(Y,order.max=pmax)
ord<-outvs$order
if(ord>0)
bhatvs[1:ord]<-outvs$ar
for(i in 1:B) {
   val <- sample(1:nmpmax,1)
   ysel <- Y[val:(val+pmax)]
   yb <- arima.sim(n=n, list(ar=phihat), innov = rnorm(n,sd=sig),n.start=pmax,
   start.innov=ysel) #might be using ysel as errors instead of Ys
   #yb <- arima.sim(n=n, list(ar=phihat),sd=sig) 
   tem<-ar.yw(yb,order.max=pmax)
   ord<-tem$order
   if(ord>0)
   betas[i,1:ord]<-tem$ar
   tem <- ar.yw(yb,aic=FALSE,order.max=pmax)
   btfull[i,] <- tem$ar
#mix
if(ord>0){
   val <- sample(1:nmpmax,1)
   ysel <- Y[val:(val+pmax)]
   yb <- arima.sim(n=n, list(ar=phihat), innov = rnorm(n,sd=sig),n.start=pmax,
   start.innov=ysel)
   # yb <- arima.sim(n=n, list(ar=phihat),sd=sig) 
   tem<-ar.yw(yb,aic=FALSE,order.max=ord)
   btmix[i,1:ord]<-tem$ar}
}
for(i in bp1:bpd){
   val <- sample(1:nmpmax,1)
   ysel <- Y[val:(val+pmax)]
   yb<-arima.sim(n=n, list(ar=phihat), innov = rnorm(n,sd=sig),n.start=pmax,
   start.innov=ysel)
   # yb <- arima.sim(n=n, list(ar=phihat),sd=sig) 
   tem <- ar.yw(yb,aic=FALSE,order.max=pmax)
   betas[i,] <- tem$ar
}
btmix[bp1:bpd,] <- betas[bp1:bpd,]
btfull[bp1:bpd,] <- betas[bp1:bpd,]
list(btfull=btfull,betas=betas,btmix=btmix,bhatvs=bhatvs)
}


arboot2<-function(Y,B=100,pmax=10,c=0.01){
#Bootstraps AR(p) model selection using the residual bootstrap.
#assumes n > 10 pmax and adds cB full model bootstrap samples.
#The full model and betahatmix are also bootstrapped.
out <- ar.yw(Y,aic=FALSE,order.max=pmax)
phihat = out$ar
n<-length(Y)
sig<-sqrt(out$var.pred)
res<-out$res[(pmax+1):n] #scale the residuals to have 0 mean
m<-length(res) #want var(res) near sigmahat^2
res <- (res-mean(res))*sqrt(m/(m-pmax))
d <- ceiling(c*B)
bpd <- B + d
bp1 <- B + 1
nmpmax <- n-pmax
bhatvs <- 0*1:pmax
outvs<-ar.yw(Y,order.max=pmax)
ord<-outvs$order
if(ord>0)
bhatvs[1:ord]<-outvs$ar
betas <- matrix(0,nrow=bpd,ncol=pmax)
btmix <- betas
btfull <- betas
for(i in 1:B) {
   resb <- sample(res,size=n,replace=TRUE)
   val <- sample(1:nmpmax,1)
   ysel <- Y[val:(val+pmax)]
   yb <- arima.sim(n=n, list(ar=phihat), innov = resb,n.start=pmax,
   start.innov=ysel)
   tem<-ar.yw(yb,order.max=pmax)
   ord<-tem$order
   if(ord>0)
   betas[i,1:ord]<-tem$ar
   tem <- ar.yw(yb,aic=FALSE,order.max=pmax)
   btfull[i,] <- tem$ar
#mix
if(ord>0){
   resb <- sample(res,size=n,replace=TRUE)
   val <- sample(1:nmpmax,1)
   ysel <- Y[val:(val+pmax)]
   yb <- arima.sim(n=n, list(ar=phihat), innov = resb,n.start=pmax,
   start.innov=ysel)
   tem<-ar.yw(yb,aic=FALSE,order.max=ord)
   btmix[i,1:ord]<-tem$ar}
}
for(i in bp1:bpd){ #full model bootstrap
   resb <- sample(res,size=n,replace=TRUE)
   val <- sample(1:nmpmax,1)
   ysel <- Y[val:(val+pmax)]
   yb <- arima.sim(n=n, list(ar=phihat), innov = resb,n.start=pmax,
   start.innov=ysel)
   tem <- ar.yw(yb,aic=FALSE,order.max=pmax)
   betas[i,] <- tem$ar
}
btmix[bp1:bpd,] <- betas[bp1:bpd,]
btfull[bp1:bpd,] <- betas[bp1:bpd,]
list(btfull=btfull,betas=betas,btmix=btmix,bhatvs=bhatvs)
}


arboottest<-function(Y,pmax=10,BB=1000,alph=0.05){
#gets CIs for phi_k for k = 1, ..., pmax using parametric and residual bootstrap
#test for whether values k:pmax of phi are zeroes with parametric bootstrap
#want n > 10 pmax
#if X is ARIMA(p,1,0), use Y <- diff(X)
#calls arboot, arboot2
cipboot <- matrix(0,nrow=pmax,ncol=2)
cipmix <- cipboot
cirboot <- cipboot
cirmix <- cipboot
cifull <- cipboot
tstat <- 1:pmax
tcut <- 1:pmax
zcut<- qnorm((1-alph/2))
zfull <- arima(Y,c(pmax,0,0))
secut <- zcut*sqrt(diag(zfull$var.coef))[-(pmax+1)]
cifull[,1] <- zfull$coef[-(pmax+1)]-secut
cifull[,2] <- zfull$coef[-(pmax+1)]+secut
outp <- arboot(Y,B=BB,pmax=pmax)
outr <- arboot2(Y,B=BB,pmax=pmax)
for(j in 1:pmax){
cipboot[j,] <- shorth3(outp$betas[,j],alpha=alph)$shorth
cipmix[j,] <- shorth3(outp$btmix[,j],alpha=alph)$shorth
cirboot[j,] <- shorth3(outr$betas[,j],alpha=alph)$shorth
cirmix[j,] <- shorth3(outr$btmix[,j],alpha=alph)$shorth
}
#test whether the values k:pmax of phi are 0 with pred reg method
for(k in 1:pmax){
    gg <- pmax - k + 1
    tem <- confreg2(outp$betas[,k:pmax],g=gg,alpha=alph)
    tstat[k] <- tem$D0
    tcut[k] <- tem$cuplim
}                
list(cipboot=cipboot,cipmix=cipmix,cirboot=cirboot,cirmix=cirmix,cifull=cifull,
tstat=tstat,tcut=tcut)
}


ardata<-function(n = 200)
{#simulates AR(p=2) data
	y <- 1:(2 * n)
	y[1] <- sum(rnorm(3))
	y[2] <- y[1] + sum(rnorm(2))
	for(i in 3:(2 * n)) {
		y[i] <- y[i - 1] - 0.89 * y[i - 2] + rnorm(1)
	}
	list(arts = y[(n + 1):(2 * n)])
}


arimapisim<-function(n=100,nruns=50,kmax=5,etype=1,tstype=1,
tdf=5,phi=c(0.7,0.1,-0.4),theta=c(0.1),pen=2,dd=0,burn=100,alph=0.05){
#Needs auto.arima from library(forecast) and calls armamsel2.
#for an arima(ps,dd,qs) true model, estimates r = max(ps,qs)
#assumes n >= 20 (kmax),  full model beta = (phi1,...,phikmax,theta1,...,thetakmax)
#dd is the amount of difference, dd=0,1,or 2, assumed known
#Fits several 1 step ahead PIs after model selection with auto.arima and method
#that uses Potscher (1990).
#There are models where nruns need to be near 50 or 100 or the program fails,
#especially if dd > 0. See armapisim.
#Simulates tstype time series of length n, and finds proportion
#of time Y_n+1 is in its 95% PI.
#The PI may be asymptotically optimal.
#Average PI length is also given. 
#Gets the 1  step ahead PIs based on forecast residuals.
#etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom
#etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors
#calls locpi, locpi2
#Finally gets the normal PIs, although the z cutoff is replaced
#by a t cutoff to increase coverage.
kp1 <- kmax+1
rhat<-1:nruns
beta<-0*1:(2*kmax)
tem<- 1:nruns   
ord <- cbind(tem,tem,tem,tem) #order
if(tstype==1){#tstype=1 for AR(1) model
phi<-c(0.5)
beta[1] <- 0.5
ps<-1 
qs <- 0
rr=1
}
if(tstype==2) { #tstype=2 for AR(2) model
phi=c(0.5,0.33)  
beta[c(1,2)]<-phi
ps<-2
qs<-0
rr=2
}
if(tstype==3){#tstype=3 for MA(1) model
theta<-c(-0.5)
beta[(kmax+1)] <- -0.5
ps<-0 
qs <- 1
rr=1
}
if(tstype==4) { #tstype=4 for MA(2) model
theta=c(-0.5,0.5)  
beta[c(kmax+1,kmax+2)]<-theta
ps<-0
qs<-2
rr=2
}
if(tstype==5){#need kmax > 2
#tstype=5 for ARMA(3,1) model
phi <- c(0.7,0.1,-0.4)
theta <- 0.1
beta[c(1:3,kmax+1)] <- c(phi,0.1)
ps<-3
qs<-1
rr=3
}
if(tstype==6){#need kmax >= rr
#tstype=6 for ARMA model selected by user
phi <- as.vector(phi)
theta <- as.vector(theta)
ps<-length(phi)
qs<-length(theta)
rr=max(ps,qs)
beta[c(1:ps,(kmax+1):(kmax+qs))] <- c(phi,theta)
}
np1 <- n + 1
len <- 1:nruns
opilen <- len 
opilen2 <- len 
npilen <- len 
h95len <- len
h95len2 <- len
coverage <- 0
opicov <- 0
opicov2 <- 0
npicov <- 0
h95cov <- 0
h95cov2 <- 0
#use n-p-q in general
tcut <- qt((1-alph/2),n-2)
for(i in 1:nruns){

 if(tstype<3){
  if (etype==1) 
   Y <- arima.sim(n=np1, list(order = c(ps,dd,qs), ar =c(phi)), innov = rnorm(np1))
  if(etype==2)
   Y <- arima.sim(n=np1, list(order = c(ps,dd,qs), ar =c(phi)), innov = rt(np1,tdf))
  if(etype==3)
   Y <- arima.sim(n=np1, list(order = c(ps,dd,qs), ar =c(phi)), innov =          
                  runif(np1,min=-1,max=1))   
  if(etype==4)
   Y <- arima.sim(n=np1, list(order = c(ps,dd,qs), ar =c(phi)), innov = (rexp(np1)-1))
 }
 if(tstype==3 || tstype==4){
 if (etype==1) 
   Y <- arima.sim(n=np1, list(order = c(ps,dd,qs), ma=c(theta)), innov = rnorm(np1))
  if(etype==2)
   Y <- arima.sim(n=np1, list(order = c(ps,dd,qs), ma=c(theta)), innov = rt(np1,tdf))
  if(etype==3)
   Y <- arima.sim(n=np1, list(order = c(ps,dd,qs), ma=c(theta)), innov =          
                  runif(np1,min=-1,max=1))   
  if(etype==4)
   Y <- arima.sim(n=np1, list(order = c(ps,dd,qs), ma=c(theta)), innov = (rexp(np1)-1))
 }
    if(tstype==5){
  if (etype==1) 
 Y <- arima.sim(n=np1,list(order = c(ps,dd,qs),ar=c(phi),ma=c(theta)),innov = rnorm(np1),n.start=burn)
  if(etype==2)
 Y <- arima.sim(n=np1,list(order = c(ps,dd,qs),ar=c(phi),ma=c(theta)),innov = rt(np1,tdf),n.start=burn)
  if(etype==3)
   Y <- arima.sim(n=np1, list(order = c(ps,dd,qs),ar=c(phi),ma=c(theta)), innov =          
                  runif(np1,min=-1,max=1),n.start=burn)   
  if(etype==4)
 Y <- arima.sim(n=np1,list(order = c(ps,dd,qs), ar=c(phi),ma=c(theta)),innov=(rexp(np1)-1),n.start=burn)
 }
    if(tstype==6){  #arma model selected by the user
  if (etype==1) 
 Y <- arima.sim(n=np1,list(order = c(ps,dd,qs), ar=c(phi),ma=c(theta)),innov = rnorm(np1),n.start=burn)
  if(etype==2)
 Y <- arima.sim(n=np1,list(order = c(ps,dd,qs), ar=c(phi),ma=c(theta)),innov = rt(np1,tdf),n.start=burn)
  if(etype==3)
   Y <- arima.sim(n=np1, list(order = c(ps,dd,qs), ar=c(phi),ma=c(theta)), innov =          
                  runif(np1,min=-1,max=1),n.start=burn)   
  if(etype==4)
  Y <- arima.sim(n=np1,list(order = c(ps,dd,qs), ar=c(phi),ma=c(theta)),innov=(rexp(np1)-1),n.start=burn)
  }

   y <- Y[1:n]
   yf <- Y[np1]
   tem <- locpi(y,alpha=alph) #PI ignore TS structure
  LPI <- tem$LPI
  UPI <- tem$UPI
  len[i] <- UPI - LPI
  if(LPI < yf && yf < UPI)
   coverage <- coverage + 1
#ARMA(p,q) model selection

#get the l-step ahead forecasts for l = 1
if(dd==0){
out<-auto.arima(y,d=dd,max.p=kmax,max.q=kmax,seasonal=FALSE,ic="aicc",
stationary=TRUE)
temp <- out %>% forecast(h=1)
outt <- out
}
if(dd==1){
out<-auto.arima(y,d=dd,max.p=kmax,max.q=kmax,seasonal=FALSE,ic="aicc",
stationary=FALSE)
pI<-out$arma[1]
qI<-out$arma[2]  
outt<-arima(y,order=c(pI,dd,qI),method="ML")
temp <- outt %>% forecast(h=1)
}
if(dd==2){
out<-auto.arima(y,d=dd,max.p=kmax,max.q=kmax,seasonal=FALSE,ic="aicc",
stationary=FALSE)
pI<-out$arma[1]
qI<-out$arma[2]  
outt<-arima(y,order=c(pI,dd,qI),method="ML")
temp <- outt %>% forecast(h=1)
}
yfhat<-temp$mean[1] #1 step ahead forecast
#get the 1 step ahead PI
res <- outt$resid
m <- length(res)
kk<-length(outt$coef)
am <- (1+15/m)*sqrt(m/(m-kk))
tem <- locpi2(res,k=kk,alph=alph)
low <- yfhat+am*tem$LPI
up <- yfhat+am*tem$UPI
if(low < yf && yf < up)
   opicov <- opicov + 1
opilen[i] <- up-low
#get PI from forecast package
up<-temp$upper[2]
low <- temp$lower[2]
if(low < yf && yf < up)
   h95cov <- h95cov + 1
h95len[i] <- up-low
#get the normal PI
se <- sqrt(out$sigma)
tcut <- qt((1-alph/2),n-kk)
  LPI <- yfhat - tcut*se
  UPI <- yfhat + tcut*se
  npilen[i] <- UPI-LPI
  if(LPI < yf && yf < UPI)
   npicov <- npicov + 1
#use a better model selection method
if(dd==0){
out2<-armamsel2(y,kmax=kmax,pen=pen)
tem<-arima(y,order=c(out2$pI,dd,out2$qI),method="ML")
}
if(dd==1){
Z <- as.vector(y)
Z[1] <- NA 
Z[-1] <- diff(y)
out2<-armamsel2(Z,kmax=kmax,pen=pen)
tem<-arima(y,order=c(out2$pI,dd,out2$qI),method="ML")
}
if(dd==2){
Z <- as.vector(y)
Z[1:2] <- NA 
Z[-c(1,2)] <- diff(Y,differences=2)
out2<-armamsel2(Z,kmax=kmax,pen=pen)
tem<-arima(y,order=c(out2$pI,dd,out2$qI),method="ML")
}
temp <- tem %>% forecast(h=1)
yfhat<-temp$mean[1] #1 step ahead forecast
#get the 1 step ahead PI
res <- tem$resid
m <- length(res)
kk<-length(tem$coef)
am <- (1+15/m)*sqrt(m/(m-kk))
tem2 <- locpi2(res,k=kk,alph=alph)
low <- yfhat+am*tem2$LPI
up <- yfhat+am*tem2$UPI
if(low < yf && yf < up)
   opicov2 <- opicov2 + 1
opilen2[i] <- up-low
#get PI from forecast package
up<-temp$upper[2]
low <- temp$lower[2]
if(low < yf && yf < up)
   h95cov2 <- h95cov2 + 1
h95len2[i] <- up-low
}
mnpilen <- mean(len)
coverage <- coverage/nruns #locPI ignores ARMA order
opilen <- mean(opilen)
opicov <- opicov/nruns
opilen2 <- mean(opilen2)
opicov2 <- opicov2/nruns
h95cov <- h95cov/nruns
h95len <- mean(h95len)
h95cov2 <- h95cov2/nruns
h95len2 <- mean(h95len2)
npicov <- npicov/nruns
npilen <- mean(npilen)
list(coverage=coverage,mnpilen=mnpilen,opicov=opicov,opilen=opilen,
opicov2=opicov2,opilen2=opilen2,h95cov=h95cov,h95len=h95len,
h95cov2=h95cov2,h95len2=h95len2, npicov=npicov, npilen=npilen)
}


armaboot<-function(Y,B=100,pmax=3,qmax=3,c=0.05){
#Bootstraps ARMA(p,q) model selection using the parametric bootstrap.
#Assumes n > 10 (pmax+qmax) and adds cB full model bootstrap samples.
#Uses the auto.arima function from the forecast package with aic and
#stationary = FALSE, armaboot2 uses a selected burnin period
#The full model and betahatmix are also bootstrapped.
#Want pmax > 0 and qmax > 0.
#Uses library forecast.
#If the true model is ARMA(ps,qs), then both pmax > ps and qmax > qs
#causes lots of convergence problems.
out <- arima(Y,order=c(pmax,0,qmax),method="ML")
kmax <- pmax+qmax
phihat <- out$coef[1:pmax]  #from full model
thetahat <- out$coef[(pmax+1):kmax] #from full model
n<-length(Y)
sig<-sqrt(out$sigma2)
d <- ceiling(c*B)
bpd <- B + d
bp1 <- B + 1
betas <- matrix(0,nrow=bpd,ncol=kmax)
btmix <- betas
btfull <- betas
bhatvs <- 0*1:kmax
outvs<-auto.arima(Y,d=0,max.p=pmax,max.q=qmax,seasonal=FALSE,ic="aic",
stationary=FALSE,method="ML")
#outvs<-auto.arima(Y,d=0,max.p=pmax,max.q=qmax,seasonal=FALSE,ic="aicc",
#$stationary=TRUE,method="ML")
arord<-outvs$arma[1]
maord<-outvs$arma[2]
if(arord>0)
bhatvs[1:arord]<-outvs$coef[1:arord]
if(maord>0)
bhatvs[(pmax+1):(pmax+maord)]<-outvs$coef[(arord+1):(arord+maord)]
for(i in 1:B) {
   yb <- arima.sim(n=n,list(ar=phihat,ma=thetahat),sd=sig)
   tem<-auto.arima(yb,d=0,max.p=pmax,max.q=qmax,seasonal=FALSE,ic="aic",
stationary=FALSE,method="ML")
# tem<-auto.arima(yb,d=0,max.p=pmax,max.q=qmax,seasonal=FALSE,ic="aicc",
#stationary=TRUE,method="ML")
   arord<-tem$arma[1]
   maord<-tem$arma[2]
   if(arord>0)
   betas[i,1:arord]<-tem$coef[1:arord]
   if(maord>0)
   betas[i,(pmax+1):(pmax+maord)]<-tem$coef[(arord+1):(arord+maord)]
   tem<-arima(yb,order=c(pmax,0,qmax),method="ML")
   btfull[i,1:pmax] <- tem$coef[1:pmax]
   btfull[i,(pmax+1):kmax] <- tem$coef[(pmax+1):kmax]
#mix
if((arord+maord)>0){
   yb <- arima.sim(n=n,list(ar=phihat,ma=thetahat),sd=sig)
   tem <- arima(yb,order=c(arord,0,maord),method="ML")
   if(arord>0)
   btmix[i,1:arord]<-tem$coef[1:arord]
   if(maord>0)
   btmix[i,(pmax+1):(pmax+maord)] <- tem$coef[(arord+1):(arord+maord)]
   }
}
for(i in bp1:bpd){
   yb <- arima.sim(n=n,list(ar=phihat,ma=thetahat),sd=sig)
   tem<-arima(yb,order=c(pmax,0,qmax),method="ML")
   betas[i,1:pmax] <- tem$coef[1:pmax]
   betas[i,(pmax+1):kmax] <- tem$coef[(pmax+1):kmax]
}
btmix[bp1:bpd,] <- betas[bp1:bpd,]
btfull[bp1:bpd,] <- betas[bp1:bpd,]
list(btfull=btfull,betas=betas,btmix=btmix,bhatvs=bhatvs)
}


armaboot2<-function(Y,B=100,pmax=3,qmax=3,c=0.05,burn=100){
#Bootstraps ARMA(p,q) model selection using the parametric bootstrap.
#Assumes n > 10 (pmax+qmax) and adds cB full model bootstrap samples.
#Uses the auto.arima function from the forecast package with aic and
#stationary = FALSE, burn is the burn in period, need burn>pmax+qmax
#The full model and betahatmix are also bootstrapped.
#Want pmax > 0 and qmax > 0.
#Uses library forecast.
#If the true model is ARMA(ps,qs), then both pmax > ps and qmax > qs
#causes lots of convergence problems.
out <- arima(Y,order=c(pmax,0,qmax),method="ML")
kmax <- pmax+qmax
phihat <- out$coef[1:pmax]  #from full model
thetahat <- out$coef[(pmax+1):kmax] #from full model
n<-length(Y)
sig<-sqrt(out$sigma2)
d <- ceiling(c*B)
bpd <- B + d
bp1 <- B + 1
betas <- matrix(0,nrow=bpd,ncol=kmax)
btmix <- betas
btfull <- betas
bhatvs <- 0*1:kmax
outvs<-auto.arima(Y,d=0,max.p=pmax,max.q=qmax,seasonal=FALSE,ic="aic",
stationary=FALSE,method="ML")
#outvs<-auto.arima(Y,d=0,max.p=pmax,max.q=qmax,seasonal=FALSE,ic="aicc",
#$stationary=TRUE,method="ML")
arord<-outvs$arma[1]
maord<-outvs$arma[2]
if(arord>0)
bhatvs[1:arord]<-outvs$coef[1:arord]
if(maord>0)
bhatvs[(pmax+1):(pmax+maord)]<-outvs$coef[(arord+1):(arord+maord)]
for(i in 1:B) {
   yb <- arima.sim(n=n,list(ar=phihat,ma=thetahat),sd=sig,n.start=burn)
   tem<-auto.arima(yb,d=0,max.p=pmax,max.q=qmax,seasonal=FALSE,ic="aic",
stationary=FALSE,method="ML")
# tem<-auto.arima(yb,d=0,max.p=pmax,max.q=qmax,seasonal=FALSE,ic="aicc",
#stationary=TRUE,method="ML")
   arord<-tem$arma[1]
   maord<-tem$arma[2]
   if(arord>0)
   betas[i,1:arord]<-tem$coef[1:arord]
   if(maord>0)
   betas[i,(pmax+1):(pmax+maord)]<-tem$coef[(arord+1):(arord+maord)]
   tem<-arima(yb,order=c(pmax,0,qmax),method="ML")
   btfull[i,1:pmax] <- tem$coef[1:pmax]
   btfull[i,(pmax+1):kmax] <- tem$coef[(pmax+1):kmax]
#mix
if((arord+maord)>0){
   yb <- arima.sim(n=n,list(ar=phihat,ma=thetahat),sd=sig,n.start=burn)
   tem <- arima(yb,order=c(arord,0,maord),method="ML")
   if(arord>0)
   btmix[i,1:arord]<-tem$coef[1:arord]
   if(maord>0)
   btmix[i,(pmax+1):(pmax+maord)] <- tem$coef[(arord+1):(arord+maord)]
   }
}
for(i in bp1:bpd){
   yb <- arima.sim(n=n,list(ar=phihat,ma=thetahat),sd=sig,n.start=burn)
   tem<-arima(yb,order=c(pmax,0,qmax),method="ML")
   betas[i,1:pmax] <- tem$coef[1:pmax]
   betas[i,(pmax+1):kmax] <- tem$coef[(pmax+1):kmax]
}
btmix[bp1:bpd,] <- betas[bp1:bpd,]
btfull[bp1:bpd,] <- betas[bp1:bpd,]
list(btfull=btfull,betas=betas,btmix=btmix,bhatvs=bhatvs)
}


armapisim<-function(n=100,nruns=50,kmax=5,etype=1,tstype=1,
tdf=5,phi=c(0.7,0.1,-0.4),theta=c(0.1),pen=2,burn=100,alph=0.05){
#Needs auto.arima from library(forecast) and calls armamsel2.
#for an arma(ps,qs) true model, estimates r = max(ps,qs)
#assumes n >= 20 (kmax),  full model beta = (phi1,...,phikmax,theta1,...,thetakmax)
#Fits several 1 step ahead PIs after model selection with auto.arima and method
#that uses Potscher (1990).
#There are models where nruns need to be near 50 or 100 or the program fails.
#Simulates tstype time series of length n, and finds proportion
#of time Y_n+1 is in its 95% PI.
#tstype = 1 for MA(2), 
#The PI may be asymptotically optimal.
#Average PI length is also given. 
#Gets the 1  step ahead PIs based on forecast residuals.
#etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom
#etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors
#calls locpi, locpi2
#Finally gets the normal PIs, although the z cutoff is replaced
#by a t cutoff to increase coverage.
kp1 <- kmax+1
rhat<-1:nruns
beta<-0*1:(2*kmax)
tem<- 1:nruns   
ord <- cbind(tem,tem,tem,tem) #order
if(tstype==1){#tstype=1 for AR(1) model
phi<-c(0.5)
beta[1] <- 0.5
ps<-1 
qs <- 0
rr=1
}
if(tstype==2) { #tstype=2 for AR(2) model
phi=c(0.5,0.33)  
beta[c(1,2)]<-phi
ps<-2
qs<-0
rr=2
}
if(tstype==3){#tstype=3 for MA(1) model
theta<-c(-0.5)
beta[(kmax+1)] <- -0.5
ps<-0 
qs <- 1
rr=1
}
if(tstype==4) { #tstype=4 for MA(2) model
theta=c(-0.5,0.5)  
beta[c(kmax+1,kmax+2)]<-theta
ps<-0
qs<-2
rr=2
}
if(tstype==5){#need kmax > 2
#tstype=5 for ARMA(3,1) model
phi <- c(0.7,0.1,-0.4)
theta <- 0.1
beta[c(1:3,kmax+1)] <- c(phi,0.1)
ps<-3
qs<-1
rr=3
}
if(tstype==6){#need kmax >= rr
#tstype=6 for ARMA model selected by user
phi <- as.vector(phi)
theta <- as.vector(theta)
ps<-length(phi)
qs<-length(theta)
rr=max(ps,qs)
beta[c(1:ps,(kmax+1):(kmax+qs))] <- c(phi,theta)
}
np1 <- n + 1
len <- 1:nruns
opilen <- len 
opilen2 <- len 
npilen <- len 
h95len <- len
h95len2 <- len
coverage <- 0
opicov <- 0
opicov2 <- 0
npicov <- 0
h95cov <- 0
h95cov2 <- 0
#use n-p-q in general
tcut <- qt((1-alph/2),n-2)
for(i in 1:nruns){

 if(tstype<3){
  if (etype==1) 
   Y <- arima.sim(n=np1, list(ar=c(phi)), innov = rnorm(np1))
  if(etype==2)
   Y <- arima.sim(n=np1, list(ar=c(phi)), innov = rt(np1,tdf))
  if(etype==3)
   Y <- arima.sim(n=np1, list(ar=c(phi)), innov =          
                  runif(np1,min=-1,max=1))   
  if(etype==4)
   Y <- arima.sim(n=np1, list(ar=c(phi)), innov = (rexp(np1)-1))
 }
 if(tstype==3 || tstype==4){
 if (etype==1) 
   Y <- arima.sim(n=np1, list(ma=c(theta)), innov = rnorm(np1))
  if(etype==2)
   Y <- arima.sim(n=np1, list(ma=c(theta)), innov = rt(np1,tdf))
  if(etype==3)
   Y <- arima.sim(n=np1, list(ma=c(theta)), innov =          
                  runif(np1,min=-1,max=1))   
  if(etype==4)
   Y <- arima.sim(n=np1, list(ma=c(theta)), innov = (rexp(np1)-1))
 }
    if(tstype==5){
  if (etype==1) 
 Y <- arima.sim(n=np1,list(ar=c(phi),ma=c(theta)),innov = rnorm(np1),n.start=burn)
  if(etype==2)
 Y <- arima.sim(n=np1,list(ar=c(phi),ma=c(theta)),innov = rt(np1,tdf),n.start=burn)
  if(etype==3)
   Y <- arima.sim(n=np1, list(ar=c(phi),ma=c(theta)), innov =          
                  runif(np1,min=-1,max=1),n.start=burn)   
  if(etype==4)
 Y <- arima.sim(n=np1,list(ar=c(phi),ma=c(theta)),innov=(rexp(np1)-1),n.start=burn)
 }
    if(tstype==6){  #arma model selected by the user
  if (etype==1) 
 Y <- arima.sim(n=np1,list(ar=c(phi),ma=c(theta)),innov = rnorm(np1),n.start=burn)
  if(etype==2)
 Y <- arima.sim(n=np1,list(ar=c(phi),ma=c(theta)),innov = rt(np1,tdf),n.start=burn)
  if(etype==3)
   Y <- arima.sim(n=np1, list(ar=c(phi),ma=c(theta)), innov =          
                  runif(np1,min=-1,max=1),n.start=burn)   
  if(etype==4)
  Y <- arima.sim(n=np1,list(ar=c(phi),ma=c(theta)),innov=(rexp(np1)-1),n.start=burn)
  }

   y <- Y[1:n]
   yf <- Y[np1]
   tem <- locpi(y,alpha=alph) #PI ignore TS structure
  LPI <- tem$LPI
  UPI <- tem$UPI
  len[i] <- UPI - LPI
  if(LPI < yf && yf < UPI)
   coverage <- coverage + 1
#ARMA(p,q) model selection
out<-auto.arima(y,d=0,max.p=kmax,max.q=kmax,seasonal=FALSE,ic="aicc",
stationary=TRUE)
#get the l-step ahead forecasts for l = 1
temp <- out %>% forecast(h=1)
yfhat<-temp$mean[1] #1 step ahead forecast
#get the 1 step ahead PI
res <- out$resid
m <- length(res)
kk<-length(out$coef)
am <- (1+15/m)*sqrt(m/(m-kk))
tem <- locpi2(res,k=kk,alph=alph)
low <- yfhat+am*tem$LPI
up <- yfhat+am*tem$UPI
if(low < yf && yf < up)
   opicov <- opicov + 1
opilen[i] <- up-low
#get PI from forecast package
up<-temp$upper[2]
low <- temp$lower[2]
if(low < yf && yf < up)
   h95cov <- h95cov + 1
h95len[i] <- up-low
#get the normal PI
se <- sqrt(out$sigma)
tcut <- qt((1-alph/2),n-kk)
  LPI <- yfhat - tcut*se
  UPI <- yfhat + tcut*se
  npilen[i] <- UPI-LPI
  if(LPI < yf && yf < UPI)
   npicov <- npicov + 1
#use a better model selection method
out2<-armamsel2(y,kmax=kmax,pen=pen)
tem<-arima(y,order=c(out2$pI,0,out2$qI),method="ML")
#get the 1 step ahead PI
res <- tem$resid
m <- length(res)
kk<-length(tem$coef)
am <- (1+15/m)*sqrt(m/(m-kk))
tem2 <- locpi2(res,k=kk,alph=alph)
low <- yfhat+am*tem2$LPI
up <- yfhat+am*tem2$UPI
if(low < yf && yf < up)
   opicov2 <- opicov2 + 1
opilen2[i] <- up-low
#get PI from forecast package
temp <- tem %>% forecast(h=1)
up<-temp$upper[2]
low <- temp$lower[2]
if(low < yf && yf < up)
   h95cov2 <- h95cov2 + 1
h95len2[i] <- up-low
}
mnpilen <- mean(len)
coverage <- coverage/nruns #locPI ignores ARMA order
opilen <- mean(opilen)
opicov <- opicov/nruns
opilen2 <- mean(opilen2)
opicov2 <- opicov2/nruns
h95cov <- h95cov/nruns
h95len <- mean(h95len)
h95cov2 <- h95cov2/nruns
h95len2 <- mean(h95len2)
npicov <- npicov/nruns
npilen <- mean(npilen)
list(coverage=coverage,mnpilen=mnpilen,opicov=opicov,opilen=opilen,
opicov2=opicov2,opilen2=opilen2,h95cov=h95cov,h95len=h95len,
h95cov2=h95cov2,h95len2=h95len2, npicov=npicov, npilen=npilen)
}


armamsel1<-function(y,kmax=5){
#For an arma(ps,qs) true model, estimates r = max(ps,qs)
#using Potscher (1990). So use an ARMA(rhat,rhat) model.
#Often works for n>=80, but underfitting is sometimes severe for n < 1000. 
n <- length(y)
kp1 <- kmax+1
tem <- 1:kp1
z <- 1:kp1
vars<-z
for(i in 1:kp1){
out <- arima(y,order=c((i-1),0,(i-1)),method="ML")
vars[i]<-out$sigma2
z[i] <- log(vars[i]) +2*(i-1)*log(n)/n
}
#get the first local min of z
zz <- z
zz[1:(kp1-1)] <- z[2:kp1]
zz[kp1]<-z[kp1]+1
tem2 <- tem[z<zz]
rhat <- min(tem2)-1
list(rhat=rhat)
}


armamsel2<-function(y,kmax=5,pen=2){
#For an arma(ps,qs) true model, estimates r = max(ps,qs)
#using Potscher (1990). Then does model selection. 
#Often works for n>=80, but underfitting is sometimes severe for n < 1000. 
n <- length(y)
kp1 <- kmax+1
tem <- 1:kp1
z <- 1:kp1
vars<-z
for(i in 1:kp1){
out <- arima(y,order=c((i-1),0,(i-1)),method="ML")
vars[i]<-out$sigma2
z[i] <- log(vars[i]) +2*(i-1)*log(n)/n
}
#get the first local min of z
zz <- z
zz[1:(kp1-1)] <- z[2:kp1]
zz[kp1]<-z[kp1]+1
tem2 <- tem[z<zz]
rhat <- min(tem2)-1
critr <- n*log(vars[(rhat+1)]) +2*(rhat+rhat) - pen #AIC type criterion - pen
pI<-rhat
qI<-rhat
if(rhat > 0){
for(i in 1:rhat){
out <- arima(y,order=c(rhat,0,(rhat-i)),method="ML")
var<-out$sigma2
ctem <- n*log(var) +2*(rhat + rhat-i)
if(ctem < critr){
critr <- ctem - pen
pI <- rhat
qI <- (rhat-i)}
out <- arima(y,order=c((rhat-i),0,rhat),method="ML")
var<-out$sigma2
ctem <- n*log(var) +2*(rhat + rhat-i)
if(ctem < critr){
critr <- ctem - pen
qI <- rhat
pI <- (rhat-i)}
}}
list(rhat=rhat,pI=pI,qI=qI)
}


armasim0<-function(n=100,nruns=20,kmax=5,etype=1,tstype=1,tdf=5,burn=100){
#ARMA(p,q) model selection.
#assumes n >= 20 (kmax),  full model beta = (phi1,...,phikmax,theta1,...,thetakmax)
#current program uses AIC 
#program fails with tstype = 5 (ARMA models) for both AIC and BIC even with n=18000
#undfit is the proportion of runs where model selection underfit
#ovfit is the proportion of runs where p>ps and q>qs so model is inconsistent
#cfit is proportion of runs a consistent model was selected = 1-undfit-ovfit
#want sfit near 1, ovfit and undfit near 0
#etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom
#etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors
#uses library(forecast)
kp1 <- kmax+1
beta<-0*1:(2*kmax)
if(tstype==1){#tstype=1 for AR(1) model
phi<-c(0.5)
beta[1] <- 0.5
ps<-1 
qs <- 0
}
if(tstype==2) { #tstype=2 for AR(2) model
phi=c(0.5,0.33)  
beta[c(1,2)]<-phi
ps<-2
qs<-0
}
if(tstype==3){#tstype=3 for MA(1) model
theta<-c(-0.5)
beta[(kmax+1)] <- -0.5
ps<-0 
qs <- 1
}
if(tstype==4) { #tstype=4 for MA(2) model
theta=c(-0.5,0.5)  
beta[c(kmax+1,kmax+2)]<-theta
ps<-0
qs<-2
}
if(tstype==5){#need pmax > 2
#tstype=5 for ARMA(3,1) model
phi <- c(0.7,0.1,-0.4)
theta <- 0.1
beta[c(1:3,kmax+1)] <- c(phi,0.1)
ps<-3
qs<-1
}
if(tstype==6){#need kmax >= rr
#tstype=6 for ARMA model selected by user
phi <- as.vector(phi)
theta <- as.vector(theta)
ps<-length(phi)
qs<-length(theta)
beta[c(1:ps,(kmax+1):(kmax+qs))] <- c(phi,theta)
}
tem<- 1:nruns   
ord <- cbind(tem,tem) #order 
undfit<-0
ovfit <- 0
cfit<-0
for(i in 1:nruns) {

if(tstype<3){
 if (etype==1) 
   Y <- arima.sim(n=n, list(ar=c(phi)), innov = rnorm(n))
  if(etype==2)
   Y <- arima.sim(n=n, list(ar=c(phi)), innov = rt(n,tdf))
  if(etype==3)
   Y <- arima.sim(n=n, list(ar=c(phi)), innov =          
                  runif(n,min=-1,max=1))   
  if(etype==4)
   Y <- arima.sim(n=n, list(ar=c(phi)), innov = (rexp(n)-1))
}
if(tstype==3 || tstype==4){
if (etype==1) 
   Y <- arima.sim(n=n, list(ma=c(theta)), innov = rnorm(n))
  if(etype==2)
   Y <- arima.sim(n=n, list(ma=c(theta)), innov = rt(n,tdf))
  if(etype==3)
   Y <- arima.sim(n=n, list(ma=c(theta)), innov =          
                  runif(n,min=-1,max=1))   
  if(etype==4)
   Y <- arima.sim(n=n, list(ma=c(theta)), innov = (rexp(n)-1))
}


    if(tstype==5){
  if (etype==1) 
 Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov = rnorm(n),n.start=burn)
  if(etype==2)
 Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov = rt(n,tdf),n.start=burn)
  if(etype==3)
   Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov =          
                  runif(n,min=-1,max=1),n.start=burn)   
  if(etype==4)
Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov=(rexp(n)-1),n.start=burn)
}
    if(tstype==6){  #arma model selected by the user
  if (etype==1) 
 Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov = rnorm(n),n.start=burn)
  if(etype==2)
 Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov = rt(n,tdf),n.start=burn)
  if(etype==3)
   Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov =          
                  runif(n,min=-1,max=1),n.start=burn)   
  if(etype==4)
Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov=(rexp(n)-1),n.start=burn)
}
   
    outvs<-auto.arima(Y,d=0,max.p=kmax,max.q=kmax,seasonal=FALSE,ic="aic",
           stationary=TRUE,method="ML") #not much change if bic is used instead
arord<-outvs$arma[1]
maord<-outvs$arma[2]    
    ord[i,1]<-arord 
    ord[i,2]<-maord 
    if(arord < ps || maord < qs)  #|| is the or operator
    undfit <- undfit+1
if(arord > ps && maord > qs)  #&& is the and operator
    ovfit <- ovfit+1   #model is inconsistent if ovfit occurs
if(arord == ps && maord == qs)  
   cfit <- cfit+1 
if(arord == ps && maord > qs)  
   cfit <- cfit+1 
if(arord > ps && maord == qs)  #the three if statements are disjoint
   cfit <- cfit+1   #cfit <- 1-ovfit-undfit is better
    
}
undfit<-undfit/nruns
ovfit<-ovfit/nruns
cfit <- cfit/nruns
list(ord=ord,beta=beta,ps=ps,qs=qs,undfit=undfit,ovfit=ovfit,cfit=cfit)
}



armasim1<-function(n=100,nruns=20,kmax=5,etype=1,tstype=5,tdf=5,
phi=c(0.7,0.1,-0.4),theta=c(0.1),burn=100){
#for an arma(ps,qs) true model, estimates r = max(ps,qs)
#using Potscher (1990), calls armamsel1
#assumes n >= 20 (kmax),  full model beta = (phi1,...,phikmax,theta1,...,thetakmax)
#undfit is the proportion of runs where model selection underfit
#etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom
#etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors
kp1 <- kmax+1
rhat<-1:nruns
beta<-0*1:(2*kmax)
if(tstype==1){#tstype=1 for AR(1) model
phi<-c(0.5)
beta[1] <- 0.5
ps<-1 
qs <- 0
rr=1
}
if(tstype==2) { #tstype=2 for AR(2) model
phi=c(0.5,0.33)  
beta[c(1,2)]<-phi
ps<-2
qs<-0
rr=2
}
if(tstype==3){#tstype=3 for MA(1) model
theta<-c(-0.5)
beta[(kmax+1)] <- -0.5
ps<-0 
qs <- 1
rr=1
}
if(tstype==4) { #tstype=4 for MA(2) model
theta=c(-0.5,0.5)  
beta[c(kmax+1,kmax+2)]<-theta
ps<-0
qs<-2
rr=2
}
if(tstype==5){#need kmax > 2
#tstype=5 for ARMA(3,1) model
phi <- c(0.7,0.1,-0.4)
theta <- 0.1
beta[c(1:3,kmax+1)] <- c(phi,0.1)
ps<-3
qs<-1
rr=3
}
if(tstype==6){#need kmax >= rr
#tstype=6 for ARMA model selected by user
phi <- as.vector(phi)
theta <- as.vector(theta)
ps<-length(phi)
qs<-length(theta)
rr=max(ps,qs)
beta[c(1:ps,(kmax+1):(kmax+qs))] <- c(phi,theta)
}
undfit<-0
rtrue<-0
for(i in 1:nruns) {

if(tstype<3){
 if (etype==1) 
   Y <- arima.sim(n=n, list(ar=c(phi)), innov = rnorm(n))
  if(etype==2)
   Y <- arima.sim(n=n, list(ar=c(phi)), innov = rt(n,tdf))
  if(etype==3)
   Y <- arima.sim(n=n, list(ar=c(phi)), innov =          
                  runif(n,min=-1,max=1))   
  if(etype==4)
   Y <- arima.sim(n=n, list(ar=c(phi)), innov = (rexp(n)-1))
}
if(tstype==3 || tstype==4){
if (etype==1) 
   Y <- arima.sim(n=n, list(ma=c(theta)), innov = rnorm(n))
  if(etype==2)
   Y <- arima.sim(n=n, list(ma=c(theta)), innov = rt(n,tdf))
  if(etype==3)
   Y <- arima.sim(n=n, list(ma=c(theta)), innov =          
                  runif(n,min=-1,max=1))   
  if(etype==4)
   Y <- arima.sim(n=n, list(ma=c(theta)), innov = (rexp(n)-1))
}
    if(tstype==5){
  if (etype==1) 
 Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov = rnorm(n),n.start=burn)
  if(etype==2)
 Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov = rt(n,tdf),n.start=burn)
  if(etype==3)
   Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov =          
                  runif(n,min=-1,max=1),n.start=burn)   
  if(etype==4)
Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov=(rexp(n)-1),n.start=burn)
}
    if(tstype==6){  #arma model selected by the user
  if (etype==1) 
 Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov = rnorm(n),n.start=burn)
  if(etype==2)
 Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov = rt(n,tdf),n.start=burn)
  if(etype==3)
   Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov =          
                  runif(n,min=-1,max=1),n.start=burn)   
  if(etype==4)
Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov=(rexp(n)-1),n.start=burn)
}
rhat[i] <- armamsel1(Y,kmax=kmax)$rhat
if(rhat[i] < rr)
undfit<-undfit+1
if(rhat[i] == rr)
rtrue <- rtrue+1   
}
undfit<-undfit/nruns
rtrue <- rtrue/nruns
list(rhat=rhat,rtrue=rtrue,undfit=undfit)
}

armasim2<-function(n=100,nruns=20,kmax=5,etype=1,tstype=5,tdf=5,
phi=c(0.7,0.1,-0.4),theta=c(0.1),pen=2,burn=100){
#for an arma(ps,qs) true model, estimates r = max(ps,qs)
#using Potscher (1990), calls armamsel2, 
#assumes n >= 20 (kmax),  full model beta = (phi1,...,phikmax,theta1,...,thetakmax)
#undfit is the proportion of runs where model selection underfit
#ovfit is the proportion of runs where p>ps and q>qs so model is inconsistent
#cfit is proportion of runs a consistent model was selected = 1-undfit-ovfit
#want sfit near 1, ovfit and undfit near 0
#rtrue is the proportion of times rhat=r, want rtrue near 1
#etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom
#etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors
kp1 <- kmax+1
rhat<-1:nruns
beta<-0*1:(2*kmax)
tem<- 1:nruns   
ord <- cbind(tem,tem) #order
if(tstype==1){#tstype=1 for AR(1) model
phi<-c(0.5)
beta[1] <- 0.5
ps<-1 
qs <- 0
rr=1
}
if(tstype==2) { #tstype=2 for AR(2) model
phi=c(0.5,0.33)  
beta[c(1,2)]<-phi
ps<-2
qs<-0
rr=2
}
if(tstype==3){#tstype=3 for MA(1) model
theta<-c(-0.5)
beta[(kmax+1)] <- -0.5
ps<-0 
qs <- 1
rr=1
}
if(tstype==4) { #tstype=4 for MA(2) model
theta=c(-0.5,0.5)  
beta[c(kmax+1,kmax+2)]<-theta
ps<-0
qs<-2
rr=2
}
if(tstype==5){#need kmax > 2
#tstype=5 for ARMA(3,1) model
phi <- c(0.7,0.1,-0.4)
theta <- 0.1
beta[c(1:3,kmax+1)] <- c(phi,0.1)
ps<-3
qs<-1
rr=3
}
if(tstype==6){#need kmax >= rr
#tstype=6 for ARMA model selected by user
phi <- as.vector(phi)
theta <- as.vector(theta)
ps<-length(phi)
qs<-length(theta)
rr=max(ps,qs)
beta[c(1:ps,(kmax+1):(kmax+qs))] <- c(phi,theta)
}
rtrue<-0
undfit<-0
ovfit <- 0
cfit<-0
for(i in 1:nruns) {

if(tstype<3){
 if (etype==1) 
   Y <- arima.sim(n=n, list(ar=c(phi)), innov = rnorm(n))
  if(etype==2)
   Y <- arima.sim(n=n, list(ar=c(phi)), innov = rt(n,tdf))
  if(etype==3)
   Y <- arima.sim(n=n, list(ar=c(phi)), innov =          
                  runif(n,min=-1,max=1))   
  if(etype==4)
   Y <- arima.sim(n=n, list(ar=c(phi)), innov = (rexp(n)-1))
}
if(tstype==3 || tstype==4){
if (etype==1) 
   Y <- arima.sim(n=n, list(ma=c(theta)), innov = rnorm(n))
  if(etype==2)
   Y <- arima.sim(n=n, list(ma=c(theta)), innov = rt(n,tdf))
  if(etype==3)
   Y <- arima.sim(n=n, list(ma=c(theta)), innov =          
                  runif(n,min=-1,max=1))   
  if(etype==4)
   Y <- arima.sim(n=n, list(ma=c(theta)), innov = (rexp(n)-1))
}
    if(tstype==5){
  if (etype==1) 
 Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov = rnorm(n),n.start=burn)
  if(etype==2)
 Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov = rt(n,tdf),n.start=burn)
  if(etype==3)
   Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov =          
                  runif(n,min=-1,max=1),n.start=burn)   
  if(etype==4)
Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov=(rexp(n)-1),n.start=burn)
}
    if(tstype==6){  #arma model selected by the user
  if (etype==1) 
 Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov = rnorm(n),n.start=burn)
  if(etype==2)
 Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov = rt(n,tdf),n.start=burn)
  if(etype==3)
   Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov =          
                  runif(n,min=-1,max=1),n.start=burn)   
  if(etype==4)
Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov=(rexp(n)-1),n.start=burn)
}
out <- armamsel2(Y,kmax=kmax,pen=pen)
rhat[i]<-out$rhat
if(rhat[i] == rr)
rtrue <- rtrue+1

arord<-out$pI
maord<-out$qI   
    ord[i,1]<-arord 
    ord[i,2]<-maord 
    if(arord < ps || maord < qs)  #|| is the or operator
    undfit <- undfit+1
if(arord > ps && maord > qs)  #&& is the and operator
    ovfit <- ovfit+1   #model is inconsistent if ovfit occurs
if(arord == ps && maord == qs)  
   cfit <- cfit+1 
if(arord == ps && maord > qs)  
   cfit <- cfit+1 
if(arord > ps && maord == qs)  #the three if statements are disjoint
   cfit <- cfit+1   #cfit <- 1-ovfit-undfit is better        
}
undfit<-undfit/nruns
rtrue <- rtrue/nruns
ovfit<-ovfit/nruns
cfit <- cfit/nruns
list(ord=ord,beta=beta,rhat=rhat,rtrue=rtrue,undfit=undfit,ovfit=ovfit,cfit=cfit)
}


armasim3<-function(n=100,nruns=20,kmax=5,etype=1,tstype=5,tdf=5,
phi=c(0.7,0.1,-0.4),theta=c(0.1),pen=2,burn=100){
#for an arma(ps,qs) true model, estimates r = max(ps,qs)
#using Potscher (1990), calls armamsel2, 
#also uses auto.arima from the forecast package
#assumes n >= 20 (kmax),  full model beta = (phi1,...,phikmax,theta1,...,thetakmax)
#undfit is the proportion of runs where model selection underfit
#ovfit is the proportion of runs where p>ps and q>qs so model is inconsistent
#cfit is proportion of runs a consistent model was selected = 1-undfit-ovfit
#want sfit near 1, ovfit and undfit near 0
#rtrue is the proportion of times rhat=r, want rtrue near 1
#etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom
#etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors
kp1 <- kmax+1
rhat<-1:nruns
beta<-0*1:(2*kmax)
tem<- 1:nruns   
ord <- cbind(tem,tem,tem,tem) #order
if(tstype==1){#tstype=1 for AR(1) model
phi<-c(0.5)
beta[1] <- 0.5
ps<-1 
qs <- 0
rr=1
}
if(tstype==2) { #tstype=2 for AR(2) model
phi=c(0.5,0.33)  
beta[c(1,2)]<-phi
ps<-2
qs<-0
rr=2
}
if(tstype==3){#tstype=3 for MA(1) model
theta<-c(-0.5)
beta[(kmax+1)] <- -0.5
ps<-0 
qs <- 1
rr=1
}
if(tstype==4) { #tstype=4 for MA(2) model
theta=c(-0.5,0.5)  
beta[c(kmax+1,kmax+2)]<-theta
ps<-0
qs<-2
rr=2
}
if(tstype==5){#need kmax > 2
#tstype=5 for ARMA(3,1) model
phi <- c(0.7,0.1,-0.4)
theta <- 0.1
beta[c(1:3,kmax+1)] <- c(phi,0.1)
ps<-3
qs<-1
rr=3
}
if(tstype==6){#need kmax >= rr
#tstype=6 for ARMA model selected by user
phi <- as.vector(phi)
theta <- as.vector(theta)
ps<-length(phi)
qs<-length(theta)
rr=max(ps,qs)
beta[c(1:ps,(kmax+1):(kmax+qs))] <- c(phi,theta)
}
rtrue<-0
undfit<-0
ovfit <- 0
cfit<-0
aundfit<-0
aovfit <- 0
acfit<-0
for(i in 1:nruns) {

if(tstype<3){
 if (etype==1) 
   Y <- arima.sim(n=n, list(ar=c(phi)), innov = rnorm(n))
  if(etype==2)
   Y <- arima.sim(n=n, list(ar=c(phi)), innov = rt(n,tdf))
  if(etype==3)
   Y <- arima.sim(n=n, list(ar=c(phi)), innov =          
                  runif(n,min=-1,max=1))   
  if(etype==4)
   Y <- arima.sim(n=n, list(ar=c(phi)), innov = (rexp(n)-1))
}
if(tstype==3 || tstype==4){
if (etype==1) 
   Y <- arima.sim(n=n, list(ma=c(theta)), innov = rnorm(n))
  if(etype==2)
   Y <- arima.sim(n=n, list(ma=c(theta)), innov = rt(n,tdf))
  if(etype==3)
   Y <- arima.sim(n=n, list(ma=c(theta)), innov =          
                  runif(n,min=-1,max=1))   
  if(etype==4)
   Y <- arima.sim(n=n, list(ma=c(theta)), innov = (rexp(n)-1))
}
    if(tstype==5){
  if (etype==1) 
 Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov = rnorm(n),n.start=burn)
  if(etype==2)
 Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov = rt(n,tdf),n.start=burn)
  if(etype==3)
   Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov =          
                  runif(n,min=-1,max=1),n.start=burn)   
  if(etype==4)
Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov=(rexp(n)-1),n.start=burn)
}
    if(tstype==6){  #arma model selected by the user
  if (etype==1) 
 Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov = rnorm(n),n.start=burn)
  if(etype==2)
 Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov = rt(n,tdf),n.start=burn)
  if(etype==3)
   Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov =          
                  runif(n,min=-1,max=1),n.start=burn)   
  if(etype==4)
Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov=(rexp(n)-1),n.start=burn)
}
out <- armamsel2(Y,kmax=kmax,pen=pen)
rhat[i]<-out$rhat
if(rhat[i] == rr)
rtrue <- rtrue+1

arord<-out$pI
maord<-out$qI   
    ord[i,1]<-arord 
    ord[i,2]<-maord 
    if(arord < ps || maord < qs)  #|| is the or operator
    undfit <- undfit+1
if(arord > ps && maord > qs)  #&& is the and operator
    ovfit <- ovfit+1   #model is inconsistent if ovfit occurs
if(arord == ps && maord == qs)  
   cfit <- cfit+1 
if(arord == ps && maord > qs)  
   cfit <- cfit+1 
if(arord > ps && maord == qs)  #the three if statements are disjoint
   cfit <- cfit+1   #cfit <- 1-ovfit-undfit is better    
##auto.arima    
    outvs<-auto.arima(Y,d=0,max.p=kmax,max.q=kmax,seasonal=FALSE,ic="aic",
           stationary=TRUE,method="ML") #not much change if bic is used instead
arord<-outvs$arma[1]
maord<-outvs$arma[2]    
    ord[i,3]<-arord 
    ord[i,4]<-maord 
    if(arord < ps || maord < qs)  #|| is the or operator
    aundfit <- aundfit+1
if(arord > ps && maord > qs)  #&& is the and operator
    aovfit <- aovfit+1   #model is inconsistent if ovfit occurs
if(arord == ps && maord == qs)  
   acfit <- acfit+1 
if(arord == ps && maord > qs)  
   acfit <- acfit+1 
if(arord > ps && maord == qs)  #the three if statements are disjoint
   acfit <- acfit+1   #cfit <- 1-ovfit-undfit is better
    
}
undfit<-undfit/nruns
rtrue <- rtrue/nruns
ovfit<-ovfit/nruns
cfit <- cfit/nruns
aundfit<-aundfit/nruns
aovfit<-aovfit/nruns
acfit <- acfit/nruns
list(ord=ord,beta=beta,rhat=rhat,rtrue=rtrue,undfit=undfit,ovfit=ovfit,cfit=cfit,
aundfit=aundfit,aovfit=aovfit,acfit=acfit)
}




arp<-function(Y,p=1){
#Fits an AR(p) model to time series Y using OLS.
#assumes p < n - p
#Makes a response and residual plot.
# Advance the plot by highlighting Stop with the right mouse button.
#phihat = phihat_0 = intercept, phihat_1, ..., phihat_p
cmar <- par("mar")
	par(mfrow = c(2, 1))
        par(mar=c(4.0,4.0,2.0,0.5))
Y <- as.vector(Y)
n <- length(Y)
Xar <- matrix(0,nrow=n-p,ncol=p)
Yar <- Y[(p+1):n]
for(i in 1:p){
Xar[,i] <- Y[(p-i+1):(n-i)]
}
ardat <- as.data.frame(cbind(Yar,Xar))
tem <- lm(Yar~.,data=ardat)
phihat <- tem$coef
fit <- Y
resid <- Y
fit[(p+1):n] <- tem$fit
fit[1:p] <- NA
resid[(p+1):n] <- tem$res
resid[1:p] <- NA
plot(fit,Y)
	abline(0, 1)
	title("Response Plot")
        identify(fit,Y)
	plot(fit, resid)
	title("Residual Plot")
        identify(fit, resid)
par(mfrow = c(1, 1))
par(mar=cmar)
list(phihat=phihat,fit=fit,resid=resid,tem=tem)
}


bicmat <- function(Y,dd=0,pmax=5,qmax=5){
#makes a matrix of BICs - min(BIC) for ARIMA(p,dd,q) models for time series Y
#uses method = "ML" to reduce errors
#reduce pmax by 1 if an error message occurs, may need to reduce qmax
#minbic is the min bic for the arima(p,dd,q) models, 
Y <- as.vector(Y)
bics <- matrix(, (pmax+1), (qmax+1), dimnames=list(p=0:pmax, q=0:qmax))
for(p in 0:pmax)
   for(q in 0:qmax){  z <- arima(Y, c(p,d=dd,q), method = "ML",
        optim.control = list(maxit = 500))
        bics[1+p, 1+q] <- BIC(z)}
minbic <- min(bics, na.rm = TRUE)
bics <- round(bics - minbic, 2)
list(bics=bics)
}


confreg<-function(x, g = 4, that = 1:4, alpha = 0.05){
# Makes confidence regions for theta from rows of x = Ti* from a bootstrap.
# Use for testing H_0: theta = 0 versus H_1: theta != 0.
# The prediction region method, hybrid, and Bickel and Ren regions are used.
# If g = 1, the shorth interval should work better.
# Also computes the distance for the 0 vector.
# Need g = dim(x)[2] and T = that the g by 1 estimator of theta.
# Often that = A betahat(I_min,0). 
# Note that center= Tbar* = bagging estimator and cov = S*_T.
x <- as.matrix(x)
that <- as.vector(that)
n <- dim(x)[1]
zero <- 0*(1:g)
up <- min((1 - alpha/2), (1 - alpha + 10*alpha*g/n))
if(alpha > 0.1)
	up <- min((1 - alpha + 0.05), (1 - alpha + g/n))
qn <- up
if(qn < 1 - alpha + 0.001)
        up <- 1 - alpha
center <- apply(x, 2, mean)
cov <- var(x)
md2 <- mahalanobis(x, center, cov)
# MD is the classical  distance
MD <- sqrt(md2)
#get prediction region method cutoff
cuplim <- sqrt(quantile(md2, up))
D0 <- sqrt(mahalanobis(zero, center, cov))
#get hybrid region statistic = Bickel and Ren statistic
br0 <- sqrt(mahalanobis(zero, that, cov))
#get the Bickel and Ren cutoff and test statistic
br2 <- mahalanobis(x,that,cov)
brlim <- sqrt(quantile(br2, up))
list(cuplim=cuplim,brlim=brlim,br0=br0,D0=D0,MD=MD,center=center,cov=cov)
}


confreg2<-function(x, g = 4,  alpha = 0.05){
# Makes confidence regions for theta from rows of x = Ti* from a bootstrap.
# Use for testing H_0: theta = 0 versus H_1: theta != 0.
# The prediction region method is used.
# If g = 1, the shorth interval should work better.
# Also computes the distance for the 0 vector.
# Need g = dim(x)[2] and T = that the g by 1 estimator of theta.
# Often that = A betahat(I_min,0). 
# Note that center= Tbar* = bagging estimator and cov = S*_T.
x <- as.matrix(x)
n <- dim(x)[1]
zero <- 0*(1:g)
up <- min((1 - alpha/2), (1 - alpha + 10*alpha*g/n))
if(alpha > 0.1)
	up <- min((1 - alpha + 0.05), (1 - alpha + g/n))
qn <- up
if(qn < 1 - alpha + 0.001)
        up <- 1 - alpha
center <- apply(x, 2, mean)
cov <- var(x)
md2 <- mahalanobis(x, center, cov)
#get prediction region method cutoff
cuplim <- sqrt(quantile(md2, up))
D0 <- sqrt(mahalanobis(zero, center, cov))
list(cuplim=cuplim,D0=D0,center=center)
}




dsarmasim<-function(n=100,nruns=20,pmax=5,qmax=5,etype=1,tstype=1,tdf=5,burn=100){
#datasplitting with ARMA(p,q) model selection.
#assumes n >= 10 (pmax+qmax),  full model beta = (phi1,...,phipmax,theta1,...,thetaqmax)
#need pmax+3 <=pmax+qmax
#current program uses AIC with H using the first floor(n/2) cases
#undfit is the proportion of runs where model selection underfit
#etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom
#etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors
#uses library(forecast)
kmax<-pmax+qmax   #p=kmax
beta<-0*1:kmax
if(tstype==1){#tstype=1 for AR(1) model
phi<-c(0.5)
beta[1] <- 0.5
ps<-1 
qs <- 0
}
if(tstype==2) { #tstype=2 for AR(2) model
phi=c(0.5,0.33)  
beta[c(1,2)]<-phi
ps<-2
qs<-0
}
if(tstype==3){#tstype=3 for MA(1) model
theta<-c(-0.5)
beta[(pmax+1)] <- -0.5
ps<-0 
qs <- 1
}
if(tstype==4) { #tstype=4 for MA(2) model
theta=c(-0.5,0.5)  
beta[c(pmax+1,pmax+2)]<-theta
ps<-0
qs<-2
}
if(tstype==5){#need pmax > 2
#tstype=5 for ARMA(3,1) model
phi <- c(0.7,0.1,-0.4)
theta <- 0.1
beta[c(1:3,pmax+1)] <- c(phi,0.1)
ps<-3
qs<-1
}
tem<- 1:nruns   
ordno2 <- cbind(tem,tem) #order for n over 2 n/2 data spitting
no2 <- floor(n/2)
undfit<-0
for(i in 1:nruns) {
if(tstype<3){
 if (etype==1) 
   Y <- arima.sim(n=n, list(ar=c(phi)), innov = rnorm(n))
  if(etype==2)
   Y <- arima.sim(n=n, list(ar=c(phi)), innov = rt(n,tdf))
  if(etype==3)
   Y <- arima.sim(n=n, list(ar=c(phi)), innov =          
                  runif(n,min=-1,max=1))   
  if(etype==4)
   Y <- arima.sim(n=n, list(ar=c(phi)), innov = (rexp(n)-1))
}
if(tstype==3 || tstype==4){
if (etype==1) 
   Y <- arima.sim(n=n, list(ma=c(theta)), innov = rnorm(n))
  if(etype==2)
   Y <- arima.sim(n=n, list(ma=c(theta)), innov = rt(n,tdf))
  if(etype==3)
   Y <- arima.sim(n=n, list(ma=c(theta)), innov =          
                  runif(n,min=-1,max=1))   
  if(etype==4)
   Y <- arima.sim(n=n, list(ma=c(theta)), innov = (rexp(n)-1))
}
if(tstype==5){
  if (etype==1) 
 Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov = rnorm(n),n.start=burn)
  if(etype==2)
 Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov = rt(n,tdf),n.start=burn)
  if(etype==3)
   Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov =          
                  runif(n,min=-1,max=1),n.start=burn)   
  if(etype==4)
Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov=(rexp(n)-1),n.start=burn)
}
   Z <-Y[1:no2]
    outvs<-auto.arima(Z,d=0,max.p=pmax,max.q=qmax,seasonal=FALSE,ic="aic",
           stationary=TRUE,method="ML")
arord<-outvs$arma[1]
maord<-outvs$arma[2]    
    ordno2[i,1]<-arord 
    ordno2[i,2]<-maord 
    if(arord < ps || maord < qs)  #|| is the or operator
    undfit <- undfit+1
    
}
undfit<-undfit/nruns
list(ordno2=ordno2,beta=beta,ps=ps,qs=qs,undfit=undfit)
}


dsarsim<-function(n=100,nruns=20,pmax=5,etype=1,tstype=1,tdf=5){
#datasplitting with AR(p) model selection.
#assumes n > 10 pmax,  full model beta = (phi1,...,phipmax)
#current program uses AIC with H using the first floor(n/2) cases
#undfit is the proportion of runs where model selection underfit
#etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom
#etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors
#tstype = k for an AR(k) model, k=1,2
#for small n, underfitting is severe, dsarsim2 is much better
beta<-0*1:pmax
phi=0.5  #tstype=1 for AR(1) model
beta[1] <- 0.5
k<-tstype
if(tstype==2) {
phi=c(0.5,0.33)  #tstype=2 for AR(2) model
beta[2]<-0.33}
ordno2<- 1:nruns   #order for n over 2 n/2 data spitting
no2 <- floor(n/2)
undfit<-0
for(i in 1:nruns) {
  if (etype==1) 
   Y <- arima.sim(n=n, list(ar=c(phi)), innov = rnorm(n))
  if(etype==2)
   Y <- arima.sim(n=n, list(ar=c(phi)), innov = rt(n,tdf))
  if(etype==3)
   Y <- arima.sim(n=n, list(ar=c(phi)), innov =          
                  runif(n,min=-1,max=1))   
  if(etype==4)
   Y <- arima.sim(n=n, list(ar=c(phi)), innov = (rexp(n)-1))
   Z <-Y[1:no2]
    outvs<-ar.yw(Z,order.max=pmax)
    ordno2[i]<-outvs$order  
    if(outvs$order < k)
    undfit <- undfit+1
    
}
undfit<-undfit/nruns
list(beta=beta,order=k,ordno2=ordno2,undfit=undfit)
}


dsarsim2<-function(n=100,nruns=20,pmax=13,etype=1,tstype=1,tdf=5,phi=c(0.5),nbic=14,pen=2){
#program fails if nruns > 100 and sometimes fails if n is too small
#uses the GMLE  (Yule-Walker underfits too much and needs order.max > 0)
#datasplitting with AR(p) model selection.
#assumes n > 7 pmax,  full model beta = (phi1,...,phipmax)
#data splitting uses the first floor(n/2) cases
#uses AIC if n < nbic*pmax, and BIC if n >= nbic*pmax
#undfit is the proportion of runs where model selection underfit
#etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom
#etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors
beta<-0*1:pmax
phi=0.5  #tstype=1 for AR(1) model
beta[1] <- 0.5
ps<-1
nval=nbic*pmax
if(tstype==2) {
phi=c(0.5,0.33)  #tstype=2 for AR(2) model
beta[2]<-0.33
ps<-2}
if(tstype==3) {
#tstype=3 for phi supplied by user
phi <- as.vector(phi)
ps<-length(phi)
beta[1:ps]<-phi
}
ordno2<- 1:nruns  #order for n/2 (n over 2) data splitting
no2 <- floor(n/2)
undfit<-0
for(i in 1:nruns) {
  if (etype==1) 
   Y <- arima.sim(n=n, list(ar=c(phi)), innov = rnorm(n))
  if(etype==2)
   Y <- arima.sim(n=n, list(ar=c(phi)), innov = rt(n,tdf))
  if(etype==3)
   Y <- arima.sim(n=n, list(ar=c(phi)), innov =          
                  runif(n,min=-1,max=1))   
  if(etype==4)
   Y <- arima.sim(n=n, list(ar=c(phi)), innov = (rexp(n)-1))
   Z <-Y[1:no2]
    out<-ar(Z,aic=FALSE,order.max=pmax,method="mle")
if(n < nval){
critr <- n*log(out$var.pred) +2*pmax - pen #AIC type criterion - pen
pI <- pmax
 for(j in 1:pmax){
   out<-ar(Z,aic=FALSE,order.max=(pmax-j),method="mle")
   ctem <- n*log(out$var.pred) +2*(pmax-j)
   if(ctem < critr){
     critr <- ctem - pen
     pI <- (pmax-j)}
}
}
else{
critr <- n*log(out$var.pred) +log(n)*pmax  #BIC type criterion 
pI <- pmax
 for(j in 1:pmax){
   out<-ar(Z,aic=FALSE,order.max=(pmax-j),method="mle")
   ctem <- n*log(out$var.pred) +log(n)*(pmax-j)
   if(ctem < critr){
     critr <- ctem 
     pI <- (pmax-j)}
}
}
    ordno2[i]<-pI  
    if(pI < ps)
    undfit <- undfit+1    
}
undfit<-undfit/nruns
list(beta=beta,order=ps,ordno2=ordno2,undfit=undfit)
}


dsmasim<-function(n=100,nruns=20,qmax=5,etype=1,tstype=1,tdf=5){
#datasplitting with MA(q) model selection.
#assumes n > 10 qmax,  full model beta = (theta1,...,thetaqmax)
#current program uses AIC with H using the first floor(n/2) cases
#undfit is the proportion of runs where model selection underfit
#etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom
#etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors
#tstype = k for an MA(k) model, k=1,2
#uses library(forecast)
beta<-0*1:qmax
theta=-0.5  #tstype=1 for MA(1) model
beta[1] <- -0.5
k<-tstype
if(tstype==2) {
theta=c(-0.5,0.5)  #tstype=2 for MA(2) model
beta[2]<-0.5}
ordno2<- 1:nruns   #order for n over 2 n/2 data spitting
no2 <- floor(n/2)
undfit<-0
for(i in 1:nruns) {
  if (etype==1) 
   Y <- arima.sim(n=n, list(ma=c(theta)), innov = rnorm(n))
  if(etype==2)
   Y <- arima.sim(n=n, list(ma=c(theta)), innov = rt(n,tdf))
  if(etype==3)
   Y <- arima.sim(n=n, list(ma=c(theta)), innov =          
                  runif(n,min=-1,max=1))   
  if(etype==4)
   Y <- arima.sim(n=n, list(ma=c(theta)), innov = (rexp(n)-1))
   Z <-Y[1:no2]
    outvs<-auto.arima(Z,d=0,max.p=0,max.q=qmax,seasonal=FALSE,ic="aic",
           stationary=TRUE)
    ord<-outvs$arma[2]    
    ordno2[i]<-ord 
    if(ord < k)
    undfit <- undfit+1
    
}
undfit<-undfit/nruns
list(beta=beta,order=k,ordno2=ordno2,undfit=undfit)
}


dsmasim2<-function(n=100,nruns=20,qmax=13,etype=1,tstype=1,tdf=5,theta=c(0.5),
nbic=0.5,pen=2){
#program sometimes fails if n is too small
#uses the GMLE   (take nbic large to see AIC, nbic=0.5 for BIC)  
#datasplitting with MA(q) model selection.
#assumes n > 7 qmax,  full model beta = (theta1,...,thetaqmax)
#data splitting uses the first floor(n/2) cases
#uses AIC if n < nbic*qmax, and BIC if n >= nbic*qmax, default uses BIC
#undfit is the proportion of runs where model selection underfit
#etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom
#etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors
beta<-0*1:qmax
theta=-0.5  #tstype=1 for MA(1) model
beta[1] <- -0.5
qs<-1
nval=nbic*qmax
if(tstype==2) {
theta=c(-0.5,0.5)  #tstype=2 for MA(2) model
beta[2]<-0.5
qs<-2}
if(tstype==3) {
#tstype=3 for theta supplied by user
theta <- as.vector(theta)
qs<-length(theta)
beta[1:qs]<-theta
}
ordno2<- 1:nruns  #order for n/2 (n over 2) data splitting
no2 <- floor(n/2)
undfit<-0
for(i in 1:nruns) {
  if (etype==1) 
   Y <- arima.sim(n=n, list(ma=c(theta)), innov = rnorm(n))
  if(etype==2)
   Y <- arima.sim(n=n, list(ma=c(theta)), innov = rt(n,tdf))
  if(etype==3)
   Y <- arima.sim(n=n, list(ma=c(theta)), innov =          
                  runif(n,min=-1,max=1))   
  if(etype==4)
   Y <- arima.sim(n=n, list(ma=c(theta)), innov = (rexp(n)-1))
   Z <-Y[1:no2]
    out <- arima(Z,order=c(0,0,qmax),method="ML")
if(n < nval){
critr <- n*log(out$sigma2) +2*qmax - pen #AIC type criterion - pen
qI <- qmax
 for(j in 1:qmax){
   out <- arima(Z,order=c(0,0,(qmax-j)),method="ML")   
   ctem <- n*log(out$sigma2) +2*(qmax-j)
   if(ctem < critr){
     critr <- ctem - pen
     qI <- (qmax-j)}
}
}
else{
critr <- n*log(out$sigma2) +log(n)*qmax  #BIC type criterion 
qI <- qmax
 for(j in 1:qmax){
   out <- arima(Z,order=c(0,0,(qmax-j)),method="ML")
   ctem <- n*log(out$sigma2) +log(n)*(qmax-j)
   if(ctem < critr){
     critr <- ctem 
     qI <- (qmax-j)}
}
}
    ordno2[i]<-qI  
    if(qI < qs)
    undfit <- undfit+1    
}
undfit<-undfit/nruns
list(beta=beta,order=qs,ordno2=ordno2,undfit=undfit)
}


dspisim<-function(n=50,nv=19,nruns=100,ytype=1,ttype=1,eps=0.1,shift=9,tdf=3,alpha=0.05)
{#Gets mean coverages and lengths of the data splitting  prediction interval.
#ytype = 1 for  N(0,1), 2 for t_df, 3 for EXP(1), 4 for U(-1,1), 5 for mixture,
#6 for lognormal
#ttype = 1 if T=median dtype = 2 if T=mean, =3 forth shorth midpoint
#want n/2 > nv
#calls shorth 3
q <- n+1
cov <- 0
nv <- min(nv, floor(n/2))
nH <- n - nv
uv <- min(nv, ceiling( (1 - alpha)*(1+nv) ) ) 
up <- uv/(nv+1) #quantile for prediction region        
indx <- 1:n 
len <- 0*1:nruns      
for(i in 1:nruns) {
                #make data
   if(ytype == 1) 
     y <- rnorm(q)
   if(ytype == 2) 
     y <- rt(q, df = tdf)
   if(ytype == 3) 
     y <- rexp(q)
   if(ytype == 4)
     y <- runif(q, min = -1, max = 1)
   if(ytype == 5) 
     y <- rnorm(q, sd = 1 + rbinom(n, 1, eps) * shift)
   if(ytype == 6) {
     y<- rnorm(q)
     y<- exp(y)}
     yf <- y[q]
     y <- y[-q]		
  #find sets H and V
  perm <- sample(indx,n)
  H <- perm[1:nH]
  yH <- y[H]
  yV <- y[-H]
  if(ttype==2)
   T <- mean(yH)
  if(ttype==1)
   T <- median(yH)
  if(ttype==3){
 out<-shorth3(yH)
  T <- (out$shorth[1]+out$shorth[2])/2}
  #find PI
   Dsq <- (yV-T)^2
   dsq <- quantile(Dsq, up, type=1)
   cut <- sqrt(dsq)
   low <- T-cut
   hi <-  T+cut	
   len[i] <- 2*cut
   if(low <= yf && hi >= yf)
   cov <- cov+1
	}
	cov <- cov/nruns
        mnlen <- mean(len)	
	list(up=up,cov=cov,mnlen=mnlen)
}


fflynx<-function()
{#  type  data(lynx)
Y <- log10(lynx) 
FAR2 <- 1:114
FAR11 <- 1:114
FAR12 <- 1:114
SETAR272 <- 1:114
SETAR252 <- 1:114
for(i in 3:114){
FAR2[i ] <- 1.05 + 1.41*Y[i-1] -0.77*Y[i-2]}
for(i in 12:114){
FAR11[i ] <-  1.13*Y[i-1] -0.51*Y[i-2] + .23*Y[i-3] -0.29*Y[i-4]
 + .14*Y[i-5] -0.14*Y[i-6] + 0.08*Y[i-7] -0.04*Y[i-8]
 + .13*Y[i-9] + 0.19*Y[i-10] - .31*Y[i-11] }
for(i in 13:114){
FAR12[i ] <-  1.123 + 1.084*Y[i-1] -0.477*Y[i-2] + .265*Y[i-3] -0.218*Y[i-4]
 + .180*Y[i-9]  - .224*Y[i-12] }
for(i in 13:114){
if( Y[i-2] <= 3.116){
SETAR272[i ] <-  0.546  + 1.032*Y[i-1] -0.173*Y[i-2] + .171*Y[i-3] -0.431*Y[i-4]
 + .332*Y[i-5]  - .284*Y[i-6] + .210*Y[i-7]}
else {SETAR272[i ] <-  2.632  + 1.492*Y[i-1] -1.324*Y[i-2]}
}
for(i in 13:114){
if( Y[i-2] <= 3.05){
SETAR252[i ] <-  0.768  + 1.064*Y[i-1] -0.200*Y[i-2] + .164*Y[i-3] -0.428*Y[i-4]
 + .181*Y[i-5] }
else {SETAR252[i ] <-  2.254  + 1.474*Y[i-1] -1.202*Y[i-2]}
}
x <- cbind(Y,FAR2,FAR11,FAR12,SETAR272,SETAR252)
x <- x[13:114,]
print(cor(x))
pairs(x)
}


FFRRplots<-function(W,zz1,zz2){
#Let zz1 be the output of the arima function, eg zz <- arima(lh,c(1,0,0)),
#let zz2 be the output of a competing arima model, eg 
#zz <- arima(lh,c(2,0,0)) and let W be the time series, eg W <- lh.
#Makes the FF plot of the fitted values from the two models
#and the RR plot of the residuals from the two models.
#Use for ARIMA(p,d,q)x(P,D,Q)s models. 
#Fitted value = W - residual.
Y <- as.vector(W)
Resid1 <- as.vector(zz1$resid)
FIT1 <- as.vector(Y-Resid1)
Resid2 <- as.vector(zz2$resid)
FIT2 <- as.vector(Y-Resid2)
par(mfrow=c(2,1))
plot(FIT1,FIT2)
abline(0,1)
title("FF plot")
plot(Resid1,Resid2)
title("RR plot")
abline(0,1)
par(mfrow=c(1,1))
}


fysim<-function( runs = 20)
{# 20 FY response plots for simulated AR(2) time series data
fycorr <- 1:runs
for(i in 1: runs){
Y <- ardata()$arts
out <- ar.yw(Y)
Yts <- Y[10:200]
FIT <- Yts - out$resid[10:200]
plot(FIT,Yts)
abline(0,1)
fycorr[i] <- cor(FIT,Yts)
}
list(fycorr=fycorr)
}


intercept<-function(zz, p = 1, q = 1){
#The R function arima intercept is actually an estimate of the mean.
#Let zz be the output of the arima function, eg zz <- arima(lh,c(1,0,1)),
#and let p and q be the values used in the fit.
#Assume d = 0. Then this function prints the intercept.
mn <- zz$coef[(p+q+1)]
intercept <- mn
if (p > 0) {
intercept <- mn * (1 - sum(zz$coef[1:p]))}
return(intercept)
}


locpi<-function(Y, alpha = 0.05){
# Makes the PI (L_n,U_n) for the MA(q) time series, 
#for Y_{t+j} when j > q.
#Uses ybar instead of the estimated mean from the MA(q) model.
#Then the simulation won't bomb.
sy <- as.vector(Y)
n <- length(sy)
cc <- ceiling(n * (1 - alpha))
corfac <- (1 + 15/n) * sqrt((n+1)/(n - 1))
yfhat <- mean(sy)
#get the PI
  sy <- sort(sy)
  rup <- sy[cc]
  rlow <- sy[1]
  olen <- rup - rlow
  if(cc < n){
    for(j in (cc + 1):n){
	zlen <- sy[j] - sy[j - cc + 1]
	if(zlen < olen) {
	   olen <- zlen
	   rup <- sy[j]
	   rlow <- sy[j - cc + 1]
	}
    }
  }
UPI  <- (1-corfac) * yfhat + corfac * rup
LPI <- (1-corfac) * yfhat + corfac * rlow
list(LPI = LPI, UPI=UPI)
}


locpi2<-function(fres, k=2, alph = 0.05){
#Makes the PI [L_n(h),U_n(h)] for a future forecast residual
#for the ARMA(p,q) time series with, e.g., k = p+q 
sy <- as.vector(fres)
n <- length(sy)  #n is n_h
  if (alph > 0.1)
    {qn <- min(1 - alph + 0.05, 1 - alph + k/n)}
  if (alph <= 0.1)
    {qn <- min(1 - alph/2, 1 - alph + 10*alph*k/n)}
#qn = 1 - deltan, alpha = delta = alph
cc <- ceiling(n * qn)
#get the PI
  sy <- sort(sy)
  rup <- sy[cc]
  rlow <- sy[1]
  olen <- rup - rlow
  if(cc < n){
    for(j in (cc + 1):n){
	zlen <- sy[j] - sy[j - cc + 1]
	if(zlen < olen) {
	   olen <- zlen
	   rup <- sy[j]
	   rlow <- sy[j - cc + 1]
	}
    }
  }
UPI  <-  rup
LPI <-  rlow
list(LPI = LPI, UPI=UPI)
}


maboot<-function(Y,B=100,qmax=10,c=0.05){
#Bootstraps MA(q) model selection using the parametric bootstrap.
#assumes n > 10 qmax and adds cB full model bootstrap samples.
#uses the auto.arima from the forecast package
#The full model and betahatmix are also bootstrapped.
#uses library(forecast)
out <- arima(Y,order=c(0,0,qmax))
thetahat = out$coef[1:qmax] #omit the constant, if any
n<-length(Y)
nmqmax <- n-qmax
sig<-sqrt(out$sigma2)
d <- ceiling(c*B)
bpd <- B + d
bp1 <- B + 1
betas <- matrix(0,nrow=bpd,ncol=qmax)
btmix <- betas
btfull <- betas
bhatvs <- 0*1:qmax
outvs<-auto.arima(Y,d=0,max.p=0,max.q=qmax,seasonal=FALSE,ic="aicc",
stationary=TRUE)
ord<-outvs$arma[2]
if(ord>0)
bhatvs[1:ord]<-outvs$coef[1:ord]
for(i in 1:B) {
   yb <- arima.sim(n=n,list(ma=thetahat),sd=sig)
  tem<-auto.arima(yb,d=0,max.p=0,max.q=qmax,seasonal=FALSE,ic="aicc",
stationary=TRUE)
   ord<-tem$arma[2]
   if(ord>0)
   betas[i,1:ord]<-tem$coef[1:ord]
   tem<-arima(yb,order=c(0,0,qmax))
   btfull[i,] <- tem$coef[1:qmax]
#mix
if(ord>0){
   yb <- arima.sim(n=n,list(ma=thetahat),sd=sig)
   tem <- arima(yb,order=c(0,0,ord))
   btmix[i,1:ord]<-tem$coef[1:ord]}
}
for(i in bp1:bpd){
   yb <- arima.sim(n=n,list(ma=thetahat),sd=sig)
   tem<-arima(yb,order=c(0,0,qmax))
   betas[i,] <- tem$coef[1:qmax]
}
btmix[bp1:bpd,] <- betas[bp1:bpd,]
btfull[bp1:bpd,] <- betas[bp1:bpd,]
list(btfull=btfull,betas=betas,btmix=btmix,bhatvs=bhatvs)
}


maboot2<-function(Y,B=100,qmax=10,c=0.05){
#Bootstraps MA(q) model selection using the residual bootstrap.
#assumes n > 10 qmax and adds cB full model bootstrap samples.
#uses the auto.arima from the forecast package
#The full model and betahatmix are also bootstrapped.
#uses library(forecast)
out <- arima(Y,order=c(0,0,qmax))
thetahat = out$coef[1:qmax] #omit the constant, if any
n<-length(Y)
nmqmax <- n-qmax
sig<-sqrt(out$sigma2)
res<-out$res #scale the residuals to have 0 mean
res <- (res-mean(res))*sqrt(n/(n-qmax)) #want var(res) near sigmahat^2
d <- ceiling(c*B)
bpd <- B + d
bp1 <- B + 1
betas <- matrix(0,nrow=bpd,ncol=qmax)
btmix <- betas
btfull <- betas
bhatvs <- 0*1:qmax
outvs<-auto.arima(Y,d=0,max.p=0,max.q=qmax,seasonal=FALSE,ic="aicc",
stationary=TRUE)
ord<-outvs$arma[2]
if(ord>0)
bhatvs[1:ord]<-outvs$coef[1:ord]
for(i in 1:B) {
   resb <- sample(res,size=n,replace=TRUE)
   yb <- arima.sim(n=n,list(ma=thetahat),innov = resb,sd=sig)
  tem<-auto.arima(yb,d=0,max.p=0,max.q=qmax,seasonal=FALSE,ic="aicc",
stationary=TRUE)
   ord<-tem$arma[2]
   if(ord>0)
   betas[i,1:ord]<-tem$coef[1:ord]
   tem<-arima(yb,order=c(0,0,qmax))
   btfull[i,] <- tem$coef[1:qmax]
#mix
if(ord>0){
   resb <- sample(res,size=n,replace=TRUE)
   yb <- arima.sim(n=n,list(ma=thetahat),innov = resb,sd=sig)
   tem <- arima(yb,order=c(0,0,ord))
   btmix[i,1:ord]<-tem$coef[1:ord]}
}
for(i in bp1:bpd){
   resb <- sample(res,size=n,replace=TRUE)
   yb <- arima.sim(n=n,list(ma=thetahat),innov = resb,sd=sig)
   tem<-arima(yb,order=c(0,0,qmax))
   betas[i,] <- tem$coef[1:qmax]
}
btmix[bp1:bpd,] <- betas[bp1:bpd,]
btfull[bp1:bpd,] <- betas[bp1:bpd,]
list(btfull=btfull,betas=betas,btmix=btmix,bhatvs=bhatvs)
}


macisim<-function(N=100,nruns=100,etype=1,tdf=5){
#Simulates MA(2) time series of length n, and finds proportion
#of time theta1 and theta2 are in their 95% CI.
#average sqrt(n) CI length is also given. 
#etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom
#etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors
ma1cov <- 0
ma2cov <- 0
ma1low <- 1:nruns
ma1up <- ma1low
ma2low<-ma1low
ma2up<-ma1low
for(i in 1:nruns){
  thet1 <- runif(1,min=-0.5,max=0.5)
  thet2 <- runif(1,min=-0.4,max=0.4)
  if (etype==1) 
   y <- arima.sim(n=N, list(ma=c(thet1,thet2)), innov = rnorm(N))
  if(etype==2)
   y <- arima.sim(n=N, list(ma=c(thet1,thet2)), innov = rt(N,tdf))
  if(etype==3)
   y <- arima.sim(n=N, list(ma=c(thet1,thet2)), innov = runif(N,min=-1,max=1))
  if(etype==4)
   y <- arima.sim(n=N, list(ma=c(thet1,thet2)), innov = (rexp(N)-1))
  zz<-arima(y,c(0,0,2))
  coef <- zz$coef
  se <- sqrt(diag(zz$var.coef))
  LCI <- coef - 2*se
  UCI <- coef + 2*se
  ma1low[i] <- LCI[1]
  ma1up[i] <- UCI[1]
  ma2low[i] <- LCI[2]
  ma2up[i] <- UCI[2]
  if(ma1low[i] < thet1 && ma1up[i] > thet1)
   ma1cov <- ma1cov + 1
  if(ma2low[i] < thet2 && ma2up[i] > thet2)
   ma2cov <- ma2cov + 1
}
ma1cov <- ma1cov/nruns
ma1len <- sqrt(N) * mean(ma1up - ma1low)
ma2cov <- ma2cov/nruns
ma2len <- sqrt(N) * mean(ma2up - ma2low)
list(ma1cov = ma1cov, ma1len = ma1len, ma2cov = ma2cov, ma2len = ma2len)
}


maq<-function(Y,q=1){
#Fits an MA(q) model to time series Y using OLS.
#assumes q < n - q
#Makes a response and residual plot.
# Advance the plot by highlighting Stop with the right mouse button.
#phihat = phihat_0 = intercept, phihat_1, ..., phihat_p
cmar <- par("mar")
	par(mfrow = c(2, 1))
        par(mar=c(4.0,4.0,2.0,0.5))
Y <- as.vector(Y)
n <- length(Y)
zz<-arima(Y,c(0,0,q))
macoef <- zz$coef
res<-zz$resid
Xma <- matrix(0,nrow=n-q,ncol=q)
Yma <- Y[(q+1):n]
for(i in 1:q){
Xma[,i] <- res[(q-i+1):(n-i)]
}
madat <- as.data.frame(cbind(Yma,Xma))
tem <- lm(Yma~.,data=madat)
thetahat <- tem$coef
fit <- Y
resid <- Y
fit[(q+1):n] <- tem$fit
fit[1:q] <- NA
resid[(q+1):n] <- tem$res
resid[1:q] <- NA
plot(fit,Y)
	abline(0, 1)
	title("Response Plot")
        identify(fit,Y)
	plot(fit, resid)
	title("Residual Plot")
        identify(fit, resid)
par(mfrow = c(1, 1))
par(mar=cmar)
list(fit=fit,resid=resid,tem=tem,macoef=macoef,thetahat=thetahat)
}



msarsim<-function(n=100,nruns=20,pmax=5,BB=100,cc=0.01,etype=1,tstype=1,
tdf=5,btype=1,alph=0.05){
#Bootstraps AR(p) model selection, and full model, and betahatmix.
#assumes n > 10 pmax,   beta = (phi1,...,phipmax)
#btype = 1 for parametric bootstrap, 2 for residual bootstrap
#calls arboot or arboot2
#etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom
#etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors
#tstype = k for an AR(k) model, k=1,2
beta<-0*1:pmax
phi=0.5  #tstype=1 for AR(1) model
beta[1] <- 0.5
pp6<-pmax+6; pp5<-pmax+5; pp4<-pmax+4; pp3<-pmax+3; pp1<-pmax+1; pp2<-pmax+2
cicov <- 0*(1:pp6)
avelen <- 0*(1:pp6)
cicovmix <- cicov
avelenmix <- avelen
cicovfull <- cicov
avelenfull <- avelen
k<-tstype
if(tstype==2) {
phi=c(0.5,0.33)  #tstype=2 for AR(2) model
beta[2]<-0.33}
tvec<-beta[1:k]
for(i in 1: nruns) {
  if (etype==1) 
   Y <- arima.sim(n=n, list(ar=c(phi)), innov = rnorm(n))
  if(etype==2)
   Y <- arima.sim(n=n, list(ar=c(phi)), innov = rt(n,tdf))
  if(etype==3)
   Y <- arima.sim(n=n, list(ar=c(phi)), innov =          
                  runif(n,min=-1,max=1))   
  if(etype==4)
   Y <- arima.sim(n=n, list(ar=c(phi)), innov = (rexp(n)-1))
  if(btype==1)
    out<-arboot(Y=Y,B=BB,pmax=pmax,c=cc)
  else
    out<-arboot2(Y=Y,B=BB,pmax=pmax,c=cc)
  for(j in 1:pmax){
    tem <- shorth3(out$betas[,j],alpha=alph)
    if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2])
    cicov[j] <- cicov[j] + 1
    avelen[j] <- avelen[j] + tem$shorth[2] - tem$shorth[1]
    tem <- shorth3(out$btmix[,j],alpha=alph)
    if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2])
    cicovmix[j] <- cicovmix[j] + 1
    avelenmix[j] <- avelenmix[j] + tem$shorth[2] - tem$shorth[1]
   tem <- shorth3(out$btfull[,j],alpha=alph)
    if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2])
    cicovfull[j] <- cicovfull[j] + 1
    avelenfull[j] <- avelenfull[j] + tem$shorth[2] - tem$shorth[1]
   } 
    #test whether the last pmax-k values of beta are 0
                gg <- pmax - k 
                tstat <- out$bhatvs[(k+1):pmax]
           tem <- confreg(out$betas[,(k+1):pmax],g=gg,that=tstat,alpha=alph)
                if(tem$D0 <= tem$cuplim) #pred. reg. method
                cicov[pp1] <- cicov[pp1] + 1
                avelen[pp1] <- avelen[pp1] + tem$cuplim
                if(tem$br0 <= tem$cuplim) #hybrid method
                cicov[pp2] <- cicov[pp2] + 1
                avelen[pp2] <- avelen[pp1] #same cutoff so same length
                if(tem$br0 <= tem$brlim) #Bickel and Ren method
                cicov[pp3] <- cicov[pp3] + 1
                avelen[pp3] <- avelen[pp3] + tem$brlim
       tem <- confreg(out$btmix[,(k+1):pmax],g=gg,that=tstat,alpha=alph)
                if(tem$D0 <= tem$cuplim) #pred. reg. method
                cicovmix[pp1] <- cicovmix[pp1] + 1
                avelenmix[pp1] <- avelenmix[pp1] + tem$cuplim
                if(tem$br0 <= tem$cuplim) #hybrid method
                cicovmix[pp2] <- cicovmix[pp2] + 1
                avelenmix[pp2] <- avelenmix[pp1] #same cutoff so same length
                if(tem$br0 <= tem$brlim) #Bickel and Ren method
                cicovmix[pp3] <- cicovmix[pp3] + 1
                avelenmix[pp3] <- avelenmix[pp3] + tem$brlim
 tem <- confreg(out$btfull[,(k+1):pmax],g=gg,that=tstat,alpha=alph)
                if(tem$D0 <= tem$cuplim) #pred. reg. method
                cicovfull[pp1] <- cicovfull[pp1] + 1
                avelenfull[pp1] <- avelenfull[pp1] + tem$cuplim
                if(tem$br0 <= tem$cuplim) #hybrid method
                cicovfull[pp2] <- cicovfull[pp2] + 1
                avelenfull[pp2] <- avelenfull[pp1] #same cutoff so same length
                if(tem$br0 <= tem$brlim) #Bickel and Ren method
                cicovfull[pp3] <- cicovfull[pp3] + 1
                avelenfull[pp3] <- avelenfull[pp3] + tem$brlim
#test whether the first k values of beta are (beta_1,...,beta_k)
                gg <- k 
                tstat <- out$bhatvs[1:k]       
           tem <- confreg(out$betas[,1:k],g=gg,that=tstat,alpha=alph)
                D0 <- sqrt(mahalanobis(tvec, tem$center, tem$cov))
                if(D0 <= tem$cuplim) #pred. reg. method
                cicov[pp4] <- cicov[pp4] + 1
                avelen[pp4] <- avelen[pp4] + tem$cuplim 
                D1 <- sqrt(mahalanobis(tvec, tstat, tem$cov))
                if(D1 <= tem$cuplim) #hybrid method
                cicov[pp5] <- cicov[pp5] + 1
                avelen[pp5] <- avelen[pp4] #same cutoff so same length
                if(D1 <= tem$brlim) #Bickel and Ren method
                cicov[pp6] <- cicov[pp6] + 1
                avelen[pp6] <-  avelen[pp6] + tem$brlim 
           tem <- confreg(out$btmix[,1:k],g=gg,that=tstat,alpha=alph)
                D0 <- sqrt(mahalanobis(tvec, tem$center, tem$cov))
                if(D0 <= tem$cuplim) #pred. reg. method
                cicovmix[pp4] <- cicovmix[pp4] + 1
                avelenmix[pp4] <- avelenmix[pp4] + tem$cuplim 
                D1 <- sqrt(mahalanobis(tvec, tstat, tem$cov))
                if(D1 <= tem$cuplim) #hybrid method
                cicovmix[pp5] <- cicovmix[pp5] + 1
                avelenmix[pp5] <- avelenmix[pp4] #same cutoff so same length
                if(D1 <= tem$brlim) #Bickel and Ren method
                cicovmix[pp6] <- cicovmix[pp6] + 1
                avelenmix[pp6] <-  avelenmix[pp6] + tem$brlim 
    tem <- confreg(out$btfull[,1:k],g=gg,that=tstat,alpha=alph)
                D0 <- sqrt(mahalanobis(tvec, tem$center, tem$cov))
                if(D0 <= tem$cuplim) #pred. reg. method
                cicovfull[pp4] <- cicovfull[pp4] + 1
                avelenfull[pp4] <- avelenfull[pp4] + tem$cuplim 
                D1 <- sqrt(mahalanobis(tvec, tstat, tem$cov))
                if(D1 <= tem$cuplim) #hybrid method
                cicovfull[pp5] <- cicovfull[pp5] + 1
                avelenfull[pp5] <- avelenfull[pp4] #same cutoff so same length
                if(D1 <= tem$brlim) #Bickel and Ren method
                cicovfull[pp6] <- cicovfull[pp6] + 1
                avelenfull[pp6] <-  avelenfull[pp6] + tem$brlim 
}
cicov <- cicov/nruns
avelen <- avelen/nruns
cicovmix <- cicovmix/nruns
avelenmix <- avelenmix/nruns
cicovfull <- cicovfull/nruns
avelenfull <- avelenfull/nruns
list(cicovfull=cicovfull,avelenfull=avelenfull,cicov=cicov,avelen=avelen,
cicovmix=cicovmix,avelenmix=avelenmix,beta=beta)
}


msarmasim<-function(n=100,nruns=20,pmax=3,qmax=3,BB=100,cc=0.05,
etype=1,tstype=1,tdf=5,alph=0.05){
#Bootstraps ARMA(p,q) model selection.
#assumes n > 10 (pmax+qmax),   beta = (phi1,...,phipmax,theta1,...,thetaqmax)
#uses parametric bootstrap,
#calls armaboot, aic might be better than aicc
#need pmax+3 <=pmax+qmax
#uses library(forecast)
#If the true model is ARMA(ps,qs), then both pmax > ps and qmax > qs
#causes lots of convergence problems.
#etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom
#etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors
kmax<-pmax+qmax   #p=kmax
beta<-0*1:kmax
pp6<-kmax+6; pp5<-kmax+5; pp4<-kmax+4; pp3<-kmax+3; pp1<-kmax+1; pp2<-kmax+2
cicov <- 0*(1:pp6)
avelen <- 0*(1:pp6)
cicovmix <- cicov
avelenmix <- avelen
cicovfull <- cicov
avelenfull <- avelen
if(tstype==1) {
theta=c(-0.5,0.5)  #tstype=1 for MA(2) model
beta[(pmax+1):(pmax+2)]<-theta
valS<-c((pmax+1),(pmax+2))
valO <- c(1:pmax,(pmax+3):kmax)
}
if(tstype==2) {
phi=c(0.5,0.33)  #tstype=2 for AR(2) model
beta[1:2]<-phi
valS <- c(1,2)
valO <- c(3:kmax)
}
if(tstype==3){
#tstype=3 for ARMA(1,1) model
phi <- 0.5
theta <- -0.5
beta[c(1,pmax+1)] <- c(0.5,-0.5)
valS <- c(1,(pmax+1))
valO <- c(2:pmax,(pmax+2):kmax)
}
k<-2
tvec<-beta[valS]
for(i in 1: nruns) {
  if(tstype==1){
  if (etype==1) 
   Y <- arima.sim(n=n, list(ma=c(theta)), innov = rnorm(n))
  if(etype==2)
   Y <- arima.sim(n=n, list(ma=c(theta)), innov = rt(n,tdf))
  if(etype==3)
   Y <- arima.sim(n=n, list(ma=c(theta)), innov =          
                  runif(n,min=-1,max=1))   
  if(etype==4)
   Y <- arima.sim(n=n, list(ma=c(theta)), innov = (rexp(n)-1))
}
  if(tstype==2){
  if (etype==1) 
   Y <- arima.sim(n=n, list(ar=c(phi)), innov = rnorm(n))
  if(etype==2)
   Y <- arima.sim(n=n, list(ar=c(phi)), innov = rt(n,tdf))
  if(etype==3)
   Y <- arima.sim(n=n, list(ar=c(phi)), innov =          
                  runif(n,min=-1,max=1))   
  if(etype==4)
   Y <- arima.sim(n=n, list(ar=c(phi)), innov = (rexp(n)-1))
}
  if(tstype==3){
  if (etype==1) 
   Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov = rnorm(n))
  if(etype==2)
   Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov = rt(n,tdf))
  if(etype==3)
   Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov =          
                  runif(n,min=-1,max=1))   
  if(etype==4)
   Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov = (rexp(n)-1))
}
      out<-armaboot(Y=Y,B=BB,pmax=pmax,qmax=qmax,c=cc)
    for(j in 1:kmax){
    tem <- shorth3(out$betas[,j],alpha=alph)
    if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2])
    cicov[j] <- cicov[j] + 1
    avelen[j] <- avelen[j] + tem$shorth[2] - tem$shorth[1]
    tem <- shorth3(out$btmix[,j],alpha=alph)
    if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2])
    cicovmix[j] <- cicovmix[j] + 1
    avelenmix[j] <- avelenmix[j] + tem$shorth[2] - tem$shorth[1]
    tem <- shorth3(out$btfull[,j],alpha=alph)
    if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2])
    cicovfull[j] <- cicovfull[j] + 1
    avelenfull[j] <- avelenfull[j] + tem$shorth[2] - tem$shorth[1]
   } 
    #test whether the  kmax-k values of betaO are 0
                gg <- kmax - k 
                tstat <- out$bhatvs[valO]
           tem <- confreg(out$betas[,valO],g=gg,that=tstat,alpha=alph)
                if(tem$D0 <= tem$cuplim) #pred. reg. method
                cicov[pp1] <- cicov[pp1] + 1
                avelen[pp1] <- avelen[pp1] + tem$cuplim
                if(tem$br0 <= tem$cuplim) #hybrid method
                cicov[pp2] <- cicov[pp2] + 1
                avelen[pp2] <- avelen[pp1] #same cutoff so same length
                if(tem$br0 <= tem$brlim) #Bickel and Ren method
                cicov[pp3] <- cicov[pp3] + 1
                avelen[pp3] <- avelen[pp3] + tem$brlim
       tem <- confreg(out$btmix[,valO],g=gg,that=tstat,alpha=alph)
                if(tem$D0 <= tem$cuplim) #pred. reg. method
                cicovmix[pp1] <- cicovmix[pp1] + 1
                avelenmix[pp1] <- avelenmix[pp1] + tem$cuplim
                if(tem$br0 <= tem$cuplim) #hybrid method
                cicovmix[pp2] <- cicovmix[pp2] + 1
                avelenmix[pp2] <- avelenmix[pp1] #same cutoff so same length
                if(tem$br0 <= tem$brlim) #Bickel and Ren method
                cicovmix[pp3] <- cicovmix[pp3] + 1
                avelenmix[pp3] <- avelenmix[pp3] + tem$brlim
       tem <- confreg(out$btfull[,valO],g=gg,that=tstat,alpha=alph)
                if(tem$D0 <= tem$cuplim) #pred. reg. method
                cicovfull[pp1] <- cicovfull[pp1] + 1
                avelenfull[pp1] <- avelenfull[pp1] + tem$cuplim
                if(tem$br0 <= tem$cuplim) #hybrid method
                cicovfull[pp2] <- cicovfull[pp2] + 1
                avelenfull[pp2] <- avelenfull[pp1] #same cutoff so same length
                if(tem$br0 <= tem$brlim) #Bickel and Ren method
                cicovfull[pp3] <- cicovfull[pp3] + 1
                avelenfull[pp3] <- avelenfull[pp3] + tem$brlim
#test whether the first  betaS = betaS 
                gg <- k 
                tstat <- out$bhatvs[valS]       
           tem <- confreg(out$betas[,valS],g=gg,that=tstat,alpha=alph)
                D0 <- sqrt(mahalanobis(tvec, tem$center, tem$cov))
                if(D0 <= tem$cuplim) #pred. reg. method
                cicov[pp4] <- cicov[pp4] + 1
                avelen[pp4] <- avelen[pp4] + tem$cuplim 
                D1 <- sqrt(mahalanobis(tvec, tstat, tem$cov))
                if(D1 <= tem$cuplim) #hybrid method
                cicov[pp5] <- cicov[pp5] + 1
                avelen[pp5] <- avelen[pp4] #same cutoff so same length
                if(D1 <= tem$brlim) #Bickel and Ren method
                cicov[pp6] <- cicov[pp6] + 1
                avelen[pp6] <-  avelen[pp6] + tem$brlim 
           tem <- confreg(out$btmix[,valS],g=gg,that=tstat,alpha=alph)
                D0 <- sqrt(mahalanobis(tvec, tem$center, tem$cov))
                if(D0 <= tem$cuplim) #pred. reg. method
                cicovmix[pp4] <- cicovmix[pp4] + 1
                avelenmix[pp4] <- avelenmix[pp4] + tem$cuplim 
                D1 <- sqrt(mahalanobis(tvec, tstat, tem$cov))
                if(D1 <= tem$cuplim) #hybrid method
                cicovmix[pp5] <- cicovmix[pp5] + 1
                avelenmix[pp5] <- avelenmix[pp4] #same cutoff so same length
                if(D1 <= tem$brlim) #Bickel and Ren method
                cicovmix[pp6] <- cicovmix[pp6] + 1
                avelenmix[pp6] <-  avelenmix[pp6] + tem$brlim 
           tem <- confreg(out$btfull[,valS],g=gg,that=tstat,alpha=alph)
                D0 <- sqrt(mahalanobis(tvec, tem$center, tem$cov))
                if(D0 <= tem$cuplim) #pred. reg. method
                cicovfull[pp4] <- cicovfull[pp4] + 1
                avelenfull[pp4] <- avelenfull[pp4] + tem$cuplim 
                D1 <- sqrt(mahalanobis(tvec, tstat, tem$cov))
                if(D1 <= tem$cuplim) #hybrid method
                cicovfull[pp5] <- cicovfull[pp5] + 1
                avelenfull[pp5] <- avelenfull[pp4] #same cutoff so same length
                if(D1 <= tem$brlim) #Bickel and Ren method
                cicovfull[pp6] <- cicovfull[pp6] + 1
                avelenfull[pp6] <-  avelenfull[pp6] + tem$brlim 
}
cicov <- cicov/nruns
avelen <- avelen/nruns
cicovmix <- cicovmix/nruns
avelenmix <- avelenmix/nruns
cicovfull <- cicovfull/nruns
avelenfull <- avelenfull/nruns
list(cicovfull=cicovfull,avelenfull=avelenfull,cicov=cicov,avelen=avelen,
cicovmix=cicovmix,avelenmix=avelenmix,beta=beta)
}


msarmasim2<-function(n=100,nruns=20,pmax=3,qmax=3,BB=100,cc=0.05,
etype=1,tstype=1,tdf=5,burn=100,alph=0.05){
#Bootstraps ARMA(p,q) model selection.
#assumes n > 10 (pmax+qmax),   beta = (phi1,...,phipmax,theta1,...,thetaqmax)
#uses parametric bootstrap,
#calls armaboot2, aic might be better than aicc
#need pmax+3 <=pmax+qmax
#uses library(forecast), burn is the burn in period, need burn>pmax+qmax 
#If the true model is ARMA(ps,qs), then both pmax > ps and qmax > qs
#causes lots of convergence problems.
#etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom
#etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors
kmax<-pmax+qmax   #p=kmax
beta<-0*1:kmax
pp6<-kmax+6; pp5<-kmax+5; pp4<-kmax+4; pp3<-kmax+3; pp1<-kmax+1; pp2<-kmax+2
cicov <- 0*(1:pp6)
avelen <- 0*(1:pp6)
cicovmix <- cicov
avelenmix <- avelen
cicovfull <- cicov
avelenfull <- avelen
if(tstype==1) {
theta=c(-0.5,0.5)  #tstype=1 for MA(2) model
beta[(pmax+1):(pmax+2)]<-theta
valS<-c((pmax+1),(pmax+2))
valO <- c(1:pmax,(pmax+3):kmax)
}
if(tstype==2) {
phi=c(0.5,0.33)  #tstype=2 for AR(2) model
beta[1:2]<-phi
valS <- c(1,2)
valO <- c(3:kmax)
}
if(tstype==3){
#tstype=3 for ARMA(1,1) model
phi <- 0.5
theta <- -0.5
beta[c(1,pmax+1)] <- c(0.5,-0.5)
valS <- c(1,(pmax+1))
valO <- c(2:pmax,(pmax+2):kmax)
}
k<-2
tvec<-beta[valS]
for(i in 1: nruns) {
  if(tstype==1){
  if (etype==1) 
   Y <- arima.sim(n=n, list(ma=c(theta)), innov = rnorm(n),n.start=burn)
  if(etype==2)
   Y <- arima.sim(n=n, list(ma=c(theta)), innov = rt(n,tdf),n.start=burn)
  if(etype==3)
   Y <- arima.sim(n=n, list(ma=c(theta)), innov =          
                  runif(n,min=-1,max=1),n.start=burn)   
  if(etype==4)
   Y <- arima.sim(n=n, list(ma=c(theta)), innov = (rexp(n)-1),n.start=burn)
}
  if(tstype==2){
  if (etype==1) 
   Y <- arima.sim(n=n, list(ar=c(phi)), innov = rnorm(n),n.start=burn)
  if(etype==2)
   Y <- arima.sim(n=n, list(ar=c(phi)), innov = rt(n,tdf),n.start=burn)
  if(etype==3)
   Y <- arima.sim(n=n, list(ar=c(phi)), innov =          
                  runif(n,min=-1,max=1),n.start=burn)   
  if(etype==4)
   Y <- arima.sim(n=n, list(ar=c(phi)), innov = (rexp(n)-1),n.start=burn)
}
  if(tstype==3){
  if (etype==1) 
   Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov = rnorm(n), n.start=burn)
  if(etype==2)
   Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov = rt(n,tdf), n.start=burn)
  if(etype==3)
   Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov =          
                  runif(n,min=-1,max=1),n.start=burn)   
  if(etype==4)
   Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov = (rexp(n)-1),
n.start=burn)
}
      out<-armaboot2(Y=Y,B=BB,pmax=pmax,qmax=qmax,c=cc,burn=burn)
    for(j in 1:kmax){
    tem <- shorth3(out$betas[,j],alpha=alph)
    if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2])
    cicov[j] <- cicov[j] + 1
    avelen[j] <- avelen[j] + tem$shorth[2] - tem$shorth[1]
    tem <- shorth3(out$btmix[,j],alpha=alph)
    if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2])
    cicovmix[j] <- cicovmix[j] + 1
    avelenmix[j] <- avelenmix[j] + tem$shorth[2] - tem$shorth[1]
    tem <- shorth3(out$btfull[,j],alpha=alph)
    if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2])
    cicovfull[j] <- cicovfull[j] + 1
    avelenfull[j] <- avelenfull[j] + tem$shorth[2] - tem$shorth[1]
   } 
    #test whether the  kmax-k values of betaO are 0
                gg <- kmax - k 
                tstat <- out$bhatvs[valO]
           tem <- confreg(out$betas[,valO],g=gg,that=tstat,alpha=alph)
                if(tem$D0 <= tem$cuplim) #pred. reg. method
                cicov[pp1] <- cicov[pp1] + 1
                avelen[pp1] <- avelen[pp1] + tem$cuplim
                if(tem$br0 <= tem$cuplim) #hybrid method
                cicov[pp2] <- cicov[pp2] + 1
                avelen[pp2] <- avelen[pp1] #same cutoff so same length
                if(tem$br0 <= tem$brlim) #Bickel and Ren method
                cicov[pp3] <- cicov[pp3] + 1
                avelen[pp3] <- avelen[pp3] + tem$brlim
       tem <- confreg(out$btmix[,valO],g=gg,that=tstat,alpha=alph)
                if(tem$D0 <= tem$cuplim) #pred. reg. method
                cicovmix[pp1] <- cicovmix[pp1] + 1
                avelenmix[pp1] <- avelenmix[pp1] + tem$cuplim
                if(tem$br0 <= tem$cuplim) #hybrid method
                cicovmix[pp2] <- cicovmix[pp2] + 1
                avelenmix[pp2] <- avelenmix[pp1] #same cutoff so same length
                if(tem$br0 <= tem$brlim) #Bickel and Ren method
                cicovmix[pp3] <- cicovmix[pp3] + 1
                avelenmix[pp3] <- avelenmix[pp3] + tem$brlim
       tem <- confreg(out$btfull[,valO],g=gg,that=tstat,alpha=alph)
                if(tem$D0 <= tem$cuplim) #pred. reg. method
                cicovfull[pp1] <- cicovfull[pp1] + 1
                avelenfull[pp1] <- avelenfull[pp1] + tem$cuplim
                if(tem$br0 <= tem$cuplim) #hybrid method
                cicovfull[pp2] <- cicovfull[pp2] + 1
                avelenfull[pp2] <- avelenfull[pp1] #same cutoff so same length
                if(tem$br0 <= tem$brlim) #Bickel and Ren method
                cicovfull[pp3] <- cicovfull[pp3] + 1
                avelenfull[pp3] <- avelenfull[pp3] + tem$brlim
#test whether the first  betaS = betaS 
                gg <- k 
                tstat <- out$bhatvs[valS]       
           tem <- confreg(out$betas[,valS],g=gg,that=tstat,alpha=alph)
                D0 <- sqrt(mahalanobis(tvec, tem$center, tem$cov))
                if(D0 <= tem$cuplim) #pred. reg. method
                cicov[pp4] <- cicov[pp4] + 1
                avelen[pp4] <- avelen[pp4] + tem$cuplim 
                D1 <- sqrt(mahalanobis(tvec, tstat, tem$cov))
                if(D1 <= tem$cuplim) #hybrid method
                cicov[pp5] <- cicov[pp5] + 1
                avelen[pp5] <- avelen[pp4] #same cutoff so same length
                if(D1 <= tem$brlim) #Bickel and Ren method
                cicov[pp6] <- cicov[pp6] + 1
                avelen[pp6] <-  avelen[pp6] + tem$brlim 
           tem <- confreg(out$btmix[,valS],g=gg,that=tstat,alpha=alph)
                D0 <- sqrt(mahalanobis(tvec, tem$center, tem$cov))
                if(D0 <= tem$cuplim) #pred. reg. method
                cicovmix[pp4] <- cicovmix[pp4] + 1
                avelenmix[pp4] <- avelenmix[pp4] + tem$cuplim 
                D1 <- sqrt(mahalanobis(tvec, tstat, tem$cov))
                if(D1 <= tem$cuplim) #hybrid method
                cicovmix[pp5] <- cicovmix[pp5] + 1
                avelenmix[pp5] <- avelenmix[pp4] #same cutoff so same length
                if(D1 <= tem$brlim) #Bickel and Ren method
                cicovmix[pp6] <- cicovmix[pp6] + 1
                avelenmix[pp6] <-  avelenmix[pp6] + tem$brlim 
           tem <- confreg(out$btfull[,valS],g=gg,that=tstat,alpha=alph)
                D0 <- sqrt(mahalanobis(tvec, tem$center, tem$cov))
                if(D0 <= tem$cuplim) #pred. reg. method
                cicovfull[pp4] <- cicovfull[pp4] + 1
                avelenfull[pp4] <- avelenfull[pp4] + tem$cuplim 
                D1 <- sqrt(mahalanobis(tvec, tstat, tem$cov))
                if(D1 <= tem$cuplim) #hybrid method
                cicovfull[pp5] <- cicovfull[pp5] + 1
                avelenfull[pp5] <- avelenfull[pp4] #same cutoff so same length
                if(D1 <= tem$brlim) #Bickel and Ren method
                cicovfull[pp6] <- cicovfull[pp6] + 1
                avelenfull[pp6] <-  avelenfull[pp6] + tem$brlim 
}
cicov <- cicov/nruns
avelen <- avelen/nruns
cicovmix <- cicovmix/nruns
avelenmix <- avelenmix/nruns
cicovfull <- cicovfull/nruns
avelenfull <- avelenfull/nruns
list(cicovfull=cicovfull,avelenfull=avelenfull,cicov=cicov,avelen=avelen,
cicovmix=cicovmix,avelenmix=avelenmix,beta=beta)
}


msarmasim3<-function(n=100,nruns=20,BB=100,cc=0.05,
etype=1,tstype=4,tdf=5,alph=0.05){
#Bootstraps ARMA(p,q) model selection.  Seems to work for nruns=500.
#assumes n > 10 (pmax+qmax),   beta = (phi1,...,phipmax,theta1,...,thetaqmax)
#uses parametric bootstrap,
#calls armaboot, aic might be better than aicc
#need pmax+3 <=pmax+qmax
#uses library(forecast)
#If the true model is ARMA(ps,qs), then both pmax > ps and qmax > qs
#causes lots of convergence problems.
#etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom
#etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors
pmax=3
qmax=3
kmax<-pmax+qmax   #p=kmax
beta<-0*1:kmax
pp6<-kmax+6; pp5<-kmax+5; pp4<-kmax+4; pp3<-kmax+3; pp1<-kmax+1; pp2<-kmax+2
cicov <- 0*(1:pp6)
avelen <- 0*(1:pp6)
cicovmix <- cicov
avelenmix <- avelen
cicovfull <- cicov
avelenfull <- avelen
if(tstype==4){#need pmax > 2
#tstype=4 for ARMA(3,1) model
phi <- c(0.7,0.1,-0.4)
theta <- 0.1
beta[c(1:3,pmax+1)] <- c(phi,0.1)
valS <- c(1:3,(pmax+1))
if(pmax==3)
valO <- c((pmax+2):kmax)
if(pmax>3)
valO <- c(3:pmax,(pmax+2):kmax)
}
k<-2
tvec<-beta[valS]
for(i in 1: nruns) {
print(i)
    if(tstype==4){
  if (etype==1) 
   Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov = rnorm(n))
  if(etype==2)
   Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov = rt(n,tdf))
  if(etype==3)
   Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov =          
                  runif(n,min=-1,max=1))   
  if(etype==4)
   Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov = (rexp(n)-1))
}
      out<-armaboot(Y=Y,B=BB,pmax=pmax,qmax=qmax,c=cc)
    for(j in 1:kmax){
    tem <- shorth3(out$betas[,j],alpha=alph)
    if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2])
    cicov[j] <- cicov[j] + 1
    avelen[j] <- avelen[j] + tem$shorth[2] - tem$shorth[1]
    tem <- shorth3(out$btmix[,j],alpha=alph)
    if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2])
    cicovmix[j] <- cicovmix[j] + 1
    avelenmix[j] <- avelenmix[j] + tem$shorth[2] - tem$shorth[1]
    tem <- shorth3(out$btfull[,j],alpha=alph)
    if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2])
    cicovfull[j] <- cicovfull[j] + 1
    avelenfull[j] <- avelenfull[j] + tem$shorth[2] - tem$shorth[1]
   } 
#test whether the last 2 values of beta are 0
                gg <- 2 
                tstat <- out$bhatvs[c(5,6)]
           tem <- confreg(out$betas[,c(5,6)],g=gg,that=tstat,alpha=alph)
                if(tem$D0 <= tem$cuplim) #pred. reg. method
                cicov[pp1] <- cicov[pp1] + 1
                avelen[pp1] <- avelen[pp1] + tem$cuplim
                if(tem$br0 <= tem$cuplim) #hybrid method
                cicov[pp2] <- cicov[pp2] + 1
                avelen[pp2] <- avelen[pp1] #same cutoff so same length
                if(tem$br0 <= tem$brlim) #Bickel and Ren method
                cicov[pp3] <- cicov[pp3] + 1
                avelen[pp3] <- avelen[pp3] + tem$brlim
       tem <- confreg(out$btmix[,c(5,6)],g=gg,that=tstat,alpha=alph)
                if(tem$D0 <= tem$cuplim) #pred. reg. method
                cicovmix[pp1] <- cicovmix[pp1] + 1
                avelenmix[pp1] <- avelenmix[pp1] + tem$cuplim
                if(tem$br0 <= tem$cuplim) #hybrid method
                cicovmix[pp2] <- cicovmix[pp2] + 1
                avelenmix[pp2] <- avelenmix[pp1] #same cutoff so same length
                if(tem$br0 <= tem$brlim) #Bickel and Ren method
                cicovmix[pp3] <- cicovmix[pp3] + 1
                avelenmix[pp3] <- avelenmix[pp3] + tem$brlim
       tem <- confreg(out$btfull[,c(5,6)],g=gg,that=tstat,alpha=alph)
                if(tem$D0 <= tem$cuplim) #pred. reg. method
                cicovfull[pp1] <- cicovfull[pp1] + 1
                avelenfull[pp1] <- avelenfull[pp1] + tem$cuplim
                if(tem$br0 <= tem$cuplim) #hybrid method
                cicovfull[pp2] <- cicovfull[pp2] + 1
                avelenfull[pp2] <- avelenfull[pp1] #same cutoff so same length
                if(tem$br0 <= tem$brlim) #Bickel and Ren method
                cicovfull[pp3] <- cicovfull[pp3] + 1
                avelenfull[pp3] <- avelenfull[pp3] + tem$brlim   
#test whether betaS = betaS 
                gg <- 4
                tstat <- out$bhatvs[1:4]       
           tem <- confreg(out$betas[,1:4],g=gg,that=tstat,alpha=alph)
                D0 <- sqrt(mahalanobis(tvec, tem$center, tem$cov))
                if(D0 <= tem$cuplim) #pred. reg. method
                cicov[pp4] <- cicov[pp4] + 1
                avelen[pp4] <- avelen[pp4] + tem$cuplim 
                D1 <- sqrt(mahalanobis(tvec, tstat, tem$cov))
                if(D1 <= tem$cuplim) #hybrid method
                cicov[pp5] <- cicov[pp5] + 1
                avelen[pp5] <- avelen[pp4] #same cutoff so same length
                if(D1 <= tem$brlim) #Bickel and Ren method
                cicov[pp6] <- cicov[pp6] + 1
                avelen[pp6] <-  avelen[pp6] + tem$brlim 
           tem <- confreg(out$btmix[,1:4],g=gg,that=tstat,alpha=alph)
                D0 <- sqrt(mahalanobis(tvec, tem$center, tem$cov))
                if(D0 <= tem$cuplim) #pred. reg. method
                cicovmix[pp4] <- cicovmix[pp4] + 1
                avelenmix[pp4] <- avelenmix[pp4] + tem$cuplim 
                D1 <- sqrt(mahalanobis(tvec, tstat, tem$cov))
                if(D1 <= tem$cuplim) #hybrid method
                cicovmix[pp5] <- cicovmix[pp5] + 1
                avelenmix[pp5] <- avelenmix[pp4] #same cutoff so same length
                if(D1 <= tem$brlim) #Bickel and Ren method
                cicovmix[pp6] <- cicovmix[pp6] + 1
                avelenmix[pp6] <-  avelenmix[pp6] + tem$brlim 
           tem <- confreg(out$btfull[,1:4],g=gg,that=tstat,alpha=alph)
                D0 <- sqrt(mahalanobis(tvec, tem$center, tem$cov))
                if(D0 <= tem$cuplim) #pred. reg. method
                cicovfull[pp4] <- cicovfull[pp4] + 1
                avelenfull[pp4] <- avelenfull[pp4] + tem$cuplim 
                D1 <- sqrt(mahalanobis(tvec, tstat, tem$cov))
                if(D1 <= tem$cuplim) #hybrid method
                cicovfull[pp5] <- cicovfull[pp5] + 1
                avelenfull[pp5] <- avelenfull[pp4] #same cutoff so same length
                if(D1 <= tem$brlim) #Bickel and Ren method
                cicovfull[pp6] <- cicovfull[pp6] + 1
                avelenfull[pp6] <-  avelenfull[pp6] + tem$brlim  
}
cicov <- cicov/nruns
avelen <- avelen/nruns
cicovmix <- cicovmix/nruns
avelenmix <- avelenmix/nruns
cicovfull <- cicovfull/nruns
avelenfull <- avelenfull/nruns
list(cicovfull=cicovfull,avelenfull=avelenfull,cicov=cicov,avelen=avelen,
cicovmix=cicovmix,avelenmix=avelenmix,beta=beta)
}


msarmasim4<-function(n=100,nruns=20,BB=100,cc=0.05,
etype=1,tstype=4,tdf=5,alph=0.05,burn=100){
#Bootstraps ARMA(p,q) model selection.
#assumes n > 10 (pmax+qmax),   beta = (phi1,...,phipmax,theta1,...,thetaqmax)
#uses parametric bootstrap, burn is the burn in period, need burn>pmax+qmax
#calls armaboot2, aic might be better than aicc
#need pmax+3 <=pmax+qmax
#uses library(forecast)
#If the true model is ARMA(ps,qs), then both pmax > ps and qmax > qs
#cause lots of convergence problems.
#etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom
#etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors
pmax=3
qmax=3
kmax<-pmax+qmax   #p=kmax
beta<-0*1:kmax
pp6<-kmax+6; pp5<-kmax+5; pp4<-kmax+4; pp3<-kmax+3; pp1<-kmax+1; pp2<-kmax+2
cicov <- 0*(1:pp6)
avelen <- 0*(1:pp6)
cicovmix <- cicov
avelenmix <- avelen
cicovfull <- cicov
avelenfull <- avelen
if(tstype==4){#need pmax > 2
#tstype=4 for ARMA(3,1) model
phi <- c(0.7,0.1,-0.4)
theta <- 0.1
beta[c(1:3,pmax+1)] <- c(phi,0.1)
valS <- c(1:3,(pmax+1))
if(pmax==3)
valO <- c((pmax+2):kmax)
if(pmax>3)
valO <- c(3:pmax,(pmax+2):kmax)
}
k<-2
tvec<-beta[valS]
for(i in 1: nruns) {
print(i)
    if(tstype==4){
  if (etype==1) 
 Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov = rnorm(n),n.start=burn)
  if(etype==2)
 Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov = rt(n,tdf),n.start=burn)
  if(etype==3)
   Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov =          
                  runif(n,min=-1,max=1),n.start=burn)   
  if(etype==4)
Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov=(rexp(n)-1),n.start=burn)
}
      out<-armaboot2(Y=Y,B=BB,pmax=pmax,qmax=qmax,c=cc,burn=burn)
    for(j in 1:kmax){
    tem <- shorth3(out$betas[,j],alpha=alph)
    if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2])
    cicov[j] <- cicov[j] + 1
    avelen[j] <- avelen[j] + tem$shorth[2] - tem$shorth[1]
    tem <- shorth3(out$btmix[,j],alpha=alph)
    if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2])
    cicovmix[j] <- cicovmix[j] + 1
    avelenmix[j] <- avelenmix[j] + tem$shorth[2] - tem$shorth[1]
    tem <- shorth3(out$btfull[,j],alpha=alph)
    if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2])
    cicovfull[j] <- cicovfull[j] + 1
    avelenfull[j] <- avelenfull[j] + tem$shorth[2] - tem$shorth[1]
   } 
#test whether the last 2 values of beta are 0
                gg <- 2 
                tstat <- out$bhatvs[c(5,6)]
           tem <- confreg(out$betas[,c(5,6)],g=gg,that=tstat,alpha=alph)
                if(tem$D0 <= tem$cuplim) #pred. reg. method
                cicov[pp1] <- cicov[pp1] + 1
                avelen[pp1] <- avelen[pp1] + tem$cuplim
                if(tem$br0 <= tem$cuplim) #hybrid method
                cicov[pp2] <- cicov[pp2] + 1
                avelen[pp2] <- avelen[pp1] #same cutoff so same length
                if(tem$br0 <= tem$brlim) #Bickel and Ren method
                cicov[pp3] <- cicov[pp3] + 1
                avelen[pp3] <- avelen[pp3] + tem$brlim
       tem <- confreg(out$btmix[,c(5,6)],g=gg,that=tstat,alpha=alph)
                if(tem$D0 <= tem$cuplim) #pred. reg. method
                cicovmix[pp1] <- cicovmix[pp1] + 1
                avelenmix[pp1] <- avelenmix[pp1] + tem$cuplim
                if(tem$br0 <= tem$cuplim) #hybrid method
                cicovmix[pp2] <- cicovmix[pp2] + 1
                avelenmix[pp2] <- avelenmix[pp1] #same cutoff so same length
                if(tem$br0 <= tem$brlim) #Bickel and Ren method
                cicovmix[pp3] <- cicovmix[pp3] + 1
                avelenmix[pp3] <- avelenmix[pp3] + tem$brlim
       tem <- confreg(out$btfull[,c(5,6)],g=gg,that=tstat,alpha=alph)
                if(tem$D0 <= tem$cuplim) #pred. reg. method
                cicovfull[pp1] <- cicovfull[pp1] + 1
                avelenfull[pp1] <- avelenfull[pp1] + tem$cuplim
                if(tem$br0 <= tem$cuplim) #hybrid method
                cicovfull[pp2] <- cicovfull[pp2] + 1
                avelenfull[pp2] <- avelenfull[pp1] #same cutoff so same length
                if(tem$br0 <= tem$brlim) #Bickel and Ren method
                cicovfull[pp3] <- cicovfull[pp3] + 1
                avelenfull[pp3] <- avelenfull[pp3] + tem$brlim   
#test whether betaS = betaS 
                gg <- 4
                tstat <- out$bhatvs[1:4]       
           tem <- confreg(out$betas[,1:4],g=gg,that=tstat,alpha=alph)
                D0 <- sqrt(mahalanobis(tvec, tem$center, tem$cov))
                if(D0 <= tem$cuplim) #pred. reg. method
                cicov[pp4] <- cicov[pp4] + 1
                avelen[pp4] <- avelen[pp4] + tem$cuplim 
                D1 <- sqrt(mahalanobis(tvec, tstat, tem$cov))
                if(D1 <= tem$cuplim) #hybrid method
                cicov[pp5] <- cicov[pp5] + 1
                avelen[pp5] <- avelen[pp4] #same cutoff so same length
                if(D1 <= tem$brlim) #Bickel and Ren method
                cicov[pp6] <- cicov[pp6] + 1
                avelen[pp6] <-  avelen[pp6] + tem$brlim 
           tem <- confreg(out$btmix[,1:4],g=gg,that=tstat,alpha=alph)
                D0 <- sqrt(mahalanobis(tvec, tem$center, tem$cov))
                if(D0 <= tem$cuplim) #pred. reg. method
                cicovmix[pp4] <- cicovmix[pp4] + 1
                avelenmix[pp4] <- avelenmix[pp4] + tem$cuplim 
                D1 <- sqrt(mahalanobis(tvec, tstat, tem$cov))
                if(D1 <= tem$cuplim) #hybrid method
                cicovmix[pp5] <- cicovmix[pp5] + 1
                avelenmix[pp5] <- avelenmix[pp4] #same cutoff so same length
                if(D1 <= tem$brlim) #Bickel and Ren method
                cicovmix[pp6] <- cicovmix[pp6] + 1
                avelenmix[pp6] <-  avelenmix[pp6] + tem$brlim 
           tem <- confreg(out$btfull[,1:4],g=gg,that=tstat,alpha=alph)
                D0 <- sqrt(mahalanobis(tvec, tem$center, tem$cov))
                if(D0 <= tem$cuplim) #pred. reg. method
                cicovfull[pp4] <- cicovfull[pp4] + 1
                avelenfull[pp4] <- avelenfull[pp4] + tem$cuplim 
                D1 <- sqrt(mahalanobis(tvec, tstat, tem$cov))
                if(D1 <= tem$cuplim) #hybrid method
                cicovfull[pp5] <- cicovfull[pp5] + 1
                avelenfull[pp5] <- avelenfull[pp4] #same cutoff so same length
                if(D1 <= tem$brlim) #Bickel and Ren method
                cicovfull[pp6] <- cicovfull[pp6] + 1
                avelenfull[pp6] <-  avelenfull[pp6] + tem$brlim  
}
cicov <- cicov/nruns
avelen <- avelen/nruns
cicovmix <- cicovmix/nruns
avelenmix <- avelenmix/nruns
cicovfull <- cicovfull/nruns
avelenfull <- avelenfull/nruns
list(cicovfull=cicovfull,avelenfull=avelenfull,cicov=cicov,avelen=avelen,
cicovmix=cicovmix,avelenmix=avelenmix,beta=beta)
}


msmasim<-function(n=100,nruns=20,qmax=5,BB=100,cc=0.05,etype=1,tstype=1,
tdf=5,btype=1,alph=0.05){
#Bootstraps MA(q) model selection.
#assumes n > 10 qmax,   beta = (theta1,...,thetaqmax)
#btype = 1 for parametric bootstrap, 2 for residual bootstrap
#calls maboot or maboot2, aic might be better than aicc, uses forecast package
#etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom
#etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors
#tstype = k for an MA(k) model, k=1,2
#uses library(forecast)
beta<-0*1:qmax
theta=-0.5  #tstype=1 for MA(1) model
beta[1] <- -0.5
pp6<-qmax+6; pp5<-qmax+5; pp4<-qmax+4; pp3<-qmax+3; pp1<-qmax+1; pp2<-qmax+2
cicov <- 0*(1:pp6)
avelen <- 0*(1:pp6)
cicovmix <- cicov
avelenmix <- avelen
cicovfull <- cicov
avelenfull <- avelen
k<-tstype
if(tstype==2) {
theta=c(-0.5,0.5)  #tstype=2 for MA(2) model
beta[2]<-0.5}
tvec<-beta[1:k]
for(i in 1: nruns) {
  if (etype==1) 
   Y <- arima.sim(n=n, list(ma=c(theta)), innov = rnorm(n))
  if(etype==2)
   Y <- arima.sim(n=n, list(ma=c(theta)), innov = rt(n,tdf))
  if(etype==3)
   Y <- arima.sim(n=n, list(ma=c(theta)), innov =          
                  runif(n,min=-1,max=1))   
  if(etype==4)
   Y <- arima.sim(n=n, list(ma=c(theta)), innov = (rexp(n)-1))
  if(btype==1)
    out<-maboot(Y=Y,B=BB,qmax=qmax,c=cc)
  else
    out<-maboot2(Y=Y,B=BB,qmax=qmax,c=cc)
  for(j in 1:qmax){
    tem <- shorth3(out$betas[,j],alpha=alph)
    if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2])
    cicov[j] <- cicov[j] + 1
    avelen[j] <- avelen[j] + tem$shorth[2] - tem$shorth[1]
    tem <- shorth3(out$btmix[,j],alpha=alph)
    if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2])
    cicovmix[j] <- cicovmix[j] + 1
    avelenmix[j] <- avelenmix[j] + tem$shorth[2] - tem$shorth[1]
    tem <- shorth3(out$btfull[,j],alpha=alph)
    if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2])
    cicovfull[j] <- cicovfull[j] + 1
    avelenfull[j] <- avelenfull[j] + tem$shorth[2] - tem$shorth[1]
   } 
    #test whether the last qmax-k values of beta are 0
                gg <- qmax - k 
                tstat <- out$bhatvs[(k+1):qmax]
           tem <- confreg(out$betas[,(k+1):qmax],g=gg,that=tstat,alpha=alph)
                if(tem$D0 <= tem$cuplim) #pred. reg. method
                cicov[pp1] <- cicov[pp1] + 1
                avelen[pp1] <- avelen[pp1] + tem$cuplim
                if(tem$br0 <= tem$cuplim) #hybrid method
                cicov[pp2] <- cicov[pp2] + 1
                avelen[pp2] <- avelen[pp1] #same cutoff so same length
                if(tem$br0 <= tem$brlim) #Bickel and Ren method
                cicov[pp3] <- cicov[pp3] + 1
                avelen[pp3] <- avelen[pp3] + tem$brlim
       tem <- confreg(out$btmix[,(k+1):qmax],g=gg,that=tstat,alpha=alph)
                if(tem$D0 <= tem$cuplim) #pred. reg. method
                cicovmix[pp1] <- cicovmix[pp1] + 1
                avelenmix[pp1] <- avelenmix[pp1] + tem$cuplim
                if(tem$br0 <= tem$cuplim) #hybrid method
                cicovmix[pp2] <- cicovmix[pp2] + 1
                avelenmix[pp2] <- avelenmix[pp1] #same cutoff so same length
                if(tem$br0 <= tem$brlim) #Bickel and Ren method
                cicovmix[pp3] <- cicovmix[pp3] + 1
                avelenmix[pp3] <- avelenmix[pp3] + tem$brlim
       tem <- confreg(out$btfull[,(k+1):qmax],g=gg,that=tstat,alpha=alph)
                if(tem$D0 <= tem$cuplim) #pred. reg. method
                cicovfull[pp1] <- cicovfull[pp1] + 1
                avelenfull[pp1] <- avelenfull[pp1] + tem$cuplim
                if(tem$br0 <= tem$cuplim) #hybrid method
                cicovfull[pp2] <- cicovfull[pp2] + 1
                avelenfull[pp2] <- avelenfull[pp1] #same cutoff so same length
                if(tem$br0 <= tem$brlim) #Bickel and Ren method
                cicovfull[pp3] <- cicovfull[pp3] + 1
                avelenfull[pp3] <- avelenfull[pp3] + tem$brlim
#test whether the first k values of beta are (beta_1,...,beta_k)
                gg <- k 
                tstat <- out$bhatvs[1:k]       
           tem <- confreg(out$betas[,1:k],g=gg,that=tstat,alpha=alph)
                D0 <- sqrt(mahalanobis(tvec, tem$center, tem$cov))
                if(D0 <= tem$cuplim) #pred. reg. method
                cicov[pp4] <- cicov[pp4] + 1
                avelen[pp4] <- avelen[pp4] + tem$cuplim 
                D1 <- sqrt(mahalanobis(tvec, tstat, tem$cov))
                if(D1 <= tem$cuplim) #hybrid method
                cicov[pp5] <- cicov[pp5] + 1
                avelen[pp5] <- avelen[pp4] #same cutoff so same length
                if(D1 <= tem$brlim) #Bickel and Ren method
                cicov[pp6] <- cicov[pp6] + 1
                avelen[pp6] <-  avelen[pp6] + tem$brlim 
           tem <- confreg(out$btmix[,1:k],g=gg,that=tstat,alpha=alph)
                D0 <- sqrt(mahalanobis(tvec, tem$center, tem$cov))
                if(D0 <= tem$cuplim) #pred. reg. method
                cicovmix[pp4] <- cicovmix[pp4] + 1
                avelenmix[pp4] <- avelenmix[pp4] + tem$cuplim 
                D1 <- sqrt(mahalanobis(tvec, tstat, tem$cov))
                if(D1 <= tem$cuplim) #hybrid method
                cicovmix[pp5] <- cicovmix[pp5] + 1
                avelenmix[pp5] <- avelenmix[pp4] #same cutoff so same length
                if(D1 <= tem$brlim) #Bickel and Ren method
                cicovmix[pp6] <- cicovmix[pp6] + 1
                avelenmix[pp6] <-  avelenmix[pp6] + tem$brlim 
           tem <- confreg(out$btfull[,1:k],g=gg,that=tstat,alpha=alph)
                D0 <- sqrt(mahalanobis(tvec, tem$center, tem$cov))
                if(D0 <= tem$cuplim) #pred. reg. method
                cicovfull[pp4] <- cicovfull[pp4] + 1
                avelenfull[pp4] <- avelenfull[pp4] + tem$cuplim 
                D1 <- sqrt(mahalanobis(tvec, tstat, tem$cov))
                if(D1 <= tem$cuplim) #hybrid method
                cicovfull[pp5] <- cicovfull[pp5] + 1
                avelenfull[pp5] <- avelenfull[pp4] #same cutoff so same length
                if(D1 <= tem$brlim) #Bickel and Ren method
                cicovfull[pp6] <- cicovfull[pp6] + 1
                avelenfull[pp6] <-  avelenfull[pp6] + tem$brlim 
}
cicov <- cicov/nruns
avelen <- avelen/nruns
cicovmix <- cicovmix/nruns
avelenmix <- avelenmix/nruns
cicovfull <- cicovfull/nruns
avelenfull <- avelenfull/nruns
list(cicovfull=cicovfull,avelenfull=avelenfull,cicov=cicov,avelen=avelen,
cicovmix=cicovmix,avelenmix=avelenmix,beta=beta)
}


nonpisim<-function(n=100,nruns=50,etype=1,tstype=1,
tdf=5,phi=c(0.7,0.1,-0.4),theta=c(0.1),pen=2,dd=0,burn=100,alph=0.05){
#Simulates time series of length n, and finds proportion
#of time Y_n+1 is in its one step nonparametric PI that is free of model selected
#Average  PI length is also given. 
#etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom
#etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors
#dd is the amount of difference, dd=0,1,or 2, assumed known
np1 <- n+1
cov <- 0
len <- 0:nruns
#assume n - q - p > 30
if(tstype==1){#tstype=1 for AR(1) model
phi<-c(0.5)
ps<-1 
qs <- 0
}
if(tstype==2) { #tstype=2 for AR(2) model
phi=c(0.5,0.33) 
ps<-2
qs<-0
}
if(tstype==3){#tstype=3 for MA(1) model
theta<-c(-0.5)
ps<-0 
qs <- 1
}
if(tstype==4) { #tstype=4 for MA(2) model
theta=c(-0.5,0.5)  
ps<-0
qs<-2
}
if(tstype==5){#need kmax > 2
#tstype=5 for ARMA(3,1) model
phi <- c(0.7,0.1,-0.4)
theta <- 0.1
ps<-3
qs<-1
}
if(tstype==6){#need kmax >= rr
#tstype=6 for ARMA model selected by user
phi <- as.vector(phi)
theta <- as.vector(theta)
ps<-length(phi)
qs<-length(theta)
}

for(i in 1:nruns){

 if(tstype<3){
  if (etype==1) 
   Y <- arima.sim(n=np1, list(order = c(ps,dd,qs), ar =c(phi)), innov = rnorm(np1))
  if(etype==2)
   Y <- arima.sim(n=np1, list(order = c(ps,dd,qs), ar =c(phi)), innov = rt(np1,tdf))
  if(etype==3)
   Y <- arima.sim(n=np1, list(order = c(ps,dd,qs), ar =c(phi)), innov =          
                  runif(np1,min=-1,max=1))   
  if(etype==4)
   Y <- arima.sim(n=np1, list(order = c(ps,dd,qs), ar =c(phi)), innov = (rexp(np1)-1))
 }
 if(tstype==3 || tstype==4){
 if (etype==1) 
   Y <- arima.sim(n=np1, list(order = c(ps,dd,qs), ma=c(theta)), innov = rnorm(np1))
  if(etype==2)
   Y <- arima.sim(n=np1, list(order = c(ps,dd,qs), ma=c(theta)), innov = rt(np1,tdf))
  if(etype==3)
   Y <- arima.sim(n=np1, list(order = c(ps,dd,qs), ma=c(theta)), innov =          
                  runif(np1,min=-1,max=1))   
  if(etype==4)
   Y <- arima.sim(n=np1, list(order = c(ps,dd,qs), ma=c(theta)), innov = (rexp(np1)-1))
 }
    if(tstype==5){
  if (etype==1) 
 Y <- arima.sim(n=np1,list(order = c(ps,dd,qs),ar=c(phi),ma=c(theta)),innov = rnorm(np1),n.start=burn)
  if(etype==2)
 Y <- arima.sim(n=np1,list(order = c(ps,dd,qs),ar=c(phi),ma=c(theta)),innov = rt(np1,tdf),n.start=burn)
  if(etype==3)
   Y <- arima.sim(n=np1, list(order = c(ps,dd,qs),ar=c(phi),ma=c(theta)), innov =          
                  runif(np1,min=-1,max=1),n.start=burn)   
  if(etype==4)
 Y <- arima.sim(n=np1,list(order = c(ps,dd,qs), ar=c(phi),ma=c(theta)),innov=(rexp(np1)-1),n.start=burn)
 }
    if(tstype==6){  #arma model selected by the user
  if (etype==1) 
 Y <- arima.sim(n=np1,list(order = c(ps,dd,qs), ar=c(phi),ma=c(theta)),innov = rnorm(np1),n.start=burn)
  if(etype==2)
 Y <- arima.sim(n=np1,list(order = c(ps,dd,qs), ar=c(phi),ma=c(theta)),innov = rt(np1,tdf),n.start=burn)
  if(etype==3)
   Y <- arima.sim(n=np1, list(order = c(ps,dd,qs), ar=c(phi),ma=c(theta)), innov =          
                  runif(np1,min=-1,max=1),n.start=burn)   
  if(etype==4)
  Y <- arima.sim(n=np1,list(order = c(ps,dd,qs), ar=c(phi),ma=c(theta)),innov=(rexp(np1)-1),n.start=burn)
  }

   y <- Y[1:n]
   yf <- Y[np1]

 
 if(dd==0){
  tem <- locpi(y,alpha=alph) #PI ignore TS structure
  LPI <- tem$LPI
  UPI <- tem$UPI
  len[i] <- UPI - LPI
  if(LPI < yf && yf < UPI)
   cov <- cov + 1
 
 }
 if(dd==1){
 Z <- as.vector(y)
 Z[1] <- NA 
 Z[-1] <- diff(y)
 tem <- locpi(Z[-1],alpha=alph)
 LPI <- tem$LPI
  UPI <- tem$UPI
  len[i] <- UPI - LPI
 yfhat=y[n]
  if((yfhat + LPI) < yf && yf < (yfhat+UPI))
  cov <- cov + 1
 }
 if(dd==2){
 Z <- as.vector(y)
 Z[1:2] <- NA 
 Z[-c(1,2)] <- diff(y,differences=2)
 tem <- locpi(Z[-c(1,2)],alpha=alph)
 LPI <- tem$LPI
  UPI <- tem$UPI
  len[i] <- UPI - LPI
 yfhat= 2*y[n] - y[(n-1)]
  if((yfhat + LPI) < yf && yf < (yfhat+UPI))
  cov <- cov + 1
 }

}
cov <- cov/nruns
len <- mean(len)

list( cov=cov, len=len)
}



onesteppi<-function(Y, p=0,d=0,q=1,P=0,D=0,Q=0,per=4,period=F,alpha = 0.05){
#Makes the one step ahead PI [L(n),U(n)] for a
#seasonal Arima(p,d,q)x(P,D,Q) model with seasonal period = S = per 
#calls locpi2, and can handle missing values NA
#which is 4 for quarterly data and 12 for monthly data
#yhat1 is the one step ahead forecast, chebSE is the SE assuming normality
#95% Chebychev PI yhat11 +- 2 chebSE tends to be too short
#onesteppi(presidents,p=1,d=1,q=0)
#onesteppi(presidents,p=1,d=1,q=0,P=1,period=T)  ##per=4
y <- as.vector(Y)
if(period==F)
out<-arima(y,c(p,d,q),seasonal=list(order=c(P,D,Q)))
else
out<-arima(y,c(p,d,q),seasonal=list(order=c(P,D,Q),period=per))
#can handle missing values but produces NAs
res <- out$res[!is.na(out$res)]
km <- p+q+P+Q
tem <- locpi2(res,k=km,alph=alpha)
temp <- predict(out,n.ahead=1)
yhat1 <- temp$pred[1]
chebSE <- temp$se[1]
low <- yhat1+tem$LPI
up <- yhat1+tem$UPI
list(chebSE=chebSE,yhat1=yhat1,low,up)
}


pimasim<-function(N=100,nruns=50,Lmq=1,etype=1,tdf=5,alph=0.05){
#old, uses locpi to get h-step ahead PI for the forecast residuals
#Simulates MA(2) time series of length n, and finds proportion
#of time Y_N+1, ..., Y_N+L are in their 95% PI base on the shorth.
#The PI may be asymptotically optimal for Y_n+j with q< j <=L.
#Average  PI length is also given. 
#Gets the asymptotically optimal 1 and 2 step ahead PIs based on forecast residuals.
#etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom
#etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors
#calls locpi
#Finally gets the normal PIs, although  the z cutoff is replaced
#by a t cutoff to increase coverage.
L <- 2 + Lmq
npL <- N + L
np1 <- N + 1
len <- 1:nruns
opilen <- matrix(0,nrow=nruns,ncol=2)
npilen <- matrix(0,nrow=nruns,ncol=L)
cov <- c(0,0)
coverage <- as.vector((0*1:L))
npicov <- coverage
#use N-p-q in general
tcut <- qt((1-alph/2),N-2)
for(i in 1:nruns){
  thet1 <- runif(1,min=-0.5,max=0.5)
  thet2 <- runif(1,min=-0.4,max=0.4)
  if (etype==1) 
   x <- arima.sim(n=npL, list(ma=c(thet1,thet2)), innov = rnorm(npL))
  if(etype==2)
   x <- arima.sim(n=npL, list(ma=c(thet1,thet2)), innov = rt(npL,tdf))
  if(etype==3)
   x <- arima.sim(n=npL, list(ma=c(thet1,thet2)), innov =          
                  runif(npL,min=-1,max=1))   
  if(etype==4)
   x <- arima.sim(n=npL, list(ma=c(thet1,thet2)), innov = (rexp(npL)-1))
   y <- x[1:N]
   yf <- x[np1:npL]
   tem <- locpi(y,alpha=alph)
  LPI <- tem$LPI
  UPI <- tem$UPI
  len[i] <- UPI - LPI
  for(j in 1:L){   
  if(LPI < yf[j] && yf[j] < UPI)
   coverage[j] <- coverage[j] + 1}
# get the time series from the N cases
out <- arima(y,c(0,0,2))
#get the l-step ahead forecasts for l = 1, ..., L
temp <- predict(out,n.ahead=L)
#get the 1 step ahead PI
res <- out$resid
#yfhat <- out$coef[3] + out$coef[1]*res[N] + out$coef[2]*res[(N-1)]
yfhat <- temp$pred[1]
tem <- locpi(res,alpha=alph)
low <- yfhat+tem$LPI
up <- yfhat+tem$UPI
if(low < yf[1] && yf[1] < up)
   cov[1] <- cov[1] + 1
opilen[i,1] <- up-low
#get the 2 step ahead PI
#yfhat <- out$coef[3] + out$coef[2]*res[N] 
yfhat <- temp$pred[2]
#get the 2 step ahead forecasts for the data
yf2 <- (out$coef[3] + out$coef[2]*res)[-c(N-1,N)]
#get the 2 step ahead forecast residuals for t = 3,...,N-2
res2 <- y[-c(1,2)] - yf2
tem <- locpi(res2,alpha=alph)
low <- yfhat+tem$LPI
up <- yfhat+tem$UPI
if(low < yf[2] && yf[2] < up)
   cov[2] <- cov[2] + 1
opilen[i,2] <- up-low
#get the normal PI
for(j in 1:L){   
  LPI <- temp$pred[j] - tcut*temp$se[j]
  UPI <- temp$pred[j] + tcut*temp$se[j]
  npilen[i,j] <- UPI-LPI
  if(LPI < yf[j] && yf[j] < UPI)
   npicov[j] <- npicov[j] + 1}
}
plot(x,type="l")
abline(a=LPI,b=0)
abline(a=UPI,b=0)
avelen <- mean(len)
coverage <- coverage/nruns
cov <- cov/nruns
opilen <- apply(opilen,2,mean)
npicov <- npicov/nruns
npilen <- apply(npilen,2,mean)
list(coverage = coverage, avelen = avelen, opicov = cov, 
opilen = opilen, npicov=npicov, npilen=npilen)
}


pimasim2<-function(N=100,nruns=50,Lmq=1,etype=1,tdf=5,alph=0.05){
#Simulates MA(2) time series of length N, and finds proportion
#of time Y_N+1, ..., Y_N+L are in their 95% PI based on the shorth.
#The PI may be asymptotically optimal for Y_N+j with q< j <=L.
#Average  PI length is also given. 
#Gets the asymptotically optimal 1 and 2 step ahead PIs based on forecast residuals.
#etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom
#etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors
#calls locpi, locpi2
#Finally gets the normal PIs, although  the z cutoff is replaced
#by a t cutoff to increase coverage.
L <- 2 + Lmq
npL <- N + L
np1 <- N + 1
len <- 1:nruns
opilen <- matrix(0,nrow=nruns,ncol=2)
npilen <- matrix(0,nrow=nruns,ncol=L)
cov <- c(0,0)
coverage <- as.vector((0*1:L))
npicov <- coverage
#use N-p-q in general
tcut <- qt((1-alph/2),N-2)
for(i in 1:nruns){
  thet1 <- runif(1,min=-0.5,max=0.5)
  thet2 <- runif(1,min=-0.4,max=0.4)
  if (etype==1) 
   x <- arima.sim(n=npL, list(ma=c(thet1,thet2)), innov = rnorm(npL))
  if(etype==2)
   x <- arima.sim(n=npL, list(ma=c(thet1,thet2)), innov = rt(npL,tdf))
  if(etype==3)
   x <- arima.sim(n=npL, list(ma=c(thet1,thet2)), innov =          
                  runif(npL,min=-1,max=1))   
  if(etype==4)
   x <- arima.sim(n=npL, list(ma=c(thet1,thet2)), innov = (rexp(npL)-1))
   y <- x[1:N]
   yf <- x[np1:npL]
   tem <- locpi(y,alpha=alph) #PI ignore TS structure, correct for L>=3
  LPI <- tem$LPI
  UPI <- tem$UPI
  len[i] <- UPI - LPI
  for(j in 1:L){   
  if(LPI < yf[j] && yf[j] < UPI)
   coverage[j] <- coverage[j] + 1}
# get the time series from the N cases
out <- arima(y,c(0,0,2))
#get the l-step ahead forecasts for l = 1, ..., L
temp <- predict(out,n.ahead=L)
#get the 1 step ahead PI
res <- out$resid
m <- length(res)
am <- (1+15/m)*sqrt(m/(m-2))
#yfhat <- out$coef[3] + out$coef[1]*res[N] + out$coef[2]*res[(N-1)]
yfhat <- temp$pred[1]
tem <- locpi2(res,k=2,alph=alph)
low <- yfhat+am*tem$LPI
up <- yfhat+am*tem$UPI
if(low < yf[1] && yf[1] < up)
   cov[1] <- cov[1] + 1
opilen[i,1] <- up-low
#get the 2 step ahead PI
#yfhat <- out$coef[3] + out$coef[2]*res[N] 
yfhat <- temp$pred[2]
#get the 2 step ahead forecasts for the data
yf2 <- (out$coef[3] + out$coef[2]*res)[-c(N-1,N)]
#get the 2 step ahead forecast residuals for t = 3,...,N-2
res2 <- y[-c(1,2)] - yf2
m <- length(res2)
am <- (1+15/m)*sqrt(m/(m-2))
tem <- locpi2(res2,alph=alph)
low <- yfhat+am*tem$LPI
up <- yfhat+am*tem$UPI
if(low < yf[2] && yf[2] < up)
   cov[2] <- cov[2] + 1
opilen[i,2] <- up-low
#get the normal PI
for(j in 1:L){   
  LPI <- temp$pred[j] - tcut*temp$se[j]
  UPI <- temp$pred[j] + tcut*temp$se[j]
  npilen[i,j] <- UPI-LPI
  if(LPI < yf[j] && yf[j] < UPI)
   npicov[j] <- npicov[j] + 1}
}
#plot(x,type="l")
#abline(a=LPI,b=0)
#abline(a=UPI,b=0)
avelen <- mean(len)
coverage <- coverage/nruns
cov <- cov/nruns
opilen <- apply(opilen,2,mean)
npicov <- npicov/nruns
npilen <- apply(npilen,2,mean)
list(coverage = coverage, avelen = avelen, opicov = cov, 
opilen = opilen, npicov=npicov, npilen=npilen)
}

pitsvssim<-function(N=100,nruns=50,pmax=5,qmax=5,tstype=1,etype=1,burn=100,
tdf=5,alph=0.05){
#Needs auto.arima from library(forecast)
#Fits ARMA(pmax,qmax) model and does model selection. Fits 4 PIs.
#Simulates tstype time series of length N, and finds proportion
#of time Y_N+1 is in its 95% PI based on the shorth.
#tstype = 1 for MA(2), 2 for AR(1), 3 for ARMA(1,1)
#The 2nd PI may be asymptotically optimal.
#Average PI length is also given. 
#Gets the 1  step ahead PIs based on forecast residuals.
#etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom
#etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors
#calls locpi, locpi2
#Finally gets the normal PIs, although the z cutoff is replaced
#by a t cutoff to increase coverage.
##armapisim is better
np1 <- N + 1
len <- 1:nruns
opilen <- len 
npilen <- len 
h95len <- len
coverage <- 0
opicov <- 0
npicov <- coverage
h95cov <- 0
#use N-p-q in general
tcut <- qt((1-alph/2),N-2)
for(i in 1:nruns){
if(tstype==1){  #MA(2) model is the true model
  thet1 <- runif(1,min=-0.5,max=0.5)
  thet2 <- runif(1,min=-0.4,max=0.4)
  if (etype==1) 
   x <- arima.sim(n=np1, list(ma=c(thet1,thet2)), innov = rnorm(np1))
  if(etype==2)
   x <- arima.sim(n=np1, list(ma=c(thet1,thet2)), innov = rt(np1,tdf))
  if(etype==3)
   x <- arima.sim(n=np1, list(ma=c(thet1,thet2)), innov =          
                  runif(np1,min=-1,max=1))   
  if(etype==4)
   x <- arima.sim(n=np1, list(ma=c(thet1,thet2)), innov = (rexp(np1)-1))
}
if(tstype==2){  #AR(1) model is the true model
  phi1 <- runif(1,min=-0.5,max=0.5)
  if (etype==1) 
   x <- arima.sim(n=np1, list(ar=c(phi1)), innov = rnorm(np1))
  if(etype==2)
   x <- arima.sim(n=np1, list(ar=c(phi1)), innov = rt(np1,tdf))
  if(etype==3)
   x <- arima.sim(n=np1, list(ar=c(phi1)), innov =          
                  runif(np1,min=-1,max=1))   
  if(etype==4)
   x <- arima.sim(n=np1, list(ar=c(phi1)), innov = (rexp(np1)-1))
}
if(tstype==3){  #ARMA(1,1) model is the true model
  phi<-0.4
  theta <- -0.7
  phi <- as.vector(phi)
theta <- as.vector(theta)
  if (etype==1) 
 x <- arima.sim(n=np1,list(ar=c(phi),ma=c(theta)),innov = rnorm(np1),n.start=burn)
  if(etype==2)
 x <- arima.sim(n=np1,list(ar=c(phi),ma=c(theta)),innov = rt(np1,tdf),n.start=burn)
  if(etype==3)
   x <- arima.sim(n=np1, list(ar=c(phi),ma=c(theta)), innov =          
                  runif(np1,min=-1,max=1),n.start=burn)   
  if(etype==4)
x <- arima.sim(n=np1,list(ar=c(phi),ma=c(theta)),innov=(rexp(np1)-1),n.start=burn)  
}
   y <- x[1:N]
   yf <- x[np1]
   tem <- locpi(y,alpha=alph) #PI ignore TS structure
  LPI <- tem$LPI
  UPI <- tem$UPI
  len[i] <- UPI - LPI
  if(LPI < yf && yf < UPI)
   coverage <- coverage + 1
#ARMA(p,q) model selection
out<-auto.arima(y,d=0,max.p=pmax,max.q=qmax,seasonal=FALSE,ic="aicc",
stationary=TRUE)
#get the l-step ahead forecasts for l = 1
temp <- out %>% forecast(h=1)
yfhat<-temp$mean[1] #1 step ahead forecast
#get the 1 step ahead PI
res <- out$resid
m <- length(res)
kk<-length(out$coef)
am <- (1+15/m)*sqrt(m/(m-kk))
tem <- locpi2(res,k=kk,alph=alph)
low <- yfhat+am*tem$LPI
up <- yfhat+am*tem$UPI
if(low < yf && yf < up)
   opicov <- opicov + 1
opilen[i] <- up-low
#get PI from forecast package
up<-temp$upper[2]
low <- temp$lower[2]
if(low < yf && yf < up)
   h95cov <- h95cov + 1
h95len[i] <- up-low
#get the normal PI
se <- sqrt(out$sigma)
tcut <- qt((1-alph/2),N-kk)
  LPI <- yfhat - tcut*se
  UPI <- yfhat + tcut*se
  npilen[i] <- UPI-LPI
  if(LPI < yf && yf < UPI)
   npicov <- npicov + 1
}
mnpilen <- mean(len)    #PI that ignores TS structure locpi
coverage <- coverage/nruns
opilen <- mean(opilen)  #asymptotically optimal PI locpi2
opicov <- opicov/nruns
h95cov <- h95cov/nruns  #normal Cheb PI from forecast package
h95len <- mean(h95len)
npicov <- npicov/nruns  #normal Cheb PI
npilen <- mean(npilen)
list(coverage = coverage, mnpilen = mnpilen, opicov = opicov, 
opilen = opilen, h95cov=h95cov, h95len=h95len, npicov=npicov, npilen=npilen)
}



predrgn <-function(x, xf, alpha = 0.05){
# Makes a prediction region for xf when cases are rows of x.
# If p = 1, the shorth interval should work better.
x <- as.matrix(x)
p <- dim(x)[2]
n <- dim(x)[1]
inr <- 0
up <- min((1 - alpha/2), (1 - alpha + 10*alpha*p/n))
if(alpha > 0.1)
up <- min((1 - alpha + 0.05), (1 - alpha + p/n))
qn <- up
if(qn < 1 - alpha + 0.001)
        up <- 1 - alpha
center <- apply(x, 2, mean)
cov <- var(x)
md2 <- mahalanobis(x, center, cov)
# md2 is the classical squared Mahalanobis distance
#get nonparametric prediction region boundary
cuplim <- quantile(md2, up)
#inr <- 1 if xf is in the prediction region
xfd <- mahalanobis(xf, center, cov)
if(xfd <= cuplim)
       inr <- 1
list(md2=md2, center=center,cov=cov, cuplim = cuplim, xfd, inr=inr)
}


predrgn2 <-function(x, xf, nv=10, alpha = 0.05){
# Makes a prediction region for xf when cases are rows of x.
#Uses data splitting prediction region with (T,C) = (coord median, Ip)
# If p = 1, the shorth interval should work better.
# MAY NOT WORK IF p = 1. 
x <- as.matrix(x)
p <- dim(x)[2]
n <- dim(x)[1]
nv <- min(nv, floor(n/2))
nH <- n - nv
uv <- min(nv, ceiling( (1 - alpha)*(1+nv) ) ) 
up <- uv/(nv+1) #quantile for prediction region
inr <- 0
C <- diag(p)
indx <- 1:n
#get the data splitting prediction region 
perm <- sample(indx,n)
H <- perm[1:nH]
xH <- x[H,]
xV <- x[-H,]
center <- apply(xH, 2, median)
md2 <- mahalanobis(xV, center, cov=C)
hsq <- quantile(md2, up, type=1)
#inr <- 1 if xf is in the prediction region
xfd <- mahalanobis(xf, center, cov=C)
if(xfd <= hsq)
       inr <- 1
list(md2=md2, center=center,hsq = hsq, xfd, inr=inr)
}



predsim2<-function(n = 50, p = 50, nv=19, nruns = 100, xtype = 1, dtype = 1, alpha = 0.05)
{# MAY NOT WORK IF p = 1. 
# Gets coverages of the data splitting  prediction region.
# Multiply x by A where xtype = 1 for MVN Nq(0,I),  2 for lognormal
#dtype = 1 if (T,C) = (xbar, Ip ), dtype = 2 if (T,C) = (coord median, Ip)
# mahalanobis gives squared Maha distances 
#want n/2 > nv
#coverage is very sensitive to quantile type. Use type = 1: inverse quantile
#function so percentile = Y(ceiling(nv*(1-alpha))
	cvr <- 0
        nv <- min(nv, floor(n/2))
        nH <- n - nv
	uv <- min(nv, ceiling( (1 - alpha)*(1+nv) ) ) 
        up <- uv/(nv+1) #quantile for prediction region
	A <- sqrt(diag(1:p))
	m <- n + 1
        C <- diag(p)
        indx <- 1:n        
	for(i in 1:nruns) {
                #make data
		x <- matrix(rnorm(m * p), nrow = m, ncol = p)
		if(xtype == 2)  
			x <- exp(x)
		x <- x %*% A
		xx <- x[ - m,  ]
                #find sets H and V
                perm <- sample(indx,n)
                H <- perm[1:nH]
                xH <- xx[H,]
                xV <- xx[-H,]
                if(dtype==1){
		center <- apply(xH, 2, mean)
		disp = C}
                if(dtype==2){
		center <- apply(xH, 2, median)
		disp = C}
		md2 <- mahalanobis(xV, center, cov=disp)
		hsq <- quantile(md2, up, type=1)
		if(mahalanobis(t(x[m,  ]), center, cov=disp) <= hsq)
			cvr <- cvr + 1		
	}
	cvr <- cvr/nruns	
	list(cvr = cvr, up=up)
}



renewalpi<-function(times,alph=0.05){
#Let times=(e1, ..., en) be the iid data for time until event. 
#Get shorth PIs for time until next event, time until next 2 events,
#time until next 3 events, and time until next 4 events.
#Reliable for time until next kth event if n/k > 99. 
#default is 95% PI, calls shpi
n<- length(times)
step2 <- seq(from = 1, to = (n-2), by = 2)
step3 <- seq(from = 1, to = (n-3), by = 3)
step4 <- seq(from = 1, to = (n-4), by = 4) 
eps1 <- times
tem <- shpi(yf=0,ydat=eps1,alph)
onepi <- c(tem$low, tem$up)         
#get 2 step ahead PI
shift <- c(eps1,0)[-1]  #length is n-1
ee <- (eps1+shift)[-(n-1)] #length is n-2, has e_i + e_i+1
eps2 <- ee[step2]  #length is about (n-2)/2
tem <- shpi(yf=0,ydat=eps2,alph)
twopi <- c(tem$low, tem$up)
#get 3 step ahead PI
shift<-shift[-(n-1)]
shift <- c(shift,0)[-1]  #length is n-2
eee <- (ee+shift)[-(n-2)] #length is n-3, has e_i + e_{i+1} + e_{i+2}
eps3 <- eee[step3]  #length is about (n-3)/3
tem <- shpi(yf=0,ydat=eps3,alph)
threepi  <- c(tem$low, tem$up)
#get 4 step ahead PI
shift<-shift[-(n-2)]
shift <- c(shift,0)[-1]  #length is n-3
eeee <- (eee+shift)[-(n-3)] #length is n-4, has e_i+e_{i+1}+e_{i+2}+e_{i+3}
eps4 <- eeee[step4]  #length is about (n-4)/4
tem <- shpi(yf=0,ydat=eps4,alph)
fourpi <- c(tem$low, tem$up)
list(onepi=onepi,twopi=twopi,threepi=threepi,fourpi=fourpi)
}


resacfs<-function(zz){
#Let zz be the output of the arima function, eg zz <- arima(lh,c(1,0,0)),
#Makes the ACF and PACF of the residuals.
#Works for ARIMA(p,d,q)x(P,D=0,Q)s models.
#Also prints out the Z test statistics for the residual ACF and residual PACF.
#|Z| > 1.25 for lags 1,2,3 and |Z| > 1.6 for all other lags
par(mfrow=c(2,1))
tema <- acf(zz$resid,na.action=na.omit)
temp <- pacf(zz$resid,na.action=na.omit)
par(mfrow=c(1,1))
Zstat <- sqrt(length(zz$resid))*rbind(tema$acf[-1],temp$acf)
return(Zstat)
}

resplots<-function(W,zz){
#Let zz be the output of the arima function, eg zz <- arima(lh,c(1,0,0)),
#and let W be the time series, eg W <- lh.
#Works for ARIMA(p,d,q)x(P,D=0,Q)s models.
#Makes the response plot and the residual plot of the fitted values  
#(W-residual) vs the residuals.
#Also  prints out the coefficients, se, 95% CI's, test statistics and pvalues.
Y <- as.vector(W)
Resid <- zz$resid
FIT <- Y-Resid
FIT <- as.vector(FIT)
par(mfrow=c(2,1))
plot(FIT,Y)
abline(0,1)
title("response plot")
plot(FIT,Resid)
title("residual plot")
par(mfrow=c(1,1))
coef <- zz$coef
se <- sqrt(diag(zz$var.coef))
Z <- as.vector(coef/se)
pval <- 2 * (1-pnorm(abs(Z)))
LCI <- coef - 2*se
UCI <- coef + 2*se
tem <- cbind(coef,se,LCI,UCI,Z,pval)
return(tem)
}


resplots2<-function(W,zz,d = 1){
#Let zz be the output of the arima function, eg zz <- arima(lh,c(1,0,0)),
#and let W be the time series, eg W <- lh.
#Uses the differenced time series as the response in the FF plot for d =1,2.
#Makes the response plot and the residual plot of the fitted values vs the 
#residuals.
#Also  prints out the coefficients, se, 95% CI's, test statistics and pvalues.
#changed FIT<-as.real(FIT) to FIT<-as.vector(FIT)
Y <- as.vector(W)
if(d==1) {Y[1] <- NA; Y[-1] <- diff(Y)}
if(d==2) {Y[1:2] <- NA; Y[-c(1,2)] <- diff(Y,differences=2)}
Resid <- zz$resid
FIT <- Y-Resid
FIT <- as.vector(FIT)
par(mfrow=c(2,1))
plot(FIT,Y)
abline(0,1)
title("response plot")
plot(FIT,Resid)
title("residual plot")
par(mfrow=c(1,1))
coef <- zz$coef
se <- sqrt(diag(zz$var.coef))
Z <- as.vector(coef/se)
pval <- 2 * (1-pnorm(abs(Z)))
LCI <- coef - 2*se
UCI <- coef + 2*se
tem <- cbind(coef,se,LCI,UCI,Z,pval)
return(tem)
}



rmreg2<-function(x, y, csteps = 5, locc = 0.5){
# Does robust multivariate linear regression based on RMVN.
# Here x contains the nontrivial predictors. 
# Advance the plot by highlighting Stop with the right mouse button.
x <- as.matrix(x)
y <- as.matrix(y)
n <- dim(x)[1]
q <- dim(x)[2]	
p <- q+1
m <- dim(y)[2]
d <- m + q
u <- cbind(x,y)
res <- matrix(nrow = n, ncol = m, 0)
fit <- res
Bhat <- matrix(nrow = p, ncol = m, 0)
# q + 1 = p is number of predictors including intercept
cmar <- par("mar")
	par(mfrow = c(2, 1))
        par(mar=c(4.0,4.0,2.0,0.5))
#get the RMVN subset of the predictors and response u
up <- qchisq(0.975, d)
        qchi <- qchisq(0.5, d)  ##get the DGK estimator
        covs <- var(u)
        mns <- apply(u, 2, mean)        ## concentrate
        for(i in 1:csteps) {
                md2 <- mahalanobis(u, mns, covs)
                medd2 <- median(md2)
                mns <- apply(u[md2 <= medd2,  ], 2, mean)
                covs <- var(u[md2 <= medd2,  ])
        }
        covd <- covs
        mnd <- mns      ##get the MB estimator
        covv <- diag(d)
        med <- apply(u, 2, median)
        md2 <- mahalanobis(u, center = med, covv)
        medd2 <- median(md2)##get the location criterion cutoff
        lcut <- medd2 
        if(locc != 0.5) lcut <- quantile(md2,locc) ## get the start
        mns <- apply(u[md2 <= medd2,  ], 2, mean)
        covs <- var(u[md2 <= medd2,  ]) ## concentrate
        for(i in 1:csteps) {
                md2 <- mahalanobis(u, mns, covs)
                medd2 <- median(md2)
                mns <- apply(u[md2 <= medd2,  ], 2, mean)
                covs <- var(u[md2 <= medd2,  ])
        }
        covm <- covs
        mnm <- mns      ##get FCH attractor
        covf <- covm
        mnf <- mnm
        val2 <- mahalanobis(t(mnd), med, covv)
        if(val2 < lcut) {
##crit = square root of det(cov)
                critd <- prod(diag(chol(covd)))
                critm <- prod(diag(chol(covm)))
                if(critd < critm) {
                        covf <- covd
                        mnf <- mnd
                }
        }
## get the FCH estimator
        rd2 <- mahalanobis(u, mnf, covf)
        const <- median(rd2)/qchi
        covf <- const * covf    
##reweight the above FCH estimator (mnf,covf) 
        rd2 <- mahalanobis(u, mnf, covf)
        rmnmvn <- apply(u[rd2 <= up,  ], 2, mean)
        rcovmvn <- var(u[rd2 <= up,  ])
        d1 <- sum(rd2 <= up)
        rd2 <- mahalanobis(u, rmnmvn, rcovmvn)
        qchi2 <- (0.5 * 0.975 * n)/d1
        qchi2 <- min(qchi2, 0.995)
        const <- median(rd2)/qchisq(qchi2, d)
        rcovmvn <- const * rcovmvn      
##now get the subset of data w used to construct the RMVN estimator
        rd2 <- mahalanobis(u, rmnmvn, rcovmvn)
        w <- u[rd2 <= up,  ]
xw <- as.matrix(w[,1:q])
yw <- as.matrix(w[,(q+1):d])
#get the multivariate regression after ellipsoidal trimming
for(i in 1:m){
  out <- lsfit(xw,yw[,i])
  res[,i] <- y[,i] - out$coef[1] - x %*% out$coef[-1]
  fit[,i] <- y[,i] - res[,i]
  Bhat[,i] <- out$coef	
}
for(i in 1:m){
	plot(fit[,i],y[,i])
	abline(0, 1)
	title("Response Plot")
        identify(fit[,i],y[,i])
	plot(fit[,i], res[,i])
	title("Residual Plot")
        identify(fit[,i], res[,i])
}
par(mfrow = c(1, 1))
par(mar=cmar)
list(fit = fit, res = res, Bhat = Bhat)
}


robar<-function(Y,p){
#Fits a robust AR(p) model to time series Y.
#assumes p < n - p
#Makes a response and residual plot.
# Advance the plot by highlighting Stop with the right mouse button.
#phihat = phihat_0 = intercept, phihat_1, ..., phihat_p
# Calls rmreg2 to get the robust estimator.
n <- length(Y)
Xar <- matrix(0,nrow=n-p,ncol=p)
Yar <- Y[(p+1):n]
for(i in 1:p){
Xar[,i] <- Y[(p-i+1):(n-i)]
}
tem <- rmreg2(Xar,Yar)
phihat <- tem$Bhat
fit <- Y
resid <- Y
fit[(p+1):n] <- tem$fit
fit[1:p] <- NA
resid[(p+1):n] <- tem$res
resid[1:p] <- NA
list(phihat=phihat,fit=fit,resid=resid)
}


rwpisim<-function(n=100,nruns=50,type=1,tdf=5,alph=0.05){
#Simulates random walk time series of length n, and finds proportion
#of time Y_n+1, ..., Y_n+4 are in their 95% PI based on the shorth.
#The PI is asymptotically optimal. Average PI length is also given. 
#type = 1 for N(1,1), 2 for 1 + t distr with tdf=degrees of freedom,
#3 for EXP(1), 4 for U(0,2). Calls shpi. Want n >= 25(4) = 100.
y0 <-1
val<-n+4
step2 <- seq(from = 1, to = (n-2), by = 2)
step3 <- seq(from = 1, to = (n-3), by = 3)
step4 <- seq(from = 1, to = (n-4), by = 4)
onepilen <- 1:nruns
twopilen <- 1:nruns
threepilen <- 1:nruns
fourpilen <- 1:nruns
onecov <- 0
twocov <- 0
threecov <- 0
fourcov <- 0
for(i in 1:nruns){
if(type==1)
tem<-rnorm(val,mean=1,sd=1)
if(type==2) 
tem <-1 + rt(val,tdf) #rcauchy generator might be bad
if(type==3) 
tem<-rexp(val)
if(type==4) 
tem<-runif(val,min=0,max=2)
tem<-c(y0,tem)
yrw<-cumsum(tem)[-1]
#get 1 step ahead PI, could use eps1 <- diff(yrw)[1:(n-1)]
eps1<-tem[3:(n+1)]
ydat <- yrw[n] + eps1
yf <- yrw[n+1]
           tem <- shpi(yf=yf,ydat=ydat,alph)
           onepilen[i] <- tem$up - tem$low
           onecov <- onecov + tem$inr 
#get 2 step ahead PI
shift <- c(eps1,0)[-1]  #length is n-1
ee <- (eps1+shift)[-(n-1)] #length is n-2, has e_i + e_i+1
eps2 <- ee[step2]  #length is about (n-2)/2
ydat <- yrw[n] + eps2
yf <- yrw[n+2]
           tem <- shpi(yf=yf,ydat=ydat,alph)
           twopilen[i] <- tem$up - tem$low
           twocov <- twocov + tem$inr 
#get 3 step ahead PI
shift<-shift[-(n-1)]
shift <- c(shift,0)[-1]  #length is n-2
eee <- (ee+shift)[-(n-2)] #length is n-3, has e_i + e_{i+1} + e_{i+2}
eps3 <- eee[step3]  #length is about (n-3)/3
ydat <- yrw[n] + eps3
yf <- yrw[n+3]
           tem <- shpi(yf=yf,ydat=ydat,alph)
           threepilen[i] <- tem$up - tem$low
           threecov <- threecov + tem$inr 
#get 4 step ahead PI
shift<-shift[-(n-2)]
shift <- c(shift,0)[-1]  #length is n-3
eeee <- (eee+shift)[-(n-3)] #length is n-4, has e_i+e_{i+1}+e_{i+2}+e_{i+3}
eps4 <- eeee[step4]  #length is about (n-4)/4
ydat <- yrw[n] + eps4
yf <- yrw[n+4]
           tem <- shpi(yf=yf,ydat=ydat,alph)
           fourpilen[i] <- tem$up - tem$low
           fourcov <- fourcov + tem$inr 
}
lens<-cbind(onepilen,twopilen,threepilen,fourpilen)
mnlen<-apply(lens,2,mean)
sdlen<-apply(lens,2,sd)
onecov <- onecov/nruns
twocov <- twocov/nruns
threecov <- threecov/nruns
fourcov <- fourcov/nruns
list(onecov=onecov,twocov=twocov,threecov=threecov,
fourcov=fourcov,mnlen=mnlen,sdlen=sdlen)
}


rwprsim<-function(n=100,nruns=50,p=2,type=1,tdf=5,psi=0.0,alph=0.05){
#Simulates multivariate random walk time series of length n, and finds 
#proportion of time Y_n+1, ..., Y_n+4 are in their 95% prediction region PR.
# Z_t is a p times 1 vector, Z_t = A x_t, where x_t entries are iid 
#type = 1 for N(1,1), 2 for 1 + t distr with tdf=degrees of freedom,
#3 for EXP(1), 4 for U(0,2). Calls predrgn. Want n >= 25(4)p = 100p.
#Need the distribution to have a nonsingular pop. covariance matrix.
#Want integer p > 1. 
# Multiply x by A: for MVN data this results
# in a covariance matrix with eigenvector c(1, ..., 1)^T
# corresponding to the largest eigenvalue. As psi gets
# close to 1, the data clusters about the line in the
# direction of (1, ..., 1)^T. See Maronna and Zamar (2002).
# cor(X_i,X_j) = [2 psi +(q-2)psi^2]/[1 + (q-1)psi^2], i not = j
# when the correlation exists.
zeroes <- 0*1:p
A <- matrix(psi,nrow=p,ncol=p)
diag(A) <- 1 
val<-n+4
step2 <- seq(from = 1, to = (n-2), by = 2)
step3 <- seq(from = 1, to = (n-3), by = 3)
step4 <- seq(from = 1, to = (n-4), by = 4)
onecov <- 0
twocov <- 0
threecov <- 0
fourcov <- 0
for(i in 1:nruns){
if(type==1)
x <- matrix(rnorm(val*p,mean=1,sd=1), nrow = val, ncol = p)
if(type==2) 
x <-1 + matrix(rt(val*p,tdf), nrow = val, ncol = p)
if(type==3)
x <- matrix(rexp(val*p), nrow = val, ncol = p) 
if(type==4) 
x<-matrix(runif(val*p,min=0,max=2),nrow=val, ncol=p)
z <- x%*%A
#z0 is the p times 1 zero vector
yrw<- apply(z,2,cumsum)
#get 1 step ahead PR, 
eps1<-z[2:n,]
ydat <- t(yrw[n,] + t(eps1))  #odd code seems to work
yf <- yrw[n+1,]
           tem <- predrgn(x=ydat,xf=yf,alph)
           onecov <- onecov + tem$inr 
##get 2 step ahead PR
shift <- rbind(eps1,zeroes)[-1,]  #length is n-1
ee <- (eps1+shift)[-(n-1),] #length is n-2, has e_i + e_i+1
eps2 <- ee[step2,]  #length is about (n-2)/2
ydat <- t(yrw[n,] + t(eps2))
yf <- yrw[n+2,]
          tem <- predrgn(x=ydat,xf=yf,alph)
          twocov <- twocov + tem$inr 
##get 3 step ahead PI
shift<-shift[-(n-1),]
shift <- rbind(shift,zeroes)[-1,]  #length is n-2
eee <- (ee+shift)[-(n-2),] #length is n-3, has e_i + e_{i+1} + e_{i+2}
eps3 <- eee[step3,]  #length is about (n-3)/3
ydat <- t(yrw[n,] + t(eps3))
yf <- yrw[n+3,]
          tem <- predrgn(x=ydat,xf=yf,alph)          
          threecov <- threecov + tem$inr 
#get 4 step ahead PI
shift<-shift[-(n-2),]
shift <- rbind(shift,zeroes)[-1,]  #length is n-3
eeee <- (eee+shift)[-(n-3),] #length is n-4, has e_i+e_{i+1}+e_{i+2}+e_{i+3}
eps4 <- eeee[step4,]  #length is about (n-4)/4
ydat <- t(yrw[n,] + t(eps4))
yf <- yrw[n+4,]
          tem <- predrgn(x=ydat,xf=yf,alph)
          fourcov <- fourcov + tem$inr 
}
onecov <- onecov/nruns
twocov <- twocov/nruns
threecov <- threecov/nruns
fourcov <- fourcov/nruns
list(onecov=onecov,twocov=twocov,threecov=threecov,fourcov=fourcov)
}


saics <- function(Y,dd=0,pmax=5,qmax=5,pbig=10,qbig=10,P=0,D=0,Q=0,perd=12){
#Makes a matrix of AICs - min(AIC) for ARIMA(p,dd,q)x(P,D,Q)s seasonal models 
#with period perd for time series Y where the seasonal values P,D,Q are fixed.
#often bombs, but uses method = "ML" to reduce errors
#Reduce pmax by 1 if an error message occurs, may need to reduce 
#pbig, qmax and qbig. May need to use pmax = 1, qmax = 1, pbig=1, qbig =1.
#Then increase values.
# May need to hit the Esc key to kill program if P,D,Q are bad.
#minaic is the min aic for the arima(p,dd,q)(P,D,Q) models, 
#the ari and ima models could have lower aic
Y <- as.vector(Y)
aics <- matrix(, (pmax+1), (qmax+1), dimnames=list(p=0:pmax, q=0:qmax))
ariaics <- 1:pbig
imaaics <- 1:qbig
for(p in 0:pmax)
   for(q in 0:qmax) aics[1+p, 1+q] <- arima(Y, order=c(p,d=dd,q), method = "ML",
       seasonal = list(order = c(P, D, Q), period = perd), 
       optim.control = list(maxit = 500))$aic
minaic <- min(aics, na.rm = TRUE)
aics <- round(aics - minaic, 2)
for(p in 1:pbig) ariaics[p] <- arima(Y,order=c(p,d=dd,0), method="ML",
             seasonal = list(order = c(P, D, Q), period = perd), 
             optim.control = list(maxit = 500))$aic
for(q in 1:qbig) imaaics[q] <- arima(Y,order=c(0,d=dd,q), method = "ML",
             seasonal = list(order = c(P, D, Q), period = perd),
             optim.control = list(maxit = 500))$aic
ariaics <- round(ariaics - minaic, 2)
imaaics <- round(imaaics - minaic, 2)
list(aics=aics, ariaics=ariaics, imaaics=imaaics)
}


shorth3<-function(y, alpha = 0.05){
# computes the shorth(c) interval [Ln,Un] for c = cc.
#shorth lists Ln and Un using Frey's correction
n <- length(y)
cc <- ceiling(n * (1 - alpha + 1.12*sqrt(alpha/n)))
cc <- min(n,cc)
  sy <- sort(y)
  rup <- sy[cc]
  rlow <- sy[1]
  olen <- rup - rlow
  if(cc < n){
    for(j in (cc + 1):n){
	zlen <- sy[j] - sy[j - cc + 1]
	if(zlen < olen) {
	   olen <- zlen
	   rup <- sy[j]
	   rlow <- sy[j - cc + 1]
	}
    }
} 
Ln <- rlow; Un <- rup
list(shorth=c(Ln, Un))
}


shpi<-function(yf=0, ydat, alph = 0.05){
#Gets the Frey (2013) PI for Yf given ydata.
ydat<-as.vector(ydat)
B<-length(ydat)  
inr <- 0
cc <- ceiling(B * (1 - alph + 1.12*sqrt(alph/B)))
cc <- min(B,cc)
  sy <- sort(ydat)
  up <- sy[cc]
  low <- sy[1]
  olen <- up - low
if(cc < B){
    for(j in (cc + 1):B){
	zlen <- sy[j] - sy[j - cc + 1]
	if(zlen < olen) {
	   olen <- zlen
	   up <- sy[j]
	   low <- sy[j - cc + 1]
	}
    }
} 
if(low <= yf && up >= yf)
inr <- inr + 1
list(low=low, up=up, inr = inr)}


tscisim<-function(N=100,nruns=100,etype=1,tdf=5){
#Simulates ARMA(1,1) time series of length n, and finds proportion
#of time theta and phi are in their 95% CI.
#average sqrt(n) CI length is also given. 
#etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom
#etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors
#use method = "ML" or method bombs often
#using n.start = 100 or n.start=400 did not seem to help
arcov <- 0
macov <- 0
arlow <- 1:nruns
arup <- arlow
malow<-arlow
maup<-arlow
for(i in 1:nruns){
  theta <- runif(1,min=-0.5,max=0.5)
  phi <- runif(1,min=-0.2,max=0.3)
  if (etype==1) 
   y <- arima.sim(n=N, list(ar=c(phi), ma=c(theta)), innov = rnorm(N))
  if(etype==2)
   y <- arima.sim(n=N, list(ar=phi, ma=theta), innov = rt(N,tdf))
  if(etype==3)
   y <- arima.sim(n=N, list(ar=phi, ma=theta), innov = runif(N,min=-1,max=1))
  if(etype==4)
   y <- arima.sim(n=N, list(ar=phi, ma=theta), innov = (rexp(N)-1))
  zz<-arima(y,c(1,0,1),method="ML")
  coef <- zz$coef
  se <- sqrt(diag(zz$var.coef))
  LCI <- coef - 2*se
  UCI <- coef + 2*se
  arlow[i] <- LCI[1]
  arup[i] <- UCI[1]
  malow[i] <- LCI[2]
  maup[i] <- UCI[2]
  if(arlow[i] < phi && arup[i] > phi)
   arcov <- arcov + 1
  if(malow[i] < theta && maup[i] > theta)
   macov <- macov + 1
}
arcov <- arcov/nruns
arlen <- sqrt(N) * mean(arup - arlow)
macov <- macov/nruns
malen <- sqrt(N) * mean(maup - malow)
list(arcov = arcov, arlen = arlen, macov = macov, malen = malen)
}



tsmsel <- function(Y,dd=0,pmax=5,qmax=5){
#makes a matrix of BICs - min(BIC) for ARIMA(p,dd,q) models for time series Y
#uses method = "ML" to reduce errors
#reduce pmax by 1 if an error message occurs, may need to reduce  qmax
#minbic is the min bic for the arima(p,dd,q) models, 
Y <- as.vector(Y)
bics <- matrix(, (pmax+1), (qmax+1), dimnames=list(p=0:pmax, q=0:qmax))
aics<-bics
mbic<-10^100
maic<-mbic
aord <- c(0,0)
bord <- aord
for(p in 0:pmax)
  for(q in 0:qmax){  z <- arima(Y, c(p,d=dd,q), method = "ML",
       optim.control = list(maxit = 500))
       aicz <- AIC(z)
       aics[1+p, 1+q] <- aicz
       if(aicz < maic){
       maic<-aicz
       zaic <- z
       aord <- c(p,q)}
       bicz <- BIC(z)
       bics[1+p, 1+q] <- bicz
       if(bicz < mbic){
       mbic<-bicz
       zbic <- z
       bord <- c(p,q)}       
   }
minaic <- min(aics, na.rm = TRUE)
minbic <- min(bics, na.rm = TRUE)
aics <- round(aics - minaic, 2)
bics <- round(bics - minbic, 2)
list(zaic=zaic,zbic=zbic,aics=aics,bics=bics,aord=aord,bord=bord)
}


tsNA<-function(Y,k=6){
#Y is a time series (possibly differenced)
#that is assumed to be weakly stationary, MA(oo), and AR(oo)
#Y gets changed to NA if Y > MED + k MAD or if Y < MED - k MAD 
kmadd <- k*mad(Y,constant=1)
medd <- median(Y)
up <- medd + kmadd
low<- medd - kmadd
maxy <- max(Y[Y<up])
miny <- min(Y[Y>low])
W <- Y
W[Y>up] <- NA
W[Y<low] <- NA
list(W=W)
}


tspisim<-function(n=100,nruns=50,L=4,kmax=5,etype=1,tstype=1,
tdf=5,phi=c(0.7,0.1,-0.4),theta=c(0.1),pen=2,dd=0,burn=100,alph=0.05){
#Simulates time series of length n, and finds proportion
#of time Y_n+1, ..., Y_n+L are in their 95% normal PIs after variable selection.
#auto.arima and armasel2 (a variant of Potscher) are used for variable selection 
#Average  PI length is also given. 
#There are models where nruns need to be near 50 or 100 or the program fails,
#especially if dd > 0.
#etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom
#etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors
#Needs auto.arima from library(forecast) and calls armamsel2.
#for an arima(ps,dd,qs) true model, estimates r = max(ps,qs)
#assumes n >= 20 (kmax),  full model beta = (phi1,...,phikmax,theta1,...,thetakmax)
#dd is the amount of difference, dd=0,1,or 2, assumed known
npL <- n + L
np1 <- n+1
npilen <- matrix(0,nrow=nruns,ncol=L)
ppilen <- npilen
ppicov <- as.vector((0*1:L))
npicov <- ppicov
#assume n - q - p > 30
tcut <- qnorm((1-alph/2))
kp1 <- kmax+1
if(tstype==1){#tstype=1 for AR(1) model
phi<-c(0.5)
ps<-1 
qs <- 0
}
if(tstype==2) { #tstype=2 for AR(2) model
phi=c(0.5,0.33) 
ps<-2
qs<-0
}
if(tstype==3){#tstype=3 for MA(1) model
theta<-c(-0.5)
ps<-0 
qs <- 1
}
if(tstype==4) { #tstype=4 for MA(2) model
theta=c(-0.5,0.5)  
ps<-0
qs<-2
}
if(tstype==5){#need kmax > 2
#tstype=5 for ARMA(3,1) model
phi <- c(0.7,0.1,-0.4)
theta <- 0.1
ps<-3
qs<-1
}
if(tstype==6){#need kmax >= rr
#tstype=6 for ARMA model selected by user
phi <- as.vector(phi)
theta <- as.vector(theta)
ps<-length(phi)
qs<-length(theta)
}

for(i in 1:nruns){

 if(tstype<3){
  if (etype==1) 
   Y <- arima.sim(n=npL, list(order = c(ps,dd,qs), ar =c(phi)), innov = rnorm(npL))
  if(etype==2)
   Y <- arima.sim(n=npL, list(order = c(ps,dd,qs), ar =c(phi)), innov = rt(npL,tdf))
  if(etype==3)
   Y <- arima.sim(n=npL, list(order = c(ps,dd,qs), ar =c(phi)), innov =          
                  runif(npL,min=-1,max=1))   
  if(etype==4)
   Y <- arima.sim(n=npL, list(order = c(ps,dd,qs), ar =c(phi)), innov = (rexp(npL)-1))
 }
 if(tstype==3 || tstype==4){
 if (etype==1) 
   Y <- arima.sim(n=npL, list(order = c(ps,dd,qs), ma=c(theta)), innov = rnorm(npL))
  if(etype==2)
   Y <- arima.sim(n=npL, list(order = c(ps,dd,qs), ma=c(theta)), innov = rt(npL,tdf))
  if(etype==3)
   Y <- arima.sim(n=npL, list(order = c(ps,dd,qs), ma=c(theta)), innov =          
                  runif(npL,min=-1,max=1))   
  if(etype==4)
   Y <- arima.sim(n=npL, list(order = c(ps,dd,qs), ma=c(theta)), innov = (rexp(npL)-1))
 }
    if(tstype==5){
  if (etype==1) 
 Y <- arima.sim(n=npL,list(order = c(ps,dd,qs),ar=c(phi),ma=c(theta)),innov = rnorm(npL),n.start=burn)
  if(etype==2)
 Y <- arima.sim(n=npL,list(order = c(ps,dd,qs),ar=c(phi),ma=c(theta)),innov = rt(npL,tdf),n.start=burn)
  if(etype==3)
   Y <- arima.sim(n=npL, list(order = c(ps,dd,qs),ar=c(phi),ma=c(theta)), innov =          
                  runif(npL,min=-1,max=1),n.start=burn)   
  if(etype==4)
 Y <- arima.sim(n=npL,list(order = c(ps,dd,qs), ar=c(phi),ma=c(theta)),innov=(rexp(npL)-1),n.start=burn)
 }
    if(tstype==6){  #arma model selected by the user
  if (etype==1) 
 Y <- arima.sim(n=npL,list(order = c(ps,dd,qs), ar=c(phi),ma=c(theta)),innov = rnorm(npL),n.start=burn)
  if(etype==2)
 Y <- arima.sim(n=npL,list(order = c(ps,dd,qs), ar=c(phi),ma=c(theta)),innov = rt(npL,tdf),n.start=burn)
  if(etype==3)
   Y <- arima.sim(n=npL, list(order = c(ps,dd,qs), ar=c(phi),ma=c(theta)), innov =          
                  runif(npL,min=-1,max=1),n.start=burn)   
  if(etype==4)
  Y <- arima.sim(n=npL,list(order = c(ps,dd,qs), ar=c(phi),ma=c(theta)),innov=(rexp(npL)-1),n.start=burn)
  }

   y <- Y[1:n]
   yf <- Y[np1:npL]

 #ARMA(p,q) model selection 
 if(dd==0){
 out<-auto.arima(y,d=dd,max.p=kmax,max.q=kmax,seasonal=FALSE,ic="aicc",
 stationary=TRUE)
 outt <- out
 }
 if(dd==1){
 out<-auto.arima(y,d=dd,max.p=kmax,max.q=kmax,seasonal=FALSE,ic="aicc",
 stationary=FALSE)
 pI<-out$arma[1]
 qI<-out$arma[2]  
 outt<-arima(y,order=c(pI,dd,qI),method="ML")
 }
 if(dd==2){
 out<-auto.arima(y,d=dd,max.p=kmax,max.q=kmax,seasonal=FALSE,ic="aicc",
 stationary=FALSE)
 pI<-out$arma[1]
 qI<-out$arma[2]  
 outt<-arima(y,order=c(pI,dd,qI),method="ML")
 }

 #get the h-step ahead forecasts for h = 1, ..., L
 temp <- predict(outt,n.ahead=L)
 #get the normal PI
 for(j in 1:L){   
  LPI <- temp$pred[j] - tcut*temp$se[j]
  UPI <- temp$pred[j] + tcut*temp$se[j]
  npilen[i,j] <- UPI-LPI
  if(LPI < yf[j] && yf[j] < UPI)
   npicov[j] <- npicov[j] + 1
 }

 #use a better model selection method
 if(dd==0){
 out2<-armamsel2(y,kmax=kmax,pen=pen)
 outt<-arima(y,order=c(out2$pI,dd,out2$qI),method="ML")
 }
 if(dd==1){
 Z <- as.vector(y)
 Z[1] <- NA 
 Z[-1] <- diff(y)
 out2<-armamsel2(Z,kmax=kmax,pen=pen)
 outt<-arima(y,order=c(out2$pI,dd,out2$qI),method="ML")
 }
 if(dd==2){
 Z <- as.vector(y)
 Z[1:2] <- NA 
 Z[-c(1,2)] <- diff(Y,differences=2)
 out2<-armamsel2(Z,kmax=kmax,pen=pen)
 outt<-arima(y,order=c(out2$pI,dd,out2$qI),method="ML")
 }

 #get the h-step ahead forecasts for h = 1, ..., L
 temp <- predict(outt,n.ahead=L)
 #get the normal PI
 for(j in 1:L){   
  LPI <- temp$pred[j] - tcut*temp$se[j]
  UPI <- temp$pred[j] + tcut*temp$se[j]
  ppilen[i,j] <- UPI-LPI
  if(LPI < yf[j] && yf[j] < UPI)
   ppicov[j] <- ppicov[j] + 1
 }
}
npicov <- npicov/nruns
npilen <- apply(npilen,2,mean)
ppicov <- ppicov/nruns
ppilen <- apply(ppilen,2,mean)
list( npicov=npicov, npilen=npilen, ppicov = ppicov, ppilen = ppilen)
}



tsrob<-function(Y,k=6){
#Y is a time series (possibly differenced)
#that is assumed to be weakly stationary, MA(oo), and AR(oo)
#W gets  NA if Y > MED + k MAD or if Y < MED - k MAD 
#the corresponding residuals rres also get NA
#the fitted values rfit corresponding to NA get MED
#uses AIC to select ARMA model order with W as the time series
kmadd <- k*mad(Y,constant=1)
medd <- median(Y)
up <- medd + kmadd
low<- medd - kmadd
maxy <- max(Y[Y<up])
miny <- min(Y[Y>low])
W <- as.vector(Y)
W[Y>up] <- NA
W[Y<low] <- NA
outr<-arima(W,method="ML")
rres<-as.vector(outr$res)
rfit <- W-rres #can be bad if white noise model is selected
rfit[is.na(rfit)]<-medd
plot(rfit,Y)
list(W=W,rfit=rfit,rres=rres)
}


tsrobplot<-function(Y,k=6,ord=c(2,0,0)){
#makes response and residual plots give the order of the ARMA model
#Y is a time series (possibly differenced)
#that is assumed to be weakly stationary, MA(oo), and AR(oo)
#W gets  NA if Y > MED + k MAD or if Y < MED - k MAD 
#the corresponding residuals rres also get NA
#the fitted values rfit corresponding to NA get MED
kmadd <- k*mad(Y,constant=1)
medd <- median(Y)
up <- medd + kmadd
low<- medd - kmadd
maxy <- max(Y[Y<up])
miny <- min(Y[Y>low])
W <- as.vector(Y)
W[Y>up] <- NA
W[Y<low] <- NA
outr<-arima(W,order=ord,method="ML")
rres<-as.vector(outr$res)
rfit <- W-rres #can be bad if white noise model is selected
rfit[is.na(rfit)]<-medd
par(mfrow=c(2,1))
plot(rfit,Y)
abline(0,1)
title("response plot")
plot(rfit,rres)
title("residual plot")
par(mfrow=c(1,1))
list(W=W,rfit=rfit,rres=rres)
}



tsrobwin<-function(Y,k=6){
#Robustly Winsorizes the time series, assumed to be weakly stationary. 
#tsNA is better
med <- median(Y)
madd <- mad(Y,constant=1)
lo <- med - k*madd
up <- med + k*madd
wup <- max(Y[Y <= up])
wlo <- min(Y[Y >= lo])
W <- Y 
W[W>up] <- wup
W[W<lo] <- wlo
list(W=W)
}