# tspack is a collection of R functions #for Math 474 Time Series, Fall 2013. ## source("http://parker.ad.siu.edu/Olive/tspack.txt") ## source("http://parker.ad.siu.edu/Olive/tsdata.txt") ##a few functions need the forecast library ##library(forecast) ##install.packages(forecast) acfpacf<-function(Y){ #makes the acf and pacf of a time series Y #Note that the ACF starts at lag 0. W <- as.vector(Y) par(mfrow=c(2,1)) acf(W) pacf(W) par(mfrow=c(1,1)) } acormat<-function(zz){ #Let zz be the output of the arima function, eg zz <- arima(lh,c(1,0,0)). #Makes the asymptotic correlation matrix of the estimated coefficients. #Want these correlations to be small. se <- sqrt(diag(zz$var.coef)) Dinv <- diag(1/se) cormat <- Dinv %*% zz$var.coef %*% Dinv return(cormat) } aicmat <- function(Y,dd=0,pmax=5,qmax=5,pbig=15,qbig=15){ #makes a matrix of AICs - min(AIC) for ARIMA(p,dd,q) models for time series Y #uses method = "ML" to reduce errors #reduce pmax by 1 if an error message occurs, may need to reduce pbig or qmax #minaic is the min aic for the arima(p,dd,q) models, the ari and ima models #could have lower aic Y <- as.vector(Y) aics <- matrix(, (pmax+1), (qmax+1), dimnames=list(p=0:pmax, q=0:qmax)) ariaics <- 1:pbig imaaics <- 1:qbig for(p in 0:pmax) for(q in 0:qmax) aics[1+p, 1+q] <- arima(Y, c(p,d=dd,q), method = "ML", optim.control = list(maxit = 500))$aic minaic <- min(aics, na.rm = TRUE) aics <- round(aics - minaic, 2) for(p in 1:pbig) ariaics[p] <- arima(Y,c(p,d=dd,0), method = "ML", optim.control = list(maxit = 500))$aic for(q in 1:qbig) imaaics[q] <- arima(Y,c(0,d=dd,q), method = "ML", optim.control = list(maxit = 500))$aic ariaics <- round(ariaics - minaic, 2) imaaics <- round(imaaics - minaic, 2) list(aics=aics, ariaics=ariaics, imaaics=imaaics) } arboot<-function(Y,B=100,pmax=10,c=0.01){ #Bootstraps AR(p) model selection using the parametric bootstrap. #assumes n > 10 pmax and adds cB full model bootstrap samples. #The full model and betahatmix are also bootstrapped. out <- ar.yw(Y,aic=FALSE,order.max=pmax) phihat = out$ar n<-length(Y) nmpmax <- n-pmax sig<-sqrt(out$var.pred) d <- ceiling(c*B) bpd <- B + d bp1 <- B + 1 betas <- matrix(0,nrow=bpd,ncol=pmax) btmix <- betas btfull <- betas bhatvs <- 0*1:pmax outvs<-ar.yw(Y,order.max=pmax) ord<-outvs$order if(ord>0) bhatvs[1:ord]<-outvs$ar for(i in 1:B) { val <- sample(1:nmpmax,1) ysel <- Y[val:(val+pmax)] yb <- arima.sim(n=n, list(ar=phihat), innov = rnorm(n,sd=sig),n.start=pmax, start.innov=ysel) #might be using ysel as errors instead of Ys #yb <- arima.sim(n=n, list(ar=phihat),sd=sig) tem<-ar.yw(yb,order.max=pmax) ord<-tem$order if(ord>0) betas[i,1:ord]<-tem$ar tem <- ar.yw(yb,aic=FALSE,order.max=pmax) btfull[i,] <- tem$ar #mix if(ord>0){ val <- sample(1:nmpmax,1) ysel <- Y[val:(val+pmax)] yb <- arima.sim(n=n, list(ar=phihat), innov = rnorm(n,sd=sig),n.start=pmax, start.innov=ysel) # yb <- arima.sim(n=n, list(ar=phihat),sd=sig) tem<-ar.yw(yb,aic=FALSE,order.max=ord) btmix[i,1:ord]<-tem$ar} } for(i in bp1:bpd){ val <- sample(1:nmpmax,1) ysel <- Y[val:(val+pmax)] yb<-arima.sim(n=n, list(ar=phihat), innov = rnorm(n,sd=sig),n.start=pmax, start.innov=ysel) # yb <- arima.sim(n=n, list(ar=phihat),sd=sig) tem <- ar.yw(yb,aic=FALSE,order.max=pmax) betas[i,] <- tem$ar } btmix[bp1:bpd,] <- betas[bp1:bpd,] btfull[bp1:bpd,] <- betas[bp1:bpd,] list(btfull=btfull,betas=betas,btmix=btmix,bhatvs=bhatvs) } arboot2<-function(Y,B=100,pmax=10,c=0.01){ #Bootstraps AR(p) model selection using the residual bootstrap. #assumes n > 10 pmax and adds cB full model bootstrap samples. #The full model and betahatmix are also bootstrapped. out <- ar.yw(Y,aic=FALSE,order.max=pmax) phihat = out$ar n<-length(Y) sig<-sqrt(out$var.pred) res<-out$res[(pmax+1):n] #scale the residuals to have 0 mean m<-length(res) #want var(res) near sigmahat^2 res <- (res-mean(res))*sqrt(m/(m-pmax)) d <- ceiling(c*B) bpd <- B + d bp1 <- B + 1 nmpmax <- n-pmax bhatvs <- 0*1:pmax outvs<-ar.yw(Y,order.max=pmax) ord<-outvs$order if(ord>0) bhatvs[1:ord]<-outvs$ar betas <- matrix(0,nrow=bpd,ncol=pmax) btmix <- betas btfull <- betas for(i in 1:B) { resb <- sample(res,size=n,replace=TRUE) val <- sample(1:nmpmax,1) ysel <- Y[val:(val+pmax)] yb <- arima.sim(n=n, list(ar=phihat), innov = resb,n.start=pmax, start.innov=ysel) tem<-ar.yw(yb,order.max=pmax) ord<-tem$order if(ord>0) betas[i,1:ord]<-tem$ar tem <- ar.yw(yb,aic=FALSE,order.max=pmax) btfull[i,] <- tem$ar #mix if(ord>0){ resb <- sample(res,size=n,replace=TRUE) val <- sample(1:nmpmax,1) ysel <- Y[val:(val+pmax)] yb <- arima.sim(n=n, list(ar=phihat), innov = resb,n.start=pmax, start.innov=ysel) tem<-ar.yw(yb,aic=FALSE,order.max=ord) btmix[i,1:ord]<-tem$ar} } for(i in bp1:bpd){ #full model bootstrap resb <- sample(res,size=n,replace=TRUE) val <- sample(1:nmpmax,1) ysel <- Y[val:(val+pmax)] yb <- arima.sim(n=n, list(ar=phihat), innov = resb,n.start=pmax, start.innov=ysel) tem <- ar.yw(yb,aic=FALSE,order.max=pmax) betas[i,] <- tem$ar } btmix[bp1:bpd,] <- betas[bp1:bpd,] btfull[bp1:bpd,] <- betas[bp1:bpd,] list(btfull=btfull,betas=betas,btmix=btmix,bhatvs=bhatvs) } arboottest<-function(Y,pmax=10,BB=1000,alph=0.05){ #gets CIs for phi_k for k = 1, ..., pmax using parametric and residual bootstrap #test for whether values k:pmax of phi are zeroes with parametric bootstrap #want n > 10 pmax #if X is ARIMA(p,1,0), use Y <- diff(X) #calls arboot, arboot2 cipboot <- matrix(0,nrow=pmax,ncol=2) cipmix <- cipboot cirboot <- cipboot cirmix <- cipboot cifull <- cipboot tstat <- 1:pmax tcut <- 1:pmax zcut<- qnorm((1-alph/2)) zfull <- arima(Y,c(pmax,0,0)) secut <- zcut*sqrt(diag(zfull$var.coef))[-(pmax+1)] cifull[,1] <- zfull$coef[-(pmax+1)]-secut cifull[,2] <- zfull$coef[-(pmax+1)]+secut outp <- arboot(Y,B=BB,pmax=pmax) outr <- arboot2(Y,B=BB,pmax=pmax) for(j in 1:pmax){ cipboot[j,] <- shorth3(outp$betas[,j],alpha=alph)$shorth cipmix[j,] <- shorth3(outp$btmix[,j],alpha=alph)$shorth cirboot[j,] <- shorth3(outr$betas[,j],alpha=alph)$shorth cirmix[j,] <- shorth3(outr$btmix[,j],alpha=alph)$shorth } #test whether the values k:pmax of phi are 0 with pred reg method for(k in 1:pmax){ gg <- pmax - k + 1 tem <- confreg2(outp$betas[,k:pmax],g=gg,alpha=alph) tstat[k] <- tem$D0 tcut[k] <- tem$cuplim } list(cipboot=cipboot,cipmix=cipmix,cirboot=cirboot,cirmix=cirmix,cifull=cifull, tstat=tstat,tcut=tcut) } ardata<-function(n = 200) {#simulates AR(p=2) data y <- 1:(2 * n) y[1] <- sum(rnorm(3)) y[2] <- y[1] + sum(rnorm(2)) for(i in 3:(2 * n)) { y[i] <- y[i - 1] - 0.89 * y[i - 2] + rnorm(1) } list(arts = y[(n + 1):(2 * n)]) } arimapisim<-function(n=100,nruns=50,kmax=5,etype=1,tstype=1, tdf=5,phi=c(0.7,0.1,-0.4),theta=c(0.1),pen=2,dd=0,burn=100,alph=0.05){ #Needs auto.arima from library(forecast) and calls armamsel2. #for an arima(ps,dd,qs) true model, estimates r = max(ps,qs) #assumes n >= 20 (kmax), full model beta = (phi1,...,phikmax,theta1,...,thetakmax) #dd is the amount of difference, dd=0,1,or 2, assumed known #Fits several 1 step ahead PIs after model selection with auto.arima and method #that uses Potscher (1990). #There are models where nruns need to be near 50 or 100 or the program fails, #especially if dd > 0. See armapisim. #Simulates tstype time series of length n, and finds proportion #of time Y_n+1 is in its 95% PI. #The PI may be asymptotically optimal. #Average PI length is also given. #Gets the 1 step ahead PIs based on forecast residuals. #etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom #etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors #calls locpi, locpi2 #Finally gets the normal PIs, although the z cutoff is replaced #by a t cutoff to increase coverage. kp1 <- kmax+1 rhat<-1:nruns beta<-0*1:(2*kmax) tem<- 1:nruns ord <- cbind(tem,tem,tem,tem) #order if(tstype==1){#tstype=1 for AR(1) model phi<-c(0.5) beta[1] <- 0.5 ps<-1 qs <- 0 rr=1 } if(tstype==2) { #tstype=2 for AR(2) model phi=c(0.5,0.33) beta[c(1,2)]<-phi ps<-2 qs<-0 rr=2 } if(tstype==3){#tstype=3 for MA(1) model theta<-c(-0.5) beta[(kmax+1)] <- -0.5 ps<-0 qs <- 1 rr=1 } if(tstype==4) { #tstype=4 for MA(2) model theta=c(-0.5,0.5) beta[c(kmax+1,kmax+2)]<-theta ps<-0 qs<-2 rr=2 } if(tstype==5){#need kmax > 2 #tstype=5 for ARMA(3,1) model phi <- c(0.7,0.1,-0.4) theta <- 0.1 beta[c(1:3,kmax+1)] <- c(phi,0.1) ps<-3 qs<-1 rr=3 } if(tstype==6){#need kmax >= rr #tstype=6 for ARMA model selected by user phi <- as.vector(phi) theta <- as.vector(theta) ps<-length(phi) qs<-length(theta) rr=max(ps,qs) beta[c(1:ps,(kmax+1):(kmax+qs))] <- c(phi,theta) } np1 <- n + 1 len <- 1:nruns opilen <- len opilen2 <- len npilen <- len h95len <- len h95len2 <- len coverage <- 0 opicov <- 0 opicov2 <- 0 npicov <- 0 h95cov <- 0 h95cov2 <- 0 #use n-p-q in general tcut <- qt((1-alph/2),n-2) for(i in 1:nruns){ if(tstype<3){ if (etype==1) Y <- arima.sim(n=np1, list(order = c(ps,dd,qs), ar =c(phi)), innov = rnorm(np1)) if(etype==2) Y <- arima.sim(n=np1, list(order = c(ps,dd,qs), ar =c(phi)), innov = rt(np1,tdf)) if(etype==3) Y <- arima.sim(n=np1, list(order = c(ps,dd,qs), ar =c(phi)), innov = runif(np1,min=-1,max=1)) if(etype==4) Y <- arima.sim(n=np1, list(order = c(ps,dd,qs), ar =c(phi)), innov = (rexp(np1)-1)) } if(tstype==3 || tstype==4){ if (etype==1) Y <- arima.sim(n=np1, list(order = c(ps,dd,qs), ma=c(theta)), innov = rnorm(np1)) if(etype==2) Y <- arima.sim(n=np1, list(order = c(ps,dd,qs), ma=c(theta)), innov = rt(np1,tdf)) if(etype==3) Y <- arima.sim(n=np1, list(order = c(ps,dd,qs), ma=c(theta)), innov = runif(np1,min=-1,max=1)) if(etype==4) Y <- arima.sim(n=np1, list(order = c(ps,dd,qs), ma=c(theta)), innov = (rexp(np1)-1)) } if(tstype==5){ if (etype==1) Y <- arima.sim(n=np1,list(order = c(ps,dd,qs),ar=c(phi),ma=c(theta)),innov = rnorm(np1),n.start=burn) if(etype==2) Y <- arima.sim(n=np1,list(order = c(ps,dd,qs),ar=c(phi),ma=c(theta)),innov = rt(np1,tdf),n.start=burn) if(etype==3) Y <- arima.sim(n=np1, list(order = c(ps,dd,qs),ar=c(phi),ma=c(theta)), innov = runif(np1,min=-1,max=1),n.start=burn) if(etype==4) Y <- arima.sim(n=np1,list(order = c(ps,dd,qs), ar=c(phi),ma=c(theta)),innov=(rexp(np1)-1),n.start=burn) } if(tstype==6){ #arma model selected by the user if (etype==1) Y <- arima.sim(n=np1,list(order = c(ps,dd,qs), ar=c(phi),ma=c(theta)),innov = rnorm(np1),n.start=burn) if(etype==2) Y <- arima.sim(n=np1,list(order = c(ps,dd,qs), ar=c(phi),ma=c(theta)),innov = rt(np1,tdf),n.start=burn) if(etype==3) Y <- arima.sim(n=np1, list(order = c(ps,dd,qs), ar=c(phi),ma=c(theta)), innov = runif(np1,min=-1,max=1),n.start=burn) if(etype==4) Y <- arima.sim(n=np1,list(order = c(ps,dd,qs), ar=c(phi),ma=c(theta)),innov=(rexp(np1)-1),n.start=burn) } y <- Y[1:n] yf <- Y[np1] tem <- locpi(y,alpha=alph) #PI ignore TS structure LPI <- tem$LPI UPI <- tem$UPI len[i] <- UPI - LPI if(LPI < yf && yf < UPI) coverage <- coverage + 1 #ARMA(p,q) model selection #get the l-step ahead forecasts for l = 1 if(dd==0){ out<-auto.arima(y,d=dd,max.p=kmax,max.q=kmax,seasonal=FALSE,ic="aicc", stationary=TRUE) temp <- out %>% forecast(h=1) outt <- out } if(dd==1){ out<-auto.arima(y,d=dd,max.p=kmax,max.q=kmax,seasonal=FALSE,ic="aicc", stationary=FALSE) pI<-out$arma[1] qI<-out$arma[2] outt<-arima(y,order=c(pI,dd,qI),method="ML") temp <- outt %>% forecast(h=1) } if(dd==2){ out<-auto.arima(y,d=dd,max.p=kmax,max.q=kmax,seasonal=FALSE,ic="aicc", stationary=FALSE) pI<-out$arma[1] qI<-out$arma[2] outt<-arima(y,order=c(pI,dd,qI),method="ML") temp <- outt %>% forecast(h=1) } yfhat<-temp$mean[1] #1 step ahead forecast #get the 1 step ahead PI res <- outt$resid m <- length(res) kk<-length(outt$coef) am <- (1+15/m)*sqrt(m/(m-kk)) tem <- locpi2(res,k=kk,alph=alph) low <- yfhat+am*tem$LPI up <- yfhat+am*tem$UPI if(low < yf && yf < up) opicov <- opicov + 1 opilen[i] <- up-low #get PI from forecast package up<-temp$upper[2] low <- temp$lower[2] if(low < yf && yf < up) h95cov <- h95cov + 1 h95len[i] <- up-low #get the normal PI se <- sqrt(out$sigma) tcut <- qt((1-alph/2),n-kk) LPI <- yfhat - tcut*se UPI <- yfhat + tcut*se npilen[i] <- UPI-LPI if(LPI < yf && yf < UPI) npicov <- npicov + 1 #use a better model selection method if(dd==0){ out2<-armamsel2(y,kmax=kmax,pen=pen) tem<-arima(y,order=c(out2$pI,dd,out2$qI),method="ML") } if(dd==1){ Z <- as.vector(y) Z[1] <- NA Z[-1] <- diff(y) out2<-armamsel2(Z,kmax=kmax,pen=pen) tem<-arima(y,order=c(out2$pI,dd,out2$qI),method="ML") } if(dd==2){ Z <- as.vector(y) Z[1:2] <- NA Z[-c(1,2)] <- diff(Y,differences=2) out2<-armamsel2(Z,kmax=kmax,pen=pen) tem<-arima(y,order=c(out2$pI,dd,out2$qI),method="ML") } temp <- tem %>% forecast(h=1) yfhat<-temp$mean[1] #1 step ahead forecast #get the 1 step ahead PI res <- tem$resid m <- length(res) kk<-length(tem$coef) am <- (1+15/m)*sqrt(m/(m-kk)) tem2 <- locpi2(res,k=kk,alph=alph) low <- yfhat+am*tem2$LPI up <- yfhat+am*tem2$UPI if(low < yf && yf < up) opicov2 <- opicov2 + 1 opilen2[i] <- up-low #get PI from forecast package up<-temp$upper[2] low <- temp$lower[2] if(low < yf && yf < up) h95cov2 <- h95cov2 + 1 h95len2[i] <- up-low } mnpilen <- mean(len) coverage <- coverage/nruns #locPI ignores ARMA order opilen <- mean(opilen) opicov <- opicov/nruns opilen2 <- mean(opilen2) opicov2 <- opicov2/nruns h95cov <- h95cov/nruns h95len <- mean(h95len) h95cov2 <- h95cov2/nruns h95len2 <- mean(h95len2) npicov <- npicov/nruns npilen <- mean(npilen) list(coverage=coverage,mnpilen=mnpilen,opicov=opicov,opilen=opilen, opicov2=opicov2,opilen2=opilen2,h95cov=h95cov,h95len=h95len, h95cov2=h95cov2,h95len2=h95len2, npicov=npicov, npilen=npilen) } armaboot<-function(Y,B=100,pmax=3,qmax=3,c=0.05){ #Bootstraps ARMA(p,q) model selection using the parametric bootstrap. #Assumes n > 10 (pmax+qmax) and adds cB full model bootstrap samples. #Uses the auto.arima function from the forecast package with aic and #stationary = FALSE, armaboot2 uses a selected burnin period #The full model and betahatmix are also bootstrapped. #Want pmax > 0 and qmax > 0. #Uses library forecast. #If the true model is ARMA(ps,qs), then both pmax > ps and qmax > qs #causes lots of convergence problems. out <- arima(Y,order=c(pmax,0,qmax),method="ML") kmax <- pmax+qmax phihat <- out$coef[1:pmax] #from full model thetahat <- out$coef[(pmax+1):kmax] #from full model n<-length(Y) sig<-sqrt(out$sigma2) d <- ceiling(c*B) bpd <- B + d bp1 <- B + 1 betas <- matrix(0,nrow=bpd,ncol=kmax) btmix <- betas btfull <- betas bhatvs <- 0*1:kmax outvs<-auto.arima(Y,d=0,max.p=pmax,max.q=qmax,seasonal=FALSE,ic="aic", stationary=FALSE,method="ML") #outvs<-auto.arima(Y,d=0,max.p=pmax,max.q=qmax,seasonal=FALSE,ic="aicc", #$stationary=TRUE,method="ML") arord<-outvs$arma[1] maord<-outvs$arma[2] if(arord>0) bhatvs[1:arord]<-outvs$coef[1:arord] if(maord>0) bhatvs[(pmax+1):(pmax+maord)]<-outvs$coef[(arord+1):(arord+maord)] for(i in 1:B) { yb <- arima.sim(n=n,list(ar=phihat,ma=thetahat),sd=sig) tem<-auto.arima(yb,d=0,max.p=pmax,max.q=qmax,seasonal=FALSE,ic="aic", stationary=FALSE,method="ML") # tem<-auto.arima(yb,d=0,max.p=pmax,max.q=qmax,seasonal=FALSE,ic="aicc", #stationary=TRUE,method="ML") arord<-tem$arma[1] maord<-tem$arma[2] if(arord>0) betas[i,1:arord]<-tem$coef[1:arord] if(maord>0) betas[i,(pmax+1):(pmax+maord)]<-tem$coef[(arord+1):(arord+maord)] tem<-arima(yb,order=c(pmax,0,qmax),method="ML") btfull[i,1:pmax] <- tem$coef[1:pmax] btfull[i,(pmax+1):kmax] <- tem$coef[(pmax+1):kmax] #mix if((arord+maord)>0){ yb <- arima.sim(n=n,list(ar=phihat,ma=thetahat),sd=sig) tem <- arima(yb,order=c(arord,0,maord),method="ML") if(arord>0) btmix[i,1:arord]<-tem$coef[1:arord] if(maord>0) btmix[i,(pmax+1):(pmax+maord)] <- tem$coef[(arord+1):(arord+maord)] } } for(i in bp1:bpd){ yb <- arima.sim(n=n,list(ar=phihat,ma=thetahat),sd=sig) tem<-arima(yb,order=c(pmax,0,qmax),method="ML") betas[i,1:pmax] <- tem$coef[1:pmax] betas[i,(pmax+1):kmax] <- tem$coef[(pmax+1):kmax] } btmix[bp1:bpd,] <- betas[bp1:bpd,] btfull[bp1:bpd,] <- betas[bp1:bpd,] list(btfull=btfull,betas=betas,btmix=btmix,bhatvs=bhatvs) } armaboot2<-function(Y,B=100,pmax=3,qmax=3,c=0.05,burn=100){ #Bootstraps ARMA(p,q) model selection using the parametric bootstrap. #Assumes n > 10 (pmax+qmax) and adds cB full model bootstrap samples. #Uses the auto.arima function from the forecast package with aic and #stationary = FALSE, burn is the burn in period, need burn>pmax+qmax #The full model and betahatmix are also bootstrapped. #Want pmax > 0 and qmax > 0. #Uses library forecast. #If the true model is ARMA(ps,qs), then both pmax > ps and qmax > qs #causes lots of convergence problems. out <- arima(Y,order=c(pmax,0,qmax),method="ML") kmax <- pmax+qmax phihat <- out$coef[1:pmax] #from full model thetahat <- out$coef[(pmax+1):kmax] #from full model n<-length(Y) sig<-sqrt(out$sigma2) d <- ceiling(c*B) bpd <- B + d bp1 <- B + 1 betas <- matrix(0,nrow=bpd,ncol=kmax) btmix <- betas btfull <- betas bhatvs <- 0*1:kmax outvs<-auto.arima(Y,d=0,max.p=pmax,max.q=qmax,seasonal=FALSE,ic="aic", stationary=FALSE,method="ML") #outvs<-auto.arima(Y,d=0,max.p=pmax,max.q=qmax,seasonal=FALSE,ic="aicc", #$stationary=TRUE,method="ML") arord<-outvs$arma[1] maord<-outvs$arma[2] if(arord>0) bhatvs[1:arord]<-outvs$coef[1:arord] if(maord>0) bhatvs[(pmax+1):(pmax+maord)]<-outvs$coef[(arord+1):(arord+maord)] for(i in 1:B) { yb <- arima.sim(n=n,list(ar=phihat,ma=thetahat),sd=sig,n.start=burn) tem<-auto.arima(yb,d=0,max.p=pmax,max.q=qmax,seasonal=FALSE,ic="aic", stationary=FALSE,method="ML") # tem<-auto.arima(yb,d=0,max.p=pmax,max.q=qmax,seasonal=FALSE,ic="aicc", #stationary=TRUE,method="ML") arord<-tem$arma[1] maord<-tem$arma[2] if(arord>0) betas[i,1:arord]<-tem$coef[1:arord] if(maord>0) betas[i,(pmax+1):(pmax+maord)]<-tem$coef[(arord+1):(arord+maord)] tem<-arima(yb,order=c(pmax,0,qmax),method="ML") btfull[i,1:pmax] <- tem$coef[1:pmax] btfull[i,(pmax+1):kmax] <- tem$coef[(pmax+1):kmax] #mix if((arord+maord)>0){ yb <- arima.sim(n=n,list(ar=phihat,ma=thetahat),sd=sig,n.start=burn) tem <- arima(yb,order=c(arord,0,maord),method="ML") if(arord>0) btmix[i,1:arord]<-tem$coef[1:arord] if(maord>0) btmix[i,(pmax+1):(pmax+maord)] <- tem$coef[(arord+1):(arord+maord)] } } for(i in bp1:bpd){ yb <- arima.sim(n=n,list(ar=phihat,ma=thetahat),sd=sig,n.start=burn) tem<-arima(yb,order=c(pmax,0,qmax),method="ML") betas[i,1:pmax] <- tem$coef[1:pmax] betas[i,(pmax+1):kmax] <- tem$coef[(pmax+1):kmax] } btmix[bp1:bpd,] <- betas[bp1:bpd,] btfull[bp1:bpd,] <- betas[bp1:bpd,] list(btfull=btfull,betas=betas,btmix=btmix,bhatvs=bhatvs) } armapisim<-function(n=100,nruns=50,kmax=5,etype=1,tstype=1, tdf=5,phi=c(0.7,0.1,-0.4),theta=c(0.1),pen=2,burn=100,alph=0.05){ #Needs auto.arima from library(forecast) and calls armamsel2. #for an arma(ps,qs) true model, estimates r = max(ps,qs) #assumes n >= 20 (kmax), full model beta = (phi1,...,phikmax,theta1,...,thetakmax) #Fits several 1 step ahead PIs after model selection with auto.arima and method #that uses Potscher (1990). #There are models where nruns need to be near 50 or 100 or the program fails. #Simulates tstype time series of length n, and finds proportion #of time Y_n+1 is in its 95% PI. #tstype = 1 for MA(2), #The PI may be asymptotically optimal. #Average PI length is also given. #Gets the 1 step ahead PIs based on forecast residuals. #etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom #etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors #calls locpi, locpi2 #Finally gets the normal PIs, although the z cutoff is replaced #by a t cutoff to increase coverage. kp1 <- kmax+1 rhat<-1:nruns beta<-0*1:(2*kmax) tem<- 1:nruns ord <- cbind(tem,tem,tem,tem) #order if(tstype==1){#tstype=1 for AR(1) model phi<-c(0.5) beta[1] <- 0.5 ps<-1 qs <- 0 rr=1 } if(tstype==2) { #tstype=2 for AR(2) model phi=c(0.5,0.33) beta[c(1,2)]<-phi ps<-2 qs<-0 rr=2 } if(tstype==3){#tstype=3 for MA(1) model theta<-c(-0.5) beta[(kmax+1)] <- -0.5 ps<-0 qs <- 1 rr=1 } if(tstype==4) { #tstype=4 for MA(2) model theta=c(-0.5,0.5) beta[c(kmax+1,kmax+2)]<-theta ps<-0 qs<-2 rr=2 } if(tstype==5){#need kmax > 2 #tstype=5 for ARMA(3,1) model phi <- c(0.7,0.1,-0.4) theta <- 0.1 beta[c(1:3,kmax+1)] <- c(phi,0.1) ps<-3 qs<-1 rr=3 } if(tstype==6){#need kmax >= rr #tstype=6 for ARMA model selected by user phi <- as.vector(phi) theta <- as.vector(theta) ps<-length(phi) qs<-length(theta) rr=max(ps,qs) beta[c(1:ps,(kmax+1):(kmax+qs))] <- c(phi,theta) } np1 <- n + 1 len <- 1:nruns opilen <- len opilen2 <- len npilen <- len h95len <- len h95len2 <- len coverage <- 0 opicov <- 0 opicov2 <- 0 npicov <- 0 h95cov <- 0 h95cov2 <- 0 #use n-p-q in general tcut <- qt((1-alph/2),n-2) for(i in 1:nruns){ if(tstype<3){ if (etype==1) Y <- arima.sim(n=np1, list(ar=c(phi)), innov = rnorm(np1)) if(etype==2) Y <- arima.sim(n=np1, list(ar=c(phi)), innov = rt(np1,tdf)) if(etype==3) Y <- arima.sim(n=np1, list(ar=c(phi)), innov = runif(np1,min=-1,max=1)) if(etype==4) Y <- arima.sim(n=np1, list(ar=c(phi)), innov = (rexp(np1)-1)) } if(tstype==3 || tstype==4){ if (etype==1) Y <- arima.sim(n=np1, list(ma=c(theta)), innov = rnorm(np1)) if(etype==2) Y <- arima.sim(n=np1, list(ma=c(theta)), innov = rt(np1,tdf)) if(etype==3) Y <- arima.sim(n=np1, list(ma=c(theta)), innov = runif(np1,min=-1,max=1)) if(etype==4) Y <- arima.sim(n=np1, list(ma=c(theta)), innov = (rexp(np1)-1)) } if(tstype==5){ if (etype==1) Y <- arima.sim(n=np1,list(ar=c(phi),ma=c(theta)),innov = rnorm(np1),n.start=burn) if(etype==2) Y <- arima.sim(n=np1,list(ar=c(phi),ma=c(theta)),innov = rt(np1,tdf),n.start=burn) if(etype==3) Y <- arima.sim(n=np1, list(ar=c(phi),ma=c(theta)), innov = runif(np1,min=-1,max=1),n.start=burn) if(etype==4) Y <- arima.sim(n=np1,list(ar=c(phi),ma=c(theta)),innov=(rexp(np1)-1),n.start=burn) } if(tstype==6){ #arma model selected by the user if (etype==1) Y <- arima.sim(n=np1,list(ar=c(phi),ma=c(theta)),innov = rnorm(np1),n.start=burn) if(etype==2) Y <- arima.sim(n=np1,list(ar=c(phi),ma=c(theta)),innov = rt(np1,tdf),n.start=burn) if(etype==3) Y <- arima.sim(n=np1, list(ar=c(phi),ma=c(theta)), innov = runif(np1,min=-1,max=1),n.start=burn) if(etype==4) Y <- arima.sim(n=np1,list(ar=c(phi),ma=c(theta)),innov=(rexp(np1)-1),n.start=burn) } y <- Y[1:n] yf <- Y[np1] tem <- locpi(y,alpha=alph) #PI ignore TS structure LPI <- tem$LPI UPI <- tem$UPI len[i] <- UPI - LPI if(LPI < yf && yf < UPI) coverage <- coverage + 1 #ARMA(p,q) model selection out<-auto.arima(y,d=0,max.p=kmax,max.q=kmax,seasonal=FALSE,ic="aicc", stationary=TRUE) #get the l-step ahead forecasts for l = 1 temp <- out %>% forecast(h=1) yfhat<-temp$mean[1] #1 step ahead forecast #get the 1 step ahead PI res <- out$resid m <- length(res) kk<-length(out$coef) am <- (1+15/m)*sqrt(m/(m-kk)) tem <- locpi2(res,k=kk,alph=alph) low <- yfhat+am*tem$LPI up <- yfhat+am*tem$UPI if(low < yf && yf < up) opicov <- opicov + 1 opilen[i] <- up-low #get PI from forecast package up<-temp$upper[2] low <- temp$lower[2] if(low < yf && yf < up) h95cov <- h95cov + 1 h95len[i] <- up-low #get the normal PI se <- sqrt(out$sigma) tcut <- qt((1-alph/2),n-kk) LPI <- yfhat - tcut*se UPI <- yfhat + tcut*se npilen[i] <- UPI-LPI if(LPI < yf && yf < UPI) npicov <- npicov + 1 #use a better model selection method out2<-armamsel2(y,kmax=kmax,pen=pen) tem<-arima(y,order=c(out2$pI,0,out2$qI),method="ML") #get the 1 step ahead PI res <- tem$resid m <- length(res) kk<-length(tem$coef) am <- (1+15/m)*sqrt(m/(m-kk)) tem2 <- locpi2(res,k=kk,alph=alph) low <- yfhat+am*tem2$LPI up <- yfhat+am*tem2$UPI if(low < yf && yf < up) opicov2 <- opicov2 + 1 opilen2[i] <- up-low #get PI from forecast package temp <- tem %>% forecast(h=1) up<-temp$upper[2] low <- temp$lower[2] if(low < yf && yf < up) h95cov2 <- h95cov2 + 1 h95len2[i] <- up-low } mnpilen <- mean(len) coverage <- coverage/nruns #locPI ignores ARMA order opilen <- mean(opilen) opicov <- opicov/nruns opilen2 <- mean(opilen2) opicov2 <- opicov2/nruns h95cov <- h95cov/nruns h95len <- mean(h95len) h95cov2 <- h95cov2/nruns h95len2 <- mean(h95len2) npicov <- npicov/nruns npilen <- mean(npilen) list(coverage=coverage,mnpilen=mnpilen,opicov=opicov,opilen=opilen, opicov2=opicov2,opilen2=opilen2,h95cov=h95cov,h95len=h95len, h95cov2=h95cov2,h95len2=h95len2, npicov=npicov, npilen=npilen) } armamsel1<-function(y,kmax=5){ #For an arma(ps,qs) true model, estimates r = max(ps,qs) #using Potscher (1990). So use an ARMA(rhat,rhat) model. #Often works for n>=80, but underfitting is sometimes severe for n < 1000. n <- length(y) kp1 <- kmax+1 tem <- 1:kp1 z <- 1:kp1 vars<-z for(i in 1:kp1){ out <- arima(y,order=c((i-1),0,(i-1)),method="ML") vars[i]<-out$sigma2 z[i] <- log(vars[i]) +2*(i-1)*log(n)/n } #get the first local min of z zz <- z zz[1:(kp1-1)] <- z[2:kp1] zz[kp1]<-z[kp1]+1 tem2 <- tem[z=80, but underfitting is sometimes severe for n < 1000. n <- length(y) kp1 <- kmax+1 tem <- 1:kp1 z <- 1:kp1 vars<-z for(i in 1:kp1){ out <- arima(y,order=c((i-1),0,(i-1)),method="ML") vars[i]<-out$sigma2 z[i] <- log(vars[i]) +2*(i-1)*log(n)/n } #get the first local min of z zz <- z zz[1:(kp1-1)] <- z[2:kp1] zz[kp1]<-z[kp1]+1 tem2 <- tem[z 0){ for(i in 1:rhat){ out <- arima(y,order=c(rhat,0,(rhat-i)),method="ML") var<-out$sigma2 ctem <- n*log(var) +2*(rhat + rhat-i) if(ctem < critr){ critr <- ctem - pen pI <- rhat qI <- (rhat-i)} out <- arima(y,order=c((rhat-i),0,rhat),method="ML") var<-out$sigma2 ctem <- n*log(var) +2*(rhat + rhat-i) if(ctem < critr){ critr <- ctem - pen qI <- rhat pI <- (rhat-i)} }} list(rhat=rhat,pI=pI,qI=qI) } armasim0<-function(n=100,nruns=20,kmax=5,etype=1,tstype=1,tdf=5,burn=100){ #ARMA(p,q) model selection. #assumes n >= 20 (kmax), full model beta = (phi1,...,phikmax,theta1,...,thetakmax) #current program uses AIC #program fails with tstype = 5 (ARMA models) for both AIC and BIC even with n=18000 #undfit is the proportion of runs where model selection underfit #ovfit is the proportion of runs where p>ps and q>qs so model is inconsistent #cfit is proportion of runs a consistent model was selected = 1-undfit-ovfit #want sfit near 1, ovfit and undfit near 0 #etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom #etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors #uses library(forecast) kp1 <- kmax+1 beta<-0*1:(2*kmax) if(tstype==1){#tstype=1 for AR(1) model phi<-c(0.5) beta[1] <- 0.5 ps<-1 qs <- 0 } if(tstype==2) { #tstype=2 for AR(2) model phi=c(0.5,0.33) beta[c(1,2)]<-phi ps<-2 qs<-0 } if(tstype==3){#tstype=3 for MA(1) model theta<-c(-0.5) beta[(kmax+1)] <- -0.5 ps<-0 qs <- 1 } if(tstype==4) { #tstype=4 for MA(2) model theta=c(-0.5,0.5) beta[c(kmax+1,kmax+2)]<-theta ps<-0 qs<-2 } if(tstype==5){#need pmax > 2 #tstype=5 for ARMA(3,1) model phi <- c(0.7,0.1,-0.4) theta <- 0.1 beta[c(1:3,kmax+1)] <- c(phi,0.1) ps<-3 qs<-1 } if(tstype==6){#need kmax >= rr #tstype=6 for ARMA model selected by user phi <- as.vector(phi) theta <- as.vector(theta) ps<-length(phi) qs<-length(theta) beta[c(1:ps,(kmax+1):(kmax+qs))] <- c(phi,theta) } tem<- 1:nruns ord <- cbind(tem,tem) #order undfit<-0 ovfit <- 0 cfit<-0 for(i in 1:nruns) { if(tstype<3){ if (etype==1) Y <- arima.sim(n=n, list(ar=c(phi)), innov = rnorm(n)) if(etype==2) Y <- arima.sim(n=n, list(ar=c(phi)), innov = rt(n,tdf)) if(etype==3) Y <- arima.sim(n=n, list(ar=c(phi)), innov = runif(n,min=-1,max=1)) if(etype==4) Y <- arima.sim(n=n, list(ar=c(phi)), innov = (rexp(n)-1)) } if(tstype==3 || tstype==4){ if (etype==1) Y <- arima.sim(n=n, list(ma=c(theta)), innov = rnorm(n)) if(etype==2) Y <- arima.sim(n=n, list(ma=c(theta)), innov = rt(n,tdf)) if(etype==3) Y <- arima.sim(n=n, list(ma=c(theta)), innov = runif(n,min=-1,max=1)) if(etype==4) Y <- arima.sim(n=n, list(ma=c(theta)), innov = (rexp(n)-1)) } if(tstype==5){ if (etype==1) Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov = rnorm(n),n.start=burn) if(etype==2) Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov = rt(n,tdf),n.start=burn) if(etype==3) Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov = runif(n,min=-1,max=1),n.start=burn) if(etype==4) Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov=(rexp(n)-1),n.start=burn) } if(tstype==6){ #arma model selected by the user if (etype==1) Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov = rnorm(n),n.start=burn) if(etype==2) Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov = rt(n,tdf),n.start=burn) if(etype==3) Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov = runif(n,min=-1,max=1),n.start=burn) if(etype==4) Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov=(rexp(n)-1),n.start=burn) } outvs<-auto.arima(Y,d=0,max.p=kmax,max.q=kmax,seasonal=FALSE,ic="aic", stationary=TRUE,method="ML") #not much change if bic is used instead arord<-outvs$arma[1] maord<-outvs$arma[2] ord[i,1]<-arord ord[i,2]<-maord if(arord < ps || maord < qs) #|| is the or operator undfit <- undfit+1 if(arord > ps && maord > qs) #&& is the and operator ovfit <- ovfit+1 #model is inconsistent if ovfit occurs if(arord == ps && maord == qs) cfit <- cfit+1 if(arord == ps && maord > qs) cfit <- cfit+1 if(arord > ps && maord == qs) #the three if statements are disjoint cfit <- cfit+1 #cfit <- 1-ovfit-undfit is better } undfit<-undfit/nruns ovfit<-ovfit/nruns cfit <- cfit/nruns list(ord=ord,beta=beta,ps=ps,qs=qs,undfit=undfit,ovfit=ovfit,cfit=cfit) } armasim1<-function(n=100,nruns=20,kmax=5,etype=1,tstype=5,tdf=5, phi=c(0.7,0.1,-0.4),theta=c(0.1),burn=100){ #for an arma(ps,qs) true model, estimates r = max(ps,qs) #using Potscher (1990), calls armamsel1 #assumes n >= 20 (kmax), full model beta = (phi1,...,phikmax,theta1,...,thetakmax) #undfit is the proportion of runs where model selection underfit #etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom #etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors kp1 <- kmax+1 rhat<-1:nruns beta<-0*1:(2*kmax) if(tstype==1){#tstype=1 for AR(1) model phi<-c(0.5) beta[1] <- 0.5 ps<-1 qs <- 0 rr=1 } if(tstype==2) { #tstype=2 for AR(2) model phi=c(0.5,0.33) beta[c(1,2)]<-phi ps<-2 qs<-0 rr=2 } if(tstype==3){#tstype=3 for MA(1) model theta<-c(-0.5) beta[(kmax+1)] <- -0.5 ps<-0 qs <- 1 rr=1 } if(tstype==4) { #tstype=4 for MA(2) model theta=c(-0.5,0.5) beta[c(kmax+1,kmax+2)]<-theta ps<-0 qs<-2 rr=2 } if(tstype==5){#need kmax > 2 #tstype=5 for ARMA(3,1) model phi <- c(0.7,0.1,-0.4) theta <- 0.1 beta[c(1:3,kmax+1)] <- c(phi,0.1) ps<-3 qs<-1 rr=3 } if(tstype==6){#need kmax >= rr #tstype=6 for ARMA model selected by user phi <- as.vector(phi) theta <- as.vector(theta) ps<-length(phi) qs<-length(theta) rr=max(ps,qs) beta[c(1:ps,(kmax+1):(kmax+qs))] <- c(phi,theta) } undfit<-0 rtrue<-0 for(i in 1:nruns) { if(tstype<3){ if (etype==1) Y <- arima.sim(n=n, list(ar=c(phi)), innov = rnorm(n)) if(etype==2) Y <- arima.sim(n=n, list(ar=c(phi)), innov = rt(n,tdf)) if(etype==3) Y <- arima.sim(n=n, list(ar=c(phi)), innov = runif(n,min=-1,max=1)) if(etype==4) Y <- arima.sim(n=n, list(ar=c(phi)), innov = (rexp(n)-1)) } if(tstype==3 || tstype==4){ if (etype==1) Y <- arima.sim(n=n, list(ma=c(theta)), innov = rnorm(n)) if(etype==2) Y <- arima.sim(n=n, list(ma=c(theta)), innov = rt(n,tdf)) if(etype==3) Y <- arima.sim(n=n, list(ma=c(theta)), innov = runif(n,min=-1,max=1)) if(etype==4) Y <- arima.sim(n=n, list(ma=c(theta)), innov = (rexp(n)-1)) } if(tstype==5){ if (etype==1) Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov = rnorm(n),n.start=burn) if(etype==2) Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov = rt(n,tdf),n.start=burn) if(etype==3) Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov = runif(n,min=-1,max=1),n.start=burn) if(etype==4) Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov=(rexp(n)-1),n.start=burn) } if(tstype==6){ #arma model selected by the user if (etype==1) Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov = rnorm(n),n.start=burn) if(etype==2) Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov = rt(n,tdf),n.start=burn) if(etype==3) Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov = runif(n,min=-1,max=1),n.start=burn) if(etype==4) Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov=(rexp(n)-1),n.start=burn) } rhat[i] <- armamsel1(Y,kmax=kmax)$rhat if(rhat[i] < rr) undfit<-undfit+1 if(rhat[i] == rr) rtrue <- rtrue+1 } undfit<-undfit/nruns rtrue <- rtrue/nruns list(rhat=rhat,rtrue=rtrue,undfit=undfit) } armasim2<-function(n=100,nruns=20,kmax=5,etype=1,tstype=5,tdf=5, phi=c(0.7,0.1,-0.4),theta=c(0.1),pen=2,burn=100){ #for an arma(ps,qs) true model, estimates r = max(ps,qs) #using Potscher (1990), calls armamsel2, #assumes n >= 20 (kmax), full model beta = (phi1,...,phikmax,theta1,...,thetakmax) #undfit is the proportion of runs where model selection underfit #ovfit is the proportion of runs where p>ps and q>qs so model is inconsistent #cfit is proportion of runs a consistent model was selected = 1-undfit-ovfit #want sfit near 1, ovfit and undfit near 0 #rtrue is the proportion of times rhat=r, want rtrue near 1 #etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom #etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors kp1 <- kmax+1 rhat<-1:nruns beta<-0*1:(2*kmax) tem<- 1:nruns ord <- cbind(tem,tem) #order if(tstype==1){#tstype=1 for AR(1) model phi<-c(0.5) beta[1] <- 0.5 ps<-1 qs <- 0 rr=1 } if(tstype==2) { #tstype=2 for AR(2) model phi=c(0.5,0.33) beta[c(1,2)]<-phi ps<-2 qs<-0 rr=2 } if(tstype==3){#tstype=3 for MA(1) model theta<-c(-0.5) beta[(kmax+1)] <- -0.5 ps<-0 qs <- 1 rr=1 } if(tstype==4) { #tstype=4 for MA(2) model theta=c(-0.5,0.5) beta[c(kmax+1,kmax+2)]<-theta ps<-0 qs<-2 rr=2 } if(tstype==5){#need kmax > 2 #tstype=5 for ARMA(3,1) model phi <- c(0.7,0.1,-0.4) theta <- 0.1 beta[c(1:3,kmax+1)] <- c(phi,0.1) ps<-3 qs<-1 rr=3 } if(tstype==6){#need kmax >= rr #tstype=6 for ARMA model selected by user phi <- as.vector(phi) theta <- as.vector(theta) ps<-length(phi) qs<-length(theta) rr=max(ps,qs) beta[c(1:ps,(kmax+1):(kmax+qs))] <- c(phi,theta) } rtrue<-0 undfit<-0 ovfit <- 0 cfit<-0 for(i in 1:nruns) { if(tstype<3){ if (etype==1) Y <- arima.sim(n=n, list(ar=c(phi)), innov = rnorm(n)) if(etype==2) Y <- arima.sim(n=n, list(ar=c(phi)), innov = rt(n,tdf)) if(etype==3) Y <- arima.sim(n=n, list(ar=c(phi)), innov = runif(n,min=-1,max=1)) if(etype==4) Y <- arima.sim(n=n, list(ar=c(phi)), innov = (rexp(n)-1)) } if(tstype==3 || tstype==4){ if (etype==1) Y <- arima.sim(n=n, list(ma=c(theta)), innov = rnorm(n)) if(etype==2) Y <- arima.sim(n=n, list(ma=c(theta)), innov = rt(n,tdf)) if(etype==3) Y <- arima.sim(n=n, list(ma=c(theta)), innov = runif(n,min=-1,max=1)) if(etype==4) Y <- arima.sim(n=n, list(ma=c(theta)), innov = (rexp(n)-1)) } if(tstype==5){ if (etype==1) Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov = rnorm(n),n.start=burn) if(etype==2) Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov = rt(n,tdf),n.start=burn) if(etype==3) Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov = runif(n,min=-1,max=1),n.start=burn) if(etype==4) Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov=(rexp(n)-1),n.start=burn) } if(tstype==6){ #arma model selected by the user if (etype==1) Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov = rnorm(n),n.start=burn) if(etype==2) Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov = rt(n,tdf),n.start=burn) if(etype==3) Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov = runif(n,min=-1,max=1),n.start=burn) if(etype==4) Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov=(rexp(n)-1),n.start=burn) } out <- armamsel2(Y,kmax=kmax,pen=pen) rhat[i]<-out$rhat if(rhat[i] == rr) rtrue <- rtrue+1 arord<-out$pI maord<-out$qI ord[i,1]<-arord ord[i,2]<-maord if(arord < ps || maord < qs) #|| is the or operator undfit <- undfit+1 if(arord > ps && maord > qs) #&& is the and operator ovfit <- ovfit+1 #model is inconsistent if ovfit occurs if(arord == ps && maord == qs) cfit <- cfit+1 if(arord == ps && maord > qs) cfit <- cfit+1 if(arord > ps && maord == qs) #the three if statements are disjoint cfit <- cfit+1 #cfit <- 1-ovfit-undfit is better } undfit<-undfit/nruns rtrue <- rtrue/nruns ovfit<-ovfit/nruns cfit <- cfit/nruns list(ord=ord,beta=beta,rhat=rhat,rtrue=rtrue,undfit=undfit,ovfit=ovfit,cfit=cfit) } armasim3<-function(n=100,nruns=20,kmax=5,etype=1,tstype=5,tdf=5, phi=c(0.7,0.1,-0.4),theta=c(0.1),pen=2,burn=100){ #for an arma(ps,qs) true model, estimates r = max(ps,qs) #using Potscher (1990), calls armamsel2, #also uses auto.arima from the forecast package #assumes n >= 20 (kmax), full model beta = (phi1,...,phikmax,theta1,...,thetakmax) #undfit is the proportion of runs where model selection underfit #ovfit is the proportion of runs where p>ps and q>qs so model is inconsistent #cfit is proportion of runs a consistent model was selected = 1-undfit-ovfit #want sfit near 1, ovfit and undfit near 0 #rtrue is the proportion of times rhat=r, want rtrue near 1 #etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom #etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors kp1 <- kmax+1 rhat<-1:nruns beta<-0*1:(2*kmax) tem<- 1:nruns ord <- cbind(tem,tem,tem,tem) #order if(tstype==1){#tstype=1 for AR(1) model phi<-c(0.5) beta[1] <- 0.5 ps<-1 qs <- 0 rr=1 } if(tstype==2) { #tstype=2 for AR(2) model phi=c(0.5,0.33) beta[c(1,2)]<-phi ps<-2 qs<-0 rr=2 } if(tstype==3){#tstype=3 for MA(1) model theta<-c(-0.5) beta[(kmax+1)] <- -0.5 ps<-0 qs <- 1 rr=1 } if(tstype==4) { #tstype=4 for MA(2) model theta=c(-0.5,0.5) beta[c(kmax+1,kmax+2)]<-theta ps<-0 qs<-2 rr=2 } if(tstype==5){#need kmax > 2 #tstype=5 for ARMA(3,1) model phi <- c(0.7,0.1,-0.4) theta <- 0.1 beta[c(1:3,kmax+1)] <- c(phi,0.1) ps<-3 qs<-1 rr=3 } if(tstype==6){#need kmax >= rr #tstype=6 for ARMA model selected by user phi <- as.vector(phi) theta <- as.vector(theta) ps<-length(phi) qs<-length(theta) rr=max(ps,qs) beta[c(1:ps,(kmax+1):(kmax+qs))] <- c(phi,theta) } rtrue<-0 undfit<-0 ovfit <- 0 cfit<-0 aundfit<-0 aovfit <- 0 acfit<-0 for(i in 1:nruns) { if(tstype<3){ if (etype==1) Y <- arima.sim(n=n, list(ar=c(phi)), innov = rnorm(n)) if(etype==2) Y <- arima.sim(n=n, list(ar=c(phi)), innov = rt(n,tdf)) if(etype==3) Y <- arima.sim(n=n, list(ar=c(phi)), innov = runif(n,min=-1,max=1)) if(etype==4) Y <- arima.sim(n=n, list(ar=c(phi)), innov = (rexp(n)-1)) } if(tstype==3 || tstype==4){ if (etype==1) Y <- arima.sim(n=n, list(ma=c(theta)), innov = rnorm(n)) if(etype==2) Y <- arima.sim(n=n, list(ma=c(theta)), innov = rt(n,tdf)) if(etype==3) Y <- arima.sim(n=n, list(ma=c(theta)), innov = runif(n,min=-1,max=1)) if(etype==4) Y <- arima.sim(n=n, list(ma=c(theta)), innov = (rexp(n)-1)) } if(tstype==5){ if (etype==1) Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov = rnorm(n),n.start=burn) if(etype==2) Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov = rt(n,tdf),n.start=burn) if(etype==3) Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov = runif(n,min=-1,max=1),n.start=burn) if(etype==4) Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov=(rexp(n)-1),n.start=burn) } if(tstype==6){ #arma model selected by the user if (etype==1) Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov = rnorm(n),n.start=burn) if(etype==2) Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov = rt(n,tdf),n.start=burn) if(etype==3) Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov = runif(n,min=-1,max=1),n.start=burn) if(etype==4) Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov=(rexp(n)-1),n.start=burn) } out <- armamsel2(Y,kmax=kmax,pen=pen) rhat[i]<-out$rhat if(rhat[i] == rr) rtrue <- rtrue+1 arord<-out$pI maord<-out$qI ord[i,1]<-arord ord[i,2]<-maord if(arord < ps || maord < qs) #|| is the or operator undfit <- undfit+1 if(arord > ps && maord > qs) #&& is the and operator ovfit <- ovfit+1 #model is inconsistent if ovfit occurs if(arord == ps && maord == qs) cfit <- cfit+1 if(arord == ps && maord > qs) cfit <- cfit+1 if(arord > ps && maord == qs) #the three if statements are disjoint cfit <- cfit+1 #cfit <- 1-ovfit-undfit is better ##auto.arima outvs<-auto.arima(Y,d=0,max.p=kmax,max.q=kmax,seasonal=FALSE,ic="aic", stationary=TRUE,method="ML") #not much change if bic is used instead arord<-outvs$arma[1] maord<-outvs$arma[2] ord[i,3]<-arord ord[i,4]<-maord if(arord < ps || maord < qs) #|| is the or operator aundfit <- aundfit+1 if(arord > ps && maord > qs) #&& is the and operator aovfit <- aovfit+1 #model is inconsistent if ovfit occurs if(arord == ps && maord == qs) acfit <- acfit+1 if(arord == ps && maord > qs) acfit <- acfit+1 if(arord > ps && maord == qs) #the three if statements are disjoint acfit <- acfit+1 #cfit <- 1-ovfit-undfit is better } undfit<-undfit/nruns rtrue <- rtrue/nruns ovfit<-ovfit/nruns cfit <- cfit/nruns aundfit<-aundfit/nruns aovfit<-aovfit/nruns acfit <- acfit/nruns list(ord=ord,beta=beta,rhat=rhat,rtrue=rtrue,undfit=undfit,ovfit=ovfit,cfit=cfit, aundfit=aundfit,aovfit=aovfit,acfit=acfit) } arp<-function(Y,p=1){ #Fits an AR(p) model to time series Y using OLS. #assumes p < n - p #Makes a response and residual plot. # Advance the plot by highlighting Stop with the right mouse button. #phihat = phihat_0 = intercept, phihat_1, ..., phihat_p cmar <- par("mar") par(mfrow = c(2, 1)) par(mar=c(4.0,4.0,2.0,0.5)) Y <- as.vector(Y) n <- length(Y) Xar <- matrix(0,nrow=n-p,ncol=p) Yar <- Y[(p+1):n] for(i in 1:p){ Xar[,i] <- Y[(p-i+1):(n-i)] } ardat <- as.data.frame(cbind(Yar,Xar)) tem <- lm(Yar~.,data=ardat) phihat <- tem$coef fit <- Y resid <- Y fit[(p+1):n] <- tem$fit fit[1:p] <- NA resid[(p+1):n] <- tem$res resid[1:p] <- NA plot(fit,Y) abline(0, 1) title("Response Plot") identify(fit,Y) plot(fit, resid) title("Residual Plot") identify(fit, resid) par(mfrow = c(1, 1)) par(mar=cmar) list(phihat=phihat,fit=fit,resid=resid,tem=tem) } bicmat <- function(Y,dd=0,pmax=5,qmax=5){ #makes a matrix of BICs - min(BIC) for ARIMA(p,dd,q) models for time series Y #uses method = "ML" to reduce errors #reduce pmax by 1 if an error message occurs, may need to reduce qmax #minbic is the min bic for the arima(p,dd,q) models, Y <- as.vector(Y) bics <- matrix(, (pmax+1), (qmax+1), dimnames=list(p=0:pmax, q=0:qmax)) for(p in 0:pmax) for(q in 0:qmax){ z <- arima(Y, c(p,d=dd,q), method = "ML", optim.control = list(maxit = 500)) bics[1+p, 1+q] <- BIC(z)} minbic <- min(bics, na.rm = TRUE) bics <- round(bics - minbic, 2) list(bics=bics) } confreg<-function(x, g = 4, that = 1:4, alpha = 0.05){ # Makes confidence regions for theta from rows of x = Ti* from a bootstrap. # Use for testing H_0: theta = 0 versus H_1: theta != 0. # The prediction region method, hybrid, and Bickel and Ren regions are used. # If g = 1, the shorth interval should work better. # Also computes the distance for the 0 vector. # Need g = dim(x)[2] and T = that the g by 1 estimator of theta. # Often that = A betahat(I_min,0). # Note that center= Tbar* = bagging estimator and cov = S*_T. x <- as.matrix(x) that <- as.vector(that) n <- dim(x)[1] zero <- 0*(1:g) up <- min((1 - alpha/2), (1 - alpha + 10*alpha*g/n)) if(alpha > 0.1) up <- min((1 - alpha + 0.05), (1 - alpha + g/n)) qn <- up if(qn < 1 - alpha + 0.001) up <- 1 - alpha center <- apply(x, 2, mean) cov <- var(x) md2 <- mahalanobis(x, center, cov) # MD is the classical distance MD <- sqrt(md2) #get prediction region method cutoff cuplim <- sqrt(quantile(md2, up)) D0 <- sqrt(mahalanobis(zero, center, cov)) #get hybrid region statistic = Bickel and Ren statistic br0 <- sqrt(mahalanobis(zero, that, cov)) #get the Bickel and Ren cutoff and test statistic br2 <- mahalanobis(x,that,cov) brlim <- sqrt(quantile(br2, up)) list(cuplim=cuplim,brlim=brlim,br0=br0,D0=D0,MD=MD,center=center,cov=cov) } confreg2<-function(x, g = 4, alpha = 0.05){ # Makes confidence regions for theta from rows of x = Ti* from a bootstrap. # Use for testing H_0: theta = 0 versus H_1: theta != 0. # The prediction region method is used. # If g = 1, the shorth interval should work better. # Also computes the distance for the 0 vector. # Need g = dim(x)[2] and T = that the g by 1 estimator of theta. # Often that = A betahat(I_min,0). # Note that center= Tbar* = bagging estimator and cov = S*_T. x <- as.matrix(x) n <- dim(x)[1] zero <- 0*(1:g) up <- min((1 - alpha/2), (1 - alpha + 10*alpha*g/n)) if(alpha > 0.1) up <- min((1 - alpha + 0.05), (1 - alpha + g/n)) qn <- up if(qn < 1 - alpha + 0.001) up <- 1 - alpha center <- apply(x, 2, mean) cov <- var(x) md2 <- mahalanobis(x, center, cov) #get prediction region method cutoff cuplim <- sqrt(quantile(md2, up)) D0 <- sqrt(mahalanobis(zero, center, cov)) list(cuplim=cuplim,D0=D0,center=center) } dsarmasim<-function(n=100,nruns=20,pmax=5,qmax=5,etype=1,tstype=1,tdf=5,burn=100){ #datasplitting with ARMA(p,q) model selection. #assumes n >= 10 (pmax+qmax), full model beta = (phi1,...,phipmax,theta1,...,thetaqmax) #need pmax+3 <=pmax+qmax #current program uses AIC with H using the first floor(n/2) cases #undfit is the proportion of runs where model selection underfit #etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom #etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors #uses library(forecast) kmax<-pmax+qmax #p=kmax beta<-0*1:kmax if(tstype==1){#tstype=1 for AR(1) model phi<-c(0.5) beta[1] <- 0.5 ps<-1 qs <- 0 } if(tstype==2) { #tstype=2 for AR(2) model phi=c(0.5,0.33) beta[c(1,2)]<-phi ps<-2 qs<-0 } if(tstype==3){#tstype=3 for MA(1) model theta<-c(-0.5) beta[(pmax+1)] <- -0.5 ps<-0 qs <- 1 } if(tstype==4) { #tstype=4 for MA(2) model theta=c(-0.5,0.5) beta[c(pmax+1,pmax+2)]<-theta ps<-0 qs<-2 } if(tstype==5){#need pmax > 2 #tstype=5 for ARMA(3,1) model phi <- c(0.7,0.1,-0.4) theta <- 0.1 beta[c(1:3,pmax+1)] <- c(phi,0.1) ps<-3 qs<-1 } tem<- 1:nruns ordno2 <- cbind(tem,tem) #order for n over 2 n/2 data spitting no2 <- floor(n/2) undfit<-0 for(i in 1:nruns) { if(tstype<3){ if (etype==1) Y <- arima.sim(n=n, list(ar=c(phi)), innov = rnorm(n)) if(etype==2) Y <- arima.sim(n=n, list(ar=c(phi)), innov = rt(n,tdf)) if(etype==3) Y <- arima.sim(n=n, list(ar=c(phi)), innov = runif(n,min=-1,max=1)) if(etype==4) Y <- arima.sim(n=n, list(ar=c(phi)), innov = (rexp(n)-1)) } if(tstype==3 || tstype==4){ if (etype==1) Y <- arima.sim(n=n, list(ma=c(theta)), innov = rnorm(n)) if(etype==2) Y <- arima.sim(n=n, list(ma=c(theta)), innov = rt(n,tdf)) if(etype==3) Y <- arima.sim(n=n, list(ma=c(theta)), innov = runif(n,min=-1,max=1)) if(etype==4) Y <- arima.sim(n=n, list(ma=c(theta)), innov = (rexp(n)-1)) } if(tstype==5){ if (etype==1) Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov = rnorm(n),n.start=burn) if(etype==2) Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov = rt(n,tdf),n.start=burn) if(etype==3) Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov = runif(n,min=-1,max=1),n.start=burn) if(etype==4) Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov=(rexp(n)-1),n.start=burn) } Z <-Y[1:no2] outvs<-auto.arima(Z,d=0,max.p=pmax,max.q=qmax,seasonal=FALSE,ic="aic", stationary=TRUE,method="ML") arord<-outvs$arma[1] maord<-outvs$arma[2] ordno2[i,1]<-arord ordno2[i,2]<-maord if(arord < ps || maord < qs) #|| is the or operator undfit <- undfit+1 } undfit<-undfit/nruns list(ordno2=ordno2,beta=beta,ps=ps,qs=qs,undfit=undfit) } dsarsim<-function(n=100,nruns=20,pmax=5,etype=1,tstype=1,tdf=5){ #datasplitting with AR(p) model selection. #assumes n > 10 pmax, full model beta = (phi1,...,phipmax) #current program uses AIC with H using the first floor(n/2) cases #undfit is the proportion of runs where model selection underfit #etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom #etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors #tstype = k for an AR(k) model, k=1,2 #for small n, underfitting is severe, dsarsim2 is much better beta<-0*1:pmax phi=0.5 #tstype=1 for AR(1) model beta[1] <- 0.5 k<-tstype if(tstype==2) { phi=c(0.5,0.33) #tstype=2 for AR(2) model beta[2]<-0.33} ordno2<- 1:nruns #order for n over 2 n/2 data spitting no2 <- floor(n/2) undfit<-0 for(i in 1:nruns) { if (etype==1) Y <- arima.sim(n=n, list(ar=c(phi)), innov = rnorm(n)) if(etype==2) Y <- arima.sim(n=n, list(ar=c(phi)), innov = rt(n,tdf)) if(etype==3) Y <- arima.sim(n=n, list(ar=c(phi)), innov = runif(n,min=-1,max=1)) if(etype==4) Y <- arima.sim(n=n, list(ar=c(phi)), innov = (rexp(n)-1)) Z <-Y[1:no2] outvs<-ar.yw(Z,order.max=pmax) ordno2[i]<-outvs$order if(outvs$order < k) undfit <- undfit+1 } undfit<-undfit/nruns list(beta=beta,order=k,ordno2=ordno2,undfit=undfit) } dsarsim2<-function(n=100,nruns=20,pmax=13,etype=1,tstype=1,tdf=5,phi=c(0.5),nbic=14,pen=2){ #program fails if nruns > 100 and sometimes fails if n is too small #uses the GMLE (Yule-Walker underfits too much and needs order.max > 0) #datasplitting with AR(p) model selection. #assumes n > 7 pmax, full model beta = (phi1,...,phipmax) #data splitting uses the first floor(n/2) cases #uses AIC if n < nbic*pmax, and BIC if n >= nbic*pmax #undfit is the proportion of runs where model selection underfit #etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom #etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors beta<-0*1:pmax phi=0.5 #tstype=1 for AR(1) model beta[1] <- 0.5 ps<-1 nval=nbic*pmax if(tstype==2) { phi=c(0.5,0.33) #tstype=2 for AR(2) model beta[2]<-0.33 ps<-2} if(tstype==3) { #tstype=3 for phi supplied by user phi <- as.vector(phi) ps<-length(phi) beta[1:ps]<-phi } ordno2<- 1:nruns #order for n/2 (n over 2) data splitting no2 <- floor(n/2) undfit<-0 for(i in 1:nruns) { if (etype==1) Y <- arima.sim(n=n, list(ar=c(phi)), innov = rnorm(n)) if(etype==2) Y <- arima.sim(n=n, list(ar=c(phi)), innov = rt(n,tdf)) if(etype==3) Y <- arima.sim(n=n, list(ar=c(phi)), innov = runif(n,min=-1,max=1)) if(etype==4) Y <- arima.sim(n=n, list(ar=c(phi)), innov = (rexp(n)-1)) Z <-Y[1:no2] out<-ar(Z,aic=FALSE,order.max=pmax,method="mle") if(n < nval){ critr <- n*log(out$var.pred) +2*pmax - pen #AIC type criterion - pen pI <- pmax for(j in 1:pmax){ out<-ar(Z,aic=FALSE,order.max=(pmax-j),method="mle") ctem <- n*log(out$var.pred) +2*(pmax-j) if(ctem < critr){ critr <- ctem - pen pI <- (pmax-j)} } } else{ critr <- n*log(out$var.pred) +log(n)*pmax #BIC type criterion pI <- pmax for(j in 1:pmax){ out<-ar(Z,aic=FALSE,order.max=(pmax-j),method="mle") ctem <- n*log(out$var.pred) +log(n)*(pmax-j) if(ctem < critr){ critr <- ctem pI <- (pmax-j)} } } ordno2[i]<-pI if(pI < ps) undfit <- undfit+1 } undfit<-undfit/nruns list(beta=beta,order=ps,ordno2=ordno2,undfit=undfit) } dsmasim<-function(n=100,nruns=20,qmax=5,etype=1,tstype=1,tdf=5){ #datasplitting with MA(q) model selection. #assumes n > 10 qmax, full model beta = (theta1,...,thetaqmax) #current program uses AIC with H using the first floor(n/2) cases #undfit is the proportion of runs where model selection underfit #etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom #etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors #tstype = k for an MA(k) model, k=1,2 #uses library(forecast) beta<-0*1:qmax theta=-0.5 #tstype=1 for MA(1) model beta[1] <- -0.5 k<-tstype if(tstype==2) { theta=c(-0.5,0.5) #tstype=2 for MA(2) model beta[2]<-0.5} ordno2<- 1:nruns #order for n over 2 n/2 data spitting no2 <- floor(n/2) undfit<-0 for(i in 1:nruns) { if (etype==1) Y <- arima.sim(n=n, list(ma=c(theta)), innov = rnorm(n)) if(etype==2) Y <- arima.sim(n=n, list(ma=c(theta)), innov = rt(n,tdf)) if(etype==3) Y <- arima.sim(n=n, list(ma=c(theta)), innov = runif(n,min=-1,max=1)) if(etype==4) Y <- arima.sim(n=n, list(ma=c(theta)), innov = (rexp(n)-1)) Z <-Y[1:no2] outvs<-auto.arima(Z,d=0,max.p=0,max.q=qmax,seasonal=FALSE,ic="aic", stationary=TRUE) ord<-outvs$arma[2] ordno2[i]<-ord if(ord < k) undfit <- undfit+1 } undfit<-undfit/nruns list(beta=beta,order=k,ordno2=ordno2,undfit=undfit) } dsmasim2<-function(n=100,nruns=20,qmax=13,etype=1,tstype=1,tdf=5,theta=c(0.5), nbic=0.5,pen=2){ #program sometimes fails if n is too small #uses the GMLE (take nbic large to see AIC, nbic=0.5 for BIC) #datasplitting with MA(q) model selection. #assumes n > 7 qmax, full model beta = (theta1,...,thetaqmax) #data splitting uses the first floor(n/2) cases #uses AIC if n < nbic*qmax, and BIC if n >= nbic*qmax, default uses BIC #undfit is the proportion of runs where model selection underfit #etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom #etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors beta<-0*1:qmax theta=-0.5 #tstype=1 for MA(1) model beta[1] <- -0.5 qs<-1 nval=nbic*qmax if(tstype==2) { theta=c(-0.5,0.5) #tstype=2 for MA(2) model beta[2]<-0.5 qs<-2} if(tstype==3) { #tstype=3 for theta supplied by user theta <- as.vector(theta) qs<-length(theta) beta[1:qs]<-theta } ordno2<- 1:nruns #order for n/2 (n over 2) data splitting no2 <- floor(n/2) undfit<-0 for(i in 1:nruns) { if (etype==1) Y <- arima.sim(n=n, list(ma=c(theta)), innov = rnorm(n)) if(etype==2) Y <- arima.sim(n=n, list(ma=c(theta)), innov = rt(n,tdf)) if(etype==3) Y <- arima.sim(n=n, list(ma=c(theta)), innov = runif(n,min=-1,max=1)) if(etype==4) Y <- arima.sim(n=n, list(ma=c(theta)), innov = (rexp(n)-1)) Z <-Y[1:no2] out <- arima(Z,order=c(0,0,qmax),method="ML") if(n < nval){ critr <- n*log(out$sigma2) +2*qmax - pen #AIC type criterion - pen qI <- qmax for(j in 1:qmax){ out <- arima(Z,order=c(0,0,(qmax-j)),method="ML") ctem <- n*log(out$sigma2) +2*(qmax-j) if(ctem < critr){ critr <- ctem - pen qI <- (qmax-j)} } } else{ critr <- n*log(out$sigma2) +log(n)*qmax #BIC type criterion qI <- qmax for(j in 1:qmax){ out <- arima(Z,order=c(0,0,(qmax-j)),method="ML") ctem <- n*log(out$sigma2) +log(n)*(qmax-j) if(ctem < critr){ critr <- ctem qI <- (qmax-j)} } } ordno2[i]<-qI if(qI < qs) undfit <- undfit+1 } undfit<-undfit/nruns list(beta=beta,order=qs,ordno2=ordno2,undfit=undfit) } dspisim<-function(n=50,nv=19,nruns=100,ytype=1,ttype=1,eps=0.1,shift=9,tdf=3,alpha=0.05) {#Gets mean coverages and lengths of the data splitting prediction interval. #ytype = 1 for N(0,1), 2 for t_df, 3 for EXP(1), 4 for U(-1,1), 5 for mixture, #6 for lognormal #ttype = 1 if T=median dtype = 2 if T=mean, =3 forth shorth midpoint #want n/2 > nv #calls shorth 3 q <- n+1 cov <- 0 nv <- min(nv, floor(n/2)) nH <- n - nv uv <- min(nv, ceiling( (1 - alpha)*(1+nv) ) ) up <- uv/(nv+1) #quantile for prediction region indx <- 1:n len <- 0*1:nruns for(i in 1:nruns) { #make data if(ytype == 1) y <- rnorm(q) if(ytype == 2) y <- rt(q, df = tdf) if(ytype == 3) y <- rexp(q) if(ytype == 4) y <- runif(q, min = -1, max = 1) if(ytype == 5) y <- rnorm(q, sd = 1 + rbinom(n, 1, eps) * shift) if(ytype == 6) { y<- rnorm(q) y<- exp(y)} yf <- y[q] y <- y[-q] #find sets H and V perm <- sample(indx,n) H <- perm[1:nH] yH <- y[H] yV <- y[-H] if(ttype==2) T <- mean(yH) if(ttype==1) T <- median(yH) if(ttype==3){ out<-shorth3(yH) T <- (out$shorth[1]+out$shorth[2])/2} #find PI Dsq <- (yV-T)^2 dsq <- quantile(Dsq, up, type=1) cut <- sqrt(dsq) low <- T-cut hi <- T+cut len[i] <- 2*cut if(low <= yf && hi >= yf) cov <- cov+1 } cov <- cov/nruns mnlen <- mean(len) list(up=up,cov=cov,mnlen=mnlen) } fflynx<-function() {# type data(lynx) Y <- log10(lynx) FAR2 <- 1:114 FAR11 <- 1:114 FAR12 <- 1:114 SETAR272 <- 1:114 SETAR252 <- 1:114 for(i in 3:114){ FAR2[i ] <- 1.05 + 1.41*Y[i-1] -0.77*Y[i-2]} for(i in 12:114){ FAR11[i ] <- 1.13*Y[i-1] -0.51*Y[i-2] + .23*Y[i-3] -0.29*Y[i-4] + .14*Y[i-5] -0.14*Y[i-6] + 0.08*Y[i-7] -0.04*Y[i-8] + .13*Y[i-9] + 0.19*Y[i-10] - .31*Y[i-11] } for(i in 13:114){ FAR12[i ] <- 1.123 + 1.084*Y[i-1] -0.477*Y[i-2] + .265*Y[i-3] -0.218*Y[i-4] + .180*Y[i-9] - .224*Y[i-12] } for(i in 13:114){ if( Y[i-2] <= 3.116){ SETAR272[i ] <- 0.546 + 1.032*Y[i-1] -0.173*Y[i-2] + .171*Y[i-3] -0.431*Y[i-4] + .332*Y[i-5] - .284*Y[i-6] + .210*Y[i-7]} else {SETAR272[i ] <- 2.632 + 1.492*Y[i-1] -1.324*Y[i-2]} } for(i in 13:114){ if( Y[i-2] <= 3.05){ SETAR252[i ] <- 0.768 + 1.064*Y[i-1] -0.200*Y[i-2] + .164*Y[i-3] -0.428*Y[i-4] + .181*Y[i-5] } else {SETAR252[i ] <- 2.254 + 1.474*Y[i-1] -1.202*Y[i-2]} } x <- cbind(Y,FAR2,FAR11,FAR12,SETAR272,SETAR252) x <- x[13:114,] print(cor(x)) pairs(x) } FFRRplots<-function(W,zz1,zz2){ #Let zz1 be the output of the arima function, eg zz <- arima(lh,c(1,0,0)), #let zz2 be the output of a competing arima model, eg #zz <- arima(lh,c(2,0,0)) and let W be the time series, eg W <- lh. #Makes the FF plot of the fitted values from the two models #and the RR plot of the residuals from the two models. #Use for ARIMA(p,d,q)x(P,D,Q)s models. #Fitted value = W - residual. Y <- as.vector(W) Resid1 <- as.vector(zz1$resid) FIT1 <- as.vector(Y-Resid1) Resid2 <- as.vector(zz2$resid) FIT2 <- as.vector(Y-Resid2) par(mfrow=c(2,1)) plot(FIT1,FIT2) abline(0,1) title("FF plot") plot(Resid1,Resid2) title("RR plot") abline(0,1) par(mfrow=c(1,1)) } fysim<-function( runs = 20) {# 20 FY response plots for simulated AR(2) time series data fycorr <- 1:runs for(i in 1: runs){ Y <- ardata()$arts out <- ar.yw(Y) Yts <- Y[10:200] FIT <- Yts - out$resid[10:200] plot(FIT,Yts) abline(0,1) fycorr[i] <- cor(FIT,Yts) } list(fycorr=fycorr) } intercept<-function(zz, p = 1, q = 1){ #The R function arima intercept is actually an estimate of the mean. #Let zz be the output of the arima function, eg zz <- arima(lh,c(1,0,1)), #and let p and q be the values used in the fit. #Assume d = 0. Then this function prints the intercept. mn <- zz$coef[(p+q+1)] intercept <- mn if (p > 0) { intercept <- mn * (1 - sum(zz$coef[1:p]))} return(intercept) } locpi<-function(Y, alpha = 0.05){ # Makes the PI (L_n,U_n) for the MA(q) time series, #for Y_{t+j} when j > q. #Uses ybar instead of the estimated mean from the MA(q) model. #Then the simulation won't bomb. sy <- as.vector(Y) n <- length(sy) cc <- ceiling(n * (1 - alpha)) corfac <- (1 + 15/n) * sqrt((n+1)/(n - 1)) yfhat <- mean(sy) #get the PI sy <- sort(sy) rup <- sy[cc] rlow <- sy[1] olen <- rup - rlow if(cc < n){ for(j in (cc + 1):n){ zlen <- sy[j] - sy[j - cc + 1] if(zlen < olen) { olen <- zlen rup <- sy[j] rlow <- sy[j - cc + 1] } } } UPI <- (1-corfac) * yfhat + corfac * rup LPI <- (1-corfac) * yfhat + corfac * rlow list(LPI = LPI, UPI=UPI) } locpi2<-function(fres, k=2, alph = 0.05){ #Makes the PI [L_n(h),U_n(h)] for a future forecast residual #for the ARMA(p,q) time series with, e.g., k = p+q sy <- as.vector(fres) n <- length(sy) #n is n_h if (alph > 0.1) {qn <- min(1 - alph + 0.05, 1 - alph + k/n)} if (alph <= 0.1) {qn <- min(1 - alph/2, 1 - alph + 10*alph*k/n)} #qn = 1 - deltan, alpha = delta = alph cc <- ceiling(n * qn) #get the PI sy <- sort(sy) rup <- sy[cc] rlow <- sy[1] olen <- rup - rlow if(cc < n){ for(j in (cc + 1):n){ zlen <- sy[j] - sy[j - cc + 1] if(zlen < olen) { olen <- zlen rup <- sy[j] rlow <- sy[j - cc + 1] } } } UPI <- rup LPI <- rlow list(LPI = LPI, UPI=UPI) } maboot<-function(Y,B=100,qmax=10,c=0.05){ #Bootstraps MA(q) model selection using the parametric bootstrap. #assumes n > 10 qmax and adds cB full model bootstrap samples. #uses the auto.arima from the forecast package #The full model and betahatmix are also bootstrapped. #uses library(forecast) out <- arima(Y,order=c(0,0,qmax)) thetahat = out$coef[1:qmax] #omit the constant, if any n<-length(Y) nmqmax <- n-qmax sig<-sqrt(out$sigma2) d <- ceiling(c*B) bpd <- B + d bp1 <- B + 1 betas <- matrix(0,nrow=bpd,ncol=qmax) btmix <- betas btfull <- betas bhatvs <- 0*1:qmax outvs<-auto.arima(Y,d=0,max.p=0,max.q=qmax,seasonal=FALSE,ic="aicc", stationary=TRUE) ord<-outvs$arma[2] if(ord>0) bhatvs[1:ord]<-outvs$coef[1:ord] for(i in 1:B) { yb <- arima.sim(n=n,list(ma=thetahat),sd=sig) tem<-auto.arima(yb,d=0,max.p=0,max.q=qmax,seasonal=FALSE,ic="aicc", stationary=TRUE) ord<-tem$arma[2] if(ord>0) betas[i,1:ord]<-tem$coef[1:ord] tem<-arima(yb,order=c(0,0,qmax)) btfull[i,] <- tem$coef[1:qmax] #mix if(ord>0){ yb <- arima.sim(n=n,list(ma=thetahat),sd=sig) tem <- arima(yb,order=c(0,0,ord)) btmix[i,1:ord]<-tem$coef[1:ord]} } for(i in bp1:bpd){ yb <- arima.sim(n=n,list(ma=thetahat),sd=sig) tem<-arima(yb,order=c(0,0,qmax)) betas[i,] <- tem$coef[1:qmax] } btmix[bp1:bpd,] <- betas[bp1:bpd,] btfull[bp1:bpd,] <- betas[bp1:bpd,] list(btfull=btfull,betas=betas,btmix=btmix,bhatvs=bhatvs) } maboot2<-function(Y,B=100,qmax=10,c=0.05){ #Bootstraps MA(q) model selection using the residual bootstrap. #assumes n > 10 qmax and adds cB full model bootstrap samples. #uses the auto.arima from the forecast package #The full model and betahatmix are also bootstrapped. #uses library(forecast) out <- arima(Y,order=c(0,0,qmax)) thetahat = out$coef[1:qmax] #omit the constant, if any n<-length(Y) nmqmax <- n-qmax sig<-sqrt(out$sigma2) res<-out$res #scale the residuals to have 0 mean res <- (res-mean(res))*sqrt(n/(n-qmax)) #want var(res) near sigmahat^2 d <- ceiling(c*B) bpd <- B + d bp1 <- B + 1 betas <- matrix(0,nrow=bpd,ncol=qmax) btmix <- betas btfull <- betas bhatvs <- 0*1:qmax outvs<-auto.arima(Y,d=0,max.p=0,max.q=qmax,seasonal=FALSE,ic="aicc", stationary=TRUE) ord<-outvs$arma[2] if(ord>0) bhatvs[1:ord]<-outvs$coef[1:ord] for(i in 1:B) { resb <- sample(res,size=n,replace=TRUE) yb <- arima.sim(n=n,list(ma=thetahat),innov = resb,sd=sig) tem<-auto.arima(yb,d=0,max.p=0,max.q=qmax,seasonal=FALSE,ic="aicc", stationary=TRUE) ord<-tem$arma[2] if(ord>0) betas[i,1:ord]<-tem$coef[1:ord] tem<-arima(yb,order=c(0,0,qmax)) btfull[i,] <- tem$coef[1:qmax] #mix if(ord>0){ resb <- sample(res,size=n,replace=TRUE) yb <- arima.sim(n=n,list(ma=thetahat),innov = resb,sd=sig) tem <- arima(yb,order=c(0,0,ord)) btmix[i,1:ord]<-tem$coef[1:ord]} } for(i in bp1:bpd){ resb <- sample(res,size=n,replace=TRUE) yb <- arima.sim(n=n,list(ma=thetahat),innov = resb,sd=sig) tem<-arima(yb,order=c(0,0,qmax)) betas[i,] <- tem$coef[1:qmax] } btmix[bp1:bpd,] <- betas[bp1:bpd,] btfull[bp1:bpd,] <- betas[bp1:bpd,] list(btfull=btfull,betas=betas,btmix=btmix,bhatvs=bhatvs) } macisim<-function(N=100,nruns=100,etype=1,tdf=5){ #Simulates MA(2) time series of length n, and finds proportion #of time theta1 and theta2 are in their 95% CI. #average sqrt(n) CI length is also given. #etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom #etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors ma1cov <- 0 ma2cov <- 0 ma1low <- 1:nruns ma1up <- ma1low ma2low<-ma1low ma2up<-ma1low for(i in 1:nruns){ thet1 <- runif(1,min=-0.5,max=0.5) thet2 <- runif(1,min=-0.4,max=0.4) if (etype==1) y <- arima.sim(n=N, list(ma=c(thet1,thet2)), innov = rnorm(N)) if(etype==2) y <- arima.sim(n=N, list(ma=c(thet1,thet2)), innov = rt(N,tdf)) if(etype==3) y <- arima.sim(n=N, list(ma=c(thet1,thet2)), innov = runif(N,min=-1,max=1)) if(etype==4) y <- arima.sim(n=N, list(ma=c(thet1,thet2)), innov = (rexp(N)-1)) zz<-arima(y,c(0,0,2)) coef <- zz$coef se <- sqrt(diag(zz$var.coef)) LCI <- coef - 2*se UCI <- coef + 2*se ma1low[i] <- LCI[1] ma1up[i] <- UCI[1] ma2low[i] <- LCI[2] ma2up[i] <- UCI[2] if(ma1low[i] < thet1 && ma1up[i] > thet1) ma1cov <- ma1cov + 1 if(ma2low[i] < thet2 && ma2up[i] > thet2) ma2cov <- ma2cov + 1 } ma1cov <- ma1cov/nruns ma1len <- sqrt(N) * mean(ma1up - ma1low) ma2cov <- ma2cov/nruns ma2len <- sqrt(N) * mean(ma2up - ma2low) list(ma1cov = ma1cov, ma1len = ma1len, ma2cov = ma2cov, ma2len = ma2len) } maq<-function(Y,q=1){ #Fits an MA(q) model to time series Y using OLS. #assumes q < n - q #Makes a response and residual plot. # Advance the plot by highlighting Stop with the right mouse button. #phihat = phihat_0 = intercept, phihat_1, ..., phihat_p cmar <- par("mar") par(mfrow = c(2, 1)) par(mar=c(4.0,4.0,2.0,0.5)) Y <- as.vector(Y) n <- length(Y) zz<-arima(Y,c(0,0,q)) macoef <- zz$coef res<-zz$resid Xma <- matrix(0,nrow=n-q,ncol=q) Yma <- Y[(q+1):n] for(i in 1:q){ Xma[,i] <- res[(q-i+1):(n-i)] } madat <- as.data.frame(cbind(Yma,Xma)) tem <- lm(Yma~.,data=madat) thetahat <- tem$coef fit <- Y resid <- Y fit[(q+1):n] <- tem$fit fit[1:q] <- NA resid[(q+1):n] <- tem$res resid[1:q] <- NA plot(fit,Y) abline(0, 1) title("Response Plot") identify(fit,Y) plot(fit, resid) title("Residual Plot") identify(fit, resid) par(mfrow = c(1, 1)) par(mar=cmar) list(fit=fit,resid=resid,tem=tem,macoef=macoef,thetahat=thetahat) } msarsim<-function(n=100,nruns=20,pmax=5,BB=100,cc=0.01,etype=1,tstype=1, tdf=5,btype=1,alph=0.05){ #Bootstraps AR(p) model selection, and full model, and betahatmix. #assumes n > 10 pmax, beta = (phi1,...,phipmax) #btype = 1 for parametric bootstrap, 2 for residual bootstrap #calls arboot or arboot2 #etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom #etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors #tstype = k for an AR(k) model, k=1,2 beta<-0*1:pmax phi=0.5 #tstype=1 for AR(1) model beta[1] <- 0.5 pp6<-pmax+6; pp5<-pmax+5; pp4<-pmax+4; pp3<-pmax+3; pp1<-pmax+1; pp2<-pmax+2 cicov <- 0*(1:pp6) avelen <- 0*(1:pp6) cicovmix <- cicov avelenmix <- avelen cicovfull <- cicov avelenfull <- avelen k<-tstype if(tstype==2) { phi=c(0.5,0.33) #tstype=2 for AR(2) model beta[2]<-0.33} tvec<-beta[1:k] for(i in 1: nruns) { if (etype==1) Y <- arima.sim(n=n, list(ar=c(phi)), innov = rnorm(n)) if(etype==2) Y <- arima.sim(n=n, list(ar=c(phi)), innov = rt(n,tdf)) if(etype==3) Y <- arima.sim(n=n, list(ar=c(phi)), innov = runif(n,min=-1,max=1)) if(etype==4) Y <- arima.sim(n=n, list(ar=c(phi)), innov = (rexp(n)-1)) if(btype==1) out<-arboot(Y=Y,B=BB,pmax=pmax,c=cc) else out<-arboot2(Y=Y,B=BB,pmax=pmax,c=cc) for(j in 1:pmax){ tem <- shorth3(out$betas[,j],alpha=alph) if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2]) cicov[j] <- cicov[j] + 1 avelen[j] <- avelen[j] + tem$shorth[2] - tem$shorth[1] tem <- shorth3(out$btmix[,j],alpha=alph) if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2]) cicovmix[j] <- cicovmix[j] + 1 avelenmix[j] <- avelenmix[j] + tem$shorth[2] - tem$shorth[1] tem <- shorth3(out$btfull[,j],alpha=alph) if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2]) cicovfull[j] <- cicovfull[j] + 1 avelenfull[j] <- avelenfull[j] + tem$shorth[2] - tem$shorth[1] } #test whether the last pmax-k values of beta are 0 gg <- pmax - k tstat <- out$bhatvs[(k+1):pmax] tem <- confreg(out$betas[,(k+1):pmax],g=gg,that=tstat,alpha=alph) if(tem$D0 <= tem$cuplim) #pred. reg. method cicov[pp1] <- cicov[pp1] + 1 avelen[pp1] <- avelen[pp1] + tem$cuplim if(tem$br0 <= tem$cuplim) #hybrid method cicov[pp2] <- cicov[pp2] + 1 avelen[pp2] <- avelen[pp1] #same cutoff so same length if(tem$br0 <= tem$brlim) #Bickel and Ren method cicov[pp3] <- cicov[pp3] + 1 avelen[pp3] <- avelen[pp3] + tem$brlim tem <- confreg(out$btmix[,(k+1):pmax],g=gg,that=tstat,alpha=alph) if(tem$D0 <= tem$cuplim) #pred. reg. method cicovmix[pp1] <- cicovmix[pp1] + 1 avelenmix[pp1] <- avelenmix[pp1] + tem$cuplim if(tem$br0 <= tem$cuplim) #hybrid method cicovmix[pp2] <- cicovmix[pp2] + 1 avelenmix[pp2] <- avelenmix[pp1] #same cutoff so same length if(tem$br0 <= tem$brlim) #Bickel and Ren method cicovmix[pp3] <- cicovmix[pp3] + 1 avelenmix[pp3] <- avelenmix[pp3] + tem$brlim tem <- confreg(out$btfull[,(k+1):pmax],g=gg,that=tstat,alpha=alph) if(tem$D0 <= tem$cuplim) #pred. reg. method cicovfull[pp1] <- cicovfull[pp1] + 1 avelenfull[pp1] <- avelenfull[pp1] + tem$cuplim if(tem$br0 <= tem$cuplim) #hybrid method cicovfull[pp2] <- cicovfull[pp2] + 1 avelenfull[pp2] <- avelenfull[pp1] #same cutoff so same length if(tem$br0 <= tem$brlim) #Bickel and Ren method cicovfull[pp3] <- cicovfull[pp3] + 1 avelenfull[pp3] <- avelenfull[pp3] + tem$brlim #test whether the first k values of beta are (beta_1,...,beta_k) gg <- k tstat <- out$bhatvs[1:k] tem <- confreg(out$betas[,1:k],g=gg,that=tstat,alpha=alph) D0 <- sqrt(mahalanobis(tvec, tem$center, tem$cov)) if(D0 <= tem$cuplim) #pred. reg. method cicov[pp4] <- cicov[pp4] + 1 avelen[pp4] <- avelen[pp4] + tem$cuplim D1 <- sqrt(mahalanobis(tvec, tstat, tem$cov)) if(D1 <= tem$cuplim) #hybrid method cicov[pp5] <- cicov[pp5] + 1 avelen[pp5] <- avelen[pp4] #same cutoff so same length if(D1 <= tem$brlim) #Bickel and Ren method cicov[pp6] <- cicov[pp6] + 1 avelen[pp6] <- avelen[pp6] + tem$brlim tem <- confreg(out$btmix[,1:k],g=gg,that=tstat,alpha=alph) D0 <- sqrt(mahalanobis(tvec, tem$center, tem$cov)) if(D0 <= tem$cuplim) #pred. reg. method cicovmix[pp4] <- cicovmix[pp4] + 1 avelenmix[pp4] <- avelenmix[pp4] + tem$cuplim D1 <- sqrt(mahalanobis(tvec, tstat, tem$cov)) if(D1 <= tem$cuplim) #hybrid method cicovmix[pp5] <- cicovmix[pp5] + 1 avelenmix[pp5] <- avelenmix[pp4] #same cutoff so same length if(D1 <= tem$brlim) #Bickel and Ren method cicovmix[pp6] <- cicovmix[pp6] + 1 avelenmix[pp6] <- avelenmix[pp6] + tem$brlim tem <- confreg(out$btfull[,1:k],g=gg,that=tstat,alpha=alph) D0 <- sqrt(mahalanobis(tvec, tem$center, tem$cov)) if(D0 <= tem$cuplim) #pred. reg. method cicovfull[pp4] <- cicovfull[pp4] + 1 avelenfull[pp4] <- avelenfull[pp4] + tem$cuplim D1 <- sqrt(mahalanobis(tvec, tstat, tem$cov)) if(D1 <= tem$cuplim) #hybrid method cicovfull[pp5] <- cicovfull[pp5] + 1 avelenfull[pp5] <- avelenfull[pp4] #same cutoff so same length if(D1 <= tem$brlim) #Bickel and Ren method cicovfull[pp6] <- cicovfull[pp6] + 1 avelenfull[pp6] <- avelenfull[pp6] + tem$brlim } cicov <- cicov/nruns avelen <- avelen/nruns cicovmix <- cicovmix/nruns avelenmix <- avelenmix/nruns cicovfull <- cicovfull/nruns avelenfull <- avelenfull/nruns list(cicovfull=cicovfull,avelenfull=avelenfull,cicov=cicov,avelen=avelen, cicovmix=cicovmix,avelenmix=avelenmix,beta=beta) } msarmasim<-function(n=100,nruns=20,pmax=3,qmax=3,BB=100,cc=0.05, etype=1,tstype=1,tdf=5,alph=0.05){ #Bootstraps ARMA(p,q) model selection. #assumes n > 10 (pmax+qmax), beta = (phi1,...,phipmax,theta1,...,thetaqmax) #uses parametric bootstrap, #calls armaboot, aic might be better than aicc #need pmax+3 <=pmax+qmax #uses library(forecast) #If the true model is ARMA(ps,qs), then both pmax > ps and qmax > qs #causes lots of convergence problems. #etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom #etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors kmax<-pmax+qmax #p=kmax beta<-0*1:kmax pp6<-kmax+6; pp5<-kmax+5; pp4<-kmax+4; pp3<-kmax+3; pp1<-kmax+1; pp2<-kmax+2 cicov <- 0*(1:pp6) avelen <- 0*(1:pp6) cicovmix <- cicov avelenmix <- avelen cicovfull <- cicov avelenfull <- avelen if(tstype==1) { theta=c(-0.5,0.5) #tstype=1 for MA(2) model beta[(pmax+1):(pmax+2)]<-theta valS<-c((pmax+1),(pmax+2)) valO <- c(1:pmax,(pmax+3):kmax) } if(tstype==2) { phi=c(0.5,0.33) #tstype=2 for AR(2) model beta[1:2]<-phi valS <- c(1,2) valO <- c(3:kmax) } if(tstype==3){ #tstype=3 for ARMA(1,1) model phi <- 0.5 theta <- -0.5 beta[c(1,pmax+1)] <- c(0.5,-0.5) valS <- c(1,(pmax+1)) valO <- c(2:pmax,(pmax+2):kmax) } k<-2 tvec<-beta[valS] for(i in 1: nruns) { if(tstype==1){ if (etype==1) Y <- arima.sim(n=n, list(ma=c(theta)), innov = rnorm(n)) if(etype==2) Y <- arima.sim(n=n, list(ma=c(theta)), innov = rt(n,tdf)) if(etype==3) Y <- arima.sim(n=n, list(ma=c(theta)), innov = runif(n,min=-1,max=1)) if(etype==4) Y <- arima.sim(n=n, list(ma=c(theta)), innov = (rexp(n)-1)) } if(tstype==2){ if (etype==1) Y <- arima.sim(n=n, list(ar=c(phi)), innov = rnorm(n)) if(etype==2) Y <- arima.sim(n=n, list(ar=c(phi)), innov = rt(n,tdf)) if(etype==3) Y <- arima.sim(n=n, list(ar=c(phi)), innov = runif(n,min=-1,max=1)) if(etype==4) Y <- arima.sim(n=n, list(ar=c(phi)), innov = (rexp(n)-1)) } if(tstype==3){ if (etype==1) Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov = rnorm(n)) if(etype==2) Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov = rt(n,tdf)) if(etype==3) Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov = runif(n,min=-1,max=1)) if(etype==4) Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov = (rexp(n)-1)) } out<-armaboot(Y=Y,B=BB,pmax=pmax,qmax=qmax,c=cc) for(j in 1:kmax){ tem <- shorth3(out$betas[,j],alpha=alph) if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2]) cicov[j] <- cicov[j] + 1 avelen[j] <- avelen[j] + tem$shorth[2] - tem$shorth[1] tem <- shorth3(out$btmix[,j],alpha=alph) if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2]) cicovmix[j] <- cicovmix[j] + 1 avelenmix[j] <- avelenmix[j] + tem$shorth[2] - tem$shorth[1] tem <- shorth3(out$btfull[,j],alpha=alph) if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2]) cicovfull[j] <- cicovfull[j] + 1 avelenfull[j] <- avelenfull[j] + tem$shorth[2] - tem$shorth[1] } #test whether the kmax-k values of betaO are 0 gg <- kmax - k tstat <- out$bhatvs[valO] tem <- confreg(out$betas[,valO],g=gg,that=tstat,alpha=alph) if(tem$D0 <= tem$cuplim) #pred. reg. method cicov[pp1] <- cicov[pp1] + 1 avelen[pp1] <- avelen[pp1] + tem$cuplim if(tem$br0 <= tem$cuplim) #hybrid method cicov[pp2] <- cicov[pp2] + 1 avelen[pp2] <- avelen[pp1] #same cutoff so same length if(tem$br0 <= tem$brlim) #Bickel and Ren method cicov[pp3] <- cicov[pp3] + 1 avelen[pp3] <- avelen[pp3] + tem$brlim tem <- confreg(out$btmix[,valO],g=gg,that=tstat,alpha=alph) if(tem$D0 <= tem$cuplim) #pred. reg. method cicovmix[pp1] <- cicovmix[pp1] + 1 avelenmix[pp1] <- avelenmix[pp1] + tem$cuplim if(tem$br0 <= tem$cuplim) #hybrid method cicovmix[pp2] <- cicovmix[pp2] + 1 avelenmix[pp2] <- avelenmix[pp1] #same cutoff so same length if(tem$br0 <= tem$brlim) #Bickel and Ren method cicovmix[pp3] <- cicovmix[pp3] + 1 avelenmix[pp3] <- avelenmix[pp3] + tem$brlim tem <- confreg(out$btfull[,valO],g=gg,that=tstat,alpha=alph) if(tem$D0 <= tem$cuplim) #pred. reg. method cicovfull[pp1] <- cicovfull[pp1] + 1 avelenfull[pp1] <- avelenfull[pp1] + tem$cuplim if(tem$br0 <= tem$cuplim) #hybrid method cicovfull[pp2] <- cicovfull[pp2] + 1 avelenfull[pp2] <- avelenfull[pp1] #same cutoff so same length if(tem$br0 <= tem$brlim) #Bickel and Ren method cicovfull[pp3] <- cicovfull[pp3] + 1 avelenfull[pp3] <- avelenfull[pp3] + tem$brlim #test whether the first betaS = betaS gg <- k tstat <- out$bhatvs[valS] tem <- confreg(out$betas[,valS],g=gg,that=tstat,alpha=alph) D0 <- sqrt(mahalanobis(tvec, tem$center, tem$cov)) if(D0 <= tem$cuplim) #pred. reg. method cicov[pp4] <- cicov[pp4] + 1 avelen[pp4] <- avelen[pp4] + tem$cuplim D1 <- sqrt(mahalanobis(tvec, tstat, tem$cov)) if(D1 <= tem$cuplim) #hybrid method cicov[pp5] <- cicov[pp5] + 1 avelen[pp5] <- avelen[pp4] #same cutoff so same length if(D1 <= tem$brlim) #Bickel and Ren method cicov[pp6] <- cicov[pp6] + 1 avelen[pp6] <- avelen[pp6] + tem$brlim tem <- confreg(out$btmix[,valS],g=gg,that=tstat,alpha=alph) D0 <- sqrt(mahalanobis(tvec, tem$center, tem$cov)) if(D0 <= tem$cuplim) #pred. reg. method cicovmix[pp4] <- cicovmix[pp4] + 1 avelenmix[pp4] <- avelenmix[pp4] + tem$cuplim D1 <- sqrt(mahalanobis(tvec, tstat, tem$cov)) if(D1 <= tem$cuplim) #hybrid method cicovmix[pp5] <- cicovmix[pp5] + 1 avelenmix[pp5] <- avelenmix[pp4] #same cutoff so same length if(D1 <= tem$brlim) #Bickel and Ren method cicovmix[pp6] <- cicovmix[pp6] + 1 avelenmix[pp6] <- avelenmix[pp6] + tem$brlim tem <- confreg(out$btfull[,valS],g=gg,that=tstat,alpha=alph) D0 <- sqrt(mahalanobis(tvec, tem$center, tem$cov)) if(D0 <= tem$cuplim) #pred. reg. method cicovfull[pp4] <- cicovfull[pp4] + 1 avelenfull[pp4] <- avelenfull[pp4] + tem$cuplim D1 <- sqrt(mahalanobis(tvec, tstat, tem$cov)) if(D1 <= tem$cuplim) #hybrid method cicovfull[pp5] <- cicovfull[pp5] + 1 avelenfull[pp5] <- avelenfull[pp4] #same cutoff so same length if(D1 <= tem$brlim) #Bickel and Ren method cicovfull[pp6] <- cicovfull[pp6] + 1 avelenfull[pp6] <- avelenfull[pp6] + tem$brlim } cicov <- cicov/nruns avelen <- avelen/nruns cicovmix <- cicovmix/nruns avelenmix <- avelenmix/nruns cicovfull <- cicovfull/nruns avelenfull <- avelenfull/nruns list(cicovfull=cicovfull,avelenfull=avelenfull,cicov=cicov,avelen=avelen, cicovmix=cicovmix,avelenmix=avelenmix,beta=beta) } msarmasim2<-function(n=100,nruns=20,pmax=3,qmax=3,BB=100,cc=0.05, etype=1,tstype=1,tdf=5,burn=100,alph=0.05){ #Bootstraps ARMA(p,q) model selection. #assumes n > 10 (pmax+qmax), beta = (phi1,...,phipmax,theta1,...,thetaqmax) #uses parametric bootstrap, #calls armaboot2, aic might be better than aicc #need pmax+3 <=pmax+qmax #uses library(forecast), burn is the burn in period, need burn>pmax+qmax #If the true model is ARMA(ps,qs), then both pmax > ps and qmax > qs #causes lots of convergence problems. #etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom #etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors kmax<-pmax+qmax #p=kmax beta<-0*1:kmax pp6<-kmax+6; pp5<-kmax+5; pp4<-kmax+4; pp3<-kmax+3; pp1<-kmax+1; pp2<-kmax+2 cicov <- 0*(1:pp6) avelen <- 0*(1:pp6) cicovmix <- cicov avelenmix <- avelen cicovfull <- cicov avelenfull <- avelen if(tstype==1) { theta=c(-0.5,0.5) #tstype=1 for MA(2) model beta[(pmax+1):(pmax+2)]<-theta valS<-c((pmax+1),(pmax+2)) valO <- c(1:pmax,(pmax+3):kmax) } if(tstype==2) { phi=c(0.5,0.33) #tstype=2 for AR(2) model beta[1:2]<-phi valS <- c(1,2) valO <- c(3:kmax) } if(tstype==3){ #tstype=3 for ARMA(1,1) model phi <- 0.5 theta <- -0.5 beta[c(1,pmax+1)] <- c(0.5,-0.5) valS <- c(1,(pmax+1)) valO <- c(2:pmax,(pmax+2):kmax) } k<-2 tvec<-beta[valS] for(i in 1: nruns) { if(tstype==1){ if (etype==1) Y <- arima.sim(n=n, list(ma=c(theta)), innov = rnorm(n),n.start=burn) if(etype==2) Y <- arima.sim(n=n, list(ma=c(theta)), innov = rt(n,tdf),n.start=burn) if(etype==3) Y <- arima.sim(n=n, list(ma=c(theta)), innov = runif(n,min=-1,max=1),n.start=burn) if(etype==4) Y <- arima.sim(n=n, list(ma=c(theta)), innov = (rexp(n)-1),n.start=burn) } if(tstype==2){ if (etype==1) Y <- arima.sim(n=n, list(ar=c(phi)), innov = rnorm(n),n.start=burn) if(etype==2) Y <- arima.sim(n=n, list(ar=c(phi)), innov = rt(n,tdf),n.start=burn) if(etype==3) Y <- arima.sim(n=n, list(ar=c(phi)), innov = runif(n,min=-1,max=1),n.start=burn) if(etype==4) Y <- arima.sim(n=n, list(ar=c(phi)), innov = (rexp(n)-1),n.start=burn) } if(tstype==3){ if (etype==1) Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov = rnorm(n), n.start=burn) if(etype==2) Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov = rt(n,tdf), n.start=burn) if(etype==3) Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov = runif(n,min=-1,max=1),n.start=burn) if(etype==4) Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov = (rexp(n)-1), n.start=burn) } out<-armaboot2(Y=Y,B=BB,pmax=pmax,qmax=qmax,c=cc,burn=burn) for(j in 1:kmax){ tem <- shorth3(out$betas[,j],alpha=alph) if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2]) cicov[j] <- cicov[j] + 1 avelen[j] <- avelen[j] + tem$shorth[2] - tem$shorth[1] tem <- shorth3(out$btmix[,j],alpha=alph) if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2]) cicovmix[j] <- cicovmix[j] + 1 avelenmix[j] <- avelenmix[j] + tem$shorth[2] - tem$shorth[1] tem <- shorth3(out$btfull[,j],alpha=alph) if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2]) cicovfull[j] <- cicovfull[j] + 1 avelenfull[j] <- avelenfull[j] + tem$shorth[2] - tem$shorth[1] } #test whether the kmax-k values of betaO are 0 gg <- kmax - k tstat <- out$bhatvs[valO] tem <- confreg(out$betas[,valO],g=gg,that=tstat,alpha=alph) if(tem$D0 <= tem$cuplim) #pred. reg. method cicov[pp1] <- cicov[pp1] + 1 avelen[pp1] <- avelen[pp1] + tem$cuplim if(tem$br0 <= tem$cuplim) #hybrid method cicov[pp2] <- cicov[pp2] + 1 avelen[pp2] <- avelen[pp1] #same cutoff so same length if(tem$br0 <= tem$brlim) #Bickel and Ren method cicov[pp3] <- cicov[pp3] + 1 avelen[pp3] <- avelen[pp3] + tem$brlim tem <- confreg(out$btmix[,valO],g=gg,that=tstat,alpha=alph) if(tem$D0 <= tem$cuplim) #pred. reg. method cicovmix[pp1] <- cicovmix[pp1] + 1 avelenmix[pp1] <- avelenmix[pp1] + tem$cuplim if(tem$br0 <= tem$cuplim) #hybrid method cicovmix[pp2] <- cicovmix[pp2] + 1 avelenmix[pp2] <- avelenmix[pp1] #same cutoff so same length if(tem$br0 <= tem$brlim) #Bickel and Ren method cicovmix[pp3] <- cicovmix[pp3] + 1 avelenmix[pp3] <- avelenmix[pp3] + tem$brlim tem <- confreg(out$btfull[,valO],g=gg,that=tstat,alpha=alph) if(tem$D0 <= tem$cuplim) #pred. reg. method cicovfull[pp1] <- cicovfull[pp1] + 1 avelenfull[pp1] <- avelenfull[pp1] + tem$cuplim if(tem$br0 <= tem$cuplim) #hybrid method cicovfull[pp2] <- cicovfull[pp2] + 1 avelenfull[pp2] <- avelenfull[pp1] #same cutoff so same length if(tem$br0 <= tem$brlim) #Bickel and Ren method cicovfull[pp3] <- cicovfull[pp3] + 1 avelenfull[pp3] <- avelenfull[pp3] + tem$brlim #test whether the first betaS = betaS gg <- k tstat <- out$bhatvs[valS] tem <- confreg(out$betas[,valS],g=gg,that=tstat,alpha=alph) D0 <- sqrt(mahalanobis(tvec, tem$center, tem$cov)) if(D0 <= tem$cuplim) #pred. reg. method cicov[pp4] <- cicov[pp4] + 1 avelen[pp4] <- avelen[pp4] + tem$cuplim D1 <- sqrt(mahalanobis(tvec, tstat, tem$cov)) if(D1 <= tem$cuplim) #hybrid method cicov[pp5] <- cicov[pp5] + 1 avelen[pp5] <- avelen[pp4] #same cutoff so same length if(D1 <= tem$brlim) #Bickel and Ren method cicov[pp6] <- cicov[pp6] + 1 avelen[pp6] <- avelen[pp6] + tem$brlim tem <- confreg(out$btmix[,valS],g=gg,that=tstat,alpha=alph) D0 <- sqrt(mahalanobis(tvec, tem$center, tem$cov)) if(D0 <= tem$cuplim) #pred. reg. method cicovmix[pp4] <- cicovmix[pp4] + 1 avelenmix[pp4] <- avelenmix[pp4] + tem$cuplim D1 <- sqrt(mahalanobis(tvec, tstat, tem$cov)) if(D1 <= tem$cuplim) #hybrid method cicovmix[pp5] <- cicovmix[pp5] + 1 avelenmix[pp5] <- avelenmix[pp4] #same cutoff so same length if(D1 <= tem$brlim) #Bickel and Ren method cicovmix[pp6] <- cicovmix[pp6] + 1 avelenmix[pp6] <- avelenmix[pp6] + tem$brlim tem <- confreg(out$btfull[,valS],g=gg,that=tstat,alpha=alph) D0 <- sqrt(mahalanobis(tvec, tem$center, tem$cov)) if(D0 <= tem$cuplim) #pred. reg. method cicovfull[pp4] <- cicovfull[pp4] + 1 avelenfull[pp4] <- avelenfull[pp4] + tem$cuplim D1 <- sqrt(mahalanobis(tvec, tstat, tem$cov)) if(D1 <= tem$cuplim) #hybrid method cicovfull[pp5] <- cicovfull[pp5] + 1 avelenfull[pp5] <- avelenfull[pp4] #same cutoff so same length if(D1 <= tem$brlim) #Bickel and Ren method cicovfull[pp6] <- cicovfull[pp6] + 1 avelenfull[pp6] <- avelenfull[pp6] + tem$brlim } cicov <- cicov/nruns avelen <- avelen/nruns cicovmix <- cicovmix/nruns avelenmix <- avelenmix/nruns cicovfull <- cicovfull/nruns avelenfull <- avelenfull/nruns list(cicovfull=cicovfull,avelenfull=avelenfull,cicov=cicov,avelen=avelen, cicovmix=cicovmix,avelenmix=avelenmix,beta=beta) } msarmasim3<-function(n=100,nruns=20,BB=100,cc=0.05, etype=1,tstype=4,tdf=5,alph=0.05){ #Bootstraps ARMA(p,q) model selection. Seems to work for nruns=500. #assumes n > 10 (pmax+qmax), beta = (phi1,...,phipmax,theta1,...,thetaqmax) #uses parametric bootstrap, #calls armaboot, aic might be better than aicc #need pmax+3 <=pmax+qmax #uses library(forecast) #If the true model is ARMA(ps,qs), then both pmax > ps and qmax > qs #causes lots of convergence problems. #etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom #etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors pmax=3 qmax=3 kmax<-pmax+qmax #p=kmax beta<-0*1:kmax pp6<-kmax+6; pp5<-kmax+5; pp4<-kmax+4; pp3<-kmax+3; pp1<-kmax+1; pp2<-kmax+2 cicov <- 0*(1:pp6) avelen <- 0*(1:pp6) cicovmix <- cicov avelenmix <- avelen cicovfull <- cicov avelenfull <- avelen if(tstype==4){#need pmax > 2 #tstype=4 for ARMA(3,1) model phi <- c(0.7,0.1,-0.4) theta <- 0.1 beta[c(1:3,pmax+1)] <- c(phi,0.1) valS <- c(1:3,(pmax+1)) if(pmax==3) valO <- c((pmax+2):kmax) if(pmax>3) valO <- c(3:pmax,(pmax+2):kmax) } k<-2 tvec<-beta[valS] for(i in 1: nruns) { print(i) if(tstype==4){ if (etype==1) Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov = rnorm(n)) if(etype==2) Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov = rt(n,tdf)) if(etype==3) Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov = runif(n,min=-1,max=1)) if(etype==4) Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov = (rexp(n)-1)) } out<-armaboot(Y=Y,B=BB,pmax=pmax,qmax=qmax,c=cc) for(j in 1:kmax){ tem <- shorth3(out$betas[,j],alpha=alph) if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2]) cicov[j] <- cicov[j] + 1 avelen[j] <- avelen[j] + tem$shorth[2] - tem$shorth[1] tem <- shorth3(out$btmix[,j],alpha=alph) if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2]) cicovmix[j] <- cicovmix[j] + 1 avelenmix[j] <- avelenmix[j] + tem$shorth[2] - tem$shorth[1] tem <- shorth3(out$btfull[,j],alpha=alph) if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2]) cicovfull[j] <- cicovfull[j] + 1 avelenfull[j] <- avelenfull[j] + tem$shorth[2] - tem$shorth[1] } #test whether the last 2 values of beta are 0 gg <- 2 tstat <- out$bhatvs[c(5,6)] tem <- confreg(out$betas[,c(5,6)],g=gg,that=tstat,alpha=alph) if(tem$D0 <= tem$cuplim) #pred. reg. method cicov[pp1] <- cicov[pp1] + 1 avelen[pp1] <- avelen[pp1] + tem$cuplim if(tem$br0 <= tem$cuplim) #hybrid method cicov[pp2] <- cicov[pp2] + 1 avelen[pp2] <- avelen[pp1] #same cutoff so same length if(tem$br0 <= tem$brlim) #Bickel and Ren method cicov[pp3] <- cicov[pp3] + 1 avelen[pp3] <- avelen[pp3] + tem$brlim tem <- confreg(out$btmix[,c(5,6)],g=gg,that=tstat,alpha=alph) if(tem$D0 <= tem$cuplim) #pred. reg. method cicovmix[pp1] <- cicovmix[pp1] + 1 avelenmix[pp1] <- avelenmix[pp1] + tem$cuplim if(tem$br0 <= tem$cuplim) #hybrid method cicovmix[pp2] <- cicovmix[pp2] + 1 avelenmix[pp2] <- avelenmix[pp1] #same cutoff so same length if(tem$br0 <= tem$brlim) #Bickel and Ren method cicovmix[pp3] <- cicovmix[pp3] + 1 avelenmix[pp3] <- avelenmix[pp3] + tem$brlim tem <- confreg(out$btfull[,c(5,6)],g=gg,that=tstat,alpha=alph) if(tem$D0 <= tem$cuplim) #pred. reg. method cicovfull[pp1] <- cicovfull[pp1] + 1 avelenfull[pp1] <- avelenfull[pp1] + tem$cuplim if(tem$br0 <= tem$cuplim) #hybrid method cicovfull[pp2] <- cicovfull[pp2] + 1 avelenfull[pp2] <- avelenfull[pp1] #same cutoff so same length if(tem$br0 <= tem$brlim) #Bickel and Ren method cicovfull[pp3] <- cicovfull[pp3] + 1 avelenfull[pp3] <- avelenfull[pp3] + tem$brlim #test whether betaS = betaS gg <- 4 tstat <- out$bhatvs[1:4] tem <- confreg(out$betas[,1:4],g=gg,that=tstat,alpha=alph) D0 <- sqrt(mahalanobis(tvec, tem$center, tem$cov)) if(D0 <= tem$cuplim) #pred. reg. method cicov[pp4] <- cicov[pp4] + 1 avelen[pp4] <- avelen[pp4] + tem$cuplim D1 <- sqrt(mahalanobis(tvec, tstat, tem$cov)) if(D1 <= tem$cuplim) #hybrid method cicov[pp5] <- cicov[pp5] + 1 avelen[pp5] <- avelen[pp4] #same cutoff so same length if(D1 <= tem$brlim) #Bickel and Ren method cicov[pp6] <- cicov[pp6] + 1 avelen[pp6] <- avelen[pp6] + tem$brlim tem <- confreg(out$btmix[,1:4],g=gg,that=tstat,alpha=alph) D0 <- sqrt(mahalanobis(tvec, tem$center, tem$cov)) if(D0 <= tem$cuplim) #pred. reg. method cicovmix[pp4] <- cicovmix[pp4] + 1 avelenmix[pp4] <- avelenmix[pp4] + tem$cuplim D1 <- sqrt(mahalanobis(tvec, tstat, tem$cov)) if(D1 <= tem$cuplim) #hybrid method cicovmix[pp5] <- cicovmix[pp5] + 1 avelenmix[pp5] <- avelenmix[pp4] #same cutoff so same length if(D1 <= tem$brlim) #Bickel and Ren method cicovmix[pp6] <- cicovmix[pp6] + 1 avelenmix[pp6] <- avelenmix[pp6] + tem$brlim tem <- confreg(out$btfull[,1:4],g=gg,that=tstat,alpha=alph) D0 <- sqrt(mahalanobis(tvec, tem$center, tem$cov)) if(D0 <= tem$cuplim) #pred. reg. method cicovfull[pp4] <- cicovfull[pp4] + 1 avelenfull[pp4] <- avelenfull[pp4] + tem$cuplim D1 <- sqrt(mahalanobis(tvec, tstat, tem$cov)) if(D1 <= tem$cuplim) #hybrid method cicovfull[pp5] <- cicovfull[pp5] + 1 avelenfull[pp5] <- avelenfull[pp4] #same cutoff so same length if(D1 <= tem$brlim) #Bickel and Ren method cicovfull[pp6] <- cicovfull[pp6] + 1 avelenfull[pp6] <- avelenfull[pp6] + tem$brlim } cicov <- cicov/nruns avelen <- avelen/nruns cicovmix <- cicovmix/nruns avelenmix <- avelenmix/nruns cicovfull <- cicovfull/nruns avelenfull <- avelenfull/nruns list(cicovfull=cicovfull,avelenfull=avelenfull,cicov=cicov,avelen=avelen, cicovmix=cicovmix,avelenmix=avelenmix,beta=beta) } msarmasim4<-function(n=100,nruns=20,BB=100,cc=0.05, etype=1,tstype=4,tdf=5,alph=0.05,burn=100){ #Bootstraps ARMA(p,q) model selection. #assumes n > 10 (pmax+qmax), beta = (phi1,...,phipmax,theta1,...,thetaqmax) #uses parametric bootstrap, burn is the burn in period, need burn>pmax+qmax #calls armaboot2, aic might be better than aicc #need pmax+3 <=pmax+qmax #uses library(forecast) #If the true model is ARMA(ps,qs), then both pmax > ps and qmax > qs #cause lots of convergence problems. #etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom #etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors pmax=3 qmax=3 kmax<-pmax+qmax #p=kmax beta<-0*1:kmax pp6<-kmax+6; pp5<-kmax+5; pp4<-kmax+4; pp3<-kmax+3; pp1<-kmax+1; pp2<-kmax+2 cicov <- 0*(1:pp6) avelen <- 0*(1:pp6) cicovmix <- cicov avelenmix <- avelen cicovfull <- cicov avelenfull <- avelen if(tstype==4){#need pmax > 2 #tstype=4 for ARMA(3,1) model phi <- c(0.7,0.1,-0.4) theta <- 0.1 beta[c(1:3,pmax+1)] <- c(phi,0.1) valS <- c(1:3,(pmax+1)) if(pmax==3) valO <- c((pmax+2):kmax) if(pmax>3) valO <- c(3:pmax,(pmax+2):kmax) } k<-2 tvec<-beta[valS] for(i in 1: nruns) { print(i) if(tstype==4){ if (etype==1) Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov = rnorm(n),n.start=burn) if(etype==2) Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov = rt(n,tdf),n.start=burn) if(etype==3) Y <- arima.sim(n=n, list(ar=c(phi),ma=c(theta)), innov = runif(n,min=-1,max=1),n.start=burn) if(etype==4) Y <- arima.sim(n=n,list(ar=c(phi),ma=c(theta)),innov=(rexp(n)-1),n.start=burn) } out<-armaboot2(Y=Y,B=BB,pmax=pmax,qmax=qmax,c=cc,burn=burn) for(j in 1:kmax){ tem <- shorth3(out$betas[,j],alpha=alph) if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2]) cicov[j] <- cicov[j] + 1 avelen[j] <- avelen[j] + tem$shorth[2] - tem$shorth[1] tem <- shorth3(out$btmix[,j],alpha=alph) if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2]) cicovmix[j] <- cicovmix[j] + 1 avelenmix[j] <- avelenmix[j] + tem$shorth[2] - tem$shorth[1] tem <- shorth3(out$btfull[,j],alpha=alph) if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2]) cicovfull[j] <- cicovfull[j] + 1 avelenfull[j] <- avelenfull[j] + tem$shorth[2] - tem$shorth[1] } #test whether the last 2 values of beta are 0 gg <- 2 tstat <- out$bhatvs[c(5,6)] tem <- confreg(out$betas[,c(5,6)],g=gg,that=tstat,alpha=alph) if(tem$D0 <= tem$cuplim) #pred. reg. method cicov[pp1] <- cicov[pp1] + 1 avelen[pp1] <- avelen[pp1] + tem$cuplim if(tem$br0 <= tem$cuplim) #hybrid method cicov[pp2] <- cicov[pp2] + 1 avelen[pp2] <- avelen[pp1] #same cutoff so same length if(tem$br0 <= tem$brlim) #Bickel and Ren method cicov[pp3] <- cicov[pp3] + 1 avelen[pp3] <- avelen[pp3] + tem$brlim tem <- confreg(out$btmix[,c(5,6)],g=gg,that=tstat,alpha=alph) if(tem$D0 <= tem$cuplim) #pred. reg. method cicovmix[pp1] <- cicovmix[pp1] + 1 avelenmix[pp1] <- avelenmix[pp1] + tem$cuplim if(tem$br0 <= tem$cuplim) #hybrid method cicovmix[pp2] <- cicovmix[pp2] + 1 avelenmix[pp2] <- avelenmix[pp1] #same cutoff so same length if(tem$br0 <= tem$brlim) #Bickel and Ren method cicovmix[pp3] <- cicovmix[pp3] + 1 avelenmix[pp3] <- avelenmix[pp3] + tem$brlim tem <- confreg(out$btfull[,c(5,6)],g=gg,that=tstat,alpha=alph) if(tem$D0 <= tem$cuplim) #pred. reg. method cicovfull[pp1] <- cicovfull[pp1] + 1 avelenfull[pp1] <- avelenfull[pp1] + tem$cuplim if(tem$br0 <= tem$cuplim) #hybrid method cicovfull[pp2] <- cicovfull[pp2] + 1 avelenfull[pp2] <- avelenfull[pp1] #same cutoff so same length if(tem$br0 <= tem$brlim) #Bickel and Ren method cicovfull[pp3] <- cicovfull[pp3] + 1 avelenfull[pp3] <- avelenfull[pp3] + tem$brlim #test whether betaS = betaS gg <- 4 tstat <- out$bhatvs[1:4] tem <- confreg(out$betas[,1:4],g=gg,that=tstat,alpha=alph) D0 <- sqrt(mahalanobis(tvec, tem$center, tem$cov)) if(D0 <= tem$cuplim) #pred. reg. method cicov[pp4] <- cicov[pp4] + 1 avelen[pp4] <- avelen[pp4] + tem$cuplim D1 <- sqrt(mahalanobis(tvec, tstat, tem$cov)) if(D1 <= tem$cuplim) #hybrid method cicov[pp5] <- cicov[pp5] + 1 avelen[pp5] <- avelen[pp4] #same cutoff so same length if(D1 <= tem$brlim) #Bickel and Ren method cicov[pp6] <- cicov[pp6] + 1 avelen[pp6] <- avelen[pp6] + tem$brlim tem <- confreg(out$btmix[,1:4],g=gg,that=tstat,alpha=alph) D0 <- sqrt(mahalanobis(tvec, tem$center, tem$cov)) if(D0 <= tem$cuplim) #pred. reg. method cicovmix[pp4] <- cicovmix[pp4] + 1 avelenmix[pp4] <- avelenmix[pp4] + tem$cuplim D1 <- sqrt(mahalanobis(tvec, tstat, tem$cov)) if(D1 <= tem$cuplim) #hybrid method cicovmix[pp5] <- cicovmix[pp5] + 1 avelenmix[pp5] <- avelenmix[pp4] #same cutoff so same length if(D1 <= tem$brlim) #Bickel and Ren method cicovmix[pp6] <- cicovmix[pp6] + 1 avelenmix[pp6] <- avelenmix[pp6] + tem$brlim tem <- confreg(out$btfull[,1:4],g=gg,that=tstat,alpha=alph) D0 <- sqrt(mahalanobis(tvec, tem$center, tem$cov)) if(D0 <= tem$cuplim) #pred. reg. method cicovfull[pp4] <- cicovfull[pp4] + 1 avelenfull[pp4] <- avelenfull[pp4] + tem$cuplim D1 <- sqrt(mahalanobis(tvec, tstat, tem$cov)) if(D1 <= tem$cuplim) #hybrid method cicovfull[pp5] <- cicovfull[pp5] + 1 avelenfull[pp5] <- avelenfull[pp4] #same cutoff so same length if(D1 <= tem$brlim) #Bickel and Ren method cicovfull[pp6] <- cicovfull[pp6] + 1 avelenfull[pp6] <- avelenfull[pp6] + tem$brlim } cicov <- cicov/nruns avelen <- avelen/nruns cicovmix <- cicovmix/nruns avelenmix <- avelenmix/nruns cicovfull <- cicovfull/nruns avelenfull <- avelenfull/nruns list(cicovfull=cicovfull,avelenfull=avelenfull,cicov=cicov,avelen=avelen, cicovmix=cicovmix,avelenmix=avelenmix,beta=beta) } msmasim<-function(n=100,nruns=20,qmax=5,BB=100,cc=0.05,etype=1,tstype=1, tdf=5,btype=1,alph=0.05){ #Bootstraps MA(q) model selection. #assumes n > 10 qmax, beta = (theta1,...,thetaqmax) #btype = 1 for parametric bootstrap, 2 for residual bootstrap #calls maboot or maboot2, aic might be better than aicc, uses forecast package #etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom #etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors #tstype = k for an MA(k) model, k=1,2 #uses library(forecast) beta<-0*1:qmax theta=-0.5 #tstype=1 for MA(1) model beta[1] <- -0.5 pp6<-qmax+6; pp5<-qmax+5; pp4<-qmax+4; pp3<-qmax+3; pp1<-qmax+1; pp2<-qmax+2 cicov <- 0*(1:pp6) avelen <- 0*(1:pp6) cicovmix <- cicov avelenmix <- avelen cicovfull <- cicov avelenfull <- avelen k<-tstype if(tstype==2) { theta=c(-0.5,0.5) #tstype=2 for MA(2) model beta[2]<-0.5} tvec<-beta[1:k] for(i in 1: nruns) { if (etype==1) Y <- arima.sim(n=n, list(ma=c(theta)), innov = rnorm(n)) if(etype==2) Y <- arima.sim(n=n, list(ma=c(theta)), innov = rt(n,tdf)) if(etype==3) Y <- arima.sim(n=n, list(ma=c(theta)), innov = runif(n,min=-1,max=1)) if(etype==4) Y <- arima.sim(n=n, list(ma=c(theta)), innov = (rexp(n)-1)) if(btype==1) out<-maboot(Y=Y,B=BB,qmax=qmax,c=cc) else out<-maboot2(Y=Y,B=BB,qmax=qmax,c=cc) for(j in 1:qmax){ tem <- shorth3(out$betas[,j],alpha=alph) if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2]) cicov[j] <- cicov[j] + 1 avelen[j] <- avelen[j] + tem$shorth[2] - tem$shorth[1] tem <- shorth3(out$btmix[,j],alpha=alph) if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2]) cicovmix[j] <- cicovmix[j] + 1 avelenmix[j] <- avelenmix[j] + tem$shorth[2] - tem$shorth[1] tem <- shorth3(out$btfull[,j],alpha=alph) if(beta[j] >= tem$shorth[1] && beta[j] <= tem$shorth[2]) cicovfull[j] <- cicovfull[j] + 1 avelenfull[j] <- avelenfull[j] + tem$shorth[2] - tem$shorth[1] } #test whether the last qmax-k values of beta are 0 gg <- qmax - k tstat <- out$bhatvs[(k+1):qmax] tem <- confreg(out$betas[,(k+1):qmax],g=gg,that=tstat,alpha=alph) if(tem$D0 <= tem$cuplim) #pred. reg. method cicov[pp1] <- cicov[pp1] + 1 avelen[pp1] <- avelen[pp1] + tem$cuplim if(tem$br0 <= tem$cuplim) #hybrid method cicov[pp2] <- cicov[pp2] + 1 avelen[pp2] <- avelen[pp1] #same cutoff so same length if(tem$br0 <= tem$brlim) #Bickel and Ren method cicov[pp3] <- cicov[pp3] + 1 avelen[pp3] <- avelen[pp3] + tem$brlim tem <- confreg(out$btmix[,(k+1):qmax],g=gg,that=tstat,alpha=alph) if(tem$D0 <= tem$cuplim) #pred. reg. method cicovmix[pp1] <- cicovmix[pp1] + 1 avelenmix[pp1] <- avelenmix[pp1] + tem$cuplim if(tem$br0 <= tem$cuplim) #hybrid method cicovmix[pp2] <- cicovmix[pp2] + 1 avelenmix[pp2] <- avelenmix[pp1] #same cutoff so same length if(tem$br0 <= tem$brlim) #Bickel and Ren method cicovmix[pp3] <- cicovmix[pp3] + 1 avelenmix[pp3] <- avelenmix[pp3] + tem$brlim tem <- confreg(out$btfull[,(k+1):qmax],g=gg,that=tstat,alpha=alph) if(tem$D0 <= tem$cuplim) #pred. reg. method cicovfull[pp1] <- cicovfull[pp1] + 1 avelenfull[pp1] <- avelenfull[pp1] + tem$cuplim if(tem$br0 <= tem$cuplim) #hybrid method cicovfull[pp2] <- cicovfull[pp2] + 1 avelenfull[pp2] <- avelenfull[pp1] #same cutoff so same length if(tem$br0 <= tem$brlim) #Bickel and Ren method cicovfull[pp3] <- cicovfull[pp3] + 1 avelenfull[pp3] <- avelenfull[pp3] + tem$brlim #test whether the first k values of beta are (beta_1,...,beta_k) gg <- k tstat <- out$bhatvs[1:k] tem <- confreg(out$betas[,1:k],g=gg,that=tstat,alpha=alph) D0 <- sqrt(mahalanobis(tvec, tem$center, tem$cov)) if(D0 <= tem$cuplim) #pred. reg. method cicov[pp4] <- cicov[pp4] + 1 avelen[pp4] <- avelen[pp4] + tem$cuplim D1 <- sqrt(mahalanobis(tvec, tstat, tem$cov)) if(D1 <= tem$cuplim) #hybrid method cicov[pp5] <- cicov[pp5] + 1 avelen[pp5] <- avelen[pp4] #same cutoff so same length if(D1 <= tem$brlim) #Bickel and Ren method cicov[pp6] <- cicov[pp6] + 1 avelen[pp6] <- avelen[pp6] + tem$brlim tem <- confreg(out$btmix[,1:k],g=gg,that=tstat,alpha=alph) D0 <- sqrt(mahalanobis(tvec, tem$center, tem$cov)) if(D0 <= tem$cuplim) #pred. reg. method cicovmix[pp4] <- cicovmix[pp4] + 1 avelenmix[pp4] <- avelenmix[pp4] + tem$cuplim D1 <- sqrt(mahalanobis(tvec, tstat, tem$cov)) if(D1 <= tem$cuplim) #hybrid method cicovmix[pp5] <- cicovmix[pp5] + 1 avelenmix[pp5] <- avelenmix[pp4] #same cutoff so same length if(D1 <= tem$brlim) #Bickel and Ren method cicovmix[pp6] <- cicovmix[pp6] + 1 avelenmix[pp6] <- avelenmix[pp6] + tem$brlim tem <- confreg(out$btfull[,1:k],g=gg,that=tstat,alpha=alph) D0 <- sqrt(mahalanobis(tvec, tem$center, tem$cov)) if(D0 <= tem$cuplim) #pred. reg. method cicovfull[pp4] <- cicovfull[pp4] + 1 avelenfull[pp4] <- avelenfull[pp4] + tem$cuplim D1 <- sqrt(mahalanobis(tvec, tstat, tem$cov)) if(D1 <= tem$cuplim) #hybrid method cicovfull[pp5] <- cicovfull[pp5] + 1 avelenfull[pp5] <- avelenfull[pp4] #same cutoff so same length if(D1 <= tem$brlim) #Bickel and Ren method cicovfull[pp6] <- cicovfull[pp6] + 1 avelenfull[pp6] <- avelenfull[pp6] + tem$brlim } cicov <- cicov/nruns avelen <- avelen/nruns cicovmix <- cicovmix/nruns avelenmix <- avelenmix/nruns cicovfull <- cicovfull/nruns avelenfull <- avelenfull/nruns list(cicovfull=cicovfull,avelenfull=avelenfull,cicov=cicov,avelen=avelen, cicovmix=cicovmix,avelenmix=avelenmix,beta=beta) } nonpisim<-function(n=100,nruns=50,etype=1,tstype=1, tdf=5,phi=c(0.7,0.1,-0.4),theta=c(0.1),pen=2,dd=0,burn=100,alph=0.05){ #Simulates time series of length n, and finds proportion #of time Y_n+1 is in its one step nonparametric PI that is free of model selected #Average PI length is also given. #etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom #etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors #dd is the amount of difference, dd=0,1,or 2, assumed known np1 <- n+1 cov <- 0 len <- 0:nruns #assume n - q - p > 30 if(tstype==1){#tstype=1 for AR(1) model phi<-c(0.5) ps<-1 qs <- 0 } if(tstype==2) { #tstype=2 for AR(2) model phi=c(0.5,0.33) ps<-2 qs<-0 } if(tstype==3){#tstype=3 for MA(1) model theta<-c(-0.5) ps<-0 qs <- 1 } if(tstype==4) { #tstype=4 for MA(2) model theta=c(-0.5,0.5) ps<-0 qs<-2 } if(tstype==5){#need kmax > 2 #tstype=5 for ARMA(3,1) model phi <- c(0.7,0.1,-0.4) theta <- 0.1 ps<-3 qs<-1 } if(tstype==6){#need kmax >= rr #tstype=6 for ARMA model selected by user phi <- as.vector(phi) theta <- as.vector(theta) ps<-length(phi) qs<-length(theta) } for(i in 1:nruns){ if(tstype<3){ if (etype==1) Y <- arima.sim(n=np1, list(order = c(ps,dd,qs), ar =c(phi)), innov = rnorm(np1)) if(etype==2) Y <- arima.sim(n=np1, list(order = c(ps,dd,qs), ar =c(phi)), innov = rt(np1,tdf)) if(etype==3) Y <- arima.sim(n=np1, list(order = c(ps,dd,qs), ar =c(phi)), innov = runif(np1,min=-1,max=1)) if(etype==4) Y <- arima.sim(n=np1, list(order = c(ps,dd,qs), ar =c(phi)), innov = (rexp(np1)-1)) } if(tstype==3 || tstype==4){ if (etype==1) Y <- arima.sim(n=np1, list(order = c(ps,dd,qs), ma=c(theta)), innov = rnorm(np1)) if(etype==2) Y <- arima.sim(n=np1, list(order = c(ps,dd,qs), ma=c(theta)), innov = rt(np1,tdf)) if(etype==3) Y <- arima.sim(n=np1, list(order = c(ps,dd,qs), ma=c(theta)), innov = runif(np1,min=-1,max=1)) if(etype==4) Y <- arima.sim(n=np1, list(order = c(ps,dd,qs), ma=c(theta)), innov = (rexp(np1)-1)) } if(tstype==5){ if (etype==1) Y <- arima.sim(n=np1,list(order = c(ps,dd,qs),ar=c(phi),ma=c(theta)),innov = rnorm(np1),n.start=burn) if(etype==2) Y <- arima.sim(n=np1,list(order = c(ps,dd,qs),ar=c(phi),ma=c(theta)),innov = rt(np1,tdf),n.start=burn) if(etype==3) Y <- arima.sim(n=np1, list(order = c(ps,dd,qs),ar=c(phi),ma=c(theta)), innov = runif(np1,min=-1,max=1),n.start=burn) if(etype==4) Y <- arima.sim(n=np1,list(order = c(ps,dd,qs), ar=c(phi),ma=c(theta)),innov=(rexp(np1)-1),n.start=burn) } if(tstype==6){ #arma model selected by the user if (etype==1) Y <- arima.sim(n=np1,list(order = c(ps,dd,qs), ar=c(phi),ma=c(theta)),innov = rnorm(np1),n.start=burn) if(etype==2) Y <- arima.sim(n=np1,list(order = c(ps,dd,qs), ar=c(phi),ma=c(theta)),innov = rt(np1,tdf),n.start=burn) if(etype==3) Y <- arima.sim(n=np1, list(order = c(ps,dd,qs), ar=c(phi),ma=c(theta)), innov = runif(np1,min=-1,max=1),n.start=burn) if(etype==4) Y <- arima.sim(n=np1,list(order = c(ps,dd,qs), ar=c(phi),ma=c(theta)),innov=(rexp(np1)-1),n.start=burn) } y <- Y[1:n] yf <- Y[np1] if(dd==0){ tem <- locpi(y,alpha=alph) #PI ignore TS structure LPI <- tem$LPI UPI <- tem$UPI len[i] <- UPI - LPI if(LPI < yf && yf < UPI) cov <- cov + 1 } if(dd==1){ Z <- as.vector(y) Z[1] <- NA Z[-1] <- diff(y) tem <- locpi(Z[-1],alpha=alph) LPI <- tem$LPI UPI <- tem$UPI len[i] <- UPI - LPI yfhat=y[n] if((yfhat + LPI) < yf && yf < (yfhat+UPI)) cov <- cov + 1 } if(dd==2){ Z <- as.vector(y) Z[1:2] <- NA Z[-c(1,2)] <- diff(y,differences=2) tem <- locpi(Z[-c(1,2)],alpha=alph) LPI <- tem$LPI UPI <- tem$UPI len[i] <- UPI - LPI yfhat= 2*y[n] - y[(n-1)] if((yfhat + LPI) < yf && yf < (yfhat+UPI)) cov <- cov + 1 } } cov <- cov/nruns len <- mean(len) list( cov=cov, len=len) } onesteppi<-function(Y, p=0,d=0,q=1,P=0,D=0,Q=0,per=4,period=F,alpha = 0.05){ #Makes the one step ahead PI [L(n),U(n)] for a #seasonal Arima(p,d,q)x(P,D,Q) model with seasonal period = S = per #calls locpi2, and can handle missing values NA #which is 4 for quarterly data and 12 for monthly data #yhat1 is the one step ahead forecast, chebSE is the SE assuming normality #95% Chebychev PI yhat11 +- 2 chebSE tends to be too short #onesteppi(presidents,p=1,d=1,q=0) #onesteppi(presidents,p=1,d=1,q=0,P=1,period=T) ##per=4 y <- as.vector(Y) if(period==F) out<-arima(y,c(p,d,q),seasonal=list(order=c(P,D,Q))) else out<-arima(y,c(p,d,q),seasonal=list(order=c(P,D,Q),period=per)) #can handle missing values but produces NAs res <- out$res[!is.na(out$res)] km <- p+q+P+Q tem <- locpi2(res,k=km,alph=alpha) temp <- predict(out,n.ahead=1) yhat1 <- temp$pred[1] chebSE <- temp$se[1] low <- yhat1+tem$LPI up <- yhat1+tem$UPI list(chebSE=chebSE,yhat1=yhat1,low,up) } pimasim<-function(N=100,nruns=50,Lmq=1,etype=1,tdf=5,alph=0.05){ #old, uses locpi to get h-step ahead PI for the forecast residuals #Simulates MA(2) time series of length n, and finds proportion #of time Y_N+1, ..., Y_N+L are in their 95% PI base on the shorth. #The PI may be asymptotically optimal for Y_n+j with q< j <=L. #Average PI length is also given. #Gets the asymptotically optimal 1 and 2 step ahead PIs based on forecast residuals. #etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom #etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors #calls locpi #Finally gets the normal PIs, although the z cutoff is replaced #by a t cutoff to increase coverage. L <- 2 + Lmq npL <- N + L np1 <- N + 1 len <- 1:nruns opilen <- matrix(0,nrow=nruns,ncol=2) npilen <- matrix(0,nrow=nruns,ncol=L) cov <- c(0,0) coverage <- as.vector((0*1:L)) npicov <- coverage #use N-p-q in general tcut <- qt((1-alph/2),N-2) for(i in 1:nruns){ thet1 <- runif(1,min=-0.5,max=0.5) thet2 <- runif(1,min=-0.4,max=0.4) if (etype==1) x <- arima.sim(n=npL, list(ma=c(thet1,thet2)), innov = rnorm(npL)) if(etype==2) x <- arima.sim(n=npL, list(ma=c(thet1,thet2)), innov = rt(npL,tdf)) if(etype==3) x <- arima.sim(n=npL, list(ma=c(thet1,thet2)), innov = runif(npL,min=-1,max=1)) if(etype==4) x <- arima.sim(n=npL, list(ma=c(thet1,thet2)), innov = (rexp(npL)-1)) y <- x[1:N] yf <- x[np1:npL] tem <- locpi(y,alpha=alph) LPI <- tem$LPI UPI <- tem$UPI len[i] <- UPI - LPI for(j in 1:L){ if(LPI < yf[j] && yf[j] < UPI) coverage[j] <- coverage[j] + 1} # get the time series from the N cases out <- arima(y,c(0,0,2)) #get the l-step ahead forecasts for l = 1, ..., L temp <- predict(out,n.ahead=L) #get the 1 step ahead PI res <- out$resid #yfhat <- out$coef[3] + out$coef[1]*res[N] + out$coef[2]*res[(N-1)] yfhat <- temp$pred[1] tem <- locpi(res,alpha=alph) low <- yfhat+tem$LPI up <- yfhat+tem$UPI if(low < yf[1] && yf[1] < up) cov[1] <- cov[1] + 1 opilen[i,1] <- up-low #get the 2 step ahead PI #yfhat <- out$coef[3] + out$coef[2]*res[N] yfhat <- temp$pred[2] #get the 2 step ahead forecasts for the data yf2 <- (out$coef[3] + out$coef[2]*res)[-c(N-1,N)] #get the 2 step ahead forecast residuals for t = 3,...,N-2 res2 <- y[-c(1,2)] - yf2 tem <- locpi(res2,alpha=alph) low <- yfhat+tem$LPI up <- yfhat+tem$UPI if(low < yf[2] && yf[2] < up) cov[2] <- cov[2] + 1 opilen[i,2] <- up-low #get the normal PI for(j in 1:L){ LPI <- temp$pred[j] - tcut*temp$se[j] UPI <- temp$pred[j] + tcut*temp$se[j] npilen[i,j] <- UPI-LPI if(LPI < yf[j] && yf[j] < UPI) npicov[j] <- npicov[j] + 1} } plot(x,type="l") abline(a=LPI,b=0) abline(a=UPI,b=0) avelen <- mean(len) coverage <- coverage/nruns cov <- cov/nruns opilen <- apply(opilen,2,mean) npicov <- npicov/nruns npilen <- apply(npilen,2,mean) list(coverage = coverage, avelen = avelen, opicov = cov, opilen = opilen, npicov=npicov, npilen=npilen) } pimasim2<-function(N=100,nruns=50,Lmq=1,etype=1,tdf=5,alph=0.05){ #Simulates MA(2) time series of length N, and finds proportion #of time Y_N+1, ..., Y_N+L are in their 95% PI based on the shorth. #The PI may be asymptotically optimal for Y_N+j with q< j <=L. #Average PI length is also given. #Gets the asymptotically optimal 1 and 2 step ahead PIs based on forecast residuals. #etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom #etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors #calls locpi, locpi2 #Finally gets the normal PIs, although the z cutoff is replaced #by a t cutoff to increase coverage. L <- 2 + Lmq npL <- N + L np1 <- N + 1 len <- 1:nruns opilen <- matrix(0,nrow=nruns,ncol=2) npilen <- matrix(0,nrow=nruns,ncol=L) cov <- c(0,0) coverage <- as.vector((0*1:L)) npicov <- coverage #use N-p-q in general tcut <- qt((1-alph/2),N-2) for(i in 1:nruns){ thet1 <- runif(1,min=-0.5,max=0.5) thet2 <- runif(1,min=-0.4,max=0.4) if (etype==1) x <- arima.sim(n=npL, list(ma=c(thet1,thet2)), innov = rnorm(npL)) if(etype==2) x <- arima.sim(n=npL, list(ma=c(thet1,thet2)), innov = rt(npL,tdf)) if(etype==3) x <- arima.sim(n=npL, list(ma=c(thet1,thet2)), innov = runif(npL,min=-1,max=1)) if(etype==4) x <- arima.sim(n=npL, list(ma=c(thet1,thet2)), innov = (rexp(npL)-1)) y <- x[1:N] yf <- x[np1:npL] tem <- locpi(y,alpha=alph) #PI ignore TS structure, correct for L>=3 LPI <- tem$LPI UPI <- tem$UPI len[i] <- UPI - LPI for(j in 1:L){ if(LPI < yf[j] && yf[j] < UPI) coverage[j] <- coverage[j] + 1} # get the time series from the N cases out <- arima(y,c(0,0,2)) #get the l-step ahead forecasts for l = 1, ..., L temp <- predict(out,n.ahead=L) #get the 1 step ahead PI res <- out$resid m <- length(res) am <- (1+15/m)*sqrt(m/(m-2)) #yfhat <- out$coef[3] + out$coef[1]*res[N] + out$coef[2]*res[(N-1)] yfhat <- temp$pred[1] tem <- locpi2(res,k=2,alph=alph) low <- yfhat+am*tem$LPI up <- yfhat+am*tem$UPI if(low < yf[1] && yf[1] < up) cov[1] <- cov[1] + 1 opilen[i,1] <- up-low #get the 2 step ahead PI #yfhat <- out$coef[3] + out$coef[2]*res[N] yfhat <- temp$pred[2] #get the 2 step ahead forecasts for the data yf2 <- (out$coef[3] + out$coef[2]*res)[-c(N-1,N)] #get the 2 step ahead forecast residuals for t = 3,...,N-2 res2 <- y[-c(1,2)] - yf2 m <- length(res2) am <- (1+15/m)*sqrt(m/(m-2)) tem <- locpi2(res2,alph=alph) low <- yfhat+am*tem$LPI up <- yfhat+am*tem$UPI if(low < yf[2] && yf[2] < up) cov[2] <- cov[2] + 1 opilen[i,2] <- up-low #get the normal PI for(j in 1:L){ LPI <- temp$pred[j] - tcut*temp$se[j] UPI <- temp$pred[j] + tcut*temp$se[j] npilen[i,j] <- UPI-LPI if(LPI < yf[j] && yf[j] < UPI) npicov[j] <- npicov[j] + 1} } #plot(x,type="l") #abline(a=LPI,b=0) #abline(a=UPI,b=0) avelen <- mean(len) coverage <- coverage/nruns cov <- cov/nruns opilen <- apply(opilen,2,mean) npicov <- npicov/nruns npilen <- apply(npilen,2,mean) list(coverage = coverage, avelen = avelen, opicov = cov, opilen = opilen, npicov=npicov, npilen=npilen) } pitsvssim<-function(N=100,nruns=50,pmax=5,qmax=5,tstype=1,etype=1,burn=100, tdf=5,alph=0.05){ #Needs auto.arima from library(forecast) #Fits ARMA(pmax,qmax) model and does model selection. Fits 4 PIs. #Simulates tstype time series of length N, and finds proportion #of time Y_N+1 is in its 95% PI based on the shorth. #tstype = 1 for MA(2), 2 for AR(1), 3 for ARMA(1,1) #The 2nd PI may be asymptotically optimal. #Average PI length is also given. #Gets the 1 step ahead PIs based on forecast residuals. #etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom #etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors #calls locpi, locpi2 #Finally gets the normal PIs, although the z cutoff is replaced #by a t cutoff to increase coverage. ##armapisim is better np1 <- N + 1 len <- 1:nruns opilen <- len npilen <- len h95len <- len coverage <- 0 opicov <- 0 npicov <- coverage h95cov <- 0 #use N-p-q in general tcut <- qt((1-alph/2),N-2) for(i in 1:nruns){ if(tstype==1){ #MA(2) model is the true model thet1 <- runif(1,min=-0.5,max=0.5) thet2 <- runif(1,min=-0.4,max=0.4) if (etype==1) x <- arima.sim(n=np1, list(ma=c(thet1,thet2)), innov = rnorm(np1)) if(etype==2) x <- arima.sim(n=np1, list(ma=c(thet1,thet2)), innov = rt(np1,tdf)) if(etype==3) x <- arima.sim(n=np1, list(ma=c(thet1,thet2)), innov = runif(np1,min=-1,max=1)) if(etype==4) x <- arima.sim(n=np1, list(ma=c(thet1,thet2)), innov = (rexp(np1)-1)) } if(tstype==2){ #AR(1) model is the true model phi1 <- runif(1,min=-0.5,max=0.5) if (etype==1) x <- arima.sim(n=np1, list(ar=c(phi1)), innov = rnorm(np1)) if(etype==2) x <- arima.sim(n=np1, list(ar=c(phi1)), innov = rt(np1,tdf)) if(etype==3) x <- arima.sim(n=np1, list(ar=c(phi1)), innov = runif(np1,min=-1,max=1)) if(etype==4) x <- arima.sim(n=np1, list(ar=c(phi1)), innov = (rexp(np1)-1)) } if(tstype==3){ #ARMA(1,1) model is the true model phi<-0.4 theta <- -0.7 phi <- as.vector(phi) theta <- as.vector(theta) if (etype==1) x <- arima.sim(n=np1,list(ar=c(phi),ma=c(theta)),innov = rnorm(np1),n.start=burn) if(etype==2) x <- arima.sim(n=np1,list(ar=c(phi),ma=c(theta)),innov = rt(np1,tdf),n.start=burn) if(etype==3) x <- arima.sim(n=np1, list(ar=c(phi),ma=c(theta)), innov = runif(np1,min=-1,max=1),n.start=burn) if(etype==4) x <- arima.sim(n=np1,list(ar=c(phi),ma=c(theta)),innov=(rexp(np1)-1),n.start=burn) } y <- x[1:N] yf <- x[np1] tem <- locpi(y,alpha=alph) #PI ignore TS structure LPI <- tem$LPI UPI <- tem$UPI len[i] <- UPI - LPI if(LPI < yf && yf < UPI) coverage <- coverage + 1 #ARMA(p,q) model selection out<-auto.arima(y,d=0,max.p=pmax,max.q=qmax,seasonal=FALSE,ic="aicc", stationary=TRUE) #get the l-step ahead forecasts for l = 1 temp <- out %>% forecast(h=1) yfhat<-temp$mean[1] #1 step ahead forecast #get the 1 step ahead PI res <- out$resid m <- length(res) kk<-length(out$coef) am <- (1+15/m)*sqrt(m/(m-kk)) tem <- locpi2(res,k=kk,alph=alph) low <- yfhat+am*tem$LPI up <- yfhat+am*tem$UPI if(low < yf && yf < up) opicov <- opicov + 1 opilen[i] <- up-low #get PI from forecast package up<-temp$upper[2] low <- temp$lower[2] if(low < yf && yf < up) h95cov <- h95cov + 1 h95len[i] <- up-low #get the normal PI se <- sqrt(out$sigma) tcut <- qt((1-alph/2),N-kk) LPI <- yfhat - tcut*se UPI <- yfhat + tcut*se npilen[i] <- UPI-LPI if(LPI < yf && yf < UPI) npicov <- npicov + 1 } mnpilen <- mean(len) #PI that ignores TS structure locpi coverage <- coverage/nruns opilen <- mean(opilen) #asymptotically optimal PI locpi2 opicov <- opicov/nruns h95cov <- h95cov/nruns #normal Cheb PI from forecast package h95len <- mean(h95len) npicov <- npicov/nruns #normal Cheb PI npilen <- mean(npilen) list(coverage = coverage, mnpilen = mnpilen, opicov = opicov, opilen = opilen, h95cov=h95cov, h95len=h95len, npicov=npicov, npilen=npilen) } predrgn <-function(x, xf, alpha = 0.05){ # Makes a prediction region for xf when cases are rows of x. # If p = 1, the shorth interval should work better. x <- as.matrix(x) p <- dim(x)[2] n <- dim(x)[1] inr <- 0 up <- min((1 - alpha/2), (1 - alpha + 10*alpha*p/n)) if(alpha > 0.1) up <- min((1 - alpha + 0.05), (1 - alpha + p/n)) qn <- up if(qn < 1 - alpha + 0.001) up <- 1 - alpha center <- apply(x, 2, mean) cov <- var(x) md2 <- mahalanobis(x, center, cov) # md2 is the classical squared Mahalanobis distance #get nonparametric prediction region boundary cuplim <- quantile(md2, up) #inr <- 1 if xf is in the prediction region xfd <- mahalanobis(xf, center, cov) if(xfd <= cuplim) inr <- 1 list(md2=md2, center=center,cov=cov, cuplim = cuplim, xfd, inr=inr) } predrgn2 <-function(x, xf, nv=10, alpha = 0.05){ # Makes a prediction region for xf when cases are rows of x. #Uses data splitting prediction region with (T,C) = (coord median, Ip) # If p = 1, the shorth interval should work better. # MAY NOT WORK IF p = 1. x <- as.matrix(x) p <- dim(x)[2] n <- dim(x)[1] nv <- min(nv, floor(n/2)) nH <- n - nv uv <- min(nv, ceiling( (1 - alpha)*(1+nv) ) ) up <- uv/(nv+1) #quantile for prediction region inr <- 0 C <- diag(p) indx <- 1:n #get the data splitting prediction region perm <- sample(indx,n) H <- perm[1:nH] xH <- x[H,] xV <- x[-H,] center <- apply(xH, 2, median) md2 <- mahalanobis(xV, center, cov=C) hsq <- quantile(md2, up, type=1) #inr <- 1 if xf is in the prediction region xfd <- mahalanobis(xf, center, cov=C) if(xfd <= hsq) inr <- 1 list(md2=md2, center=center,hsq = hsq, xfd, inr=inr) } predsim2<-function(n = 50, p = 50, nv=19, nruns = 100, xtype = 1, dtype = 1, alpha = 0.05) {# MAY NOT WORK IF p = 1. # Gets coverages of the data splitting prediction region. # Multiply x by A where xtype = 1 for MVN Nq(0,I), 2 for lognormal #dtype = 1 if (T,C) = (xbar, Ip ), dtype = 2 if (T,C) = (coord median, Ip) # mahalanobis gives squared Maha distances #want n/2 > nv #coverage is very sensitive to quantile type. Use type = 1: inverse quantile #function so percentile = Y(ceiling(nv*(1-alpha)) cvr <- 0 nv <- min(nv, floor(n/2)) nH <- n - nv uv <- min(nv, ceiling( (1 - alpha)*(1+nv) ) ) up <- uv/(nv+1) #quantile for prediction region A <- sqrt(diag(1:p)) m <- n + 1 C <- diag(p) indx <- 1:n for(i in 1:nruns) { #make data x <- matrix(rnorm(m * p), nrow = m, ncol = p) if(xtype == 2) x <- exp(x) x <- x %*% A xx <- x[ - m, ] #find sets H and V perm <- sample(indx,n) H <- perm[1:nH] xH <- xx[H,] xV <- xx[-H,] if(dtype==1){ center <- apply(xH, 2, mean) disp = C} if(dtype==2){ center <- apply(xH, 2, median) disp = C} md2 <- mahalanobis(xV, center, cov=disp) hsq <- quantile(md2, up, type=1) if(mahalanobis(t(x[m, ]), center, cov=disp) <= hsq) cvr <- cvr + 1 } cvr <- cvr/nruns list(cvr = cvr, up=up) } renewalpi<-function(times,alph=0.05){ #Let times=(e1, ..., en) be the iid data for time until event. #Get shorth PIs for time until next event, time until next 2 events, #time until next 3 events, and time until next 4 events. #Reliable for time until next kth event if n/k > 99. #default is 95% PI, calls shpi n<- length(times) step2 <- seq(from = 1, to = (n-2), by = 2) step3 <- seq(from = 1, to = (n-3), by = 3) step4 <- seq(from = 1, to = (n-4), by = 4) eps1 <- times tem <- shpi(yf=0,ydat=eps1,alph) onepi <- c(tem$low, tem$up) #get 2 step ahead PI shift <- c(eps1,0)[-1] #length is n-1 ee <- (eps1+shift)[-(n-1)] #length is n-2, has e_i + e_i+1 eps2 <- ee[step2] #length is about (n-2)/2 tem <- shpi(yf=0,ydat=eps2,alph) twopi <- c(tem$low, tem$up) #get 3 step ahead PI shift<-shift[-(n-1)] shift <- c(shift,0)[-1] #length is n-2 eee <- (ee+shift)[-(n-2)] #length is n-3, has e_i + e_{i+1} + e_{i+2} eps3 <- eee[step3] #length is about (n-3)/3 tem <- shpi(yf=0,ydat=eps3,alph) threepi <- c(tem$low, tem$up) #get 4 step ahead PI shift<-shift[-(n-2)] shift <- c(shift,0)[-1] #length is n-3 eeee <- (eee+shift)[-(n-3)] #length is n-4, has e_i+e_{i+1}+e_{i+2}+e_{i+3} eps4 <- eeee[step4] #length is about (n-4)/4 tem <- shpi(yf=0,ydat=eps4,alph) fourpi <- c(tem$low, tem$up) list(onepi=onepi,twopi=twopi,threepi=threepi,fourpi=fourpi) } resacfs<-function(zz){ #Let zz be the output of the arima function, eg zz <- arima(lh,c(1,0,0)), #Makes the ACF and PACF of the residuals. #Works for ARIMA(p,d,q)x(P,D=0,Q)s models. #Also prints out the Z test statistics for the residual ACF and residual PACF. #|Z| > 1.25 for lags 1,2,3 and |Z| > 1.6 for all other lags par(mfrow=c(2,1)) tema <- acf(zz$resid,na.action=na.omit) temp <- pacf(zz$resid,na.action=na.omit) par(mfrow=c(1,1)) Zstat <- sqrt(length(zz$resid))*rbind(tema$acf[-1],temp$acf) return(Zstat) } resplots<-function(W,zz){ #Let zz be the output of the arima function, eg zz <- arima(lh,c(1,0,0)), #and let W be the time series, eg W <- lh. #Works for ARIMA(p,d,q)x(P,D=0,Q)s models. #Makes the response plot and the residual plot of the fitted values #(W-residual) vs the residuals. #Also prints out the coefficients, se, 95% CI's, test statistics and pvalues. Y <- as.vector(W) Resid <- zz$resid FIT <- Y-Resid FIT <- as.vector(FIT) par(mfrow=c(2,1)) plot(FIT,Y) abline(0,1) title("response plot") plot(FIT,Resid) title("residual plot") par(mfrow=c(1,1)) coef <- zz$coef se <- sqrt(diag(zz$var.coef)) Z <- as.vector(coef/se) pval <- 2 * (1-pnorm(abs(Z))) LCI <- coef - 2*se UCI <- coef + 2*se tem <- cbind(coef,se,LCI,UCI,Z,pval) return(tem) } resplots2<-function(W,zz,d = 1){ #Let zz be the output of the arima function, eg zz <- arima(lh,c(1,0,0)), #and let W be the time series, eg W <- lh. #Uses the differenced time series as the response in the FF plot for d =1,2. #Makes the response plot and the residual plot of the fitted values vs the #residuals. #Also prints out the coefficients, se, 95% CI's, test statistics and pvalues. #changed FIT<-as.real(FIT) to FIT<-as.vector(FIT) Y <- as.vector(W) if(d==1) {Y[1] <- NA; Y[-1] <- diff(Y)} if(d==2) {Y[1:2] <- NA; Y[-c(1,2)] <- diff(Y,differences=2)} Resid <- zz$resid FIT <- Y-Resid FIT <- as.vector(FIT) par(mfrow=c(2,1)) plot(FIT,Y) abline(0,1) title("response plot") plot(FIT,Resid) title("residual plot") par(mfrow=c(1,1)) coef <- zz$coef se <- sqrt(diag(zz$var.coef)) Z <- as.vector(coef/se) pval <- 2 * (1-pnorm(abs(Z))) LCI <- coef - 2*se UCI <- coef + 2*se tem <- cbind(coef,se,LCI,UCI,Z,pval) return(tem) } rmreg2<-function(x, y, csteps = 5, locc = 0.5){ # Does robust multivariate linear regression based on RMVN. # Here x contains the nontrivial predictors. # Advance the plot by highlighting Stop with the right mouse button. x <- as.matrix(x) y <- as.matrix(y) n <- dim(x)[1] q <- dim(x)[2] p <- q+1 m <- dim(y)[2] d <- m + q u <- cbind(x,y) res <- matrix(nrow = n, ncol = m, 0) fit <- res Bhat <- matrix(nrow = p, ncol = m, 0) # q + 1 = p is number of predictors including intercept cmar <- par("mar") par(mfrow = c(2, 1)) par(mar=c(4.0,4.0,2.0,0.5)) #get the RMVN subset of the predictors and response u up <- qchisq(0.975, d) qchi <- qchisq(0.5, d) ##get the DGK estimator covs <- var(u) mns <- apply(u, 2, mean) ## concentrate for(i in 1:csteps) { md2 <- mahalanobis(u, mns, covs) medd2 <- median(md2) mns <- apply(u[md2 <= medd2, ], 2, mean) covs <- var(u[md2 <= medd2, ]) } covd <- covs mnd <- mns ##get the MB estimator covv <- diag(d) med <- apply(u, 2, median) md2 <- mahalanobis(u, center = med, covv) medd2 <- median(md2)##get the location criterion cutoff lcut <- medd2 if(locc != 0.5) lcut <- quantile(md2,locc) ## get the start mns <- apply(u[md2 <= medd2, ], 2, mean) covs <- var(u[md2 <= medd2, ]) ## concentrate for(i in 1:csteps) { md2 <- mahalanobis(u, mns, covs) medd2 <- median(md2) mns <- apply(u[md2 <= medd2, ], 2, mean) covs <- var(u[md2 <= medd2, ]) } covm <- covs mnm <- mns ##get FCH attractor covf <- covm mnf <- mnm val2 <- mahalanobis(t(mnd), med, covv) if(val2 < lcut) { ##crit = square root of det(cov) critd <- prod(diag(chol(covd))) critm <- prod(diag(chol(covm))) if(critd < critm) { covf <- covd mnf <- mnd } } ## get the FCH estimator rd2 <- mahalanobis(u, mnf, covf) const <- median(rd2)/qchi covf <- const * covf ##reweight the above FCH estimator (mnf,covf) rd2 <- mahalanobis(u, mnf, covf) rmnmvn <- apply(u[rd2 <= up, ], 2, mean) rcovmvn <- var(u[rd2 <= up, ]) d1 <- sum(rd2 <= up) rd2 <- mahalanobis(u, rmnmvn, rcovmvn) qchi2 <- (0.5 * 0.975 * n)/d1 qchi2 <- min(qchi2, 0.995) const <- median(rd2)/qchisq(qchi2, d) rcovmvn <- const * rcovmvn ##now get the subset of data w used to construct the RMVN estimator rd2 <- mahalanobis(u, rmnmvn, rcovmvn) w <- u[rd2 <= up, ] xw <- as.matrix(w[,1:q]) yw <- as.matrix(w[,(q+1):d]) #get the multivariate regression after ellipsoidal trimming for(i in 1:m){ out <- lsfit(xw,yw[,i]) res[,i] <- y[,i] - out$coef[1] - x %*% out$coef[-1] fit[,i] <- y[,i] - res[,i] Bhat[,i] <- out$coef } for(i in 1:m){ plot(fit[,i],y[,i]) abline(0, 1) title("Response Plot") identify(fit[,i],y[,i]) plot(fit[,i], res[,i]) title("Residual Plot") identify(fit[,i], res[,i]) } par(mfrow = c(1, 1)) par(mar=cmar) list(fit = fit, res = res, Bhat = Bhat) } robar<-function(Y,p){ #Fits a robust AR(p) model to time series Y. #assumes p < n - p #Makes a response and residual plot. # Advance the plot by highlighting Stop with the right mouse button. #phihat = phihat_0 = intercept, phihat_1, ..., phihat_p # Calls rmreg2 to get the robust estimator. n <- length(Y) Xar <- matrix(0,nrow=n-p,ncol=p) Yar <- Y[(p+1):n] for(i in 1:p){ Xar[,i] <- Y[(p-i+1):(n-i)] } tem <- rmreg2(Xar,Yar) phihat <- tem$Bhat fit <- Y resid <- Y fit[(p+1):n] <- tem$fit fit[1:p] <- NA resid[(p+1):n] <- tem$res resid[1:p] <- NA list(phihat=phihat,fit=fit,resid=resid) } rwpisim<-function(n=100,nruns=50,type=1,tdf=5,alph=0.05){ #Simulates random walk time series of length n, and finds proportion #of time Y_n+1, ..., Y_n+4 are in their 95% PI based on the shorth. #The PI is asymptotically optimal. Average PI length is also given. #type = 1 for N(1,1), 2 for 1 + t distr with tdf=degrees of freedom, #3 for EXP(1), 4 for U(0,2). Calls shpi. Want n >= 25(4) = 100. y0 <-1 val<-n+4 step2 <- seq(from = 1, to = (n-2), by = 2) step3 <- seq(from = 1, to = (n-3), by = 3) step4 <- seq(from = 1, to = (n-4), by = 4) onepilen <- 1:nruns twopilen <- 1:nruns threepilen <- 1:nruns fourpilen <- 1:nruns onecov <- 0 twocov <- 0 threecov <- 0 fourcov <- 0 for(i in 1:nruns){ if(type==1) tem<-rnorm(val,mean=1,sd=1) if(type==2) tem <-1 + rt(val,tdf) #rcauchy generator might be bad if(type==3) tem<-rexp(val) if(type==4) tem<-runif(val,min=0,max=2) tem<-c(y0,tem) yrw<-cumsum(tem)[-1] #get 1 step ahead PI, could use eps1 <- diff(yrw)[1:(n-1)] eps1<-tem[3:(n+1)] ydat <- yrw[n] + eps1 yf <- yrw[n+1] tem <- shpi(yf=yf,ydat=ydat,alph) onepilen[i] <- tem$up - tem$low onecov <- onecov + tem$inr #get 2 step ahead PI shift <- c(eps1,0)[-1] #length is n-1 ee <- (eps1+shift)[-(n-1)] #length is n-2, has e_i + e_i+1 eps2 <- ee[step2] #length is about (n-2)/2 ydat <- yrw[n] + eps2 yf <- yrw[n+2] tem <- shpi(yf=yf,ydat=ydat,alph) twopilen[i] <- tem$up - tem$low twocov <- twocov + tem$inr #get 3 step ahead PI shift<-shift[-(n-1)] shift <- c(shift,0)[-1] #length is n-2 eee <- (ee+shift)[-(n-2)] #length is n-3, has e_i + e_{i+1} + e_{i+2} eps3 <- eee[step3] #length is about (n-3)/3 ydat <- yrw[n] + eps3 yf <- yrw[n+3] tem <- shpi(yf=yf,ydat=ydat,alph) threepilen[i] <- tem$up - tem$low threecov <- threecov + tem$inr #get 4 step ahead PI shift<-shift[-(n-2)] shift <- c(shift,0)[-1] #length is n-3 eeee <- (eee+shift)[-(n-3)] #length is n-4, has e_i+e_{i+1}+e_{i+2}+e_{i+3} eps4 <- eeee[step4] #length is about (n-4)/4 ydat <- yrw[n] + eps4 yf <- yrw[n+4] tem <- shpi(yf=yf,ydat=ydat,alph) fourpilen[i] <- tem$up - tem$low fourcov <- fourcov + tem$inr } lens<-cbind(onepilen,twopilen,threepilen,fourpilen) mnlen<-apply(lens,2,mean) sdlen<-apply(lens,2,sd) onecov <- onecov/nruns twocov <- twocov/nruns threecov <- threecov/nruns fourcov <- fourcov/nruns list(onecov=onecov,twocov=twocov,threecov=threecov, fourcov=fourcov,mnlen=mnlen,sdlen=sdlen) } rwprsim<-function(n=100,nruns=50,p=2,type=1,tdf=5,psi=0.0,alph=0.05){ #Simulates multivariate random walk time series of length n, and finds #proportion of time Y_n+1, ..., Y_n+4 are in their 95% prediction region PR. # Z_t is a p times 1 vector, Z_t = A x_t, where x_t entries are iid #type = 1 for N(1,1), 2 for 1 + t distr with tdf=degrees of freedom, #3 for EXP(1), 4 for U(0,2). Calls predrgn. Want n >= 25(4)p = 100p. #Need the distribution to have a nonsingular pop. covariance matrix. #Want integer p > 1. # Multiply x by A: for MVN data this results # in a covariance matrix with eigenvector c(1, ..., 1)^T # corresponding to the largest eigenvalue. As psi gets # close to 1, the data clusters about the line in the # direction of (1, ..., 1)^T. See Maronna and Zamar (2002). # cor(X_i,X_j) = [2 psi +(q-2)psi^2]/[1 + (q-1)psi^2], i not = j # when the correlation exists. zeroes <- 0*1:p A <- matrix(psi,nrow=p,ncol=p) diag(A) <- 1 val<-n+4 step2 <- seq(from = 1, to = (n-2), by = 2) step3 <- seq(from = 1, to = (n-3), by = 3) step4 <- seq(from = 1, to = (n-4), by = 4) onecov <- 0 twocov <- 0 threecov <- 0 fourcov <- 0 for(i in 1:nruns){ if(type==1) x <- matrix(rnorm(val*p,mean=1,sd=1), nrow = val, ncol = p) if(type==2) x <-1 + matrix(rt(val*p,tdf), nrow = val, ncol = p) if(type==3) x <- matrix(rexp(val*p), nrow = val, ncol = p) if(type==4) x<-matrix(runif(val*p,min=0,max=2),nrow=val, ncol=p) z <- x%*%A #z0 is the p times 1 zero vector yrw<- apply(z,2,cumsum) #get 1 step ahead PR, eps1<-z[2:n,] ydat <- t(yrw[n,] + t(eps1)) #odd code seems to work yf <- yrw[n+1,] tem <- predrgn(x=ydat,xf=yf,alph) onecov <- onecov + tem$inr ##get 2 step ahead PR shift <- rbind(eps1,zeroes)[-1,] #length is n-1 ee <- (eps1+shift)[-(n-1),] #length is n-2, has e_i + e_i+1 eps2 <- ee[step2,] #length is about (n-2)/2 ydat <- t(yrw[n,] + t(eps2)) yf <- yrw[n+2,] tem <- predrgn(x=ydat,xf=yf,alph) twocov <- twocov + tem$inr ##get 3 step ahead PI shift<-shift[-(n-1),] shift <- rbind(shift,zeroes)[-1,] #length is n-2 eee <- (ee+shift)[-(n-2),] #length is n-3, has e_i + e_{i+1} + e_{i+2} eps3 <- eee[step3,] #length is about (n-3)/3 ydat <- t(yrw[n,] + t(eps3)) yf <- yrw[n+3,] tem <- predrgn(x=ydat,xf=yf,alph) threecov <- threecov + tem$inr #get 4 step ahead PI shift<-shift[-(n-2),] shift <- rbind(shift,zeroes)[-1,] #length is n-3 eeee <- (eee+shift)[-(n-3),] #length is n-4, has e_i+e_{i+1}+e_{i+2}+e_{i+3} eps4 <- eeee[step4,] #length is about (n-4)/4 ydat <- t(yrw[n,] + t(eps4)) yf <- yrw[n+4,] tem <- predrgn(x=ydat,xf=yf,alph) fourcov <- fourcov + tem$inr } onecov <- onecov/nruns twocov <- twocov/nruns threecov <- threecov/nruns fourcov <- fourcov/nruns list(onecov=onecov,twocov=twocov,threecov=threecov,fourcov=fourcov) } saics <- function(Y,dd=0,pmax=5,qmax=5,pbig=10,qbig=10,P=0,D=0,Q=0,perd=12){ #Makes a matrix of AICs - min(AIC) for ARIMA(p,dd,q)x(P,D,Q)s seasonal models #with period perd for time series Y where the seasonal values P,D,Q are fixed. #often bombs, but uses method = "ML" to reduce errors #Reduce pmax by 1 if an error message occurs, may need to reduce #pbig, qmax and qbig. May need to use pmax = 1, qmax = 1, pbig=1, qbig =1. #Then increase values. # May need to hit the Esc key to kill program if P,D,Q are bad. #minaic is the min aic for the arima(p,dd,q)(P,D,Q) models, #the ari and ima models could have lower aic Y <- as.vector(Y) aics <- matrix(, (pmax+1), (qmax+1), dimnames=list(p=0:pmax, q=0:qmax)) ariaics <- 1:pbig imaaics <- 1:qbig for(p in 0:pmax) for(q in 0:qmax) aics[1+p, 1+q] <- arima(Y, order=c(p,d=dd,q), method = "ML", seasonal = list(order = c(P, D, Q), period = perd), optim.control = list(maxit = 500))$aic minaic <- min(aics, na.rm = TRUE) aics <- round(aics - minaic, 2) for(p in 1:pbig) ariaics[p] <- arima(Y,order=c(p,d=dd,0), method="ML", seasonal = list(order = c(P, D, Q), period = perd), optim.control = list(maxit = 500))$aic for(q in 1:qbig) imaaics[q] <- arima(Y,order=c(0,d=dd,q), method = "ML", seasonal = list(order = c(P, D, Q), period = perd), optim.control = list(maxit = 500))$aic ariaics <- round(ariaics - minaic, 2) imaaics <- round(imaaics - minaic, 2) list(aics=aics, ariaics=ariaics, imaaics=imaaics) } shorth3<-function(y, alpha = 0.05){ # computes the shorth(c) interval [Ln,Un] for c = cc. #shorth lists Ln and Un using Frey's correction n <- length(y) cc <- ceiling(n * (1 - alpha + 1.12*sqrt(alpha/n))) cc <- min(n,cc) sy <- sort(y) rup <- sy[cc] rlow <- sy[1] olen <- rup - rlow if(cc < n){ for(j in (cc + 1):n){ zlen <- sy[j] - sy[j - cc + 1] if(zlen < olen) { olen <- zlen rup <- sy[j] rlow <- sy[j - cc + 1] } } } Ln <- rlow; Un <- rup list(shorth=c(Ln, Un)) } shpi<-function(yf=0, ydat, alph = 0.05){ #Gets the Frey (2013) PI for Yf given ydata. ydat<-as.vector(ydat) B<-length(ydat) inr <- 0 cc <- ceiling(B * (1 - alph + 1.12*sqrt(alph/B))) cc <- min(B,cc) sy <- sort(ydat) up <- sy[cc] low <- sy[1] olen <- up - low if(cc < B){ for(j in (cc + 1):B){ zlen <- sy[j] - sy[j - cc + 1] if(zlen < olen) { olen <- zlen up <- sy[j] low <- sy[j - cc + 1] } } } if(low <= yf && up >= yf) inr <- inr + 1 list(low=low, up=up, inr = inr)} tscisim<-function(N=100,nruns=100,etype=1,tdf=5){ #Simulates ARMA(1,1) time series of length n, and finds proportion #of time theta and phi are in their 95% CI. #average sqrt(n) CI length is also given. #etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom #etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors #use method = "ML" or method bombs often #using n.start = 100 or n.start=400 did not seem to help arcov <- 0 macov <- 0 arlow <- 1:nruns arup <- arlow malow<-arlow maup<-arlow for(i in 1:nruns){ theta <- runif(1,min=-0.5,max=0.5) phi <- runif(1,min=-0.2,max=0.3) if (etype==1) y <- arima.sim(n=N, list(ar=c(phi), ma=c(theta)), innov = rnorm(N)) if(etype==2) y <- arima.sim(n=N, list(ar=phi, ma=theta), innov = rt(N,tdf)) if(etype==3) y <- arima.sim(n=N, list(ar=phi, ma=theta), innov = runif(N,min=-1,max=1)) if(etype==4) y <- arima.sim(n=N, list(ar=phi, ma=theta), innov = (rexp(N)-1)) zz<-arima(y,c(1,0,1),method="ML") coef <- zz$coef se <- sqrt(diag(zz$var.coef)) LCI <- coef - 2*se UCI <- coef + 2*se arlow[i] <- LCI[1] arup[i] <- UCI[1] malow[i] <- LCI[2] maup[i] <- UCI[2] if(arlow[i] < phi && arup[i] > phi) arcov <- arcov + 1 if(malow[i] < theta && maup[i] > theta) macov <- macov + 1 } arcov <- arcov/nruns arlen <- sqrt(N) * mean(arup - arlow) macov <- macov/nruns malen <- sqrt(N) * mean(maup - malow) list(arcov = arcov, arlen = arlen, macov = macov, malen = malen) } tsmsel <- function(Y,dd=0,pmax=5,qmax=5){ #makes a matrix of BICs - min(BIC) for ARIMA(p,dd,q) models for time series Y #uses method = "ML" to reduce errors #reduce pmax by 1 if an error message occurs, may need to reduce qmax #minbic is the min bic for the arima(p,dd,q) models, Y <- as.vector(Y) bics <- matrix(, (pmax+1), (qmax+1), dimnames=list(p=0:pmax, q=0:qmax)) aics<-bics mbic<-10^100 maic<-mbic aord <- c(0,0) bord <- aord for(p in 0:pmax) for(q in 0:qmax){ z <- arima(Y, c(p,d=dd,q), method = "ML", optim.control = list(maxit = 500)) aicz <- AIC(z) aics[1+p, 1+q] <- aicz if(aicz < maic){ maic<-aicz zaic <- z aord <- c(p,q)} bicz <- BIC(z) bics[1+p, 1+q] <- bicz if(bicz < mbic){ mbic<-bicz zbic <- z bord <- c(p,q)} } minaic <- min(aics, na.rm = TRUE) minbic <- min(bics, na.rm = TRUE) aics <- round(aics - minaic, 2) bics <- round(bics - minbic, 2) list(zaic=zaic,zbic=zbic,aics=aics,bics=bics,aord=aord,bord=bord) } tsNA<-function(Y,k=6){ #Y is a time series (possibly differenced) #that is assumed to be weakly stationary, MA(oo), and AR(oo) #Y gets changed to NA if Y > MED + k MAD or if Y < MED - k MAD kmadd <- k*mad(Y,constant=1) medd <- median(Y) up <- medd + kmadd low<- medd - kmadd maxy <- max(Y[Ylow]) W <- Y W[Y>up] <- NA W[Y 0. #etype = 1 for N(0,1) errors, etype = 2 for t distr with tdf=degrees of freedom #etype = 3 for U(-1,1) errors, etype=4 for EXP(1)-1 errors #Needs auto.arima from library(forecast) and calls armamsel2. #for an arima(ps,dd,qs) true model, estimates r = max(ps,qs) #assumes n >= 20 (kmax), full model beta = (phi1,...,phikmax,theta1,...,thetakmax) #dd is the amount of difference, dd=0,1,or 2, assumed known npL <- n + L np1 <- n+1 npilen <- matrix(0,nrow=nruns,ncol=L) ppilen <- npilen ppicov <- as.vector((0*1:L)) npicov <- ppicov #assume n - q - p > 30 tcut <- qnorm((1-alph/2)) kp1 <- kmax+1 if(tstype==1){#tstype=1 for AR(1) model phi<-c(0.5) ps<-1 qs <- 0 } if(tstype==2) { #tstype=2 for AR(2) model phi=c(0.5,0.33) ps<-2 qs<-0 } if(tstype==3){#tstype=3 for MA(1) model theta<-c(-0.5) ps<-0 qs <- 1 } if(tstype==4) { #tstype=4 for MA(2) model theta=c(-0.5,0.5) ps<-0 qs<-2 } if(tstype==5){#need kmax > 2 #tstype=5 for ARMA(3,1) model phi <- c(0.7,0.1,-0.4) theta <- 0.1 ps<-3 qs<-1 } if(tstype==6){#need kmax >= rr #tstype=6 for ARMA model selected by user phi <- as.vector(phi) theta <- as.vector(theta) ps<-length(phi) qs<-length(theta) } for(i in 1:nruns){ if(tstype<3){ if (etype==1) Y <- arima.sim(n=npL, list(order = c(ps,dd,qs), ar =c(phi)), innov = rnorm(npL)) if(etype==2) Y <- arima.sim(n=npL, list(order = c(ps,dd,qs), ar =c(phi)), innov = rt(npL,tdf)) if(etype==3) Y <- arima.sim(n=npL, list(order = c(ps,dd,qs), ar =c(phi)), innov = runif(npL,min=-1,max=1)) if(etype==4) Y <- arima.sim(n=npL, list(order = c(ps,dd,qs), ar =c(phi)), innov = (rexp(npL)-1)) } if(tstype==3 || tstype==4){ if (etype==1) Y <- arima.sim(n=npL, list(order = c(ps,dd,qs), ma=c(theta)), innov = rnorm(npL)) if(etype==2) Y <- arima.sim(n=npL, list(order = c(ps,dd,qs), ma=c(theta)), innov = rt(npL,tdf)) if(etype==3) Y <- arima.sim(n=npL, list(order = c(ps,dd,qs), ma=c(theta)), innov = runif(npL,min=-1,max=1)) if(etype==4) Y <- arima.sim(n=npL, list(order = c(ps,dd,qs), ma=c(theta)), innov = (rexp(npL)-1)) } if(tstype==5){ if (etype==1) Y <- arima.sim(n=npL,list(order = c(ps,dd,qs),ar=c(phi),ma=c(theta)),innov = rnorm(npL),n.start=burn) if(etype==2) Y <- arima.sim(n=npL,list(order = c(ps,dd,qs),ar=c(phi),ma=c(theta)),innov = rt(npL,tdf),n.start=burn) if(etype==3) Y <- arima.sim(n=npL, list(order = c(ps,dd,qs),ar=c(phi),ma=c(theta)), innov = runif(npL,min=-1,max=1),n.start=burn) if(etype==4) Y <- arima.sim(n=npL,list(order = c(ps,dd,qs), ar=c(phi),ma=c(theta)),innov=(rexp(npL)-1),n.start=burn) } if(tstype==6){ #arma model selected by the user if (etype==1) Y <- arima.sim(n=npL,list(order = c(ps,dd,qs), ar=c(phi),ma=c(theta)),innov = rnorm(npL),n.start=burn) if(etype==2) Y <- arima.sim(n=npL,list(order = c(ps,dd,qs), ar=c(phi),ma=c(theta)),innov = rt(npL,tdf),n.start=burn) if(etype==3) Y <- arima.sim(n=npL, list(order = c(ps,dd,qs), ar=c(phi),ma=c(theta)), innov = runif(npL,min=-1,max=1),n.start=burn) if(etype==4) Y <- arima.sim(n=npL,list(order = c(ps,dd,qs), ar=c(phi),ma=c(theta)),innov=(rexp(npL)-1),n.start=burn) } y <- Y[1:n] yf <- Y[np1:npL] #ARMA(p,q) model selection if(dd==0){ out<-auto.arima(y,d=dd,max.p=kmax,max.q=kmax,seasonal=FALSE,ic="aicc", stationary=TRUE) outt <- out } if(dd==1){ out<-auto.arima(y,d=dd,max.p=kmax,max.q=kmax,seasonal=FALSE,ic="aicc", stationary=FALSE) pI<-out$arma[1] qI<-out$arma[2] outt<-arima(y,order=c(pI,dd,qI),method="ML") } if(dd==2){ out<-auto.arima(y,d=dd,max.p=kmax,max.q=kmax,seasonal=FALSE,ic="aicc", stationary=FALSE) pI<-out$arma[1] qI<-out$arma[2] outt<-arima(y,order=c(pI,dd,qI),method="ML") } #get the h-step ahead forecasts for h = 1, ..., L temp <- predict(outt,n.ahead=L) #get the normal PI for(j in 1:L){ LPI <- temp$pred[j] - tcut*temp$se[j] UPI <- temp$pred[j] + tcut*temp$se[j] npilen[i,j] <- UPI-LPI if(LPI < yf[j] && yf[j] < UPI) npicov[j] <- npicov[j] + 1 } #use a better model selection method if(dd==0){ out2<-armamsel2(y,kmax=kmax,pen=pen) outt<-arima(y,order=c(out2$pI,dd,out2$qI),method="ML") } if(dd==1){ Z <- as.vector(y) Z[1] <- NA Z[-1] <- diff(y) out2<-armamsel2(Z,kmax=kmax,pen=pen) outt<-arima(y,order=c(out2$pI,dd,out2$qI),method="ML") } if(dd==2){ Z <- as.vector(y) Z[1:2] <- NA Z[-c(1,2)] <- diff(Y,differences=2) out2<-armamsel2(Z,kmax=kmax,pen=pen) outt<-arima(y,order=c(out2$pI,dd,out2$qI),method="ML") } #get the h-step ahead forecasts for h = 1, ..., L temp <- predict(outt,n.ahead=L) #get the normal PI for(j in 1:L){ LPI <- temp$pred[j] - tcut*temp$se[j] UPI <- temp$pred[j] + tcut*temp$se[j] ppilen[i,j] <- UPI-LPI if(LPI < yf[j] && yf[j] < UPI) ppicov[j] <- ppicov[j] + 1 } } npicov <- npicov/nruns npilen <- apply(npilen,2,mean) ppicov <- ppicov/nruns ppilen <- apply(ppilen,2,mean) list( npicov=npicov, npilen=npilen, ppicov = ppicov, ppilen = ppilen) } tsrob<-function(Y,k=6){ #Y is a time series (possibly differenced) #that is assumed to be weakly stationary, MA(oo), and AR(oo) #W gets NA if Y > MED + k MAD or if Y < MED - k MAD #the corresponding residuals rres also get NA #the fitted values rfit corresponding to NA get MED #uses AIC to select ARMA model order with W as the time series kmadd <- k*mad(Y,constant=1) medd <- median(Y) up <- medd + kmadd low<- medd - kmadd maxy <- max(Y[Ylow]) W <- as.vector(Y) W[Y>up] <- NA W[Y MED + k MAD or if Y < MED - k MAD #the corresponding residuals rres also get NA #the fitted values rfit corresponding to NA get MED kmadd <- k*mad(Y,constant=1) medd <- median(Y) up <- medd + kmadd low<- medd - kmadd maxy <- max(Y[Ylow]) W <- as.vector(Y) W[Y>up] <- NA W[Y= lo]) W <- Y W[W>up] <- wup W[W