#This file gives some R code useful for some homework #problems for Math 474 Time Series, Fall 2013. #The easiest way to get tspack.txt is to copy and paste the following command into R. source("http://parker.ad.siu.edu/Olive/tspack.txt") #Here is a method to attach the text's TSA package. chooseCRANmirror() #select USA(IA) install.packages("TSA") #should say stuff was installed after several minutes #type the following command to start using the TSA functions and data library("TSA") #can also click packages, set CRAN mirror #choose US(IA) #click on packages, install.packages #and choose TSA #These commands can fail if your computer has a firewall. #Then an administrator may be needed to install the packages. #Shumway and Stoffer http://www.stat.pitt.edu/stoffer/tsa3/ #Quandl 7 million time series data sets http://www.quandl.com/about/overview/academics #search key words time series http://lib.stat.cmu.edu/datasets/ # Some data sets here (http://datamarket.com/data/list/?q=provider:tsdl). #time series data library ##Using the ar function in R. Here lh is a time series. help(lh) help(ar) ?ar # plot the time series plot(lh) #plot the ACF and PACF with tspack function acfpacf acfpacf(lh) #Could be an AR(1) or an MA(1) model #WARNING: the ACF in R starts at 0 with a rho_0 = 1, ignore this spike. #The default is AR(p) where p minimized the AIC, in this case p = 3. #The default often picks p too large. #To fit an AR(1) use the following command. out1 <- ar(lh,AIC=FALSE,order.max=1) #residual ACF and residual PACF, looks like a white noise, ignoring lag 0 #Z statistics for auto and partial autocorrelations of the residuals. #Look closely at value of |Z| > 1.25 for lags 1, 2 and 3 #and |Z| > 1.6 for the other lags. The third lag has a big |Z| for both #the ACF and PACF. #uses tspack function resacfs resacfs(out1) out<-ar(lh) out summary(out) #fitted coefficients out$ar [1] 0.65340168 -0.06362084 -0.22694020 #AIC for AR(1), AR(2), ..., AR(k) models out$aic #first p are NA out$resid #response plot Y <- as.vector(lh) FIT <- Y-out$resid FIT <- as.real(FIT) plot(FIT,Y) abline(0,1) #residual plot plot(FIT,out$resid) #residual plot vs time plot(1:(length(Y)),out$resid,type="l") #variance covariance matrix of the estimated coefficients out$asy.var.coef #Z statistics and pvalues for the AR coefficients Z <- as.vector(out$ar/sqrt(diag(out$asy.var.coef))) pval <- 2 * (1-pnorm(abs(Z))) Z pval ####end using R function ar ##Using the R function arima, out <- arima(Y,c(p,d,q)) #Y is the time series, c(p,d,q) gives the orders of the ARIMA(p,d,q) model. # plot the time series plot(lh) #plot the ACF and PACF with tspack function acfpacf acfpacf(lh) #Could be an AR(1) or an MA(1) model #WARNING: the ACF in R starts at 0 with a rho_0 = 1, ignore this spike. #fit an AR(1) model out <- arima(lh,c(1,0,0)) #Get ACF of residuals, residual plot of time vs standardized residuals, # and Ljung Box statistics. tsdiag(out) ##Using tspack function resacfs, getthe ACF of the residuals, #PACF of residuals, and Z test statistics. #Look closely at value of |Z| > 1.25 for lags 1, 2 and 3 #and |Z| > 1.6 for the other lags. The |Z| for the 3rd lag #is large for both the residual ACF and residual PACF. resacfs(out) ##Using tspack function resplots, get response plot, residual plot #and table of coefficients, standard errors, Z statistics and pvals. resplots(lh,out) ##Warning, MA coefficients tau_i = -theta_i of those given in the book, ##so they differ in sign. ####end using R function arima help(lynx) plot(lynx) #HW3A) lynx data #a) lnlynx <- log(lynx) plot(lnlynx) #b) lnlynx1 <- 1:114 lnlynx2 <- 1:114 lnlynx1[1:113] <- lnlynx[2:114] lnlynx2[1:112] <- lnlynx[3:114] x <- cbind(lnlynx,lnlynx1,lnlynx2) x<-x[1:112,] pairs(x) #c) acfpacf(lnlynx) #d) out <- arima(lnlynx,c(2,0,0)) tsdiag(out) #e) resplots(lnlynx,out) #f) resacfs(out) #HW3B) AR(1) simulation ar1 <- arima.sim(list(order = c(1,0,0), ar = 0.7), n = 200) acfpacf(ar1) outp1 <- arima(ar1,c(1,0,0)) plot(ar1) points((ar1-outp1$resid)) tsdiag(outp1) #a) resplots(ar1,outp1) resacfs(outp1) #incorrectly fit an MA(1) model outq1 <- arima(ar1,c(0,0,1)) plot(ar1) points((ar1-outq1$resid)) tsdiag(outq1) resplots(ar1,outq1) resacfs(outq1) #b) outp1q1 <- arima(ar1,c(1,0,1)) resplots(ar1,outp1q1) #HW3C) MA(1) simulation ma1 <- arima.sim(list(order = c(0,0,1), ma = 0.7), n = 200) acfpacf(ma1) outq1 <- arima(ma1,c(0,0,1)) plot(ma1) points((ma1-outq1$resid)) tsdiag(outq1) #a) resplots(ma1,outq1) resacfs(outq1) #incorrectly fit an AR(1) model outp1 <- arima(ma1,c(1,0,0)) plot(ma1) points((ma1-outp1$resid)) tsdiag(outp1) resplots(ma1,outp1) resacfs(outp1) #check with a bigger model #b) outp1q1 <- arima(ma1,c(1,0,1)) resplots(ma1,outp1q1) #HW3D) ARMA(1,1) simulation armap1q1 <- arima.sim(list(order = c(1,0,1), ar = -0.5, ma = 0.7), n = 200) plot(armap1q1) acfpacf(armap1q1) outp1q1 <- arima(armap1q1,c(1,0,1)) tsdiag(outp1q1) resplots(armap1q1,outp1q1) resacfs(outp1q1) #HW3E) armap2q2 <- arima.sim( list( ar=c(0.8897, -0.4858), ma=c(-0.2279, 0.2488) ), n = 200) plot(armap2q2) acfpacf(armap2q2) outp2q2 <- arima(armap2q2,c(2,0,2)) tsdiag(outp2q2) resplots(armap2q2,outp2q2) resacfs(outp2q2) #HW4 A) #a) get modulus of roots of hatphi(B) = 0 y <- arima.sim(n=100, list(ar=c(0.4,-0.2)), innov = rnorm(100)) out <- arima(y,c(2,0,0)) p<-2 Mod(polyroot(c(1, -out$coef[-(p+1)]))) #b) Get modulus of roots of hattheta(B) = 0 #R uses -theta so no - sign y <- arima.sim(n=100, list(ma=c(0.4,-0.2)), innov = rnorm(100)) out <- arima(y,c(0,0,2)) q<-2 Mod(polyroot(c(1, out$coef[-(q+1)]))) #c) Get modulus of roots of hatphi(B) = 0 and of hattheta(B) = 0 #i) y <- arima.sim(n=100, list(ar=c(-0.1,0.3), ma=c(0.4,-0.2)), innov = rnorm(100)) out <- arima(y,c(2,0,2)) p<-2 q<-2 #AR hatphi(B) = 0 Mod(polyroot(c(1, -out$coef[1:p]))) #ii) #MA hattheta(B) = 0 Mod(polyroot(c(1, out$coef[(p+1):(p+q)]))) #HW4 B) #a) par(mfrow=c(2,1)) plot(WWWusage) w <- diff(WWWusage) plot(w) par(mfrow=c(1,1)) #b) acfpacf(WWWusage) #c) acfpacf(w) #d) out <- arima(WWWusage,c(3,1,0)) resplots(WWWusage,out,d=1) #e) resacfs(out) #f) tsdiag(out) #g) #w <- diff(WWWusage) #w <- c(NA,w) #w - out$resid #fitted values hat{W}_t ###### #HW5 B) http://www.stat.pitt.edu/stoffer/tsa3/ #b) recruit <- scan() #Copy and paste the recruit data below this command, then hit enter. #c) out<-arima(recruit,c(1,0,1)) resplots(recruit,out) intercept(out,p=1,q=1) #d) aicmat(recruit) #HW5 C) par(mfrow=c(2,1)) plot(UKgas) plot(log(UKgas)) par(mfrow=c(1,1)) #HW5 D) #a) Y <- log(lynx) out<-arima(Y,c(2,0,0)) resplots(Y,out) intercept(out,p=2,q=0) #b) click the right mouse button and stop twice out<-arp(Y,p=2) summary(out$tem) #HW5 E) #a) Y <- WWWusage out<-arima(Y,c(3,1,0)) resplots(Y,out,d=1) #b) W <- diff(Y) out<-arima(W,c(3,0,0)) resplots(W,out) #c) out<-arima(W,c(3,0,0),include.mean=FALSE) resplots(W,out) ####HW6 #HW6 A) #a) macisim(N=100,nruns=1000,etype=1) #b) macisim(N=100,nruns=1000,etype=2) #c) macisim(N=100,nruns=1000,etype=3) #d) macisim(N=100,nruns=1000,etype=4) #HW6 B) #a) tscisim(N=1000,nruns=100,etype=1) #b) tscisim(N=1000,nruns=100,etype=2) #c) tscisim(N=1000,nruns=100,etype=3) #d) tscisim(N=1000,nruns=100,etype=4) #HW6 C) #a) par(mfrow=c(2,1)) plot(WWWusage) W<- diff(WWWusage) plot(W) par(mfrow=c(1,1)) #b) acfpacf(W) #c) aicmat(WWWusage,dd=1) #d) #i) outp3d1 <- arima(WWWusage,c(3,1,0)) resplots(WWWusage,outp3d1,d=1) #ii) resacfs(outp3d1) #iii) tsdiag(outp3d1) #e) #i) outp1d1q1 <- arima(WWWusage,c(1,1,1)) resplots(WWWusage,outp1d1q1,d=1) #ii) resacfs(outp1d1q1) #iii) tsdiag(outp1d1q1) #f) FFRRplots(WWWusage,outp3d1,outp1d1q1) #HW6 D) #a) plot(discoveries) #b) acfpacf(discoveries) #c) aicmat(discoveries) #d) Y <- discoveries outp1q1 <- arima(Y,c(1,0,1)) resplots(Y,outp1q1) resacfs(outp1q1) tsdiag(outp1q1) #e) outp2 <- arima(Y,c(2,0,0)) resplots(Y,outp2) resacfs(outp2) tsdiag(outp2) FFRRplots(Y,outp2,outp1q1) #f) outq3 <- arima(Y,c(0,0,3)) resplots(Y,outq3) resacfs(outq3) tsdiag(outq3) FFRRplots(Y,outq3,outp1q1) #g) intercept(outp1q1,p=1,q=1) intercept(outp2,p=2,q=0) intercept(outq3,p=0,q=3) ####HW7 #HW7 B) #a) pimasim(N=100,nruns=5000,Lmq=6,etype=1,alph=.05) #b) pimasim(N=100,nruns=5000,Lmq=6,etype=1,alph=.5) #c) pimasim(N=100,nruns=5000,Lmq=6,etype=2,alph=.05) #d) pimasim(N=100,nruns=5000,Lmq=6,etype=2,alph=.5) #e) pimasim(N=100,nruns=5000,Lmq=6,etype=3,alph=.05) #f) pimasim(N=100,nruns=5000,Lmq=6,etype=3,alph=.5) #g) pimasim(N=100,nruns=5000,Lmq=6,etype=4,alph=.05) #h) pimasim(N=100,nruns=5000,Lmq=6,etype=4,alph=.5) #HW 7 C) #a) http://www.stat.pitt.edu/stoffer/tsa3/ varve <- scan() #b) Y <- log(varve) plot(Y,type="l") #c) acfpacf(Y) #d) If d = 0, use the commands in b) and c). #If d = 1, use the following commands. W<-diff(Y) plot(W,type="l") acfpacf(W) #e) Use the command with dd=d where d=0 or d=1. aicmat(Y,dd=0) aicmat(Y,dd=1) #####HW 8 #HW8 A) Y <- log(lynx) plot(Y) acfpacf(Y) #If d = 0, use the above commands. #If d = 1, use the following commands. W<-diff(Y) plot(W) acfpacf(W) #Use the command with dd=d where d=0 or d=1. aicmat(Y,dd=0) aicmat(Y,dd=1) #HW8 C) #a) X <- AirPassengers plot(X) #X starts in January mnth <- as.vector(c("J","F","M","A","Y","U","L","G","S","O","N","D")) points(y=X,x=time(X),pch=mnth) #b) Y <- log(AirPassengers) W <- diff(Y) plot(W) # W starts at F mnth <- as.vector(c("F","M","A","Y","U","L","G","S","O","N","D","J")) points(y=W,x=time(Y)[-1],pch=mnth) #c) acfpacf(W) #d) out <- arima(Y,c(0,1,0),seasonal = list(order=c(1, 0 ,1), period=12)) out #SAR(1) and SMA(1) seem significant saics(Y,dd=1,pmax=3,qmax=3,pbig=3,qbig=3,P=1,Q=1) #e) out <- arima(Y,c(2,1,3),seasonal = list(order=c(1, 0 ,1), period=12),method="ML") resplots(Y,out) #f) resacfs(out) #g) tsdiag(out) #h) out2 <- arima(Y,c(0, 1, 1),seasonal = list(order=c(0, 1 ,1), period=12)) resplots(Y,out2) #i) AIC(out,out2) #j) FFRRplots(Y,out,out2) #m) out3 <- arima(Y,c(0, 1, 0),seasonal = list(order=c(1, 1 ,1), period=12)) out3 out4 <- arima(Y,c(0, 1, 0),seasonal = list(order=c(0, 1 ,1), period=12)) out4 out5 <- arima(Y,c(0, 1, 0),seasonal = list(order=c(1, 1 ,0), period=12)) out5 saics(Y,dd=1,pmax=4,qmax=4,pbig=4,qbig=4,P=0,D=1,Q=1) ####HW 9 ##HW9 A) #a) get the differenced time series when d=1 and D=1, then plot it Y <- log(AirPassengers) X <- diff(diff(Y),lag=12) plot(X) #b) acfpacf(X) #looks a bit like an ARMA(0,1,1)x(0,1,1)_12 model ##HW9 B) #a) Y <- log(lynx) out <- arima(Y,order=c(3,0,3)) out #b) force MA(2) coef = 0, last NA is for the mean (intercept line) #first 3 NA's are for the AR parameters out2 <- arima(Y,order=c(3,0,3),fixed=c(NA,NA,NA,NA,0,NA,NA)) out2 #c) FFRRplots(Y,out,out2) ##HW9 C) Y <- discoveries plot(Y) acfpacf(Y) #If d = 0, use the above commands. #If d = 1, use the following commands. W<-diff(Y) plot(W) acfpacf(W) #Use the command with dd=d where d=0 or d=1. aicmat(Y,dd=0) aicmat(Y,dd=1,pmax=1) #can't make pmax larger and get output ##HW9 D) #a) plot(co2) mnth <- as.vector(c("F","M","A","Y","U","L","G","S","O","N","D","J")) points(y=co2,x=time(co2),pch=mnth) #b) Y <- co2 W <- diff(Y) plot(W) acfpacf(W) out <- arima(Y,c(0,1,0),seasonal=list(order=c(1,0,1),period=12),method="ML") out #both significant, sar1 = 1 so D = 1 #c) X <- diff(diff(Y),lag=12) plot(X) #d) acfpacf(X) #e) outQ1 <- arima(Y,c(0,1,0),seasonal=list(order=c(0,1,1),period=12),method="ML") outQ1 outQ2 <- arima(Y,c(0,1,0),seasonal=list(order=c(0,1,2),period=12),method="ML") outQ2 #do not need SMA2 outP1Q1 <- arima(Y,c(0,1,0),seasonal=list(order=c(1,1,1),period=12),method="ML") outP1Q1 #do not need SAR1 #f) saics(Y,dd=1,P=0,D=1,Q=1) #too a long time to run, might want to hit the Esc key saics(Y,dd=1,pmax=5,qmax=5,pbig=5,qbig=5,P=0,D=1,Q=1) #took a long time to run #g) #i) out <- arima(Y,c(0,1,1),seasonal=list(order=c(0,1,1),period=12),method="ML") resplots(Y,out) #ii) resacfs(out) #iii) tsdiag(out) #h) #i) out3 <- arima(Y,c(0,1,3),seasonal=list(order=c(0,1,1),period=12),method="ML") resplots(Y,out3) #ii) resacfs(out3) #iii) tsdiag(out3) #iv) FFRRplots(Y,out,out3) #####HW10 ##HW10 A #a) library("TSA") data(CREF) r.cref <- diff(log(CREF))*100 acfpacf(r.cref) #b) acfpacf( (r.cref^2) ) #c) # test whether ARCH is present McLeod.Li.test(y=r.cref) #d) # test whether r_t is a normal white noise #i) shapiro.test(r.cref) #ii) jarque.bera.test(r.cref) #iii) qqnorm(r.cref) qqline(r.cref) ##HW10 B #a) aicmat(r.cref^2) #b) eacf(r.cref^2) #c) aicmat(abs(r.cref)) #d) eacf(abs(r.cref)) #e) out11 <- garch(x=r.cref,order=c(1,1)) summary(out11) #f) out12 <- garch(x=r.cref,order=c(1,2)) summary(out12) #g) out21 <- garch(x=r.cref,order=c(2,1)) summary(out21) ##HW10 C) #library("TSA") #a) data(deere1) plot(deere1) #b) out2 <- arima(deere1,c(2,0,0)) resplots(deere1,out2) #c) outr1 <- robar(deere1,2) outr1$phihat #For each Y outlier in an AR(p) model, #there will be p outliers in the X matrix #which could cause up to p outliers in the fitted values. #So p+1 outliers in the XY matrix used to compute the robust estimator. #d) deerem2 <- deere1 deerem2[27] <- 8 out2m <- arima(deerem2,c(2,0,0)) resplots(deerem2,out2m) #e) outr2 <- robar(deerem2,2) outr2$phihat #f) cleanfit <- as.vector(deere1) - as.vector(out2m$resid) plot(as.vector(outr1$fit,),cleanfit) abline(0,1) #g) deerem3 <- deere1 deerem3[c(7,76)] <- c(25,26) outr3 <- robar(deerem3,2) plot(as.vector(outr3$fit),outr2$fit) abline(0,1) #identify(as.vector(outr3$fit),outr2$fit) #NAs mean the identified points are off by 2 #74, 25, 5 instead of 76,27,7 #h) deerem4 <- deere1 deerem4[c(7,76)] <- c(250,260) outr4 <- robar(deerem4,2) plot(as.vector(outr4$fit),outr2$fit) abline(0,1) #identify(as.vector(outr4$fit),outr2$fit #works better with massive outliers #outliers are easy to spot with response plot #since there Y values are outlying ##HW10 D) #library("TSA") data(deere3) Y <- deere3 plot(Y) acfpacf(Y) aicmat(Y) #or aicmat(Y,dd=1) ##HW10 E) #library("TSA") data(JJ) Y <-log(JJ) plot(Y) quatr <- as.vector(c("A","B","C","D")) points(y=Y,x=time(Y),pch=quatr) W <- diff(Y) plot(W) acfpacf(W) out <- arima(Y,c(0,1,0),seasonal=list(order=c(1,0,1),period=4),method="ML") out #sar1 approx 1 so D = 1 X <- diff(diff(Y),lag=4) plot(X) acfpacf(X) outQ1 <- arima(Y,c(0,1,0),seasonal=list(order=c(0,1,1),period=4),method="ML") outQ1 outQ2 <- arima(Y,c(0,1,0),seasonal=list(order=c(0,1,2),period=4),method="ML") outQ2 #do not need SMA2 outP1Q1 <- arima(Y,c(0,1,0),seasonal=list(order=c(1,1,1),period=4),method="ML") outP1Q1 #do not need SAR1 #period = 4 so need perd = 4 saics(Y,dd=1,P=0,D=1,Q=1,perd=4) ####HW12 ##HW12 A) Y <- nhtemp plot(Y) acfpacf(Y) #if differencing is needed then W <- diff(Y) plot(W) acfpacf(W) aicmat(Y) #or aicmat(Y,dd=1) ##HW12 B, modified 13.5 #a) t=1:96 set.seed(124) integer=sample(48,2) freq1=integer[1]/96; freq2=integer[2]/96 A1=rnorm(1,0,2); B1=rnorm(1,0,2) # sample normal "amplitudes" A2=rnorm(1,0,3); B2=rnorm(1,0,3) w=2*pi*t y=A1*cos(w*freq1)+B1*sin(w*freq1)+A2*cos(w*freq2)+B2*sin(w*freq2)+rnorm(96,0,1) plot(t,y,type="o",ylab=expression(y[t])) #b) library(TSA) spec(y,log="no",type="h",lwd=2) ##HW12 C) #library(TSA) #help(nottem) #a) nott <- window(nottem, end=c(1936,12)) spec(nott,log="no",type="h") #b) t=1:204 cos1=cos(2*pi*t*17/204) sin1=sin(2*pi*t*17/204) out<-lm(nott~cos1+sin1) summary(out) #c) Y<-nott RESID <- out$resid FIT <- out$fit plot(nott) points(t/12+1920,FIT) #d) resplots(Y,out) #e) acfpacf(RESID) #sinusoidal pattern in the acf #f) McLeod.Li.test(y=RESID) ##HW12 D) #a) fflynx() ##HW12 E) #a) #library(TSA) Y <- log10(lynx) out2 <- arima(Y,c(2,0,0)) resplots(Y,out2) intercept(out2,p=2,q=0) #b) out11 <- arima(Y,c(11,0,0)) resplots(Y,out11) intercept(out11,p=11,q=0) #c) out12 <- arima(Y,c(12,0,0)) resplots(Y,out12) intercept(out12,p=12,q=0) #d) setar272 <- tar(Y,p1=7,p2=2,d=2,estimate.thd = FALSE, threshold=3.116,print=TRUE) #e) setar252 <- tar(Y,p1=5,p2=2,d=2,estimate.thd = FALSE, threshold=3.05,print=TRUE)