#This file gives some R code useful for some homework
#problems for Math 474 Time Series, Fall 2013. 


#The easiest way to get tspack.txt is to copy and paste the following command into R.

source("http://parker.ad.siu.edu/Olive/tspack.txt")

#Here is a method to attach the text's TSA package.

chooseCRANmirror()
#select USA(IA)

install.packages("TSA")
#should say stuff was installed after several minutes

#type the following command to start using the TSA functions and data
library("TSA")

#can also click packages, set CRAN mirror
#choose US(IA)
#click on packages, install.packages
#and choose TSA

#These commands can fail if your computer has a firewall.
#Then an administrator may be needed to install the packages.

#Shumway and Stoffer    http://www.stat.pitt.edu/stoffer/tsa3/

#Quandl 7 million time series data sets   http://www.quandl.com/about/overview/academics

#search key words time series   http://lib.stat.cmu.edu/datasets/

# Some data sets here (http://datamarket.com/data/list/?q=provider:tsdl).
#time series data library



##Using the ar function in R. Here lh is a time series.

help(lh)
help(ar)
?ar

# plot the time series
plot(lh)

#plot the ACF and PACF with tspack function acfpacf

acfpacf(lh)

#Could be an AR(1) or an MA(1) model

#WARNING: the ACF in R starts at 0 with a rho_0 = 1, ignore this spike.

#The default is AR(p) where p minimized the AIC, in this case p = 3.
#The default often picks p too large.
#To fit an AR(1) use the following command.

out1 <- ar(lh,AIC=FALSE,order.max=1)

#residual ACF and residual PACF, looks like a white noise, ignoring lag 0
#Z statistics for auto and partial autocorrelations of the residuals.
#Look closely at value of |Z| > 1.25 for lags 1, 2 and 3
#and |Z| > 1.6 for the other lags. The third lag has a big |Z| for both
#the ACF and PACF.
#uses tspack function resacfs

resacfs(out1)

out<-ar(lh)
out
summary(out)

#fitted coefficients
out$ar
[1]  0.65340168 -0.06362084 -0.22694020

#AIC for AR(1), AR(2), ..., AR(k) models
out$aic

#first p are NA
out$resid

#response plot
Y <- as.vector(lh)
FIT <- Y-out$resid
FIT <- as.real(FIT)
plot(FIT,Y)
abline(0,1)

#residual plot
plot(FIT,out$resid)

#residual plot vs time
plot(1:(length(Y)),out$resid,type="l")

#variance covariance matrix of the estimated coefficients
out$asy.var.coef

#Z statistics and pvalues for the AR coefficients
Z <- as.vector(out$ar/sqrt(diag(out$asy.var.coef)))
pval <- 2 * (1-pnorm(abs(Z)))

Z
pval

####end using R function ar



##Using the R function arima, out <- arima(Y,c(p,d,q))
#Y is the time series, c(p,d,q) gives the orders of the ARIMA(p,d,q) model.

# plot the time series
plot(lh)

#plot the ACF and PACF with tspack function acfpacf

acfpacf(lh)

#Could be an AR(1) or an MA(1) model

#WARNING: the ACF in R starts at 0 with a rho_0 = 1, ignore this spike.

#fit an AR(1) model

out <- arima(lh,c(1,0,0))

#Get ACF of residuals,  residual plot of  time vs standardized residuals,
# and Ljung Box statistics.

tsdiag(out)

##Using tspack function resacfs, getthe ACF of the residuals,
#PACF of residuals, and Z test statistics.
#Look closely at value of |Z| > 1.25 for lags 1, 2 and 3
#and |Z| > 1.6 for the other lags. The |Z| for the 3rd lag
#is large for both the residual ACF and residual PACF.


resacfs(out)

##Using tspack function resplots, get response plot, residual plot
#and table of coefficients, standard errors, Z statistics and pvals.

resplots(lh,out)


##Warning, MA coefficients tau_i = -theta_i of those given in the book,
##so they differ in sign. 


####end using R function arima




help(lynx)
plot(lynx)

#HW3A) lynx data

#a)

lnlynx <- log(lynx)
plot(lnlynx)

#b)

lnlynx1 <- 1:114
lnlynx2 <- 1:114
lnlynx1[1:113] <- lnlynx[2:114]
lnlynx2[1:112] <- lnlynx[3:114]
x <- cbind(lnlynx,lnlynx1,lnlynx2)
x<-x[1:112,]
pairs(x)

#c)

acfpacf(lnlynx)

#d)

out <- arima(lnlynx,c(2,0,0))
tsdiag(out)

#e)

resplots(lnlynx,out)

#f) 

resacfs(out)



#HW3B) AR(1) simulation

ar1 <- arima.sim(list(order = c(1,0,0), ar = 0.7), n = 200)


acfpacf(ar1)

outp1 <- arima(ar1,c(1,0,0))
plot(ar1)
points((ar1-outp1$resid))


tsdiag(outp1)

#a)

resplots(ar1,outp1)

resacfs(outp1)

#incorrectly fit an MA(1) model 

outq1 <- arima(ar1,c(0,0,1))
plot(ar1)
points((ar1-outq1$resid))

tsdiag(outq1)


resplots(ar1,outq1)

resacfs(outq1)


#b)
outp1q1 <- arima(ar1,c(1,0,1))
resplots(ar1,outp1q1)


#HW3C) MA(1) simulation

ma1 <- arima.sim(list(order = c(0,0,1), ma = 0.7), n = 200)


acfpacf(ma1)

outq1 <- arima(ma1,c(0,0,1))
plot(ma1)
points((ma1-outq1$resid))

tsdiag(outq1)

#a)
resplots(ma1,outq1)

resacfs(outq1)

#incorrectly fit an AR(1) model 

outp1 <- arima(ma1,c(1,0,0))
plot(ma1)
points((ma1-outp1$resid))

tsdiag(outp1)

resplots(ma1,outp1)

resacfs(outp1)

#check with a bigger model
#b)

outp1q1 <- arima(ma1,c(1,0,1))
resplots(ma1,outp1q1)


#HW3D) ARMA(1,1) simulation

armap1q1 <- arima.sim(list(order = c(1,0,1), ar = -0.5, ma = 0.7), n = 200)

plot(armap1q1)

acfpacf(armap1q1)

outp1q1 <- arima(armap1q1,c(1,0,1))

tsdiag(outp1q1)

resplots(armap1q1,outp1q1)

resacfs(outp1q1)


#HW3E)

armap2q2 <- arima.sim( list( ar=c(0.8897, -0.4858), ma=c(-0.2279, 0.2488) ), n = 200)

plot(armap2q2)

acfpacf(armap2q2)

outp2q2 <- arima(armap2q2,c(2,0,2))

tsdiag(outp2q2)

resplots(armap2q2,outp2q2)

resacfs(outp2q2)

#HW4 A)

#a) get modulus of roots of hatphi(B) = 0

y <- arima.sim(n=100, list(ar=c(0.4,-0.2)), innov = rnorm(100))
out <- arima(y,c(2,0,0))
p<-2
Mod(polyroot(c(1, -out$coef[-(p+1)])))

#b) Get modulus of roots of hattheta(B) = 0
#R uses -theta so no - sign

y <- arima.sim(n=100, list(ma=c(0.4,-0.2)), innov = rnorm(100))
out <- arima(y,c(0,0,2))
q<-2
Mod(polyroot(c(1, out$coef[-(q+1)])))


#c) Get modulus of roots of hatphi(B) = 0 and of hattheta(B) = 0

#i) 
y <- arima.sim(n=100, list(ar=c(-0.1,0.3), ma=c(0.4,-0.2)), innov = rnorm(100))
out <- arima(y,c(2,0,2))
p<-2
q<-2
#AR  hatphi(B) = 0
Mod(polyroot(c(1, -out$coef[1:p])))

#ii)

#MA hattheta(B) = 0
Mod(polyroot(c(1, out$coef[(p+1):(p+q)])))




#HW4 B)

#a)

par(mfrow=c(2,1))
plot(WWWusage)
w <- diff(WWWusage)
plot(w)
par(mfrow=c(1,1)) 

#b)

acfpacf(WWWusage)

#c)

acfpacf(w)

#d)

out <- arima(WWWusage,c(3,1,0))
resplots(WWWusage,out,d=1)



#e)

resacfs(out)

#f)

tsdiag(out)


#g)
#w <- diff(WWWusage)
#w <- c(NA,w)
#w - out$resid #fitted values hat{W}_t



######



#HW5 B)   http://www.stat.pitt.edu/stoffer/tsa3/

#b)

recruit <- scan()

#Copy and paste the recruit data below this command, then hit enter.


#c)

out<-arima(recruit,c(1,0,1))
resplots(recruit,out)
intercept(out,p=1,q=1)

#d)

aicmat(recruit)

#HW5 C)


par(mfrow=c(2,1))
plot(UKgas)
plot(log(UKgas))
par(mfrow=c(1,1))


#HW5 D)

#a)

Y <- log(lynx)
out<-arima(Y,c(2,0,0))
resplots(Y,out)
intercept(out,p=2,q=0)


#b) click the right mouse button and stop twice 

out<-arp(Y,p=2)
summary(out$tem)

#HW5 E)

#a)

Y <- WWWusage
out<-arima(Y,c(3,1,0))
resplots(Y,out,d=1)

#b)

W <- diff(Y)
out<-arima(W,c(3,0,0))
resplots(W,out)

#c)

out<-arima(W,c(3,0,0),include.mean=FALSE)
resplots(W,out)


####HW6

#HW6 A)

#a)

macisim(N=100,nruns=1000,etype=1)

#b)

macisim(N=100,nruns=1000,etype=2)

#c)

macisim(N=100,nruns=1000,etype=3)

#d)

macisim(N=100,nruns=1000,etype=4)


#HW6 B) 

#a)

tscisim(N=1000,nruns=100,etype=1)

#b) 

tscisim(N=1000,nruns=100,etype=2)

#c)

tscisim(N=1000,nruns=100,etype=3)

#d)

tscisim(N=1000,nruns=100,etype=4)

#HW6 C)

#a)

par(mfrow=c(2,1))   
plot(WWWusage)      
W<- diff(WWWusage) 
plot(W)             
par(mfrow=c(1,1))

#b)

acfpacf(W)

#c)

aicmat(WWWusage,dd=1)

#d) 

#i)
outp3d1 <- arima(WWWusage,c(3,1,0))
resplots(WWWusage,outp3d1,d=1)

#ii)
resacfs(outp3d1)

#iii)
tsdiag(outp3d1)

#e) 

#i)

outp1d1q1 <- arima(WWWusage,c(1,1,1))
resplots(WWWusage,outp1d1q1,d=1)

#ii)
resacfs(outp1d1q1)

#iii)
tsdiag(outp1d1q1)

#f)

FFRRplots(WWWusage,outp3d1,outp1d1q1)

#HW6 D)


#a)

plot(discoveries)

#b)

acfpacf(discoveries)

#c)

aicmat(discoveries)

#d)

Y <- discoveries
outp1q1 <- arima(Y,c(1,0,1))
resplots(Y,outp1q1)

resacfs(outp1q1)

tsdiag(outp1q1)

#e)

outp2 <- arima(Y,c(2,0,0))
resplots(Y,outp2)

resacfs(outp2)

tsdiag(outp2)

FFRRplots(Y,outp2,outp1q1)


#f)
outq3 <- arima(Y,c(0,0,3))
resplots(Y,outq3)

resacfs(outq3)

tsdiag(outq3)

FFRRplots(Y,outq3,outp1q1)

#g)

intercept(outp1q1,p=1,q=1)

intercept(outp2,p=2,q=0)

intercept(outq3,p=0,q=3)



####HW7

#HW7 B)

#a)

pimasim(N=100,nruns=5000,Lmq=6,etype=1,alph=.05)

#b)

pimasim(N=100,nruns=5000,Lmq=6,etype=1,alph=.5)

#c) 

pimasim(N=100,nruns=5000,Lmq=6,etype=2,alph=.05)

#d)

pimasim(N=100,nruns=5000,Lmq=6,etype=2,alph=.5)

#e)

pimasim(N=100,nruns=5000,Lmq=6,etype=3,alph=.05)

#f)

pimasim(N=100,nruns=5000,Lmq=6,etype=3,alph=.5)

#g)

pimasim(N=100,nruns=5000,Lmq=6,etype=4,alph=.05)

#h)

pimasim(N=100,nruns=5000,Lmq=6,etype=4,alph=.5)

#HW 7 C) 

#a) http://www.stat.pitt.edu/stoffer/tsa3/

varve <- scan()

#b)

Y <- log(varve)
plot(Y,type="l")

#c)

acfpacf(Y)

#d) If d = 0, use the commands in b) and c).
#If d = 1, use the following commands.

W<-diff(Y)
plot(W,type="l")

acfpacf(W)

#e) Use the command with dd=d where d=0 or d=1.

aicmat(Y,dd=0)

aicmat(Y,dd=1)


#####HW 8

#HW8 A)

Y <- log(lynx)
plot(Y)

acfpacf(Y)

#If d = 0, use the above commands.
#If d = 1, use the following commands.

W<-diff(Y)
plot(W)

acfpacf(W)

#Use the command with dd=d where d=0 or d=1.

aicmat(Y,dd=0)

aicmat(Y,dd=1)

#HW8 C)

#a) 

X <- AirPassengers
plot(X) #X starts in January
mnth <- as.vector(c("J","F","M","A","Y","U","L","G","S","O","N","D"))
points(y=X,x=time(X),pch=mnth)

#b) 

Y <- log(AirPassengers)
W <- diff(Y)
plot(W)
# W starts at F
mnth <- as.vector(c("F","M","A","Y","U","L","G","S","O","N","D","J"))
points(y=W,x=time(Y)[-1],pch=mnth)

#c)

acfpacf(W)


#d)

out <- arima(Y,c(0,1,0),seasonal = list(order=c(1, 0 ,1), period=12))
out
#SAR(1) and SMA(1) seem significant

saics(Y,dd=1,pmax=3,qmax=3,pbig=3,qbig=3,P=1,Q=1)

#e)

out <- arima(Y,c(2,1,3),seasonal = list(order=c(1, 0 ,1), period=12),method="ML")
resplots(Y,out)

#f)

resacfs(out)

#g)

 tsdiag(out)


#h)

out2 <- arima(Y,c(0, 1, 1),seasonal = list(order=c(0, 1 ,1), period=12))
resplots(Y,out2)

#i)

 AIC(out,out2)

#j)

FFRRplots(Y,out,out2)


#m) 

out3 <- arima(Y,c(0, 1, 0),seasonal = list(order=c(1, 1 ,1), period=12))
out3

out4 <- arima(Y,c(0, 1, 0),seasonal = list(order=c(0, 1 ,1), period=12))
out4

out5 <- arima(Y,c(0, 1, 0),seasonal = list(order=c(1, 1 ,0), period=12))
out5

saics(Y,dd=1,pmax=4,qmax=4,pbig=4,qbig=4,P=0,D=1,Q=1)



####HW 9

##HW9 A)

#a) get the differenced time series when d=1 and D=1, then plot it

Y <- log(AirPassengers)
X <- diff(diff(Y),lag=12)
plot(X)


#b)

acfpacf(X)
#looks a bit like an ARMA(0,1,1)x(0,1,1)_12 model


##HW9 B)

#a)

Y <- log(lynx)
out <- arima(Y,order=c(3,0,3))
out


#b)  force MA(2) coef = 0, last NA is for the mean (intercept line)
#first 3 NA's are for the AR parameters

out2 <- arima(Y,order=c(3,0,3),fixed=c(NA,NA,NA,NA,0,NA,NA))
out2

#c)

FFRRplots(Y,out,out2)

##HW9 C) 

Y <- discoveries
plot(Y)

acfpacf(Y)

#If d = 0, use the above commands.
#If d = 1, use the following commands.

W<-diff(Y)
plot(W)

acfpacf(W)

#Use the command with dd=d where d=0 or d=1.

aicmat(Y,dd=0)

aicmat(Y,dd=1,pmax=1)
#can't make pmax larger and get output

##HW9 D)

#a)

plot(co2)
mnth <- as.vector(c("F","M","A","Y","U","L","G","S","O","N","D","J"))
points(y=co2,x=time(co2),pch=mnth)

#b)

Y <- co2
W <- diff(Y)
plot(W)

acfpacf(W)

out <- arima(Y,c(0,1,0),seasonal=list(order=c(1,0,1),period=12),method="ML")
out
#both significant, sar1 = 1 so D = 1

#c)

X <- diff(diff(Y),lag=12)
plot(X)


#d)

acfpacf(X)

#e)

outQ1 <- arima(Y,c(0,1,0),seasonal=list(order=c(0,1,1),period=12),method="ML")
outQ1

outQ2 <- arima(Y,c(0,1,0),seasonal=list(order=c(0,1,2),period=12),method="ML")
outQ2
#do not need SMA2

outP1Q1 <- arima(Y,c(0,1,0),seasonal=list(order=c(1,1,1),period=12),method="ML")
outP1Q1
#do not need SAR1

#f)


saics(Y,dd=1,P=0,D=1,Q=1)
#too a long time to run, might want to hit the Esc key

saics(Y,dd=1,pmax=5,qmax=5,pbig=5,qbig=5,P=0,D=1,Q=1)
#took a long time to run


#g)

#i)
out <- arima(Y,c(0,1,1),seasonal=list(order=c(0,1,1),period=12),method="ML")
resplots(Y,out)

#ii)
resacfs(out)

#iii)
tsdiag(out)

#h)

#i)

out3 <- arima(Y,c(0,1,3),seasonal=list(order=c(0,1,1),period=12),method="ML")
resplots(Y,out3)


#ii)
resacfs(out3)

#iii)
tsdiag(out3)

#iv)

FFRRplots(Y,out,out3)


#####HW10

##HW10 A

#a) 

library("TSA")
data(CREF)
r.cref <- diff(log(CREF))*100
acfpacf(r.cref)

#b) 

acfpacf( (r.cref^2) )

#c)

# test whether ARCH is present

McLeod.Li.test(y=r.cref)


#d)

# test whether r_t is a normal white noise

#i)

shapiro.test(r.cref)

#ii)

jarque.bera.test(r.cref)

#iii)

qqnorm(r.cref)
qqline(r.cref)


##HW10 B

#a)

aicmat(r.cref^2)

#b)

 eacf(r.cref^2)

#c) 

aicmat(abs(r.cref))


#d) 

eacf(abs(r.cref))

#e)

out11 <- garch(x=r.cref,order=c(1,1))
summary(out11)

#f) 

out12 <- garch(x=r.cref,order=c(1,2))
summary(out12)

#g)

out21 <- garch(x=r.cref,order=c(2,1))
summary(out21)


##HW10 C)

#library("TSA")

#a)

data(deere1)
plot(deere1)

#b)

out2 <- arima(deere1,c(2,0,0))
resplots(deere1,out2)

#c) 

outr1 <- robar(deere1,2)
outr1$phihat
#For each Y outlier in an AR(p) model,
#there will be p outliers in the X matrix
#which could cause up to p outliers in the fitted values.
#So p+1 outliers in the XY matrix used to compute the robust estimator.


#d)

deerem2 <- deere1
deerem2[27] <- 8
out2m <- arima(deerem2,c(2,0,0))
resplots(deerem2,out2m)

#e)

outr2 <- robar(deerem2,2)
outr2$phihat

#f)

cleanfit <- as.vector(deere1) - as.vector(out2m$resid)
plot(as.vector(outr1$fit,),cleanfit)
abline(0,1)

#g)


deerem3 <- deere1
deerem3[c(7,76)] <- c(25,26)
outr3 <- robar(deerem3,2)
plot(as.vector(outr3$fit),outr2$fit)
abline(0,1)
#identify(as.vector(outr3$fit),outr2$fit)
#NAs mean the identified points are off by 2
#74, 25, 5 instead of 76,27,7

#h)

deerem4 <- deere1
deerem4[c(7,76)] <- c(250,260)
outr4 <- robar(deerem4,2)
plot(as.vector(outr4$fit),outr2$fit)
abline(0,1)
#identify(as.vector(outr4$fit),outr2$fit
#works better with massive outliers
#outliers are easy to spot with response plot
#since there Y values are outlying


##HW10 D)

#library("TSA")
data(deere3)

Y <- deere3
plot(Y)

acfpacf(Y)

aicmat(Y)

#or 

aicmat(Y,dd=1)

##HW10 E)

#library("TSA")

data(JJ)
Y <-log(JJ)

plot(Y)
quatr <- as.vector(c("A","B","C","D"))
points(y=Y,x=time(Y),pch=quatr)



W <- diff(Y)
plot(W)

acfpacf(W)

out <- arima(Y,c(0,1,0),seasonal=list(order=c(1,0,1),period=4),method="ML")
out
#sar1 approx 1 so D = 1


X <- diff(diff(Y),lag=4)
plot(X)

acfpacf(X)


outQ1 <- arima(Y,c(0,1,0),seasonal=list(order=c(0,1,1),period=4),method="ML")
outQ1

outQ2 <- arima(Y,c(0,1,0),seasonal=list(order=c(0,1,2),period=4),method="ML")
outQ2
#do not need SMA2

outP1Q1 <- arima(Y,c(0,1,0),seasonal=list(order=c(1,1,1),period=4),method="ML")
outP1Q1
#do not need SAR1

#period = 4 so need perd = 4
saics(Y,dd=1,P=0,D=1,Q=1,perd=4)


####HW12

##HW12 A)

Y <- nhtemp
plot(Y)
acfpacf(Y)

#if differencing is needed then

W <- diff(Y)
plot(W)
acfpacf(W)


aicmat(Y)

#or

aicmat(Y,dd=1)

##HW12 B, modified 13.5

#a)

t=1:96
set.seed(124)
integer=sample(48,2)
freq1=integer[1]/96; freq2=integer[2]/96
A1=rnorm(1,0,2); B1=rnorm(1,0,2) # sample normal "amplitudes"
A2=rnorm(1,0,3); B2=rnorm(1,0,3)
w=2*pi*t
y=A1*cos(w*freq1)+B1*sin(w*freq1)+A2*cos(w*freq2)+B2*sin(w*freq2)+rnorm(96,0,1)
plot(t,y,type="o",ylab=expression(y[t]))

#b)

library(TSA)
spec(y,log="no",type="h",lwd=2)

##HW12 C)


#library(TSA)
#help(nottem)


#a)

nott <- window(nottem, end=c(1936,12))
spec(nott,log="no",type="h")


#b)
t=1:204 
cos1=cos(2*pi*t*17/204)
sin1=sin(2*pi*t*17/204)
out<-lm(nott~cos1+sin1)
summary(out)

#c)
Y<-nott
RESID <- out$resid
FIT <- out$fit
plot(nott)
points(t/12+1920,FIT)

#d)
resplots(Y,out)

#e)
acfpacf(RESID)
#sinusoidal pattern in the acf

#f)
McLeod.Li.test(y=RESID)

##HW12 D)

#a)

 fflynx()

##HW12 E)

#a)

#library(TSA)

Y <- log10(lynx)
out2 <- arima(Y,c(2,0,0))
resplots(Y,out2)
intercept(out2,p=2,q=0)

#b)

out11 <- arima(Y,c(11,0,0))
resplots(Y,out11)
intercept(out11,p=11,q=0)

#c)

out12 <- arima(Y,c(12,0,0))
resplots(Y,out12)
intercept(out12,p=12,q=0)

#d)

setar272 <- tar(Y,p1=7,p2=2,d=2,estimate.thd = FALSE, threshold=3.116,print=TRUE)

#e)

setar252 <- tar(Y,p1=5,p2=2,d=2,estimate.thd = FALSE, threshold=3.05,print=TRUE)