#    Beginner's Guide to GAM with R
#    Alain Zuur

#    www.highstat.com

#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.


##############################################################
#Load the data
DF1 <-read.table("BaileyDensity.txt", 
                 header = TRUE)
DF2 <- DF1[DF1$MeanDepth > 800,]
DF  <- na.exclude(DF2)

#Inspect the results
names(DF)
str(DF)


##############################################################
#Housekeeping: load packages and support functions
library(lattice) #For multi-panel graphs
library(MASS)    #For NB GLM
library(mgcv)    #For GAMs


###############################################
#Section 4.2 review of GLM. Count data approach

DF$fPeriod <- factor(DF$Period)


#Figure 4.1
par(mfrow = c(2,2), mar = c(5,5,2,2))
dotchart(DF$SweptArea, 
         cex.lab = 1.5,
         xlab = "Swept area",
         ylab = "Order of the data")

dotchart(DF$TotAbund, 
         cex.lab = 1.5,
         xlab = "Total abundance",
         ylab = "Order of the data")

plot(x = DF$MeanDepth, 
     y = DF$SweptArea,
     xlab = "Mean depth (m)",
     ylab = "Swept area",
     cex.lab = 1.5)

plot(x = DF$MeanDepth, 
     y = DF$TotAbund,
     xlab = "Mean depth (m)",
     ylab = "Total abundance",
     cex.lab = 1.5)

######################################################
#Section 4.4 Results of Piosson and NB GLM

#Fit the models C1 and C2
C1 <- glm(TotAbund ~ MeanDepth * fPeriod,
          family = poisson, 
          data = DF)

C2 <- glm(TotAbund ~ MeanDepth * fPeriod,
          family = poisson(link = "identity"),
          start = coef(C1),
          data = DF)
#Warnings in C2!

AIC(C1, C2)

#Calculate the overdispersion
E1 <- resid(C1, type = "pearson")
N  <- nrow(DF)
p  <- length(coef(C1))
Overdispersion <- sum(E1^2) / (N - p)
Overdispersion

MyXLab <- "Mean sampling depth (m)"
MyYLab <- expression(paste("Fish density (",m^{-2}, ")"))
DF$MyPch <- c(1,16)[DF$Period]
#Figure 4.3
par(mar = c(5,5,2,2))
plot(x = DF$MeanDepth, 
     y = E1,
     cex.lab = 1.5,
     pch = DF$MyPch,
     xlab = MyXLab,
     ylab = "Pearson residuals")
abline(h = 0)

#Fit model C3
C3 <- glm.nb(TotAbund ~ MeanDepth * fPeriod,
             data = DF)


####################################################################
#Section 4.6 Poisson GLM with offset

#Fit Model C4
DF$LogSA <- log(DF$SweptArea)
C4 <- glm(TotAbund ~ MeanDepth * fPeriod + offset(LogSA),
         family = poisson, 
         data = DF)


####################################################################
#Section 4.8 Poisson and NB GAM with offset
#Fit Model C6
C6 <- gam(TotAbund ~ s(MeanDepth, by = fPeriod) + fPeriod + offset(LogSA),
          family = poisson, 
          data = DF)

#Calcualte overdispersion
E6 <- resid(C6, type = "pearson")
Overdispersion <- sum(E6^2) / (C6$df.res)
Overdispersion

summary(C6)


#To plot the smoothers we can use the following commands
par(mfrow = c(1,2))
plot(C6)

par(mfrow = c(2,2))
plot(C6, all.terms = TRUE)

par(mfrow = c(1,2))
plot(C6,select = c(1), main ="Period 1")
plot(C6,select = c(2), main ="Period 2")

par(mfrow = c(2,2))
plot(C6, all.terms = TRUE, resid = TRUE, scale = FALSE)

par(mfrow = c(1,2))
plot(C6, shade = TRUE, scale = FALSE)


#Fit Model C7
C7 <- gam(TotAbund ~ s(MeanDepth, by = fPeriod) + fPeriod +
                     offset(LogSA),
          family = negbin(theta = c(1,10)),
          data = DF)
summary(C7)
#See the warning for the more up to date mgcv version.
#Use:
C7 <- gam(TotAbund ~ s(MeanDepth, by = fPeriod) + fPeriod +
                     offset(LogSA),
          family = nb(),
          data = DF)
#theta <- getTheta(C7)


#Fit Model C8
C8 <- gam(TotAbund ~ s(MeanDepth) + fPeriod +
                     offset(LogSA),
          family = nb(theta = 2.048),  #Slightly difference values as in book
          data = DF)                   #Due to different mgcv version! 

AIC(C7, C8)


#Figure 4.4
par(mfrow=c(1,2), mar = c(5,6,3,2))
plot(C7, select = 1, main = "Period 1")
plot(C7, select = 2, main = "Period 2")


##########################################################################
#Figure 4.5
MeanDepth.1 <- DF$MeanDepth[DF$fPeriod=="1"]
MeanDepth.2 <- DF$MeanDepth[DF$fPeriod=="2"]

MLSA1 <- mean(DF$LogSA[DF$fPeriod=="1"])
MLSA2 <- mean(DF$LogSA[DF$fPeriod=="2"])

ND1 <- data.frame(MeanDepth = seq(min(MeanDepth.1),
                                  max(MeanDepth.1),
                                  by = 1),
                  fPeriod = "1", 
                  LogSA = MLSA1)
ND2 <- data.frame(MeanDepth = seq(min(MeanDepth.2),
                                  max(MeanDepth.2),
                                  by = 1),
                  fPeriod = "2", 
                  LogSA = MLSA2)

PM1.1 <- predict(C7,newdata = ND1, se = TRUE)
PM1.2 <- predict(C7,newdata = ND2, se = TRUE)

AllY <- DF$TotAbun
AllX <- DF$MeanDepth

AllPeriod <- DF$Period
AllPch <- c(1,16)[AllPeriod]

#x axis for the lines
AllMD.1 <- ND1$MeanDepth
AllMD.2 <- ND2$MeanDepth

#y axis for fitted lines
AllFit.1 <- c(PM1.1$fit)
AllFit.2 <- c(PM1.2$fit)

AllSE.1 <- c(PM1.1$se.fit)
AllSE.2 <- c(PM1.2$se.fit)

xyplot(AllY ~ AllX ,
       strip = function(bg='white', ...) strip.default(bg='white', ...),
       scales = list(alternating = T,
                     x = list(relation = "same"),
                     y = list(relation = "free")),
       xlab = "Mean depth (m)",
       ylab = "Total abundance",
       panel=function(x,y, subscripts,...){
              x1 <- AllMD.1
              x2 <- AllMD.2
              y1 <- AllFit.1
              y2 <- AllFit.2
              se1<- AllSE.1
              se2<- AllSE.2
              panel.grid(h=-1, v= 2)

              z1.low <- exp(y1 - 1.96*se1)
              z1.up  <- exp(y1 + 1.96*se1)
              z2.low <- exp(y2 - 1.96*se2)
              z2.up  <- exp(y2 + 1.96*se2)

              panel.polygon(c(x1,rev(x1)),c(z1.low,rev(z1.up)),
                            col=grey(0.4),border=NULL)
              panel.polygon(c(x2,rev(x2)),c(z2.low,rev(z2.up)),
                            col=grey(0.7), border=NULL)
              panel.points(x,y, col = 1,
                           pch=AllPch[subscripts],cex=1)
              panel.lines(x1,exp(y1),lwd=3,lty=1,col=1)
              panel.lines(x2,exp(y2),lwd=3,lty=1,col=1)
            })
#I guess ggplot2 code would be much shorter!


#Figure 4.6
#Model validation should be for model M7
#Not M8!
E7 <- resid(C7, type = "pearson")
F7 <- fitted(C7, type = "response")

par(mfrow = c(2,2), mar = c(5,5,2,3))
plot(x = F7, 
     y = E7, 
     xlab = "Fitted values", 
     ylab = "Pearson Residuals")

plot(x = DF$MeanDepth, 
     y = E7, 
     xlab = "Mean depth (m)", 
     ylab ="Pearson residuals")
abline(h = 0, lty = 2)

plot(E7 ~ fPeriod, 
     data = DF, 
     ylab = "Pearson residuals", 
     xlab = "Period")
     
hist(E7, 
     xlab = "Pearson residuals", 
     main = "")

############## End of code #########