# This file was taken from the support material from Jagger et al. (2015).

# Jagger TH, Elsner JB, Widen HM (2015). A statistical model for regional
# tornado climate studies. PLoS ONE 10(8): e0131876. doi:10.1371/
# journal.pone.0131876


#This contains support functions for the countywide statistics project.
print(paste("Sourcing CountyStatisticsSupport.R","on",date(),sep=" "))
print("loading packages")
print("Version 1.2 Fri Dec  5 13:19:14 2014")
slibrary(sp)
slibrary(rgeos)
slibrary(rgdal)
slibrary(plyr)
slibrary(maps)
slibrary(moments)
slibrary(spdep)
slibrary(maptools)
slibrary(mapproj)
slibrary(raster) 
slibrary(stringr)
slibrary(scales)
slibrary(munsell)
slibrary(ggplot2)
slibrary(grid)
slibrary(INLA)
# Multiple plot function
#
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
# - cols:   Number of columns in layout
# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
#
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom.
#  Author: winston@stdout.org
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  require(grid)
  
  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)
  
  numPlots = length(plots)
  
  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                     ncol = cols, nrow = ceiling(numPlots/cols), byrow=TRUE)
  }
  
  if (numPlots==1) {
    print(plots[[1]])
    
  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
    
    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
      
      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}
make.precip.df<-function(x=precip.raw){
  precip.split = lapply(x, strsplit, "\t")
  precip.header = precip.split[[1]][[1]]
  precip.header[c(1, 2)] = c("county", "Year")
  lengthprecip = sapply(precip.split, function(x) length(x[[1]]))
  precip.df = do.call("rbind", lapply(precip.split[-c(1, which(lengthprecip == 14))],
                                      function(x) tolower(str_trim(x[[1]]))))
  cn = rle(precip.df[, 1])
  precip.df[,1] = rep(cn$values[cn$values != ""], each = sum(cn$lengths[1:2]))
  colnames(precip.df) = str_trim(precip.header)
  precip.df      
}
getBordersAndCenters<-function(x,CRS0){  
  centers.sp = rgeos::gCentroid(x, byid=TRUE)
  centers.sp = spTransform(centers.sp, CRS0)
  centers.df = as.data.frame(coordinates(centers.sp)) 
  names(centers.df) = c("long", "lat")
  centers.df$county = rownames(centers.df)
  borders.df = fortify(spTransform(x,CRS0),region="county")
  borders.df$county = borders.df$id
  return(list(borders=borders.df,centers=centers.df))
}
plotTorn<-function(x,tracks){
  CRS0=CRS("+proj=longlat +datum=WGS84")
  #x is the spatial polygons object
  #tracks is the track file for each polygon.
  bc<-getBordersAndCenters(x,CRS0)
  bc$centers$nT=x$nT
  bc$centers$rate = round(x$rate, 2)
  borders.df = merge(bc$centers[c("county","nT","rate")], bc$borders, by = "county")

  #Use this data to make the plot of intensity
  mapT = ggplot(borders.df, aes(x = long, y = lat, group = county, fill = nT)) +
    geom_polygon(colour = "gray", size = .2) +
    coord_map(project="polyconic") + 
    theme(panel.grid.minor=element_blank(), 
          panel.grid.major=element_blank(), 
          panel.background=element_blank(),
          axis.ticks = element_blank(),
          axis.text = element_blank(),
          legend.position="none") +
    #  labs(title="Number of Tornadoes\n(1954-2012)") + 
    xlab("") + ylab("") +
    scale_fill_gradient() +
    geom_text(data=bc$centers, aes(x = long, y = lat, group=NULL, label = nT), 
              vjust = .5, size = 5, color = "white")
  #generate the tracks data from the track.df file 
  tracks = tracks[!duplicated(tracks$OM), ]
  tracks = subset(tracks, ELAT != 0)
  mapT +
    geom_segment(aes(x = SLON, xend = ELON, y = SLAT, yend = ELAT, 
                     group = NULL, fill = NULL),
                 data = tracks, lineend = "round",
                 color = "orangered", size = 1.5, alpha = .6) +
    geom_text(data=bc$centers, aes(x = long, y = lat, group = NULL, label = nT), 
              vjust = .5, size = 5, color="white")
}
read.df<-function(x,values="t"){
  for(i in values) x[x==i]<-NA
  if(is.matrix(x)) x<-data.frame(x,stringsAsFactors=FALSE)
  out<-data.frame(lapply(x,function(x) if(is.character(x)) type.convert(x,as.is=T) else x ) ,stringsAsFactors=FALSE) 
  if(!is.null(colnames(x))) colnames(out)<-colnames(x)
  if(!is.null(rownames(x))) rownames(out)<-rownames(x)                  
  out
}
processCity<-function(pop,field){
  ctys = sapply(strsplit(pop$GEO.display.label, ","), function(x) x[1])
  ctys2 = tolower(sapply(strsplit(ctys, " "), function(x) x[1]))
  pop = pop[order(ctys2), ]
  if(!all(tolower(substring(pop$GEO.display.label, 1, nchar(spdf$county))) == spdf$county )) stop("processCity failed mismatch countynames")
  pop[[field]]
}
processPop<-function(x){
  #Process state county yearly population data
  #Similar to melt() but shows details
years<-as.numeric(colnames(x)[-1]) #year values
data<-as.matrix(x[,-1])
nc = nrow(data)  #number of counties nc
ny = ncol(data)  #number of years    ny should equal length(years)
county<-tolower(gsub(" Co.","",x[[1]],fixed=T)) #get lowercase county names
data1<-data.frame(county=rep(county,ny),Year=rep(years,each=nc),pop=as.vector(data),stringsAsFactors=F)
return(data1[with(data1,order(county,Year)),])
}
changeTornDataTypes<-function(x=TornL){
  #converts character data to strings, length from miles to meters and width from yards to meters.
  #adds a field called sequence id, which can be used to identify similar events split over multiple counties
  x$Year = as.integer(x$YR)
  x$Month = as.integer(x$MO)
  x$EF = as.integer(x$MAG) 
  x$Date = as.Date(x$DATE)
  x$Length = as.numeric(x$LEN) * 1609.34
  x$Width = as.numeric(x$WID) * 0.9144
  x$FAT = as.integer(x$FAT)
  x$SLON = as.numeric(x$SLON)
  x$SLAT = as.numeric(x$SLAT)
  x$ELON = as.numeric(x$ELON)
  x$ELAT = as.numeric(x$ELAT)
  x$INJ = as.numeric(x$INJ)
  x$SeqID = 1:nrow(x)
  return(x)
}
make.equal.positions<-function(nx,ny,by.row=T,topdown=T){
  xval<-seq(0,1,length=nx+1)
  yval<-seq(0,1,length=ny+1)
  if(topdown) yval<-rev(yval)
  positions<-expand.grid(x=xval,y=yval)
  loc<-matrix(1:((nx+1)*(ny+1)),nx+1,ny+1)
  nx1<-1:nx
  ny1<-1:ny
  d=topdown*1  
  if(!by.row)
  out<-do.call("rbind",lapply(nx1,function(ix) do.call("rbind",lapply(ny1, function(iy) cbind(positions[loc[ix,iy+d],],positions[loc[ix+1,iy+1-d],])))))
  else
    out<-do.call("rbind",lapply(ny1,function(iy) do.call("rbind",lapply(nx1, function(ix) cbind(positions[loc[ix,iy+d],],positions[loc[ix+1,iy+1-d],])))))
  out<-(t((out)))
  colnames(out)<-paste("fig",(1:(nx*ny)),sep=".")
  rownames(out)<-c("xstart","ystart","xend","yend")
  data.frame(out)
}
multipleplot<-function(tlist,nx,ny,by.row=T){
  if(length(tlist) != nx*ny) warning("Missing Corners")
  positions<-make.equal.positions(nx,ny,by.row)
  for(i in 1:(length(tlist)))
  {
    more = (i != length(tlist))
    print(tlist[[i]],position=positions[[i]],more=more)
  }        
}    
ggplotSpdata<-function(x,field, tfun=function(x) I(x), color=muted("yellow",l=50,c=80)){
  CRS0=CRS("+proj=longlat +datum=WGS84")
  bc = getBordersAndCenters(x,CRS0)
  bc$centers$newfield = tfun(x[[field]])
  borders.df = merge(bc$borders, bc$centers[c("county","newfield")], by="county")
  mapO = ggplot(borders.df, aes(x=long, y=lat, group=group, fill = newfield)) +
    geom_polygon(colour = "gray", size = .2) +
    coord_map(project="polyconic") + 
    theme(panel.grid.minor=element_blank(), 
          panel.grid.major=element_blank(), 
          panel.background=element_blank(),
          axis.ticks = element_blank(),
          axis.text = element_blank(),
          legend.position="none") +
    xlab("") + ylab("") +
    scale_fill_gradient2() +
    geom_text(data=bc$centers, aes(x = long, y = lat, group=NULL, label = newfield), 
              vjust = .5, size = 5, color = color)
    mapO
}  
ggplotmargin<-function(x,type,effect,xlab,ylab=expression(Posterior~~Density),int.value=c(value=0,2.5,97.5),color=c("red","green","blue")){
xx<-as.data.frame(inla.smarginal(x[[paste("marginals",type,sep=".")]][[effect]]))
  out<-ggplot(xx, aes(x, y)) + geom_line() +  ylab(ylab) + xlab(xlab)    
if(length(int.value)==0) int.value=0
int.value <-lapply(int.value,function(x) if(is.character(x)) type.convert(x,as.is=T) else x)
int.value <-lapply(int.value, function(x) if(is.character(x)) 
            lapply(strsplit(x,"=")[[1]], type.convert, as.is=T) else x)
nx<-names(int.value)
if(!is.null(nx))
   for(i in which(nx !=""))  int.value[[i]] <- list(nx[i],int.value[[i]])
    int.value<-sapply(int.value, function(x) {
                      if(is.numeric(x)) xx$x[which.max(cumsum(xx$y)/sum(xx$y) >= as.numeric(x/100))]
                      else switch(x[[1]], 
                      mean= sum(xx$y*xx$x)/sum(xx$y), 
                      median=xx$x[which.max(cumsum(xx$y)/sum(xx$y) >=.5)],
                      mode=xx$x[which.max(xx$y)],
                      value=x[[2]],
                      zero=0)})

if(length(color) <= length(int.value)) color<-rep(color,length=length(int.value))
for(i in 1:length(int.value)) out<- out + geom_vline(xintercept = int.value[i], color = color[i]) 
out
}

printinla <-  function (x, call=F,time=F, notes=F, digits=3,...) {
    if(class(x)=="inla") x<-summary(x,digits=digits)
    digits = digits
    if(call){    cat("\nCall:\n", inla.formula2character(x$call), "\n\n", sep = "")}
    if(time) {if (inla.is.element("cpu.used", x)) {
      cat("Time used:\n")
      print(x$cpu.used)
      cat("\n")
    }
    }
    if (inla.is.element("fixed", x)) {
      cat("Fixed effects:\n")
      print.default(x$fixed)
      cat("\n")
    }
    else {
      cat("The model has no fixed effects\n\n")
    }
    if (inla.is.element("lincomb.derived", x)) {
      cat("Linear combinations (derived):\n")
      print.default(x$lincomb.derived)
      cat("\n")
    }
    if (inla.is.element("lincomb", x)) {
      cat("Linear combinations:\n")
      print.default(x$lincomb)
      cat("\n")
    }
    if (inla.is.element("random.names", x)) {
      cat("Random effects:\n")
      cat("Name\t ", "Model\n ")
      for (i in 1:length(x$random.names)) cat(paste(inla.nameunfix(x$random.names[i]), 
                                                    " ", x$random.model[i], "\n"))
      cat("\n")
    }
    else {
      cat("The model has no random effects\n\n")
    }
    if (inla.is.element("hyperpar", x)) {
      cat("Model hyperparameters:\n")
      print(format(x$hyperpar, digits = digits, nsmall = 2), 
            quote = FALSE)
      cat("\n")
    }
    else {
      cat("The model has no hyperparameters\n\n")
    }
    if (inla.is.element("neffp", x)) {
      cat("Expected number of effective parameters(std dev): ", 
          format(x$neffp[1], digits = digits, nsmall = 2), 
          "(", format(x$neffp[2], digits = digits, nsmall = 2), 
          ")\n", sep = "")
      cat("Number of equivalent replicates :", format(x$neffp[3], 
                                                      digits = digits, nsmall = 2), "\n")
      cat("\n")
    }
    else {
      cat("Expected number of effective parameters and Number of equivalent replicates not computed\n")
      cat("\n")
    }
    if (inla.is.element("dic", x)) {
      cat(paste("Deviance Information Criterion: ", format(x$dic$dic, 
                                                           digits = digits, nsmall = 2), "\n", "Effective number of parameters: ", 
                format(x$dic$p.eff, digits = digits, nsmall = 2), 
                "\n", sep = ""))
      cat("\n")
    }
    if(notes){
      if (inla.is.element("mlik", x)) {
      cat(paste("Marginal Likelihood: ", format(x$mlik[2], 
                                                digits = digits, nsmall = 2), "\n"))
    }
    if (inla.is.element("cpo", x)) {
      cat("CPO and PIT are computed\n")
      cat("\n")
    }
    if (inla.is.element("linear.predictor", x)) {
      cat("Posterior marginals for linear predictor and fitted values computed\n\n")
    }
    }
  }
environment(printinla)<-environment(INLA:::print.summary.inla)

#The following functions below allow one to aggregate over large raster files, as long as small sections can be read.
mytabulate<-function(x,levels=NULL){
    if(!is.null(levels)) {
      values<- match(x,levels,nomatch=NA)
      if(any(is.na(values))) stop("Additional Value found in data:",unique(x[is.na(values)])) 
    }
    else stop("Need levels for mytabulate")
    out<-tabulate(values,nbins=length(levels))
    names(out)<-levels
    out
  }
aggRasterOverSpatialPolygon<-function(r,poly,myfun=mytabulate,...,printinfo=T){
  #r is a raster (layer)
  #values r@data@min:r@data@max
  #poly is a spatialPolygons (or dataframe)
  #Returns a dataframe, but could be a Spatial Polygons dataframe
  #function returns a vector of named values for each polygons object in the SpatialPolygons object
  #poly has polygons which has Polygons (so we have functions of polygon and Polygon)
  #We assume one layer here.... More to follow ...
  if(printinfo) print(r@data@attributes[[1]][r@data@attributes[[1]]$COUNT>0,])
  polygons<-lapply(poly@polygons,getValues,r=r,myfun=myfun,...)
  names(polygons)<-sapply(poly@polygons, slot, name="ID")
  do.call("rbind",polygons)
}
getExtentMulti<-function(x,y){ 
  #Find cell locations in x from extents (bbox) in spatial polygons y
  bboxes<-lapply(y@polygons,function(xx) getextent(x,xx))
  names(bboxes) <-sapply(y@polygons, function(xx) xx@ID)
  bboxes
}
getExtent<-function(x,y) {
  z<-getRowColLoc(xx=bbox(x),yy=bbox(y),dimrc=dim(x))
  z[1,]<-z[1,2:1]
  return(cbind(min=trunc(z[,1]+1),max=ceiling((z[,2]))))
}
getValues<-function(r,polygon,myfun=mytabulate,...){
  bbox<-getExtent(r,polygon)
  values<-getValuesBlock1(r,bbox)
  rowcol<-getCoordsExtentCenters(bbox) 
  cells <- rowSums(sapply(polygon@Polygons,function(x) getCells(rowcol,getPoints(r,x@coords)) *(1-2*x@hole)))
  chosen<-values[cells>0]
  myfun(x=chosen,...)
}  
getValuesBlock1<-function(r,bbox){
  getValuesBlock(r,format="",bbox[1,1],bbox[1,2]-bbox[1,1]+1,bbox[2,1],bbox[2,2]-bbox[2,1]+1)
}
getRowColLoc<-function(xx,yy,dimrc) { #xx is the xy extent of raster, yy are points im t(xy),  returns coordinates one column for each pt by 2 rows}
  z<-(yy-xx[,1]) /(xx[,2]-xx[,1])
  rbind(row=(1-z[2,]),col=z[1,])*dimrc[1:2]
}
getPoints<-function(r,pts){
  #if the row is the x coordinate and column is y coordiante, we do not need to rotate poly for direction as
  #the transformation from x, y to row,col is a 90 degree counter clockwise rotation within the 0,1 by 0,1 scaled 
  #location withinraster extent box times the dimensions of the raster. 
  t(getRowColLoc(xx=bbox(r),yy=t(pts),dimrc=dim(r)))  
}
getCoordsExtentCenters<-function(x,offset=c(-.5,-.5)){
  #if the coordinates are the row column centers relative to the plot extent of [0,Nrow]x [0,Ncol]
  #and the upper left is row and column (1,1) and lower right is (Nrow,Ncol) then the centers are given here
  x<-round(x)
  if(x[1,1]>x[1,2] || x[2,1] > x[2,2]) {warning("Empty extent") ; return(NULL)}
  rows<-x[1,1]:(x[1,2])+offset[1]
  cols<-x[2,1]:(x[2,2])+offset[2]
  lr<-length(rows)
  lc<-length(cols)
  return(data.frame(row=rep.int(rows,rep.int(lc,lr)),col=rep.int(cols,lr)))
}
getCells<-function(pts,poly){  
  sp::point.in.polygon(pts$row,pts$col,poly[,1],poly[,2])  
}


#use point.in.polygon to determine if a point is in polygon. 

#How to extract values

#1. Get cells in extent of polygon
#2. Get cell locations in extent
#3. Use over to get index 1 in 0 out.
#4. Calculate statistic.

samplePlot<-function(){
f = "~/nlcd_2011_landcover_2011_edition_2014_03_31/nlcd_2011_landcover_2011_edition_2014_03_31.img"
r = raster(f)
rr<-GDAL.open(r@file@name)
tmpv<-getValuesBlock(r,row=50000,nrow=4000,col=40000,ncol=50000,format="matrix")
tmpvv<-t(tmpv[3000:1,16000:20000])
whichcols<-do.call("seq",as.list(range(tmpvv)))+1 #for colors
x11()
rr<-GDAL.open(r@file@name)
cols<-getColorTable(rr, band = 1)
image(tmpvv,col=cols[whichcols])
return(list(tmpvv=tmpvv,cols=cols, whichcols=whichcols))
}

generatedayofweekdata<-function(x,lon=-100, EF0=1, Year0=1954)
{
  t2=c("Sun","Mon","Tue","Wed","Thu","Fri","Sat")
  x<-subset(x,SLON>=lon & Year >= Year0 & EF >= EF0)@data
  xt<-table(x$Year,  factor(weekdays(x$Date,abbrev=T),levels=t2), x$ST)
  xm<-melt(xt)
  colnames(xm)<-c("Year","Day","State","Count")
  xm$IDyear<-xm$Year-Year0+1
  xm$IDday<-as.numeric(xm$Day)-1
  xm
}

inlaboxplot<-function(x,type="random",effect="IDday",levels=c("Sun","Mon","Tue","Wed","Thur","Fri","Sat"),p=c(.025,.25,.5,.75,.975),xlab="Day",ylab="effect"){
   marginals<-data.frame(t(sapply(x[[paste("marginals",type,sep=".")]][[effect]],function(x) inla.qmarginal(p=p,marginal=x) )))
   marginals$x=factor(levels,levels=levels)
   ggplot(data=marginals)+geom_boxplot(aes(ymin=X1,lower=X2,middle=X3,upper=X4,ymax=X5,x=x),stat="identity")+xlab(xlab)+ylab(ylab)                        
}

testday<-function(x,lon=-100,EF0=1,Year0=1954,formula=Count~f(IDyear,model="rw2")+State+f(IDday,model="seasonal",season.length=7),family="nbinomial"){
testdata<-generatedayofweekdata(x=x,lon=lon,EF0=EF0,Year0=Year0)
testinla<-inla(family=family,data=testdata,formula=formula)
plot<-inlaboxplot(x=testinla)
return(list(data=testdata,inla=testinla,plot=plot))
}

#End May 24, 2104 Thomas Jagger.