#code supporting the results from:
  #Wampler, K.A.; Bladon, K.D.; Myers-Pigg, A.
  #The influence of burn severity on dissolved organic carbon concentrations in streams differs based on seasonal wetness conditions post-fire
  #submitted to Biogeosciences, 2024

#written on 2024-01-23 


#section 0: load libraries and functions------ 
  library(SSN) #for SSN modeling
  library(thorloki) #for custom SSN functions (available from github: "katiewampler/thorloki")
  library(data.table)
  library(dplyr) #for data management
  library(mosaic) #for z-score
  library(chron) #for dealing with storm times
  library(glmmTMB) #for glmm modeling
  library(emmeans) #for looking at means
  library(ggplot2) #for plotting

  standardize <- function(df){
  df_stand <- df %>% mutate(PRECIP = zscore(PRECIP), 
                            ELEV = zscore(ELEV), FOREST = zscore(FOREST), 
                            ARIDITY =zscore(ARIDITY), AWC = zscore(AWC), BFI=zscore(BFI),
                            CLAY = zscore(CLAY), SOIL_PH = zscore(SOIL_PH),
                            SOIL_OM = zscore(SOIL_OM), TWI=zscore(TWI), 
                            TIME_HR=zscore(TIME_HR),
                            LOG_AREA = zscore(LOG_AREA), DNBR=zscore(DNBR)) 
  df_stand
}
  samp_time <- function(season){
    storms <- data.frame(season=c("Wetting","Wet","Drying","Dry"),
                         start =times(c("05:30:00", "07:00:00", "06:30:00", "06:45:00")),
                         end =times(c("17:15:00","15:15:00","17:30:00","17:15:00")))
    storms$times <- (storms$end - storms$start)/2 + storms$start #get midpoint of the day
    
    samp_time <- as.character(storms$times[storms$season == season])
    
    time <-as.numeric(as.ITime(samp_time))/(60*60) #convert to decimal hours in day
    time
  } 
  pred_samp <- function(season){
    preds <- getSSNdata.frame(m_ssn, Name="preds")
    preds_gate <- getSSNdata.frame(m_ssn, Name="preds_gate")
    preds_quartz <- getSSNdata.frame(m_ssn, Name="preds_quartz")
    
    preds$TIME_HR <- samp_time(season)
    preds$LOG_AREA <- log10(preds$AREA)
    preds_gate$TIME_HR <- samp_time(season)
    preds_gate$LOG_AREA <- log10(preds_gate$AREA)
    preds_quartz$TIME_HR <- samp_time(season)
    preds_quartz$LOG_AREA <- log10(preds_quartz$AREA)
    
    
    #remove other seasons in observed 
    obs <- getSSNdata.frame(m_ssn)
    obs[obs$SEASON != season,doc_col] <- NA
    ssn_model <- putSSNdata.frame(obs, m_ssn)
    
    ssn_model <- putSSNdata.frame(preds, ssn_model, Name="preds")
    ssn_model <- putSSNdata.frame(preds_quartz, ssn_model, Name="preds_quartz")
    ssn_model <- putSSNdata.frame(preds_gate, ssn_model, Name="preds_gate")
    ssn_model
  }
  
#section 1: build SSN models ------ 
  #section 1.1: load the SSN model 
    working_folder <- "Z:/1_Research/4_McKenzie_SSN/Data Package"
    
    m_ssn <- importSSN(paste(working_folder, "/lsn.ssn", sep=""), o.write=T, predpt="preds")
    m_ssn <- importPredpts(m_ssn, "preds_quartz", "ssn")
    m_ssn <- importPredpts(m_ssn, "preds_gate", "ssn")
  
    #create distance matrixes 
    createDistMat(m_ssn, predpts = "preds", o.write=T, amongpreds =T)
    createDistMat(m_ssn, predpts = "preds_quartz", o.write=T, amongpreds =T)
    createDistMat(m_ssn, predpts = "preds_gate", o.write=T, amongpreds =T)
    
      #do necessary formatting, creating factors 
        obs <- getSSNdata.frame(m_ssn)
        obs$BurnSev <- factor(obs$BurnSev, levels=c("Unburned", "Low", "Moderate", "High"))
        obs$BurnSev <- relevel(obs$BurnSev, ref="Unburned")
        obs$SEASON <- factor(obs$SEASON, levels=c("Wetting","Wet","Drying","Dry"))
        obs$LOG_AREA <- log10(obs$AREA) #take log of area for modeling
        obs$Site <- as.factor(obs$Site)
        m_ssn <- putSSNdata.frame(obs, m_ssn)
        
  #section 1.3: make seasonal models 
    doc_col <- which(colnames(obs)== "DOC_mgL")
    
    wetting_obs <- obs
    wetting_obs[wetting_obs$SEASON != "Wetting",doc_col] <- NA
    wetting_ssn <- putSSNdata.frame(wetting_obs, m_ssn)
        
    wet_obs <- obs
    wet_obs[wet_obs$SEASON != "Wet",doc_col] <- NA
    wet_ssn <- putSSNdata.frame(wet_obs, m_ssn)
        
    drying_obs <- obs
    drying_obs[drying_obs$SEASON != "Drying",doc_col] <- NA
    drying_ssn <- putSSNdata.frame(drying_obs, m_ssn) 
        
    dry_obs <- obs
    dry_obs[dry_obs$SEASON != "Dry",doc_col] <- NA
    dry_ssn <- putSSNdata.frame(dry_obs, m_ssn)
        
  #section 1.5: double selection on the mean model 
    ## modified models for rand
    covar <- c("PRECIP", "ELEV", "FOREST", "ARIDITY", "AWC", "BFI",
               "CLAY", "SOIL_PH", "SOIL_OM", "TWI", "TIME_HR", "LOG_AREA", "SEASON")
    
    #find the model formula
    form <- ds_select(df=obs, covar = covar) #do double selection with the linear model
    form <- DOC_mgL ~ ARIDITY + AWC + BFI + CLAY + ELEV + FOREST + LOG_AREA + 
      SOIL_PH + TIME_HR + SEASON + SOIL_OM + DNBR  #redo manually to remove categorical season
    
    #find the best autocorelation models
    best_var(form, m_ssn, random="Site") #find the best autocorrelation
    
    #try all combinations (takes a while, do overnight)
    mean_var <- best_var_str(form, m_ssn, random="SITE")
    mean_models <- c("Exponential.tailup", "Gaussian.Euclid", "Site")
    
    #make final mean model and check
    #standardize for variable importance 
    m_ssn_stand <- putSSNdata.frame(standardize(obs), m_ssn)
    
    model <- glmssn(form, m_ssn_stand,addfunccol = "afvArea", CorModels =mean_models)
    check <- check_model(model, "DOC_mgL")
    check[[1]] #spatial residuals
    check[[2]] #histogram of residuals
    pot_outlier <- check[[3]] #potential outlier values
    check[[4]] #cross validation 
    check[[5]] #residuals with explanatory variables
    check[[6]] #r-squared 
    check[[7]] #nugget
    var_imp(model)
    SSN::varcomp(model)
  
  #section 1.6: build standardized seasonal models 
    #standardize by season and stick into "standard total model" to keep structure
    #get dataframes of just each season
    wetting <- wetting_obs[is.na(wetting_obs$DOC_mgL) == F, ]
    wet <- wet_obs[is.na(wet_obs$DOC_mgL) == F, ]
    drying <- drying_obs[is.na(drying_obs$DOC_mgL) == F, ]
    dry <- dry_obs[is.na(dry_obs$DOC_mgL) == F, ] 
    
    obs_standard <- rbind(standardize(wetting), standardize(wet), standardize(drying), 
                          standardize(dry))
    obs_standard <- obs_standard[order(as.numeric(row.names(obs_standard))), ]
    m_ssn_stand <- putSSNdata.frame(obs_standard, m_ssn)
    
    #get standardized model for each season
    doc_col <- which(colnames(obs)== "DOC_mgL")
    
    wetting_obs <- obs_standard
    wetting_obs[wetting_obs$SEASON != "Wetting",doc_col] <- NA
    wetting_ssn_stand <- putSSNdata.frame(wetting_obs, m_ssn_stand)
    
    wet_obs <- obs_standard
    wet_obs[wet_obs$SEASON != "Wet",doc_col] <- NA
    wet_ssn_stand <- putSSNdata.frame(wet_obs, m_ssn_stand)
    
    drying_obs <- obs_standard
    drying_obs[drying_obs$SEASON != "Drying",doc_col] <- NA
    drying_ssn_stand <- putSSNdata.frame(drying_obs, m_ssn_stand) 
    
    dry_obs <- obs_standard
    dry_obs[dry_obs$SEASON != "Dry",doc_col] <- NA
    dry_ssn_stand <- putSSNdata.frame(dry_obs, m_ssn_stand)
    
  #section 1.7: build seasonal models 
    form <- DOC_mgL ~ ARIDITY + AWC + BFI + CLAY + ELEV + FOREST + LOG_AREA + 
      SOIL_PH + TIME_HR  + SOIL_OM + DNBR 
    #make wetting model 
    wetting_model <- glmssn(form, wetting_ssn_stand,addfunccol = "afvArea", CorModels =mean_models)
    check <- check_model(wetting_model, "DOC_mgL")
    check[[1]] #spatial residuals
    check[[2]] #histogram of residuals
    pot_outlier <- check[[3]] #potential oulier values
    check[[4]] #cross validation 
    check[[5]] #residuals with explanatory variables
    check[[6]] #r-squared 
    check[[7]] #nugget
    var_imp(wetting_model)
    SSN::varcomp(wetting_model)
    
    #make wet model 
    wet_model <- glmssn(form, wet_ssn_stand,addfunccol = "afvArea", CorModels =mean_models)
    check <- check_model(wet_model, "DOC_mgL")
    check[[1]] #spatial residuals
    check[[2]] #histogram of residuals
    pot_outlier <- check[[3]] #potential oulier values
    check[[4]] #cross validation 
    check[[5]] #residuals with explanatory variables
    check[[6]] #r-squared 
    check[[7]] #nugget
    var_imp(wet_model)
    SSN::varcomp(wet_model) 
    
    #make drying model 
    drying_model <- glmssn(form, drying_ssn_stand,addfunccol = "afvArea", CorModels =mean_models)
    check <- check_model(drying_model, "DOC_mgL")
    check[[1]] #spatial residuals
    check[[2]] #histogram of residuals
    pot_outlier <- check[[3]] #potential oulier values
    check[[4]] #cross validation 
    check[[5]] #residuals with explanatory variables
    check[[6]] #r-squared 
    check[[7]] #nugget
    var_imp(drying_model)
    SSN::varcomp(drying_model)
    
    #make dry model 
    dry_model <- glmssn(form, dry_ssn_stand,addfunccol = "afvArea", CorModels =mean_models)
    check <- check_model(dry_model, "DOC_mgL")
    check[[1]] #spatial residuals
    check[[2]] #histogram of residuals
    pot_outlier <- check[[3]] #potential oulier values
    check[[4]] #cross validation 
    check[[5]] #residuals with explanatory variables
    check[[6]] #r-squared 
    check[[7]] #nugget
    var_imp(dry_model)
    SSN::varcomp(dry_model)
    
    
#section 2: predict doc concentrations across the stream network ------ 
  #section 2.1: Add in a column in for sample time and log area 
    #(use the mean time for each sampling campaign)
    wetting_pred <- pred_samp("Wetting")
    wet_pred <- pred_samp("Wet")
    drying_pred <- pred_samp("Drying")
    dry_pred <- pred_samp("Dry")
    
  #section 2.2: Make predictions on SSN models
    form <- DOC_mgL ~ ARIDITY + AWC + BFI + CLAY + ELEV + FOREST + LOG_AREA + 
      SOIL_PH + TIME_HR  + SOIL_OM + DNBR
    mean_models <- c("Exponential.tailup", "Gaussian.Euclid")
    
    #wetting
    wetting <- glmssn(form, wetting_pred,addfunccol = "afvArea", CorModels =mean_models)
    wetting_preds <- SSN::predict.glmssn(wetting, "preds")
    
    #wet 
    wet <- glmssn(form, wet_pred,addfunccol = "afvArea", CorModels = mean_models)
    wet_preds <- SSN::predict.glmssn(wet, "preds")
    
    #drying
    drying <- glmssn(form, drying_pred,addfunccol = "afvArea", CorModels = mean_models)
    drying_preds <- SSN::predict.glmssn(drying, "preds")
    
    #dry
    dry <- glmssn(form, dry_pred,addfunccol = "afvArea", CorModels = mean_models)
    dry_preds <- SSN::predict.glmssn(dry, "preds")
    
#section 3: get summaries of DOC data across burn severity groups and season ----- 
  doc <- read.csv(paste(working_folder,"McKenzie_DOC_SiteData_2022_2023.csv", sep="/"))
  doc$BurnSev <- factor(doc$BurnSev, levels=c("Unburned","Low","Moderate","High"),ordered=T)
  doc$SEASON <- factor(doc$SEASON, levels=c("Wetting","Wet","Drying","Dry"),ordered=T)

  doc %>% group_by(BurnSev) %>% summarise(max=max(DOC_mgL), 
                                      min = min(DOC_mgL),
                                      median = median(DOC_mgL),
                                      mean = mean(DOC_mgL),
                                      sd = sd(DOC_mgL),
                                      IQR = IQR(DOC_mgL),
                                      count=n())
  
  doc %>% group_by(SEASON, BurnSev) %>% summarise(max=max(DOC_mgL), 
                                          min = min(DOC_mgL),
                                          median = median(DOC_mgL),
                                          mean = mean(DOC_mgL),
                                          sd = sd(DOC_mgL),
                                          IQR = IQR(DOC_mgL),
                                          count=n())

    
#section 4: do glmmTMB modeling to look at variance and mean between groups -----
  #section 4.1: load data
  doc <- read.csv(paste(working_folder,"McKenzie_DOC_SiteData_2022_2023.csv", sep="/"))
  doc$BurnSev <- factor(doc$BurnSev, levels=c("Unburned","Low","Moderate","High"),ordered=F)
  doc$SEASON <- factor(doc$SEASON, levels=c("Wetting","Wet","Drying","Dry"),ordered=F)
  doc$Site <- factor(doc$Site)
  
  #section 4.2: fit the glmmTMB model and check
  fit <- glmmTMB(DOC_mgL ~ SEASON * BurnSev + (1 | Site),
    dispformula = ~ SEASON + BurnSev, data = doc) 
  
    #add residuals to the data
    doc$res <- resid(fit, type = "response")
    
    #get model matrix for dispersion model
    mmd <- model.matrix(~ SEASON + BurnSev, data = doc)
  
    #estimate the sd's associated with each obs
    #we matrix-multiply the model matrix by the
    #vector of coefficient estimates in the dispersion model
    #Then exponentiate, and finally take the square root
    doc$sd_estim <- sqrt( exp( mmd %*% fixef(fit)$disp ))
    
    #divide residuals by estimated sds
    doc <- doc %>% mutate(res_scl = res / sd_estim)
    
    #compare the two residuals. Does the model seem to fit well?
    ggplot(data = doc, aes(x = SEASON, y = res, fill = BurnSev)) +
      geom_boxplot() + ggtitle("Raw residuals")
    
    ggplot(data = doc, aes(x = SEASON, y = res_scl, fill = BurnSev)) +
      geom_boxplot() + ggtitle("Scaled residuals")
  
    summary(fit)
    
  #section 4.3: get differences in means across seasons
  results <- emmeans(fit,pairwise ~ SEASON)
  results <- results$contrasts
  
  #section 4.4: examine differences between variances
    #estimate variance for each fire severity
    uniq_rows <- unique(mmd)
    
    #get estimates and ses on log scale
    lvar <- as.double(uniq_rows %*% fixef(fit)$disp) #estimates the variance on log scale
    se_lvar <- diag(uniq_rows %*% vcov(fit)$disp %*% t(uniq_rows)) #gets standard error of variance
    
    #get the variance estimates (on not log scale)
    var_estims <- tibble(
      # look at the 1s and 0s in uniq_rows to see which row is which category
      group = c(
        "Wetting_L", "Wet_L", "Drying_L", "Dry_L",
        "Wetting_U", "Wet_U", "Drying_U", "Dry_U",
        "Wetting_M", "Wet_M", "Drying_M", "Dry_M",
        "Wetting_H", "Wet_H", "Drying_H", "Dry_H"),
      # construct the estimates
      var_estim = exp(lvar),
      # get lower and upper CI bounds
      lower = exp(lvar - 2 * se_lvar),
      upper = exp(lvar + 2 * se_lvar))
    
    var_estims
  
  #section 4.5: get contrasts in variance between severity groups and season
    #set up and get the contrasts of interest for severity
      nums <- data.frame(unburn_low =c(rep(5, 3), rep(1,2),9), burned=c(1,9,13,9,13,13),
                         sev1 =c(rep("Unburned",3),rep("Low",2), "Moderate"),
                         sev2 =c("Low","Moderate","High","Moderate","High", "High"))
  
      var_comp <- as.data.frame(matrix(nrow=nrow(nums),ncol=5))
      colnames(var_comp) <- c("Severity1","Severity2", "Ratio","L_CI","U_CI")
      var_comp$Severity1 <- nums$sev1
      var_comp$Severity2 <- nums$sev2
  
    for(x in 1:nrow(nums)){
      # now say I want to see the difference between wetting_U and drying_U
      #this gives the number of times larger
      # just pull out those rows of uniq_rows and subtract one from the other
      diff_fu_su <- uniq_rows[nums$burned[x], ] - uniq_rows[nums$unburn_low[x], ]
      
      # now matrix multiply by the estimates and exponentiate
      ratio <- exp(diff_fu_su %*% fixef(fit)$disp)
      
      # lower CI limit for the contrast
      l_ratio <- exp(diff_fu_su %*% fixef(fit)$disp - 2 * diff_fu_su %*% vcov(fit)$disp %*% diff_fu_su)
      
      # upper limit
      u_ratio <- exp(diff_fu_su %*% fixef(fit)$disp + 2 * diff_fu_su %*% vcov(fit)$disp %*% diff_fu_su)
      
      var_comp[x,3:5] <- c(ratio, l_ratio, u_ratio)
  } 
    
   #set up and get the contrasts of interest for season
      nums <- data.frame(group1 =c(1,4,2), group2=c(4,2,3),
                         season1 =c("Wetting", "Wet","Drying"),
                         season2 =c("Wet","Drying","Dry"))
      
      var_comp <- as.data.frame(matrix(nrow=nrow(nums),ncol=5))
      colnames(var_comp) <- c("Season1","Season2", "Ratio","L_CI","U_CI")
      var_comp$Season1 <- nums$season1
      var_comp$Season2 <- nums$season2
      
      for(x in 1:nrow(nums)){
        # now say I want to see the difference between wetting_U and drying_U
        #this gives the number of times larger
        # just pull out those rows of uniq_rows and subtract one from the other
        diff_fu_su <- uniq_rows[nums$group1[x], ] - uniq_rows[nums$group2[x], ]
        
        # now matrix multiply by the estimates and exponentiate
        ratio <- exp(diff_fu_su %*% fixef(fit)$disp)
        
        # lower CI limit for the contrast
        l_ratio <- exp(diff_fu_su %*% fixef(fit)$disp - 2 * diff_fu_su %*% vcov(fit)$disp %*% diff_fu_su)
        
        # upper limit
        u_ratio <- exp(diff_fu_su %*% fixef(fit)$disp + 2 * diff_fu_su %*% vcov(fit)$disp %*% diff_fu_su)
        
        var_comp[x,3:5] <- c(ratio, l_ratio, u_ratio)
      } 
 
  
#section 5: estimate range of changes in DOC concentrations with burn severity ------ 
    #section 5.1: remake SSN models with non-standarized variables 
      model <- glmssn(form, m_ssn,addfunccol = "afvArea", CorModels =mean_models)
      wetting <- glmssn(form, wetting_ssn,addfunccol = "afvArea", CorModels =mean_models)
      wet <- glmssn(form, wet_ssn,addfunccol = "afvArea", CorModels = mean_models)
      drying <- glmssn(form, drying_ssn,addfunccol = "afvArea", CorModels = mean_models)
      dry <- glmssn(form, dry_ssn,addfunccol = "afvArea", CorModels = mean_models)

      mean_change <- burn_impact(model)     
      wetting_change <- burn_impact(wetting)
      wet_change <- burn_impact(wet)
      drying_change <- burn_impact(drying)
      dry_change <- burn_impact(dry)