##
## Dail and Madsen (JABES 2011) Supplementary Files 

# this file includes 4 functions:

# 1. model_M1    # Returns the negative log likelihood of model M1
# 2. model_M2    # Returns the negative log likelihood of model M2
# 3. M1_ans      # Returns the estimates of site population sizes using model M1,
                 #   along with summary for the estimated arrival rate
# 4. M2_ans      # Returns the estimates of site population sizes using model M2

## You will also need two C program files, M1_Trans.c and M1_ANS.c, 
##  that accompany this R file. See the corresponding tutorial for how
##  to compile and utilize the C files within R.


# The R functions:

model_M1 = function(vars, n.it, Date, K){
  Date.mat = Date
  n=n.it
  R = length(n[,1]) #number of sites with any observations
  nsite=R
  
  ## uses the large Date matrix, with every sampling date represented
  ## and NA for each site-date combination not sampled  

  probs.i=numeric(R)

  i=1 # since same dates all through matrix

    dates.i = as.numeric(na.omit(Date[i,]))

    T=length(Date[i,])
    delta.dates=numeric(T)
    delta.dates[1]=dates.i[1]
    for(t in 2:T){
      delta.dates[t]= dates.i[t]-dates.i[t-1]
    }
    pi.1.it=exp(-1/exp(vars[1])*delta.dates) #only the first delta.date is unique in the sims

    p.it = rep(expit(vars[5]),T)
  
    xi.2 = -exp(vars[4])  # must be negative, for normal. Could be any pdf (e.g., gamma) - later (and discussion).
    xi.1 = vars[3]
    xi.0 = vars[2]
  
    a=xi.1
    b=xi.2
    c0=xi.0
    lambda.t=numeric(T)
    d.t = dates.i
    for(t in 2:T){
      lambda.t[t]= -1/b*exp(xi.0)*sqrt(-b*pi)*exp(-a^2/(4*b))*(pnorm(d.t[t],mean=-a/(2*b),sd=sqrt(-1/(2*b)))-
        pnorm(d.t[t-1],mean=-a/(2*b),sd=sqrt(-1/(2*b))))
      if(is.na(lambda.t[t]) | is.nan(lambda.t[t])) lambda.t[t]=0
    }

    pi.2.t=numeric(T)
    for(t in 2:T){
      if(lambda.t[t]>0){ 
        a= 1/exp(vars[1]) + xi.1  
        pi.2.t[t]= -1/b*exp(xi.0-d.t[t]/exp(vars[1]))/lambda.t[t]*sqrt(-b*pi)*exp(-a^2/(4*b))*(pnorm(d.t[t],mean=-a/(2*b),sd=sqrt(-1/(2*b)))-
          pnorm(d.t[t-1],mean=-a/(2*b),sd=sqrt(-1/(2*b))))
        if(is.na(pi.2.t[t]) | is.nan(pi.2.t[t])) pi.2.t[t] = 1
      } else {
        pi.2.t[t] = 1
      }
    }
    
    f.At = matrix(1,nrow=R,ncol=K+1)
    ones.Kp1 = rep(1,K+1)
    for(t in T:2){
      P.Atm1.At = matrix(0,nrow=K+1,ncol=K+1)
      P.Atm1.At2=P.Atm1.At
 
      for(Atm1 in 0:K){
        res2 = numeric(K+1)
        P.Atm1.At2[Atm1+1,] = unlist(.C("M1_Trans", as.double(pi.1.it[t]),
           as.double(pi.2.t[t]), as.double(lambda.t[t]), 
           as.integer(Atm1), as.integer(K), as.double(res2), NAOK=TRUE)[6])
      }
      P.Atm1.At=P.Atm1.At2

      for(i in 1:R){
        if(is.na(n.it[i,t])) obs.mat = matrix(1, K+1, K+1)
        if(!is.na(n.it[i,t])) obs.mat = matrix(rep( dbinom(n.it[i,t],0:K,p.it[t]),K+1), nrow=K+1,byrow=TRUE) 
        f.At[i,]=( matrix(rep(f.At[i,],K+1),nrow=K+1,byrow=TRUE) *obs.mat*P.Atm1.At)%*%ones.Kp1
      }
    }
    q=-1*sum(log(f.At[,1]*dbinom(n.it[,1],0,p.it[1])))
   
  if(!is.finite(q)) return(1000000000)
  return(q)
}



model_M2 = function(vars,n.it,Date,K=80){
  # function returns the negative log likelihood of the auc method of Dail and Madsen 2011
  # David Dail, revised October 25, 2010
  #
  # vars is the list of parameters
  # n is the data
  # Date.mat is the matrix of dates
  # K is the upper cutoff value for the sums over N.it
  #
  n=n.it
  R = length(n[,1]) #number of sites with any observations
  nsite=R
  
  probs.i=numeric(R)

  Date.mat=Date

  # some preliminaries we'll need for each site, may as well build them now:
  
  Pois1 = rep(0:-K,(K+1)^2) + rep(0:K, each=(K+1)^2)
  zeros = which(Pois1==0)
  str.zero = rep(0,length(Pois1))
  str.zero[zeros]=1

  Bin1 = rep(0:(K),(K+1)^2)
  Bin2 = rep(rep(0:(K),each=(K+1)),(K+1))

  for(i in 1:R){

    n.i = as.vector(na.omit(n.it[i,]))
    dates.i = (Date[i,which(!is.na(n.it[i,]))])
    T=length(dates.i)
    delta.dates=numeric(T)
    delta.dates[1]=dates.i[1]
    for(t in 2:T){
      delta.dates[t]= dates.i[t]-dates.i[t-1]
    }
  
    Delta.frac = (delta.dates/11)
    J.vec = numeric(0)
    n.obs = numeric(0)
    n.t = numeric(0)
    n.it.vec = as.vector(na.omit(n.it[i,]))
    for(j in 1:length(Delta.frac)){
      J.vec = c(J.vec, rep(1, floor(Delta.frac[j])), Delta.frac[j]-floor(Delta.frac[j]))
      n.obs = c(n.obs, rep(0, floor(Delta.frac[j])), 1) 
      n.t = c(n.t, rep(NA, floor(Delta.frac[j])), n.it.vec[j])
    }  
    Date.use = numeric(0)
    for(j in 1:length(J.vec)){
      Date.use = round( c(Date.use, sum(J.vec[1:j]*11) ),0)
    }
    theta.1 = vars[1]
    log.theta.surv = theta.1
    w.it=exp(-1/exp(log.theta.surv)*J.vec*11)
    w.it = matrix(w.it, nrow=1,byrow=FALSE)
    w.it[,1]=1
    var.next=2
 
    theta.2=c(vars[2:3],-4*expit(vars[4]))
    X.ZIpar.int = rep(1,length(as.vector(Date.use)))
    X.ZIpar.lin = as.vector(Date.use)
    X.ZIpar.quad = as.vector(Date.use)^2
    X.ZIpar = cbind(X.ZIpar.int, X.ZIpar.lin, X.ZIpar.quad)
  
    r.mat = exp(matrix(X.ZIpar%*%theta.2,nrow=1,byrow=FALSE))
    r.it = r.mat*J.vec ## fractional periods

    p.it = expit(vars[5])

    K.num = 0:K

    ##############################################################################################
    # all the parameters are set

    T = length(r.it)  
    #Make A
    p.t = rep(p.it,T)
    w.t = w.it
    r.t = r.it
    #n.t, n.obs, 

    f.1 = rep(1, K+1)
    f1.mat = matrix( 1, nrow=K+1, ncol=K+1)
    ### here:
    for(t in T:2){
    
      Bin.mat = matrix(dbinom(Bin1,Bin2,w.t[t]),nrow=(K+1)^2, ncol=(K+1),byrow=TRUE)
      r.it=rep(r.t[t],(K+1))
      r.it.rep = rep(rep(r.it,each=(K+1)),(K+1))
      Pois.mat = matrix((dpois(Pois1,r.it.rep) ),nrow=(K+1)^2,ncol=(K+1),byrow=TRUE)
      Trans.mat = matrix((Pois.mat*Bin.mat)%*%rep(1,(K+1)),nrow=(K+1),ncol=(K+1),byrow=FALSE)

      if(n.obs[t]==1){
        mult.n = dbinom(n.t[t],K.num, p.t[t])
        matrix.n = matrix(rep(mult.n, K+1), nrow=K+1, ncol=K+1, byrow=TRUE)
      } else {
        matrix.n = matrix(1, nrow=K+1, ncol=K+1)
      }
      f.1 = (f1.mat*matrix.n*Trans.mat)%*%rep(1,K+1)
      f1.mat = matrix( rep(f.1, K+1), nrow=K+1, ncol=K+1, byrow = TRUE)
    } # end of loop through T
    # now, for t=1
    # P(N_1 = 0)=1
    probs.i[i] = f.1[1]
 
  } #end of loop through sites
   
  q = -1*sum(log(probs.i))
  if(!is.finite(q)) return(1000000000)
  return(q)
}


M1_ans = function(vars,n.it, Date,K ){

  a=vars[3]
  b=-1*exp(vars[4])
  c0=vars[2]
  p.est.1 = expit(vars[5])
  R = dim(n.it)[1]
  Nt.mean.hat=numeric(R)
  Nt.max.hat=numeric(R)
  lam.sum=numeric(R)
  ms.est.1 = exp(vars[1])


  # First, we get N^E (note: this takes a few hours to run)

  for(i in 1:R){
    T = sum(!is.na(n.it[i,]))
    n.t = n.it[i,which(!is.na(n.it[i,]))]
    d.t = Date[i,which(!is.na(n.it[i,]))]
    j.num = numeric(T)
    p.est = rep(expit(p.est.1),T)
    MS.est = rep(ms.est.1,T)
       
    pi.1.it = numeric(T)
    for(j in 2:T){
      pi.1.it[j]=exp(-1/MS.est[j]*(d.t[j]-d.t[j-1])) #only the first delta.date is unique
    }
    pi.1.it[1]=1
 
    lambda.t=numeric(T)
    for(t in 2:T){
      lambda.t[t]= -1/b*exp(c0)*sqrt(-b*pi)*exp(-a^2/(4*b))*(pnorm(d.t[t],mean=-a/(2*b),sd=sqrt(-1/(2*b)))-
        pnorm(d.t[t-1],mean=-a/(2*b),sd=sqrt(-1/(2*b))))
    }
    a.2 = 1/MS.est+ a
    pi.2.t=numeric(T)
    for(t in 2:T){
      if(lambda.t[t]>0){ 
        pi.2.t[t]= -1/b*exp(c0-d.t[t]/MS.est[t])/lambda.t[t]*sqrt(-b*pi)*exp(-a.2[t]^2/(4*b))*(pnorm(d.t[t],mean=-a.2[t]/(2*b),sd=sqrt(-1/(2*b)))-
          pnorm(d.t[t-1],mean=-a.2[t]/(2*b),sd=sqrt(-1/(2*b))))
      } else {
        pi.2.t[t] = 1
      }
    }
    pi.2.t[1]=1
    pi.2.t[which(is.nan(pi.2.t))]=0
    K.lim=K
    
    G.hat = numeric(T)
    for(t in 2:T){
      g_ave_st=0
      g_max_start_vec = numeric(K+1)
      c.fun.hold = .C("M1_ANS", as.double(pi.1.it[t]), as.double(pi.2.t[t]), as.double(lambda.t[t]),
        as.double(p.est[t]), as.integer(n.t[t]), as.integer(n.t[t-1]), as.integer(K), as.double(g_ave_st),
        as.double(g_max_start_vec), NAOK=TRUE)
      G.hat[t] = unlist(c.fun.hold)[8]
      prob.g=(unlist(c.fun.hold))[9:(K+9)]
    }
    Nt.mean.hat[i]=sum(G.hat)
    lam.sum[i] = sum(lambda.t)
  }
 
  ## then, we scale N^E by N^NHPP:

  frac.mean= sum(lam.sum) / sum(Nt.mean.hat)
  N.hat.M1 = Nt.mean.hat * frac.mean

  ## Now we can summarize the rate parameters:

  d.t2 = c(min(d.t), max(d.t))
  sum.lambda = -1/b*exp(c0)*sqrt(-b*pi)*exp(-a^2/(4*b))*(pnorm(d.t2[2],mean=-a/(2*b),sd=sqrt(-1/(2*b)))-
        pnorm(d.t2[1],mean=-a/(2*b),sd=sqrt(-1/(2*b))))
    
  mean.day=-a/(2*b)
  sd.day = sqrt(-1/(2*b))
  arrive95 = mean.day+c(-1.96,1.96)*sd.day
  rate.ret= c(sum.lambda, mean.day, arrive95)

  ret.list = list(N.hat.M1, rate.ret)
  return(ret.list)
}



M2_ans = function(vars,n.it,Date,K=80){
  n=n.it
  R = length(n[,1]) #number of sites with any observations
  nsite=R
  Date.mat=Date

  A.i.est = numeric(R)
  for(i in 1:R){

    n.i = as.vector(na.omit(n.it[i,]))
    dates.i = (Date[i,which(!is.na(n.it[i,]))])
    T=length(dates.i)
    delta.dates=numeric(T)
    delta.dates[1]=dates.i[1]
    for(t in 2:T){
      delta.dates[t]= dates.i[t]-dates.i[t-1]
    }
  
    Delta.frac = (delta.dates/11)
    J.vec = numeric(0)
    n.obs = numeric(0)
    n.t = numeric(0)
    n.it.vec = as.vector(na.omit(n.it[i,]))
    for(j in 1:length(Delta.frac)){
      J.vec = c(J.vec, rep(1, floor(Delta.frac[j])), Delta.frac[j]-floor(Delta.frac[j]))
      n.obs = c(n.obs, rep(0, floor(Delta.frac[j])), 1) 
      n.t = c(n.t, rep(NA, floor(Delta.frac[j])), n.it.vec[j])
    }  
    Date.use = numeric(0)
    for(j in 1:length(J.vec)){
      Date.use = round( c(Date.use, sum(J.vec[1:j]*11) ),0)
    }
 
    X.surv.int = matrix(rep(1,length(as.vector(Date.use))),ncol=1)
    X.surv.date = matrix( as.vector(Date.use),ncol=1)
    
    theta.1 = vars[1]
    log.theta.surv = theta.1
    w.it=exp(-1/exp(log.theta.surv)*J.vec*11)
    w.it = matrix(w.it, nrow=1,byrow=FALSE)
    w.it[,1]=1
    s.it = exp(vars[1])

    theta.2=c(vars[2:3],-4*expit(vars[4])) 
    X.ZIpar.int = rep(1,length(as.vector(Date.use)))
    X.ZIpar.lin = as.vector(Date.use)
    X.ZIpar.quad = as.vector(Date.use)^2
    X.ZIpar = cbind(X.ZIpar.int, X.ZIpar.lin, X.ZIpar.quad)
  
    r.mat = exp(matrix(X.ZIpar%*%theta.2,nrow=1,byrow=FALSE))
    r.it=r.mat

    p.it = expit(vars[5])

    r.it = r.mat*J.vec ## accounting for fractional periods

    K.num = 0:K

    ##############################################################################################
    # all the parameters are set

    T = length(r.it)  
    #Make A
    p.t = rep(p.it,T)
    S.t = rep(s.it, T)
    w.t = w.it
    r.t = r.it
    #n.t, n.obs, 

    N.t = numeric(T)
    N.t[1]=0
    j.num = numeric(T)
    for(t in 2:T){
      if(is.na(n.t[t])){
        N.t[t] = N.t[t-1]*w.t[t]+r.t[t]
      }
      if(!is.na(n.t[t])){
        N.t[t] = n.t[t]/p.t[t]
      }
      j.num[t] = (Date.use[t] - Date.use[t-1])*(N.t[t]/S.t[t]+N.t[t-1]/S.t[t-1])
    }
    A.i.est[i] = .5*sum(j.num) + .5*n.t[1]/p.t[1] + .5*n.t[T]/p.t[T]
  }
  AUC.hat=A.i.est 


  max.n=numeric(0)
  sum.n=numeric(0)
  for(i in 1:R){
    n.t = n.it[i,which(!is.na(n.it[i,]))]
    max.n = c(max.n, max(n.t))
    sum.n = c(sum.n, sum(n.t))
  }

  ## And now, we will find N^0:

  M2.hat = AUC.hat
  zero.i = which(max.n==0)
  for(i in zero.i){

    n.i = as.vector(na.omit(n.it[i,]))
    dates.i = (Date[i,which(!is.na(n.it[i,]))])
    T=length(dates.i)
    delta.dates=numeric(T)
    delta.dates[1]=dates.i[1]
    for(t in 2:T){
      delta.dates[t]= dates.i[t]-dates.i[t-1]
    }
  
    Delta.frac = (delta.dates/11)
    J.vec = numeric(0)
    n.obs = numeric(0)
    n.t = numeric(0)
    n.it.vec = as.vector(na.omit(n.it[i,]))
    for(j in 1:length(Delta.frac)){
      J.vec = c(J.vec, rep(1, floor(Delta.frac[j])), Delta.frac[j]-floor(Delta.frac[j]))
      n.obs = c(n.obs, rep(0, floor(Delta.frac[j])), 1) 
      n.t = c(n.t, rep(NA, floor(Delta.frac[j])), n.it.vec[j])
    }  
    Date.use = numeric(0)
    for(j in 1:length(J.vec)){
      Date.use = round( c(Date.use, sum(J.vec[1:j]*11) ),0)
    }
 
    X.surv.int = matrix(rep(1,length(as.vector(Date.use))),ncol=1)
    X.surv.date = matrix( as.vector(Date.use),ncol=1)
    
      theta.1 = vars[1]
      X.surv = X.surv.int
      log.theta.surv = matrix(X.surv%*%theta.1,nrow=1,byrow=FALSE)
      w.it=exp(-1/exp(log.theta.surv)*J.vec*11)
      w.it = matrix(w.it, nrow=1,byrow=FALSE)
      w.it[,1]=1
      S.t = exp(log.theta.surv)

    theta.2=c(vars[2:3],-4*expit(vars[4])) 
    X.ZIpar.int = rep(1,length(as.vector(Date.use)))
    X.ZIpar.lin = as.vector(Date.use)
    X.ZIpar.quad = as.vector(Date.use)^2
    X.ZIpar = cbind(X.ZIpar.int, X.ZIpar.lin, X.ZIpar.quad)
  
    r.mat = exp(matrix(X.ZIpar%*%theta.2,nrow=1,byrow=FALSE))
    r.it=r.mat

    p.it = expit(vars[5])

    r.it = r.mat*J.vec ## fractional periods

    K.num = 0:K

    ##############################################################################################
    # all the parameters are set

    T = length(r.it)  
    #Make A
    p.t = rep(p.it,T)
    p.t[which(is.na(n.t))]=0

    w.t = w.it
    r.t = r.it

    ## Allow N_tm1 to be 0 to K

    N.est=matrix(0,nrow=T,ncol=K+1)
    N.use=0:K
    P_N = dpois(N.use,r.t[1])
    P_N=P_N/sum(P_N)
 
    P_num = (1-p.t[1])^N.use*P_N  
    P_den = sum(P_num)
    N.est[1,]=(1*(P_num/P_den))
    P_N.0 = matrix(0,nrow=K+1,ncol=K+1)
    for(t in 2:T){
      for(k in 0:(K)){
        for(j in N.use){
          P_N.0[k+1,j] = sum(dpois(N.use,r.t[t])*dbinom(k-N.use,j,w.t[t])*N.est[t-1,j+1])
        }
      }
      P_N.2 = P_N.0%*%(rep(1,K+1))
      P_N.3 = P_N.2/sum(P_N.2)
      P_num = (1-p.t[t])^N.use*P_N.3
      P_den = sum(P_num)
      N.est[t,] = (1*P_num/P_den)
    }

    K.mat = matrix( rep(N.use,T),nrow=T,byrow=TRUE)
    N.est.2=(N.est*K.mat)%*%(rep(1,K+1))

    j.num=numeric(T)
    for(t in 2:T){
      j.num[t] = (Date.use[t] - Date.use[t-1])*(N.est.2[t]/S.t[t]+N.est.2[t-1]/S.t[t-1])
    }
    A.i.zero = .5*sum(j.num)
    M2.hat[i] = A.i.zero
  }
  return(M2.hat)
}