## ## Dail and Madsen (JABES 2011) Supplementary Files # this file includes 4 functions: # 1. model_M1 # Returns the negative log likelihood of model M1 # 2. model_M2 # Returns the negative log likelihood of model M2 # 3. M1_ans # Returns the estimates of site population sizes using model M1, # along with summary for the estimated arrival rate # 4. M2_ans # Returns the estimates of site population sizes using model M2 ## You will also need two C program files, M1_Trans.c and M1_ANS.c, ## that accompany this R file. See the corresponding tutorial for how ## to compile and utilize the C files within R. # The R functions: model_M1 = function(vars, n.it, Date, K){ Date.mat = Date n=n.it R = length(n[,1]) #number of sites with any observations nsite=R ## uses the large Date matrix, with every sampling date represented ## and NA for each site-date combination not sampled probs.i=numeric(R) i=1 # since same dates all through matrix dates.i = as.numeric(na.omit(Date[i,])) T=length(Date[i,]) delta.dates=numeric(T) delta.dates[1]=dates.i[1] for(t in 2:T){ delta.dates[t]= dates.i[t]-dates.i[t-1] } pi.1.it=exp(-1/exp(vars[1])*delta.dates) #only the first delta.date is unique in the sims p.it = rep(expit(vars[5]),T) xi.2 = -exp(vars[4]) # must be negative, for normal. Could be any pdf (e.g., gamma) - later (and discussion). xi.1 = vars[3] xi.0 = vars[2] a=xi.1 b=xi.2 c0=xi.0 lambda.t=numeric(T) d.t = dates.i for(t in 2:T){ lambda.t[t]= -1/b*exp(xi.0)*sqrt(-b*pi)*exp(-a^2/(4*b))*(pnorm(d.t[t],mean=-a/(2*b),sd=sqrt(-1/(2*b)))- pnorm(d.t[t-1],mean=-a/(2*b),sd=sqrt(-1/(2*b)))) if(is.na(lambda.t[t]) | is.nan(lambda.t[t])) lambda.t[t]=0 } pi.2.t=numeric(T) for(t in 2:T){ if(lambda.t[t]>0){ a= 1/exp(vars[1]) + xi.1 pi.2.t[t]= -1/b*exp(xi.0-d.t[t]/exp(vars[1]))/lambda.t[t]*sqrt(-b*pi)*exp(-a^2/(4*b))*(pnorm(d.t[t],mean=-a/(2*b),sd=sqrt(-1/(2*b)))- pnorm(d.t[t-1],mean=-a/(2*b),sd=sqrt(-1/(2*b)))) if(is.na(pi.2.t[t]) | is.nan(pi.2.t[t])) pi.2.t[t] = 1 } else { pi.2.t[t] = 1 } } f.At = matrix(1,nrow=R,ncol=K+1) ones.Kp1 = rep(1,K+1) for(t in T:2){ P.Atm1.At = matrix(0,nrow=K+1,ncol=K+1) P.Atm1.At2=P.Atm1.At for(Atm1 in 0:K){ res2 = numeric(K+1) P.Atm1.At2[Atm1+1,] = unlist(.C("M1_Trans", as.double(pi.1.it[t]), as.double(pi.2.t[t]), as.double(lambda.t[t]), as.integer(Atm1), as.integer(K), as.double(res2), NAOK=TRUE)[6]) } P.Atm1.At=P.Atm1.At2 for(i in 1:R){ if(is.na(n.it[i,t])) obs.mat = matrix(1, K+1, K+1) if(!is.na(n.it[i,t])) obs.mat = matrix(rep( dbinom(n.it[i,t],0:K,p.it[t]),K+1), nrow=K+1,byrow=TRUE) f.At[i,]=( matrix(rep(f.At[i,],K+1),nrow=K+1,byrow=TRUE) *obs.mat*P.Atm1.At)%*%ones.Kp1 } } q=-1*sum(log(f.At[,1]*dbinom(n.it[,1],0,p.it[1]))) if(!is.finite(q)) return(1000000000) return(q) } model_M2 = function(vars,n.it,Date,K=80){ # function returns the negative log likelihood of the auc method of Dail and Madsen 2011 # David Dail, revised October 25, 2010 # # vars is the list of parameters # n is the data # Date.mat is the matrix of dates # K is the upper cutoff value for the sums over N.it # n=n.it R = length(n[,1]) #number of sites with any observations nsite=R probs.i=numeric(R) Date.mat=Date # some preliminaries we'll need for each site, may as well build them now: Pois1 = rep(0:-K,(K+1)^2) + rep(0:K, each=(K+1)^2) zeros = which(Pois1==0) str.zero = rep(0,length(Pois1)) str.zero[zeros]=1 Bin1 = rep(0:(K),(K+1)^2) Bin2 = rep(rep(0:(K),each=(K+1)),(K+1)) for(i in 1:R){ n.i = as.vector(na.omit(n.it[i,])) dates.i = (Date[i,which(!is.na(n.it[i,]))]) T=length(dates.i) delta.dates=numeric(T) delta.dates[1]=dates.i[1] for(t in 2:T){ delta.dates[t]= dates.i[t]-dates.i[t-1] } Delta.frac = (delta.dates/11) J.vec = numeric(0) n.obs = numeric(0) n.t = numeric(0) n.it.vec = as.vector(na.omit(n.it[i,])) for(j in 1:length(Delta.frac)){ J.vec = c(J.vec, rep(1, floor(Delta.frac[j])), Delta.frac[j]-floor(Delta.frac[j])) n.obs = c(n.obs, rep(0, floor(Delta.frac[j])), 1) n.t = c(n.t, rep(NA, floor(Delta.frac[j])), n.it.vec[j]) } Date.use = numeric(0) for(j in 1:length(J.vec)){ Date.use = round( c(Date.use, sum(J.vec[1:j]*11) ),0) } theta.1 = vars[1] log.theta.surv = theta.1 w.it=exp(-1/exp(log.theta.surv)*J.vec*11) w.it = matrix(w.it, nrow=1,byrow=FALSE) w.it[,1]=1 var.next=2 theta.2=c(vars[2:3],-4*expit(vars[4])) X.ZIpar.int = rep(1,length(as.vector(Date.use))) X.ZIpar.lin = as.vector(Date.use) X.ZIpar.quad = as.vector(Date.use)^2 X.ZIpar = cbind(X.ZIpar.int, X.ZIpar.lin, X.ZIpar.quad) r.mat = exp(matrix(X.ZIpar%*%theta.2,nrow=1,byrow=FALSE)) r.it = r.mat*J.vec ## fractional periods p.it = expit(vars[5]) K.num = 0:K ############################################################################################## # all the parameters are set T = length(r.it) #Make A p.t = rep(p.it,T) w.t = w.it r.t = r.it #n.t, n.obs, f.1 = rep(1, K+1) f1.mat = matrix( 1, nrow=K+1, ncol=K+1) ### here: for(t in T:2){ Bin.mat = matrix(dbinom(Bin1,Bin2,w.t[t]),nrow=(K+1)^2, ncol=(K+1),byrow=TRUE) r.it=rep(r.t[t],(K+1)) r.it.rep = rep(rep(r.it,each=(K+1)),(K+1)) Pois.mat = matrix((dpois(Pois1,r.it.rep) ),nrow=(K+1)^2,ncol=(K+1),byrow=TRUE) Trans.mat = matrix((Pois.mat*Bin.mat)%*%rep(1,(K+1)),nrow=(K+1),ncol=(K+1),byrow=FALSE) if(n.obs[t]==1){ mult.n = dbinom(n.t[t],K.num, p.t[t]) matrix.n = matrix(rep(mult.n, K+1), nrow=K+1, ncol=K+1, byrow=TRUE) } else { matrix.n = matrix(1, nrow=K+1, ncol=K+1) } f.1 = (f1.mat*matrix.n*Trans.mat)%*%rep(1,K+1) f1.mat = matrix( rep(f.1, K+1), nrow=K+1, ncol=K+1, byrow = TRUE) } # end of loop through T # now, for t=1 # P(N_1 = 0)=1 probs.i[i] = f.1[1] } #end of loop through sites q = -1*sum(log(probs.i)) if(!is.finite(q)) return(1000000000) return(q) } M1_ans = function(vars,n.it, Date,K ){ a=vars[3] b=-1*exp(vars[4]) c0=vars[2] p.est.1 = expit(vars[5]) R = dim(n.it)[1] Nt.mean.hat=numeric(R) Nt.max.hat=numeric(R) lam.sum=numeric(R) ms.est.1 = exp(vars[1]) # First, we get N^E (note: this takes a few hours to run) for(i in 1:R){ T = sum(!is.na(n.it[i,])) n.t = n.it[i,which(!is.na(n.it[i,]))] d.t = Date[i,which(!is.na(n.it[i,]))] j.num = numeric(T) p.est = rep(expit(p.est.1),T) MS.est = rep(ms.est.1,T) pi.1.it = numeric(T) for(j in 2:T){ pi.1.it[j]=exp(-1/MS.est[j]*(d.t[j]-d.t[j-1])) #only the first delta.date is unique } pi.1.it[1]=1 lambda.t=numeric(T) for(t in 2:T){ lambda.t[t]= -1/b*exp(c0)*sqrt(-b*pi)*exp(-a^2/(4*b))*(pnorm(d.t[t],mean=-a/(2*b),sd=sqrt(-1/(2*b)))- pnorm(d.t[t-1],mean=-a/(2*b),sd=sqrt(-1/(2*b)))) } a.2 = 1/MS.est+ a pi.2.t=numeric(T) for(t in 2:T){ if(lambda.t[t]>0){ pi.2.t[t]= -1/b*exp(c0-d.t[t]/MS.est[t])/lambda.t[t]*sqrt(-b*pi)*exp(-a.2[t]^2/(4*b))*(pnorm(d.t[t],mean=-a.2[t]/(2*b),sd=sqrt(-1/(2*b)))- pnorm(d.t[t-1],mean=-a.2[t]/(2*b),sd=sqrt(-1/(2*b)))) } else { pi.2.t[t] = 1 } } pi.2.t[1]=1 pi.2.t[which(is.nan(pi.2.t))]=0 K.lim=K G.hat = numeric(T) for(t in 2:T){ g_ave_st=0 g_max_start_vec = numeric(K+1) c.fun.hold = .C("M1_ANS", as.double(pi.1.it[t]), as.double(pi.2.t[t]), as.double(lambda.t[t]), as.double(p.est[t]), as.integer(n.t[t]), as.integer(n.t[t-1]), as.integer(K), as.double(g_ave_st), as.double(g_max_start_vec), NAOK=TRUE) G.hat[t] = unlist(c.fun.hold)[8] prob.g=(unlist(c.fun.hold))[9:(K+9)] } Nt.mean.hat[i]=sum(G.hat) lam.sum[i] = sum(lambda.t) } ## then, we scale N^E by N^NHPP: frac.mean= sum(lam.sum) / sum(Nt.mean.hat) N.hat.M1 = Nt.mean.hat * frac.mean ## Now we can summarize the rate parameters: d.t2 = c(min(d.t), max(d.t)) sum.lambda = -1/b*exp(c0)*sqrt(-b*pi)*exp(-a^2/(4*b))*(pnorm(d.t2[2],mean=-a/(2*b),sd=sqrt(-1/(2*b)))- pnorm(d.t2[1],mean=-a/(2*b),sd=sqrt(-1/(2*b)))) mean.day=-a/(2*b) sd.day = sqrt(-1/(2*b)) arrive95 = mean.day+c(-1.96,1.96)*sd.day rate.ret= c(sum.lambda, mean.day, arrive95) ret.list = list(N.hat.M1, rate.ret) return(ret.list) } M2_ans = function(vars,n.it,Date,K=80){ n=n.it R = length(n[,1]) #number of sites with any observations nsite=R Date.mat=Date A.i.est = numeric(R) for(i in 1:R){ n.i = as.vector(na.omit(n.it[i,])) dates.i = (Date[i,which(!is.na(n.it[i,]))]) T=length(dates.i) delta.dates=numeric(T) delta.dates[1]=dates.i[1] for(t in 2:T){ delta.dates[t]= dates.i[t]-dates.i[t-1] } Delta.frac = (delta.dates/11) J.vec = numeric(0) n.obs = numeric(0) n.t = numeric(0) n.it.vec = as.vector(na.omit(n.it[i,])) for(j in 1:length(Delta.frac)){ J.vec = c(J.vec, rep(1, floor(Delta.frac[j])), Delta.frac[j]-floor(Delta.frac[j])) n.obs = c(n.obs, rep(0, floor(Delta.frac[j])), 1) n.t = c(n.t, rep(NA, floor(Delta.frac[j])), n.it.vec[j]) } Date.use = numeric(0) for(j in 1:length(J.vec)){ Date.use = round( c(Date.use, sum(J.vec[1:j]*11) ),0) } X.surv.int = matrix(rep(1,length(as.vector(Date.use))),ncol=1) X.surv.date = matrix( as.vector(Date.use),ncol=1) theta.1 = vars[1] log.theta.surv = theta.1 w.it=exp(-1/exp(log.theta.surv)*J.vec*11) w.it = matrix(w.it, nrow=1,byrow=FALSE) w.it[,1]=1 s.it = exp(vars[1]) theta.2=c(vars[2:3],-4*expit(vars[4])) X.ZIpar.int = rep(1,length(as.vector(Date.use))) X.ZIpar.lin = as.vector(Date.use) X.ZIpar.quad = as.vector(Date.use)^2 X.ZIpar = cbind(X.ZIpar.int, X.ZIpar.lin, X.ZIpar.quad) r.mat = exp(matrix(X.ZIpar%*%theta.2,nrow=1,byrow=FALSE)) r.it=r.mat p.it = expit(vars[5]) r.it = r.mat*J.vec ## accounting for fractional periods K.num = 0:K ############################################################################################## # all the parameters are set T = length(r.it) #Make A p.t = rep(p.it,T) S.t = rep(s.it, T) w.t = w.it r.t = r.it #n.t, n.obs, N.t = numeric(T) N.t[1]=0 j.num = numeric(T) for(t in 2:T){ if(is.na(n.t[t])){ N.t[t] = N.t[t-1]*w.t[t]+r.t[t] } if(!is.na(n.t[t])){ N.t[t] = n.t[t]/p.t[t] } j.num[t] = (Date.use[t] - Date.use[t-1])*(N.t[t]/S.t[t]+N.t[t-1]/S.t[t-1]) } A.i.est[i] = .5*sum(j.num) + .5*n.t[1]/p.t[1] + .5*n.t[T]/p.t[T] } AUC.hat=A.i.est max.n=numeric(0) sum.n=numeric(0) for(i in 1:R){ n.t = n.it[i,which(!is.na(n.it[i,]))] max.n = c(max.n, max(n.t)) sum.n = c(sum.n, sum(n.t)) } ## And now, we will find N^0: M2.hat = AUC.hat zero.i = which(max.n==0) for(i in zero.i){ n.i = as.vector(na.omit(n.it[i,])) dates.i = (Date[i,which(!is.na(n.it[i,]))]) T=length(dates.i) delta.dates=numeric(T) delta.dates[1]=dates.i[1] for(t in 2:T){ delta.dates[t]= dates.i[t]-dates.i[t-1] } Delta.frac = (delta.dates/11) J.vec = numeric(0) n.obs = numeric(0) n.t = numeric(0) n.it.vec = as.vector(na.omit(n.it[i,])) for(j in 1:length(Delta.frac)){ J.vec = c(J.vec, rep(1, floor(Delta.frac[j])), Delta.frac[j]-floor(Delta.frac[j])) n.obs = c(n.obs, rep(0, floor(Delta.frac[j])), 1) n.t = c(n.t, rep(NA, floor(Delta.frac[j])), n.it.vec[j]) } Date.use = numeric(0) for(j in 1:length(J.vec)){ Date.use = round( c(Date.use, sum(J.vec[1:j]*11) ),0) } X.surv.int = matrix(rep(1,length(as.vector(Date.use))),ncol=1) X.surv.date = matrix( as.vector(Date.use),ncol=1) theta.1 = vars[1] X.surv = X.surv.int log.theta.surv = matrix(X.surv%*%theta.1,nrow=1,byrow=FALSE) w.it=exp(-1/exp(log.theta.surv)*J.vec*11) w.it = matrix(w.it, nrow=1,byrow=FALSE) w.it[,1]=1 S.t = exp(log.theta.surv) theta.2=c(vars[2:3],-4*expit(vars[4])) X.ZIpar.int = rep(1,length(as.vector(Date.use))) X.ZIpar.lin = as.vector(Date.use) X.ZIpar.quad = as.vector(Date.use)^2 X.ZIpar = cbind(X.ZIpar.int, X.ZIpar.lin, X.ZIpar.quad) r.mat = exp(matrix(X.ZIpar%*%theta.2,nrow=1,byrow=FALSE)) r.it=r.mat p.it = expit(vars[5]) r.it = r.mat*J.vec ## fractional periods K.num = 0:K ############################################################################################## # all the parameters are set T = length(r.it) #Make A p.t = rep(p.it,T) p.t[which(is.na(n.t))]=0 w.t = w.it r.t = r.it ## Allow N_tm1 to be 0 to K N.est=matrix(0,nrow=T,ncol=K+1) N.use=0:K P_N = dpois(N.use,r.t[1]) P_N=P_N/sum(P_N) P_num = (1-p.t[1])^N.use*P_N P_den = sum(P_num) N.est[1,]=(1*(P_num/P_den)) P_N.0 = matrix(0,nrow=K+1,ncol=K+1) for(t in 2:T){ for(k in 0:(K)){ for(j in N.use){ P_N.0[k+1,j] = sum(dpois(N.use,r.t[t])*dbinom(k-N.use,j,w.t[t])*N.est[t-1,j+1]) } } P_N.2 = P_N.0%*%(rep(1,K+1)) P_N.3 = P_N.2/sum(P_N.2) P_num = (1-p.t[t])^N.use*P_N.3 P_den = sum(P_num) N.est[t,] = (1*P_num/P_den) } K.mat = matrix( rep(N.use,T),nrow=T,byrow=TRUE) N.est.2=(N.est*K.mat)%*%(rep(1,K+1)) j.num=numeric(T) for(t in 2:T){ j.num[t] = (Date.use[t] - Date.use[t-1])*(N.est.2[t]/S.t[t]+N.est.2[t-1]/S.t[t-1]) } A.i.zero = .5*sum(j.num) M2.hat[i] = A.i.zero } return(M2.hat) }