Load data and get libraries togther

knitr::opts_chunk$set(echo = TRUE)
library(rstan)
library(shinystan)
library(ggplot2)
library(dplyr)
library(readr)
ardata <- read_csv("ardata.csv")
sf6data <- read_csv("sf6data.csv")

Prepare data

First need to calculate saturation concentrations. These are based on Hamme and Emerson (2004)

watdens<-function(temp){
  
  t<-temp
  
  A <- 7.0132e-5
  B <- 7.926295e-3 
  C <-  -7.575477e-5 
  D<- 7.314701e-7
  E <-  -3.596363e-9
  to<- 3.9818
  
  dens<- (999.97358- (A*(t-to) + B*(t-to)^2 +C*(t-to)^3 + D*(t-to)^4+E*(t-to)^5) ) -4.873e-3 + 1.708e-4*t - 3.108e-6 * t^2
  dens/1000
}

nsat<- function(temp, bp) {
  
  
  ts<-log((298.15-temp) / (273.15 + temp))
  a0<-6.42931
  a1<-2.92704
  a2<-4.32531
  a3<-4.69149
  
  u<-10^(8.10765-(1750.286/(235+temp)))
  satn<-(exp(a0 + a1*ts + a2*ts^2 + a3*ts^3))*((bp-u)/(760-u))
  watdens(temp)*satn*(28.014/1000)##converts umol/kg to mg/L
}

arsat<- function(temp, bp) {
  
  
  ts<-log((298.15-temp) / (273.15 + temp))
  a0<-2.79150
  a1<-3.17609
  a2<-4.13116
  a3<-4.90379
  
  u<-10^(8.10765-(1750.286/(235+temp)))
  satar<-(exp(a0 + a1*ts + a2*ts^2 + a3*ts^3))*((bp-u)/(760-u))
  watdens(temp)*satar*(39.948/1000)##converts umol/kg to mg/L
}

## Estimates K600 for KO2 at a given temperature. From Wanninkhof (1992).
K600fromO2<-function (temp, KO2) {
  ((600/(1800.6 - (120.1 * temp) + (3.7818 * temp^2) - (0.047608 * temp^3)))^-0.5) * KO2
}



## Estimates K600 for KAr at a given temperature. From Raymond et al  (2012).
K600fromAr<-function (temp, KAr) {
  ((600/(1799 - (106.96 * temp) + (2.797 * temp^2) - (0.0289 * temp^3)))^-0.5) * KAr
}

Now calculate saturation concentrations, and split data into pre and post

ardata$arnsat <- arsat(ardata$temp,ardata$pressure) / nsat(ardata$temp,ardata$pressure)
ardatapre<- ardata[ardata$type=='pre', ]
ardatapost<- ardata[ardata$type=='post', ]
sf6datapost<- sf6data[sf6data$type=='post', ]

Ar tracer data

Plot of raw Ar:N2. Note the high varibility in some of the pre Ar data. We used the calculated Ar:N2 since it was in the middle of our samples, but not nearly as variable. The two lines on the Ar sat are the post and pre calculations; they vary because of temperature. The upper line is the warmer, pleateau collection temperature.

## Warning: Removed 16 rows containing missing values (geom_point).
## Warning: Removed 15 rows containing missing values (geom_point).

Now for calculations where we subtract background and correct for conductivity

ardatapost$arcorr<- ardatapost$arncalc - ardatapost$arnsat

#2.2.  Calc mean of all pre cond by station and by site
ardataprecond <- ardatapre %>% group_by(Trial, stationcorr) %>% summarise(precond=mean(cond, na.rm=T))
## Warning: package 'bindrcpp' was built under R version 3.3.2
#join with the post cond
ardatapost<-merge(ardatapost,ardataprecond)



ardatapost$condcor<- ardatapost$cond - ardatapost$precond

ardatapost$arcond<- ardatapost$arcorr / ardatapost$condcor

##calc the mean for station 0
ardatapost_0<-ardatapost[ardatapost$stationcorr==0,]
ardata_0sum <- ardatapost_0 %>% group_by(Trial) %>% summarise(arcond_0=mean(arcond), arn_enrich=mean(arncalc/arnsat))


#join with ardatapost
ardatapost<-merge(ardatapost,ardata_0sum)

ardatapost$arcondnorm<-ardatapost$arcond/ardatapost$arcond_0

Prep SF6 in same way

#2.2.  Calc mean of all pre cond by station and by site
ardataprecond <- ardatapre %>% group_by(Trial, stationcorr) %>% summarise(precond=mean(cond, na.rm=T))

#join with the post cond
sf6datapost<-merge(sf6datapost,ardataprecond) #yes, using pre from Ar



sf6datapost$condcor<- sf6datapost$cond - sf6datapost$precond

sf6datapost$sf6cond<- sf6datapost$sf6 / sf6datapost$condcor

##calc the mean for station 0
sf6datapost_0<-sf6datapost[sf6datapost$stationcorr==0,]
sf6data_0sum <- sf6datapost_0 %>% group_by(Trial) %>% summarise(sf6cond_0=mean(sf6cond))


#join with sf6datapost
sf6datapost<-merge(sf6datapost,sf6data_0sum)

sf6datapost$sf6condnorm<-sf6datapost$sf6cond/sf6datapost$sf6cond_0

Stan model

Here is the Stan model that we used for the analysis. Details on priors etc are in the text.

######
sink("combineargonmodelb.stan")

cat("
    
    data {
    
    int <lower=1> N; //number of data points, ar
    int <lower=1> NS; //number of data points, sf6
    int <lower=1> sites; //number of sites
    int <lower=0> sitear[N]; //stream number  for ar indexing vector
    int <lower=0> sitesf6[NS]; //stream number for sf6 indexing vector
    
    
    vector [N] ar;//conductivity corrected Ar data
    vector [N] distar;//
    
    vector [NS] sf6;//conductivity corrected sf6 data
    vector [NS] distsf6;// 
    
    }
    
    parameters {
    
    vector <lower=0> [sites] a;
    real mu_a;   //take out for hyperprior info  // mean prior
    real<lower=0, upper=0.5> sigma_ar; // error ar
    real<lower=0, upper=0.5> sigma_sf6; // error sf6
    
    vector[sites] k; // decline
    real <lower=0, upper=2> Ar0;
    real <lower=0, upper=2> SF60;
    
    real <lower=0> sigma_a; // mean prior
    }
    
    model { 
    
    //priors. 
    k ~ normal(0, 10);
  
    a~normal (mu_a,sigma_a); // mean prior

    Ar0 ~normal(1,0.05);
    SF60~normal(1,0.05);

    mu_a~normal(1.35, 1); // hyper prior
     sigma_a ~ normal(0, 2); //this is in fact half normal.  See the constraint above.
    
    //likelihood        
    for (i in 1:N) {
    ar[i] ~ normal( Ar0 * exp(-k[sitear[i]]*distar[i]*0.01) , sigma_ar); 
    }
    for (j in 1:NS) {
    sf6[j] ~ normal( SF60 * exp(-k[sitesf6[j]]*distsf6[j]*0.01/a[sitesf6[j]]) , sigma_sf6); 
    }
    
    }
    "
    ,fill=TRUE)
sink()

Run model, look at output

Get data together into a list for Stan.

arstandata=list("ar"= ardatapost$arcondnorm, "distar"=ardatapost$stationcorr, "N"=length(ardatapost$stationcorr), 
                "sf6"= sf6datapost$sf6condnorm, "distsf6"=sf6datapost$stationcorr, "NS"=length(sf6datapost$stationcorr), 
                "sites"=max(ardatapost$Trial),  "sitear" = ardatapost$Trial, 
                "sitesf6" = sf6datapost$Trial)

Run the stan model

arfit <- stan(file = "combineargonmodelb.stan", data = arstandata, 
               iter = 3000, chains = 4)
## The following numerical problems occured the indicated number of times after warmup on chain 1
##                                                                                         count
## Exception thrown at line 54: normal_log: Location parameter is nan, but must be finite!     2
## When a numerical problem occurs, the Hamiltonian proposal gets rejected.
## See http://mc-stan.org/misc/warnings.html#exception-hamiltonian-proposal-rejected
## If the number in the 'count' column is small, do not ask about this message on stan-users.
## The following numerical problems occured the indicated number of times after warmup on chain 2
##                                                                                         count
## Exception thrown at line 51: normal_log: Location parameter is inf, but must be finite!     2
## When a numerical problem occurs, the Hamiltonian proposal gets rejected.
## See http://mc-stan.org/misc/warnings.html#exception-hamiltonian-proposal-rejected
## If the number in the 'count' column is small, do not ask about this message on stan-users.
## The following numerical problems occured the indicated number of times after warmup on chain 3
##                                                                                         count
## Exception thrown at line 51: normal_log: Location parameter is inf, but must be finite!     3
## When a numerical problem occurs, the Hamiltonian proposal gets rejected.
## See http://mc-stan.org/misc/warnings.html#exception-hamiltonian-proposal-rejected
## If the number in the 'count' column is small, do not ask about this message on stan-users.
## The following numerical problems occured the indicated number of times after warmup on chain 4
##                                                                                         count
## Exception thrown at line 51: normal_log: Location parameter is inf, but must be finite!     3
## When a numerical problem occurs, the Hamiltonian proposal gets rejected.
## See http://mc-stan.org/misc/warnings.html#exception-hamiltonian-proposal-rejected
## If the number in the 'count' column is small, do not ask about this message on stan-users.

Print out parameters. a[j] is the ratio of \(K_{Ar}: K_{SF6}\) for stream j. k is \(K_{Ar,j}\), i.e., the per unit distance evaion rate of Ar. It is scaled 100 fold for the purposes of Stan, which like all parameters to be around -1 to 1. mu_a is the hyperparamter for \(a\)

print(arfit)
## Inference for Stan model: combineargonmodelb.
## 4 chains, each with iter=3000; warmup=1500; thin=1; 
## post-warmup draws per chain=1500, total post-warmup draws=6000.
## 
##             mean se_mean   sd   2.5%    25%    50%    75%  97.5% n_eff
## a[1]        2.01    0.00 0.28   1.51   1.81   1.99   2.18   2.61  5292
## a[2]        0.92    0.00 0.14   0.67   0.82   0.91   1.01   1.23  4668
## a[3]        1.76    0.00 0.22   1.37   1.61   1.75   1.90   2.22  4959
## a[4]        2.00    0.00 0.21   1.62   1.85   1.98   2.13   2.45  4735
## a[5]        1.46    0.00 0.17   1.15   1.34   1.45   1.57   1.83  4772
## a[6]        2.31    0.00 0.23   1.88   2.15   2.30   2.46   2.79  5015
## a[7]        0.94    0.00 0.12   0.73   0.86   0.93   1.01   1.18  4652
## a[8]        3.42    0.01 0.44   2.68   3.11   3.38   3.68   4.41  3796
## mu_a        1.79    0.00 0.35   1.09   1.58   1.80   2.01   2.48  6000
## sigma_ar    0.09    0.00 0.01   0.08   0.09   0.09   0.10   0.11  6000
## sigma_sf6   0.06    0.00 0.00   0.05   0.06   0.06   0.06   0.07  6000
## k[1]        0.18    0.00 0.02   0.14   0.17   0.18   0.19   0.22  6000
## k[2]        0.12    0.00 0.02   0.09   0.11   0.12   0.14   0.16  4752
## k[3]        0.51    0.00 0.05   0.42   0.47   0.51   0.54   0.61  4844
## k[4]        0.57    0.00 0.05   0.47   0.53   0.56   0.60   0.67  4940
## k[5]        1.78    0.00 0.19   1.44   1.65   1.77   1.90   2.18  4730
## k[6]        1.65    0.00 0.14   1.38   1.55   1.64   1.75   1.95  5388
## k[7]        2.54    0.00 0.26   2.08   2.36   2.53   2.71   3.08  4423
## k[8]       10.00    0.02 1.20   7.97   9.18   9.89  10.70  12.70  3884
## Ar0         0.99    0.00 0.01   0.96   0.98   0.99   1.00   1.02  6000
## SF60        0.99    0.00 0.01   0.97   0.98   0.99   1.00   1.01  6000
## sigma_a     0.99    0.00 0.36   0.52   0.74   0.92   1.15   1.92  6000
## lp__      536.31    0.07 3.54 528.56 534.15 536.66 538.89 542.07  2259
##           Rhat
## a[1]         1
## a[2]         1
## a[3]         1
## a[4]         1
## a[5]         1
## a[6]         1
## a[7]         1
## a[8]         1
## mu_a         1
## sigma_ar     1
## sigma_sf6    1
## k[1]         1
## k[2]         1
## k[3]         1
## k[4]         1
## k[5]         1
## k[6]         1
## k[7]         1
## k[8]         1
## Ar0          1
## SF60         1
## sigma_a      1
## lp__         1
## 
## Samples were drawn using NUTS(diag_e) at Tue Feb 27 12:07:55 2018.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

Extract parameters of interest. Note that the model multiplied k by 0.01 so that all parameters are on the same scale. We fix that here.

arfitsum<- summary(arfit)$summary

asum<-arfitsum[1:8,c("2.5%", "50%", "97.5%")]

ksum<-(arfitsum[12:19,c("2.5%", "50%", "97.5%")])*0.01# the 0.01 here rescales the k estimate

Calculate derived variables such as \(K\) (per time) and \(k600\)

streamslope<-c( 0.00703, 0.00703,0.015,0.015, 0.06, 0.11,0.12,0.12)
Q<- c(0.084,0.07,0.02,0.02,0.021,0.097,0.022,0.022)*60
w<- c(2.3,1.6,0.8,0.8,0.9,3.3,0.7,1.3)
ardataposttemp<-ardatapost %>% group_by(Trial) %>% summarise(temp=mean(temp, na.rm=T))
k<- ksum*Q*1440/w
k600<-K600fromAr(ardataposttemp$temp, k)
v<-c(12,15.4,9.5,9.5,3.1,4,6.3,5.2)

z<-(Q)/(w*v)

Kt<-v*ksum[,2]*1440

Enjoy.