################################################################################
################################################################################

# using R for an mcmc analysis of the t sampling model fit to the NB10 data

# dd ( 28 feb 2021 )

# if You've already installed JAGS from sourceforge, skip step (0)

# (0) type 'rjags sourceforge' in your favorite browser search window; 
#     the top entry should say

#       JAGS: Just Another Gibbs Sampler - SourceForge

#     clicking on that link will take you to sourceforge.net ,
#     where you now click on the field that says

#       Download Latest Version JAGS-4.3.0.exe (33.9 MB)

#     watch the small window in the lower left of your screen;
#     eventually a flashing circle will tell you that the download
#     is finished; at this point, left click on the place where 
#     the flashing circle was and follow the resulting instructions 
#     to install JAGS (i recommend that you accept all default settings 
#     offemaroon to you)

#     JAGS is set up so that, once you've installed it,
#     (a) R will automatically find it (wherever your machine put it)
#     when running rjags, and (b) you don't need to install it again

# --> please stop and verify that this step was successfully completed

################################################################################

# (1) create a new directory; for illustration here, i'm going
#     to call it

#       NB10-Bayesian-Analysis

# --> please stop and verify that this step was successfully completed

################################################################################

# (2) open up an R session and change directory to the directory name
#     you chose in step (1); as noted above, i chose 

#       NB10-Bayesian-Analysis

#     on my home desktop this can be performed with the function call

#       setwd( 'C:/DD/Teaching/AMS-STAT-206/Winter-2021/NB10-Bayesian-Analysis' )

#     unbuffer the output (if relevant in your R environment)

#     start with a clean copy of R

rm( list = ls( ) )

# --> please stop and verify that this step was successfully completed

#################################################################################

# (3) download the files

#       nb10-t-model.txt
#       nb10-gaussian-model.txt

#     from the drupal course web page and put them in the directory
#     you created in step (1) 

#     for clarity, the t model file looks like the following here,
#     (of course) without the # comment symbols

# model {

#   mu ~ dnorm( 0.0, 1.0E-6 )
#   sigma ~ dunif( 0.0, 10.0 )
#   nu ~ dunif( 2.0, 12.0 )
 
#   for ( i in 1:n ) {
# 
#     y[ i ] ~ dt( mu, tau, nu )
 
#   }
# 
#   tau <- 1 / ( sigma * sigma )
#   y.new ~ dt( mu, tau, nu )
 
# }

# note to self: if you use this case study in the future,
# you'll be comparing the t and gaussian models,
# in which 'mu' and 'sigma' have different meanings; fix this
# with 'mu.t' and 'sigma.t' in the t model and in this file

##### explanation of the 'rjags' t model file

#   (A) all 'rjags' model files must begin with the first line

#         model {

#       and end with the line

#         }

#   (B) these files always have three parts:

#         (i) the prior specification, which i like to put
#             at the top of the file

#             here this is

#               mu ~ dnorm( 0.0, 1.0E-6 )
#               sigma ~ dunif( 0.0, 10.0 )
#               nu ~ dunif( 2.0, 12.0 )

#        (ii) the sampling distribution/likelihood specification,
#             which fits in naturally in the middle of the file

#             here this is

#               for ( i in 1:n ) {
# 
#                 y[ i ] ~ dt( mu, tau, nu )
 
#               }

#       (iii) the specification of any *derived quantities* of interest,
#             which you want to monitor along with the parameters;
#             this naturally fits in at the end of the file

#             here this is

#               tau <- 1 / ( sigma * sigma )
#               y.new ~ dt( mu, tau, nu )

##### explanation of the choices for the priors on the three parameters

#   (C) we want an LI (low information) prior here, because
#       (for you and me) close to nothing was known about these 
#       parameters before seeing the data

#   (D) the prior is actually 3-dimensional here

#         p( mu, sigma, nu | script B )

#       but in practice little is typically lost by pretending that
#       the parameters are independent in the prior:

#         p( mu, sigma, nu | script B ) = p( mu | script B ) *
#                                           p( sigma | script B ) *
#                                           p( nu | script B )

#       this is ok because any dependence structure that should
#       be present among th parameters will be learned 
#       via the likelihood function

#   (E) in 'rjags', everything to do with the Normal distribution
#       uses a specification of spread based on the *precision*,
#       *not* the variance or SD; in this model that applies both
#       to the Normal prior on 'mu' and the t likelihood,
#       because the t distribution is actually a scale mixture
#       of Gaussians in disguise   

#   (F) the 'rjags' specification 'mu ~ dnorm( 0.0, 1.0E-6 )' means
#       that i've chosen an LI prior for mu: Normal with mean 0
#       and precision 1 / 1000000, which equates to a prior SD of
#       sqrt( 1 / ( 1 / 1000000 ) ) = 1000

#       this is an LI prior because (from our earlier work) the 
#       99.9% likelihood-based confidence interval for mu was

#         ( 402.7, 405.8 )

#       and the Normal density with a mean of 0 and an SD of 1000
#       is extremely close to constant (i.e., flat) in that region:

mu.grid.1 <- seq( -3000, 3000, length = 500 )

mu.grid.2 <- seq( 402.7, 405.8, length = 500 )

par( mfrow = c( 2, 1 ) )

plot( mu.grid.1, dnorm( mu.grid.1, 0, 1000 ), type = 'l',
  lwd = 3, col = 'cornflowerblue', xlab = 'mu', ylab = 'Density' )

abline( v = 402.7, lwd = 3, lty = 3, col = 'darkcyan' )

abline( v = 405.8, lwd = 3, lty = 3, col = 'darkcyan')

plot( mu.grid.2, dnorm( mu.grid.2, 0, 1000 ), type = 'l',
  lwd = 3, col = 'chocolate1', xlab = 'mu', ylab = 'Density',
  ylim = c( 0, 4e-04 ) )

par( mfrow = c( 1, 1 ) )

#   (G) for sigma, the 'rjags' specification 'sigma ~ dunif( 0.0, 10.0 )' 
#       means that i've chosen an LI Uniform( 0, 10 ) prior for sigma

#       when using a Uniform( left, right ) distribution as an LI prior, 
#       care must be taken to choose 'left' and 'right' in such a way
#       that the resulting interval ( left, right ) doesn't truncate
#       the likelihood function at either end in the bayes's theorem
#       multiplication

#         posterior = c * prior * likelihood

#       from our earlier work the 99.9% likelihood-based confidence interval 
#       for sigma was

#         ( 2.317, 5.083 )

#       so Uniform( 0, 10 ) for sigma will certainly avoid inapproprite
#       truncation

#   (H) for nu, the 'rjags' specification 'nu ~ dunif( 2.0, 12.0 )
#       means that i've chosen an LI Uniform( 2, 12 ) prior for nu

#       i set 'right' to 12 in this prior to avoid inappropriate truncation:
#       from our earlier work the 99.9% likelihood-based confidence interval 
#       for nu was

#         ( 0.1723, 5.844 )

#       so 'right' = 12 should avoid inappropriate truncation
#       in the right tail of the posterior for nu

#       it turns out that the variance of the t distribution
#       is infinite when nu <= 2; i don't think that infinite-variance
#       models are realistic for good data science, so i've set 'left'
#       to 2.0

##### explanation of the sampling distribution/likelihood part of the
##### 'rjags' t model specification

#   (I) our model for the observations in this part of the case study is

#         ( Y_i | mu sigma nu [t SM] [ script B ] ) ~IID t_nu ( mu, sigma^2 )
#                                 ( i = 1, ..., n )

#       the way you specify this IID structure in 'rjags' is with
#       the following 'for' loop:

#               for ( i in 1:n ) {
# 
#                 y[ i ] ~ dt( mu, tau, nu )
 
#               }

#       IMPORTANT: as noted above, the scale parameter in the 't' model
#       is specified via the *precision* parameter

#          tau = 1 / sigma^2

#       in this model 'rjags' is expecting us to put a prior on tau, 
#       not sigma

#       for calibration reasons i've chosen instead to put a Uniform prior
#       on sigma (research has shown that this choice produces well-calibrated
#       bayesian results in this model)

#       this means that i have to put a statement linking tau and sigma
#       in the *derived quantities* part of the model specification;
#       if i don't i'll get a compilation error

##### explanation of the derived quantities part of the 'rjags' 
##### t model specification

#   (J) here this is

#         tau <- 1 / ( sigma * sigma )
#         y.new ~ dt( mu, tau, nu )

#       the first of these statements provides the deterministic linkage
#       between tau and sigma, necessary for successful model compilation

#       the second statement permits us to sample from the 
#       *pmaroonictive distribution* for a new Y value, for a reason
#       that i'll explain in the document camera notes

##### explanation of the 'rjags' gaussian model file, which looks like
##### this (without the # symbols, of course)

# model {

#   mu ~ dnorm( 0.0, 1.0E-6 )
#   sigma.gaussian ~ dunif( 0.0, 10.0 )
 
#   for ( i in 1:n ) {
# 
#     y[ i ] ~ dnorm( mu, tau.gaussian )
 
#   }
# 
#   tau.gaussian <- 1 / ( sigma.gaussian * sigma.gaussian )
#   y.new ~ dnorm( mu, tau.gaussian )
 
# }

#   (K) actually this file needs no further explanation
#       once the t model file is understood

#################################################################################

# (4) if this is your first time using rjags, you have to install
#     the rjags package from CRAN and then load it; if you've already
#     installed it you just need to load it now (you have to be
#     connected to the internet for the next step to work).

#     at the R prompt > issue the command

install.packages( 'rjags' )

#     you'll need to select a CRAN mirror site: i use 

#       USA (OR) [https]

#     R will now mumble a few things and finish with a remark like

#       The downloaded binary packages are in blah-blah

#     to load rjags, at the R prompt > issue the command

library( rjags ) 

#     R will again mumble a few things, possibly finishing with 
#     some warning messages, which you can ignore, saying that 
#     various things were built under a different version of R 
#     than the one you're using; when you get a > prompt at the end 
#     of the mumbling, rjags will be loaded

# --> please stop and verify that this step was successfully completed

#################################################################################

# (5) now enter the NB10 data set as a list and assign it to an object;
# here i've called it nb10.data :

nb10.data <- list( 
  y = c( 409., 400., 406., 399., 402., 406., 401., 403., 401., 403.,
         398., 403., 407., 402., 401., 399., 400., 401., 405., 402., 
         408., 399., 399., 402., 399., 397., 407., 401., 399., 401., 
         403., 400., 410., 401., 407., 423., 406., 406., 402., 405., 
         405., 409., 399., 402., 407., 406., 413., 409., 404., 402., 
         404., 406., 407., 405., 411., 410., 410., 410., 401., 402., 
         404., 405., 392., 407., 406., 404., 403., 408., 404., 407., 
         412., 406., 409., 400., 408., 404., 401., 404., 408., 406., 
         408., 406., 401., 412., 393., 437., 418., 415., 404., 401., 
         401., 407., 412., 375., 409., 406., 398., 406., 403., 404. ), 
  n = 100 )

# --> please stop and verify that this step was successfully completed

#################################################################################

# (6) now enter initial values for the Markov chain as a list and assign
# it to an object; here i've called it nb10.initial.values :

nb10.initial.values <- list( mu = 404.59, sigma = 4.0, nu = 5.0, 
  y.new = 400.0 )

# --> please stop and verify that this step was successfully completed

#################################################################################

# (7) now you do model checking and compiling with the following 
#     command (using the names i've chosen for illustration above):

nb10.run.1 <- jags.model( file = "nb10-t-model.txt", data = nb10.data, 
  inits = nb10.initial.values )

# --> please stop and verify that this step was successfully completed

#################################################################################

# (8) before doing the random MCMC sampling, it's good form to set the
#     random number seed, so that other people running your code get
#     the same results you do:

set.seed( 314159 )

# --> please stop and verify that this step was successfully completed

#################################################################################

# (9) now you do a burn-in of (e.g.) 1000 iterations from your
#     initial values with the following commands:

n.burnin <- 1000

system.time( 

  update( nb10.run.1, n.iter = n.burnin )

)

#      on my desktop, with an Intel Core i7 3770 CPU at 3.40GHz, 
#      single-threaded, 1000 burn-in iterations took about 1.1 seconds

# --> please stop and verify that this step was successfully completed

#################################################################################

# (10) now you do a monitoring run of (e.g.) 100000 iterations with
#      the following command (at decent GHz this will take about 
#      90 seconds):

n.monitor <- 100000

system.time( 

  nb10.run.1.results <- jags.samples( nb10.run.1, 
    variable.names = c( "mu", "sigma", "nu", "y.new" ), 
    n.iter = n.monitor )

)

#      on my desktop (single-threaded), 100000 monitoring iterations
#      took about 81 seconds (rjags is not blindingly fast)

# --> please stop and verify that this step was successfully completed

#################################################################################

# (11) now we get to summarize the posterior in a variety of ways;
#      here's some useful code to do so; first we extract each of
#      the monitored columns of the MCMC data set into individual
#      variables:

mu.star <- nb10.run.1.results$mu

sigma.star <- nb10.run.1.results$sigma

nu.star <- nb10.run.1.results$nu

y.new.star <- nb10.run.1.results$y.new

#      then we can create graphical and numerical summaries of each
#      monitored quantity, as follows:

# --> please stop and verify that this step was successfully completed

##### summary for mu

# (11a) first we create the MCMC 4-plot:

par( mfrow = c( 2, 2 ) )

plot( 1:n.monitor, mu.star, type = 'l', xlab = 'Iteration Number',
  ylab = 'mu', col = 'maroon' )

# the upper left graph is a time-series plot of the monitored
# iterations (to visually check for stationarity)

plot( density( mu.star ), xlab = 'mu',
  lwd = 3, col = 'cornflowerblue', main = 'NB10 Case Study' )

# the upper right graph is a density trace of the marginal posterior
# distribution for the quantity being monitored

acf( as.ts( mu.star[ 1:n.monitor ], start = 1, 
  end = n.monitor, frequency = 1 ), lag.max = 10, main = '',
  col = 'darkcyan', lwd = 3 )

pacf( as.ts( mu.star[ 1:n.monitor ], start = 1, 
  end = n.monitor, frequency = 1 ), lag.max = 10, main = '',
  col = 'chocolate1', lwd = 3 )

par( mfrow = c( 1, 1 ) )

# the lower left and right graphs are plots of the autocorrelation 
# function (ACF) and the partial autocorrelation function (PACF) 
# for the time series of monitored iterations; if these iterations
# behave like an autoregressive time series of order 1 with first-
# order autocorrelation rho (often denoted by AR_rho( 1 )), then
# (a) the PACF will have a single spike of height rho at lag 1 and
# no other significant spikes and (b) the ACF will exhibit a
# geometric decay pattern with a spike of height rho at lag 1, a
# spike of approximate height rho^2 at lag 2, and so on -- for
# the parameter mu the ACF and PACF plots look perfectly like an
# AR( 1 ) with a first-order autocorrelation of about rho.1.hat = 0.21

# an MCMC time series with a rho.1.hat value close to 0 (which
# would be equivalent to IID sampling) is said to be "mixing well";
# here we would say that this is true of the mu series

# --> please stop and verify that this step was successfully completed

# (11b) then we can make numerical summaries of the marginal 
#       posterior for the monitored quantity:

c( mean( mu.star ), sd( mu.star ), quantile( mu.star, 
  probs = c( 0.0005, 0.9995 ) ) )

#                               0.05%      99.95% 
# 404.2986245   0.4742523 402.7202350 405.8606385

print( rho.1.hat.mu.star <- cor( mu.star[ 1:( n.monitor - 1 ) ],
  mu.star[ 2:n.monitor ] ) )

# 0.2125716

# the reason for making the ACF and PACF plots is that if the time
# series of monitored iterations for a given quantity behaves like
# an AR_rho( 1 ), then the Monte Carlo standard error (MCSE) of the 
# mean of this series is given by the following expression:

print( se.mu.star.mean <- ( sd( mu.star ) / sqrt( n.monitor ) ) *
  sqrt( ( 1 + rho.1.hat.mu.star ) / ( 1 - rho.1.hat.mu.star ) ) )

# 0.001834315

# this is also the right order of magnitude for the MCSE of the
# MCMC estimate of the posterior SD and the quantiles giving the
# 99.9% interval for the monitored quantity

# we would conclude, based on this model, that the true weight of
# NB10 was about 404.30 (MCSE 0.002) micrograms below the nominal weight 
# of 10 grams, give or take about 0.47 micrograms, and that a 99.9% 
# posterior interval for the true weight runs from about 402.7
# to about 405.9 micrograms below 10 grams

# --> IMPORTANT: in Monte Carlo work we need to NOT get confused
#     between 

#       the MCSE for the posterior mean estimate, which we can make
#       arbitrarily small with a big enough choice for the number M
#       of monitoring iterations

#     and

#       the posterior SD for the parameter of interest, which depends
#       on the DATA SAMPLE SIZE n and which would only go down
#       if we got more data

#     both of these quantities are standard deviations, but
#     SDs for completely different things

# --> please stop and verify that this step was successfully completed

##### summary for sigma

par( mfrow = c( 2, 2 ) )

plot( 1:n.monitor, sigma.star, type = 'l', xlab = 'Iteration Number',
  ylab = 'sigma', col = 'maroon' )

plot( density( sigma.star ), xlab = 'sigma',
  lwd = 3, col = 'cornflowerblue', main = 'NB10 Case Study' )

acf( as.ts( sigma.star[ 1:n.monitor ], start = 1, 
  end = n.monitor, frequency = 1 ), lag.max = 10, main = '',
  col = 'darkcyan', lwd = 3 )

pacf( as.ts( sigma.star[ 1:n.monitor ], start = 1, 
  end = n.monitor, frequency = 1 ), lag.max = 10, main = '',
  col = 'chocolate1', lwd = 3 )

par( mfrow = c( 1, 1 ) )

# the marginal posterior for sigma has a bit of a long right-hand
# tail, as you would expect for a scale parameter; the ACF and PACF
# plots show that the sigma time series behaves more or less like an
# AR( 1 ) with a first-order autocorrelation of about 0.45 (so the
# MCMC output for this parameter is not mixing quite as well as
# the output for mu, but 0.45 is still a fairly low autocorrelation)

c( mean( sigma.star ), sd( sigma.star ), quantile( sigma.star, 
  probs = c( 0.0005, 0.9995 ) ) )

#                         0.05%    99.95% 
# 3.9200491 0.4463147 2.6874819 5.6718138

print( rho.1.hat.sigma.star <- cor( sigma.star[ 1:( n.monitor - 1 ) ],
  sigma.star[ 2:n.monitor ] ) )

# 0.4480396

print( se.sigma.star.mean <- ( sd( sigma.star ) / sqrt( n.monitor ) ) *
  sqrt( ( 1 + rho.1.hat.sigma.star ) / ( 1 - rho.1.hat.sigma.star ) ) )

# 0.002245337

# we would conclude that the population value of the scale parameter 
# in the t model is about 3.86 micrograms (MCSE 0.002), give or take about 
# 0.44 micrograms, and that a 99.9% posterior interval for this
# parameter runs from about 2.69 to about 5.67 micrograms

# --> please stop and verify that this step was successfully completed

##### summary for nu

par( mfrow = c( 2, 2 ) )

plot( 1:n.monitor, nu.star, type = 'l', xlab = 'Iteration Number',
  ylab = 'nu', col = 'maroon', ylim = c( 0, max( nu.star ) ) )

# the barrier at 2.0 for the nu.star values was imposed
# by the prior, which was set to Uniform( 2, 12 )
# because (as noted above) the t model has infinite variance for nu <= 2

plot( density( nu.star ), xlab = 'nu', 
  lwd = 3, col = 'cornflowerblue', main = 'NB10 Case Study' )

acf( as.ts( nu.star[ 1:n.monitor ], start = 1, 
  end = n.monitor, frequency = 1 ), lag.max = 10, main = '',
  col = 'darkcyan', lwd = 3 )

pacf( as.ts( nu.star[ 1:n.monitor ], start = 1, 
  end = n.monitor, frequency = 1 ), lag.max = 10, main = '',
  col = 'chocolate1', lwd = 3 )

par( mfrow = c( 1, 1 ) )

# the marginal posterior for nu has a pronounced long right-hand
# tail; the ACF and PACF plots show that the nu time series behaves 
# more or less like an AR( 1 ) with a first-order autocorrelation of 
# almost 0.6 (this parameter is mixing the least well, but 0.6 is
# still not too bad for a first-order autocorrelation)

c( mean( nu.star ), sd( nu.star ), quantile( nu.star, 
  probs = c( 0.0005, 0.9995 ) ) )

#                        0.05%    99.95% 
# 3.718455  1.219122  2.004299 11.254030

print( rho.1.hat.nu.star <- cor( nu.star[ 1:( n.monitor - 1 ) ],
  nu.star[ 2:n.monitor ] ) )

# 0.5729619

print( se.nu.star.mean <- ( sd( nu.star ) / sqrt( n.monitor ) ) *
  sqrt( ( 1 + rho.1.hat.nu.star ) / ( 1 - rho.1.hat.nu.star ) ) )

# 0.007102942

# notice the effect of the roughly 0.6 autocorrelation value: this
# MCSE, while still small, is about 3.5 times larger than the MCSEs
# for mu and sigma

# we would conclude that the population value of the degrees 
# of freedom parameter in the t model is about 3.7 (MCSE 0.007), 
# give or take about 1.2, and that a 99.9% posterior interval for this
# parameter runs from about 2.0 to about 11.3

# --> please stop and verify that this step was successfully completed

##### summary for y.new

par( mfrow = c( 2, 2 ) )

plot( 1:n.monitor, y.new.star, type = 'l', xlab = 'Iteration Number',
  ylab = 'y.new', col = 'maroon' )

plot( density( y.new.star ), xlab = 'y.new',
  lwd = 3, col = 'cornflowerblue', main = 'NB10 Case Study' )

acf( as.ts( y.new.star[ 1:n.monitor ], start = 1, 
  end = n.monitor, frequency = 1 ), lag.max = 10, main = '',
  col = 'darkcyan', lwd = 3 )

pacf( as.ts( y.new.star[ 1:n.monitor ], start = 1, 
  end = n.monitor, frequency = 1 ), lag.max = 10, main = '',
  col = 'chocolate1', lwd = 3 )

par( mfrow = c( 1, 1 ) )

# the ACF and PACF plots show that the MCMC draws from the predictive
# distribution behave like white noise, and this is in fact a general
# phenomenon in MCMC simulation from predictive distributions (the
# resulting draws are *always* IID)

# the time series plot for y.new shows that the t model with small
# degrees of freedom can generate some simulated predicted data
# values that are quite far from the center of the distribution
# (in fact, (much) farther away from the middle than the actual data values)

summary( nb10.data$y )

#  Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#  375.0   401.0   404.0   404.6   407.0   437.0

# let's replot the time series and density traces
# with the 'MCMC outliers' removed; i'll plot these traces
# for MCMC sampled values between the smallest and largest data values

y.new.star.trimmed <- 
  y.new.star[ ( y.new.star > 375 ) & ( y.new.star < 437 ) ]

1 - sum( ( y.new.star > 375 ) & ( y.new.star < 437 ) ) / n.monitor

# [1] 0.00352

( 1 - sum( ( y.new.star > 375 ) & ( y.new.star < 437 ) ) / n.monitor ) *
  n.monitor

# [1] 352

# only 352 out of the 100,000 monitoring draws (0.35%) fell outside
# the range of the data and are excluded here

par( mfrow = c( 2, 2 ) )

plot( as.ts( y.new.star.trimmed ), 
  type = 'l', xlab = 'Iteration Number', ylab = 'y.new', col = 'maroon' )

plot( density( y.new.star.trimmed ), 
  xlab = 'y.new', lwd = 3, col = 'cornflowerblue', main = 'NB10 Case Study' )

acf( as.ts( y.new.star.trimmed, start = 1, 
  end = n.monitor, frequency = 1 ), lag.max = 10, main = '',
  col = 'darkcyan', lwd = 3 )

pacf( as.ts( y.new.star.trimmed, start = 1, 
  end = n.monitor, frequency = 1 ), lag.max = 10, main = '',
  col = 'chocolate1', lwd = 3 )

par( mfrow = c( 1, 1 ) )

# let's compare the quantiles of the data vector
# to the corresponding quantiles of 'y.new.star.trimmed':

quantile( nb10.data$y, 
  probs = c( 0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99 ) )

#     1%     5%    10%    25%    50%    75%    90%    95%    99% 
# 391.83 398.00 399.00 401.00 404.00 407.00 410.00 412.05 423.14

quantile( y.new.star.trimmed, 
  probs = c( 0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99 ) )

#       1%       5%      10%      25%      50%      75%      90%      95%      99% 
# 388.8168 395.5892 398.1009 401.3767 404.3043 407.2499 410.4903 413.0648 420.1603

# you can see that the two distributions are quite similar;
# this says informally that the t model fits the data pretty well,
# because predictions of future data values from it
# behave like the actual data values (this is a good way to informally check 
# a Bayesian model)

qqplot( nb10.data$y, y.new.star.trimmed, xlab = 'NB10 data',
  ylab = 'Predictive distribution from t model',
  col = 'maroon', type = 'l', lwd = 3 )

abline( 0, 1, lwd = 2, col = 'darkcyan', lty = 3 )

# the fit of this plot is really good

# because of the MCMC outliers, i'll make the next function call
# to produce a 99% predictive interval, rather than the usual 99.9%

c( mean( y.new.star.trimmed ), sd( y.new.star.trimmed ), 
  quantile( y.new.star.trimmed, probs = c( 0.005, 0.995 ) ) )

#                             0.5%      99.5% 
# 404.292382   7.207702 383.031180 425.611887

print( rho.1.hat.y.new.star <- cor( y.new.star[ 1:( n.monitor - 1 ) ],
  y.new.star[ 2:n.monitor ] ) )

# 0.006518032

print( se.y.new.star.mean <- ( sd( y.new.star ) / sqrt( n.monitor ) ) *
  sqrt( ( 1 + rho.1.hat.y.new.star ) / ( 1 - rho.1.hat.y.new.star ) ) )

# 0.02057306

# even though rho.1.hat.y.new.star is essentially 0, the MCSE for
# the mean of the posterior predictive distribution is about 3 times
# larger than that for nu (the worst-mixing parameter); this is
# because the SD of the posterior predictive distribution (6.46) 
# is so much larger than the posterior SDs of the parameters (we
# noticed this in the Poisson length-of-stay case study: prediction
# of future data values is much noisier than parameter estimation)

# we would conclude that, according to the t model, future weighings
# of NB10 should produces values around 404.3 micrograms below
# 10 grams (MCSE 0.02), give or take about 6.5 micrograms, and that about 99%
# of those future data values should be between about 383.0 and
# 425.6 micrograms below 10 grams

c( mean( nb10.data$y ), sd( nb10.data$y ) )

# 404.590000   6.466846

c( mean( y.new.star.trimmed ), sd( y.new.star.trimmed ) )

# [1] 404.321287   5.592268

sum( ( 383.0 < nb10.data$y ) * ( nb10.data$y < 425.6 ) ) / 100

# [1] 0.98

# you can see that the actual data mean and SD values agree well with
# the mean and SD of the posterior predictive distribution, and that
# (as it happens) exactly 95% of the data values are within the
# 95% predictive distribution for future data values; all in all,
# this adds up to the informal conclusion that the t model fits the 
# NB10 data set well

################################################################################
################################################################################