#################################################################################
#################################################################################

# likelihood and bayesian analysis of the nb10 case study:
# using R to consider the family of t distributions for the sampling model

# dd ( 23 feb 2021 )

##### preliminaries:

# (1) unbuffer the output (if relevant to your R environment)

# (2) set the working directory to the place where you'd like
# PDF versions of plots to be stored

# on my home desktop this is

#   setwd( 'C:/DD/Teaching/AMS-STAT-206/Winter-2021/Sampling-Distribution-Uncertainty' )

# (3) ensure that we start with a completely empty R workspace:

rm( list = ls( ) )

#################################################################################

##### here's a function that makes a probability (quantile-quantile) plot
##### for the NB10 data, using as target sampling distribution
##### the t family

# nu (a positive number ) is the degrees of freedom of the t distribution

# y is the NB10 data set

# M (a positive integer) is the number of simulation replications

# seed (a positive integer) is the random number seed, for replicability
# of results

t.probability.plot <- function( nu, y, M, seed ) {

  set.seed( seed )

  n <- length( y )

  standardized.y <- ( y - mean( y ) ) / sd( y )

  plot( qt( ( ( 1:n ) - 0.5 ) / n, nu ), sort( y ), col = 'red', 
    xlab = 'Quantiles', ylab = 'sorted standardized y values', 
    main = paste( 'T Sampling Distribution (Degrees of Freedom): ', 
    nu, sep = '' ), lwd = 2, type = 'n', xlim = c( -6, 6 ), 
    ylim = c( -10, 10 ) )

  for ( m in 1:M ) {

    y.star <- rt( n, nu )

    lines( qt( ( ( 1:n ) - 0.5 ) / n, nu ), sort( y.star ),
      lwd = 1, col = 'gray70', lty = 3 )

  }

  abline( 0, 1, lwd = 3, col = 'black', lty = 2 )

  lines( qt( ( ( 1:n ) - 0.5 ) / n, nu ), sort( standardized.y ), 
    lwd = 3, col = 'red', lty = 1 )

  lines( qt( ( ( 1:n ) - 0.5 ) / n, nu ), sort( rnorm( n ) ), 
    lwd = 4, col = 'darkcyan', lty = 4 )

  legend( -5, 7.5, legend = c( 'Target t Shape', 
    'Bootstrap t QQplots', 'Data t QQplot', 'Gaussian t QQplot' ),
    col = c( 'black', 'grey', 'red', 'darkcyan' ), 
    lty = c( 3, 2, 1, 4 ), lwd = 3 )

  return( '!' )

}

##### read in the data vector

y <- c( 409., 400., 406., 399., 402., 406., 401., 403., 401., 403.,
        398., 403., 407., 402., 401., 399., 400., 401., 405., 402., 
        408., 399., 399., 402., 399., 397., 407., 401., 399., 401., 
        403., 400., 410., 401., 407., 423., 406., 406., 402., 405., 
        405., 409., 399., 402., 407., 406., 413., 409., 404., 402., 
        404., 406., 407., 405., 411., 410., 410., 410., 401., 402., 
        404., 405., 392., 407., 406., 404., 403., 408., 404., 407., 
        412., 406., 409., 400., 408., 404., 401., 404., 408., 406., 
        408., 406., 401., 412., 393., 437., 418., 415., 404., 401., 
        401., 407., 412., 375., 409., 406., 398., 406., 403., 404. 
)

##### make the plot, with the following inputs:

M <- 1000

seed <- 3412519

nu <- 20

t.probability.plot( nu, y, M, seed )

# a random sample of size 100 from the Normal( 0, 1 ) distribution
# is included for reference, as a kind of control:
# the smaller nu gets, the worse the fit should be
# for the simulated Normal sample

# now just run the function with different nu values,
# for example starting with nu = 1 (the Cauchy distribution)
# and increasing by 1 successively until the plot goes from 

#   nu = 1 (NB10 data's tails not that heavy)

# to

#   nu = ? (NB10 data's tails not that light)

# to see if any members of the t family are plausible
# with the NB10 data ...

##################################################################################
##################################################################################