################################################################################# ################################################################################# # likelihood and bayesian analysis of the nb10 case study: # using R to consider the family of t distributions for the sampling model # dd ( 23 feb 2021 ) ##### preliminaries: # (1) unbuffer the output (if relevant to your R environment) # (2) set the working directory to the place where you'd like # PDF versions of plots to be stored # on my home desktop this is # setwd( 'C:/DD/Teaching/AMS-STAT-206/Winter-2021/Sampling-Distribution-Uncertainty' ) # (3) ensure that we start with a completely empty R workspace: rm( list = ls( ) ) ################################################################################# ##### here's a function that makes a probability (quantile-quantile) plot ##### for the NB10 data, using as target sampling distribution ##### the t family # nu (a positive number ) is the degrees of freedom of the t distribution # y is the NB10 data set # M (a positive integer) is the number of simulation replications # seed (a positive integer) is the random number seed, for replicability # of results t.probability.plot <- function( nu, y, M, seed ) { set.seed( seed ) n <- length( y ) standardized.y <- ( y - mean( y ) ) / sd( y ) plot( qt( ( ( 1:n ) - 0.5 ) / n, nu ), sort( y ), col = 'red', xlab = 'Quantiles', ylab = 'sorted standardized y values', main = paste( 'T Sampling Distribution (Degrees of Freedom): ', nu, sep = '' ), lwd = 2, type = 'n', xlim = c( -6, 6 ), ylim = c( -10, 10 ) ) for ( m in 1:M ) { y.star <- rt( n, nu ) lines( qt( ( ( 1:n ) - 0.5 ) / n, nu ), sort( y.star ), lwd = 1, col = 'gray70', lty = 3 ) } abline( 0, 1, lwd = 3, col = 'black', lty = 2 ) lines( qt( ( ( 1:n ) - 0.5 ) / n, nu ), sort( standardized.y ), lwd = 3, col = 'red', lty = 1 ) lines( qt( ( ( 1:n ) - 0.5 ) / n, nu ), sort( rnorm( n ) ), lwd = 4, col = 'darkcyan', lty = 4 ) legend( -5, 7.5, legend = c( 'Target t Shape', 'Bootstrap t QQplots', 'Data t QQplot', 'Gaussian t QQplot' ), col = c( 'black', 'grey', 'red', 'darkcyan' ), lty = c( 3, 2, 1, 4 ), lwd = 3 ) return( '!' ) } ##### read in the data vector y <- c( 409., 400., 406., 399., 402., 406., 401., 403., 401., 403., 398., 403., 407., 402., 401., 399., 400., 401., 405., 402., 408., 399., 399., 402., 399., 397., 407., 401., 399., 401., 403., 400., 410., 401., 407., 423., 406., 406., 402., 405., 405., 409., 399., 402., 407., 406., 413., 409., 404., 402., 404., 406., 407., 405., 411., 410., 410., 410., 401., 402., 404., 405., 392., 407., 406., 404., 403., 408., 404., 407., 412., 406., 409., 400., 408., 404., 401., 404., 408., 406., 408., 406., 401., 412., 393., 437., 418., 415., 404., 401., 401., 407., 412., 375., 409., 406., 398., 406., 403., 404. ) ##### make the plot, with the following inputs: M <- 1000 seed <- 3412519 nu <- 20 t.probability.plot( nu, y, M, seed ) # a random sample of size 100 from the Normal( 0, 1 ) distribution # is included for reference, as a kind of control: # the smaller nu gets, the worse the fit should be # for the simulated Normal sample # now just run the function with different nu values, # for example starting with nu = 1 (the Cauchy distribution) # and increasing by 1 successively until the plot goes from # nu = 1 (NB10 data's tails not that heavy) # to # nu = ? (NB10 data's tails not that light) # to see if any members of the t family are plausible # with the NB10 data ... ################################################################################## ##################################################################################