module fstats_bootstrap use iso_fortran_env use fstats_errors use omp_lib use fstats_distributions use fstats_descriptive_statistics use fstats_special_functions use fstats_regression use linalg, only : sort implicit none private public :: bootstrap_resampling_routine public :: bootstrap_statistic_routine public :: random_resample public :: scaled_random_resample public :: bootstrap_statistics public :: bootstrap ! REFERENCES: ! - https://medium.com/@m21413108/bootstrapping-maximum-entropy-non-parametric-boot-python-3b1e23ea589d ! - https://cran.r-project.org/web/packages/meboot/vignettes/meboot.pdf ! - https://gist.github.com/christianjauregui/314456688a3c2fead43a48be3a47dad6 type bootstrap_statistics !! A collection of statistics resulting from the bootstrap process. real(real64) :: statistic_value !! The value of the statistic of interest. real(real64) :: upper_confidence_interval !! The upper confidence limit on the statistic. real(real64) :: lower_confidence_interval !! The lower confidence limit on the statistic. real(real64) :: bias !! The bias in the statistic. real(real64) :: standard_error !! The standard error of the statistic. real(real64), allocatable, dimension(:) :: population !! An array of the population values generated by the bootstrap !! process. end type interface subroutine bootstrap_resampling_routine(x, xn) !! Defines the signature of a subroutine used to compute a !! resampling of data for bootstrapping purposes. use iso_fortran_env, only : real64 real(real64), intent(in), dimension(:) :: x !! The N-element array to resample. real(real64), intent(out), dimension(size(x)) :: xn !! An N-element array where the resampled data set will be !! written. end subroutine function bootstrap_statistic_routine(x) result(rst) !! Defines the signature of a function for computing the desired !! bootstrap statistic. use iso_fortran_env, only : real64 real(real64), intent(in), dimension(:) :: x !! The array of data to analyze. real(real64) :: rst !! The resulting statistic. end function end interface contains ! ****************************************************************************** ! RESAMPLING ! ------------------------------------------------------------------------------ subroutine random_resample(x, xn) !! Random resampling, with replacement, based upon a normal distribution. real(real64), intent(in), dimension(:) :: x !! The N-element array to resample. real(real64), intent(out), dimension(size(x)) :: xn !! An N-element array where the resampled data set will be written. ! Parameters real(real64), parameter :: scale = 1.25d0 ! Local Variables integer(int32) :: i, n real(real64) :: xmin, xmax, rng ! Process n = size(x) xmin = x(1) xmax = x(1) do i = 2, n xmin = min(xmin, x(i)) xmax = max(xmax, x(i)) end do rng = (xmax - xmin) call random_number(xn) xn = xn * rng + xmin end subroutine ! ------------------------------------------------------------------------------ subroutine scaled_random_resample(x, xn) !! A random resampling, scaled by the standard deviation of the original !! data, but based upon a normal distribution. real(real64), intent(in), dimension(:) :: x !! The N-element array to resample. real(real64), intent(out), dimension(size(x)) :: xn !! An N-element array where the resampled data set will be written. ! Parameters real(real64), parameter :: half = 0.5d0 ! Local Variables integer(int32) :: n real(real64) :: eps ! Process n = size(x) eps = standard_deviation(x) / sqrt(real(n, real64)) call random_number(xn) xn = eps * (xn - half) + x end subroutine ! ****************************************************************************** ! BOOTSTRAPPING ! ------------------------------------------------------------------------------ function bootstrap(stat, x, method, nsamples, alpha) result(rst) !! Performs a bootstrap calculation on the supplied data set for the given !! statistic. The default implementation utlizes a random resampling with !! replacement. Other resampling methods may be defined by specifying an !! appropriate routine by means of the method input. procedure(bootstrap_statistic_routine), pointer, intent(in) :: stat !! The routine used to compute the desired statistic. real(real64), intent(in), dimension(:) :: x !! The N-element data set. procedure(bootstrap_resampling_routine), pointer, intent(in), optional :: method !! An optional pointer to the method to use for resampling of the data. !! If no method is supplied, a random resampling is utilized. integer(int32), intent(in), optional :: nsamples !! An optional input, that if supplied, specifies the number of !! resampling runs to perform. The default is 10 000. real(real64), intent(in), optional :: alpha !! An optional input, that if supplied, defines the significance level !! to use for the analysis. The default is 0.05. type(bootstrap_statistics) :: rst !! The resulting bootstrap_statistics type containing the confidence !! intervals, bias, standard error, etc. for the analyzed statistic. ! Parameters real(real64), parameter :: half = 0.5d0 real(real64), parameter :: p05 = 5.0d-2 ! Local Variables integer(int32) :: i, i1, i2, n, ns real(real64) :: a real(real64), allocatable, dimension(:) :: xn procedure(bootstrap_resampling_routine), pointer :: resample ! Initialization n = size(x) if (present(method)) then resample => method else resample => random_resample end if if (present(nsamples)) then ns = nsamples else ns = 10000 end if if (present(alpha)) then a = alpha else a = p05 end if allocate(rst%population(ns)) i1 = floor(half * a * ns, int32) i2 = ns - i1 + 1 ! Analyze the basic data set rst%statistic_value = stat(x) rst%population(1) = rst%statistic_value ! Resampling Process #ifdef USEOPENMP ! Use OpenMP to run operations in parallel !$OMP PARALLEL DO PRIVATE(xn) SHARED(rst) do i = 2, ns ! Per-thread memory allocation if (.not.allocated(xn)) allocate(xn(n)) ! Resample the data call resample(x, xn) ! Compute the statistic rst%population(i) = stat(xn) end do !$OMP END PARALLEL DO #else ! OpenMP is not available - run in a serial manner allocate(xn(n)) do i = 2, ns ! Resample the data call resample(x, xn) ! Compute the statistic for the resampled data rst%population(i) = stat(xn) end do #endif ! Compute the relevant quantities on the resampled statistic call sort(rst%population, .true.) rst%upper_confidence_interval = rst%population(i2) rst%lower_confidence_interval = rst%population(i1) rst%bias = mean(rst%population) - rst%statistic_value rst%standard_error = standard_deviation(rst%population) end function ! ------------------------------------------------------------------------------ end module