fstats_bootstrap.f90 Source File


Contents

Source Code


Source Code

module fstats_bootstrap
    use iso_fortran_env
    use fstats_errors
    use omp_lib
    use fstats_distributions
    use fstats_descriptive_statistics
    use fstats_special_functions
    use fstats_regression
    use linalg, only : sort
    implicit none
    private
    public :: bootstrap_resampling_routine
    public :: bootstrap_statistic_routine
    public :: random_resample
    public :: scaled_random_resample
    public :: bootstrap_statistics
    public :: bootstrap

! REFERENCES:
! - https://medium.com/@m21413108/bootstrapping-maximum-entropy-non-parametric-boot-python-3b1e23ea589d
! - https://cran.r-project.org/web/packages/meboot/vignettes/meboot.pdf
! - https://gist.github.com/christianjauregui/314456688a3c2fead43a48be3a47dad6

    type bootstrap_statistics
        !! A collection of statistics resulting from the bootstrap process.
        real(real64) :: statistic_value
            !! The value of the statistic of interest.
        real(real64) :: upper_confidence_interval
            !! The upper confidence limit on the statistic.
        real(real64) :: lower_confidence_interval
            !! The lower confidence limit on the statistic.
        real(real64) :: bias
            !! The bias in the statistic.
        real(real64) :: standard_error
            !! The standard error of the statistic.
        real(real64), allocatable, dimension(:) :: population
            !! An array of the population values generated by the bootstrap
            !! process.
    end type

    interface
        subroutine bootstrap_resampling_routine(x, xn)
            !! Defines the signature of a subroutine used to compute a 
            !! resampling of data for bootstrapping purposes.
            use iso_fortran_env, only : real64
            real(real64), intent(in), dimension(:) :: x
                !! The N-element array to resample.
            real(real64), intent(out), dimension(size(x)) :: xn
                !! An N-element array where the resampled data set will be 
                !! written.
        end subroutine

        function bootstrap_statistic_routine(x) result(rst)
            !! Defines the signature of a function for computing the desired
            !! bootstrap statistic.
            use iso_fortran_env, only : real64
            real(real64), intent(in), dimension(:) :: x
                !! The array of data to analyze.
            real(real64) :: rst
                !! The resulting statistic.
        end function
    end interface

contains
! ******************************************************************************
! RESAMPLING
! ------------------------------------------------------------------------------
subroutine random_resample(x, xn)
    !! Random resampling, with replacement, based upon a normal distribution.
    real(real64), intent(in), dimension(:) :: x
        !! The N-element array to resample.
    real(real64), intent(out), dimension(size(x)) :: xn
        !! An N-element array where the resampled data set will be written.

    ! Parameters
    real(real64), parameter :: scale = 1.25d0

    ! Local Variables
    integer(int32) :: i, n
    real(real64) :: xmin, xmax, rng

    ! Process
    n = size(x)
    xmin = x(1)
    xmax = x(1)
    do i = 2, n
        xmin = min(xmin, x(i))
        xmax = max(xmax, x(i))
    end do
    rng = (xmax - xmin)
    call random_number(xn)
    xn = xn * rng + xmin
end subroutine

! ------------------------------------------------------------------------------
subroutine scaled_random_resample(x, xn)
    !! A random resampling, scaled by the standard deviation of the original
    !! data, but based upon a normal distribution.
    real(real64), intent(in), dimension(:) :: x
        !! The N-element array to resample.
    real(real64), intent(out), dimension(size(x)) :: xn
        !! An N-element array where the resampled data set will be written.

    ! Parameters
    real(real64), parameter :: half = 0.5d0

    ! Local Variables
    integer(int32) :: n
    real(real64) :: eps

    ! Process
    n = size(x)
    eps = standard_deviation(x) / sqrt(real(n, real64))
    call random_number(xn)
    xn = eps * (xn - half) + x
end subroutine

! ******************************************************************************
! BOOTSTRAPPING
! ------------------------------------------------------------------------------
function bootstrap(stat, x, method, nsamples, alpha) result(rst)
    !! Performs a bootstrap calculation on the supplied data set for the given
    !! statistic.  The default implementation utlizes a random resampling with 
    !! replacement.  Other resampling methods may be defined by specifying an 
    !! appropriate routine by means of the method input.
    procedure(bootstrap_statistic_routine), pointer, intent(in) :: stat
        !! The routine used to compute the desired statistic.
    real(real64), intent(in), dimension(:) :: x
        !! The N-element data set.
    procedure(bootstrap_resampling_routine), pointer, intent(in), optional :: method
        !! An optional pointer to the method to use for resampling of the data.
        !! If no method is supplied, a random resampling is utilized.
    integer(int32), intent(in), optional :: nsamples
        !! An optional input, that if supplied, specifies the number of 
        !! resampling runs to perform.  The default is 10 000.
    real(real64), intent(in), optional :: alpha
        !! An optional input, that if supplied, defines the significance level
        !! to use for the analysis.  The default is 0.05.
    type(bootstrap_statistics) :: rst
        !! The resulting bootstrap_statistics type containing the confidence
        !! intervals, bias, standard error, etc. for the analyzed statistic.

    ! Parameters
    real(real64), parameter :: half = 0.5d0
    real(real64), parameter :: p05 = 5.0d-2

    ! Local Variables
    integer(int32) :: i, i1, i2, n, ns
    real(real64) :: a
    real(real64), allocatable, dimension(:) :: xn
    procedure(bootstrap_resampling_routine), pointer :: resample

    ! Initialization
    n = size(x)
    if (present(method)) then
        resample => method
    else
        resample => random_resample
    end if
    if (present(nsamples)) then
        ns = nsamples
    else
        ns = 10000
    end if
    if (present(alpha)) then
        a = alpha
    else
        a = p05
    end if
    allocate(rst%population(ns))
    i1 = floor(half * a * ns, int32)
    i2 = ns - i1 + 1

    ! Analyze the basic data set
    rst%statistic_value = stat(x)
    rst%population(1) = rst%statistic_value

    ! Resampling Process
#ifdef USEOPENMP
    ! Use OpenMP to run operations in parallel
!$OMP PARALLEL DO PRIVATE(xn) SHARED(rst)
    do i = 2, ns
        ! Per-thread memory allocation
        if (.not.allocated(xn)) allocate(xn(n))

        ! Resample the data
        call resample(x, xn)

        ! Compute the statistic
        rst%population(i) = stat(xn)
    end do
!$OMP END PARALLEL DO
#else
    ! OpenMP is not available - run in a serial manner
    allocate(xn(n))
    do i = 2, ns
        ! Resample the data
        call resample(x, xn)

        ! Compute the statistic for the resampled data
        rst%population(i) = stat(xn)
    end do
#endif

    ! Compute the relevant quantities on the resampled statistic
    call sort(rst%population, .true.)
    rst%upper_confidence_interval = rst%population(i2)
    rst%lower_confidence_interval = rst%population(i1)
    rst%bias = mean(rst%population) - rst%statistic_value
    rst%standard_error = standard_deviation(rst%population)
end function

! ------------------------------------------------------------------------------
end module