fstats_hypothesis.f90 Source File


Contents

Source Code


Source Code

module fstats_hypothesis
    use iso_fortran_env
    use ieee_arithmetic
    use fstats_errors
    use fstats_special_functions
    use fstats_distributions
    use fstats_descriptive_statistics
    use fstats_types
    private
    public :: confidence_interval
    public :: t_test_equal_variance
    public :: t_test_unequal_variance
    public :: t_test_paired
    public :: f_test
    public :: bartletts_test
    public :: levenes_test
    public :: sample_size

    interface confidence_interval
        !! Computes the confidence interval for the specified distribution.
        !!
        !! See Also
        !!
        !! - [Wikipedia](https://en.wikipedia.org/wiki/Confidence_interval)
        module procedure :: confidence_interval_scalar
        module procedure :: confidence_interval_array
    end interface
contains
! ------------------------------------------------------------------------------
pure function confidence_interval_scalar(dist, alpha, s, n) result(rst)
    !! Computes the confidence interval for the specified distribution.
    class(distribution), intent(in) :: dist
        !! The distribution object defining the probability distribution
        !! to establish the confidence level.
    real(real64), intent(in) :: alpha
        !! The probability value of interest.  For instance, use a value of 0.05
        !! for a confidence level of 95%.
    real(real64), intent(in) :: s
        !! The sample standard deviation.
    integer(int32), intent(in) :: n
        !! The number of samples in the data set.
    real(real64) :: rst
        !! The result.

    ! Local Variables
    real(real64) :: x, dn

    ! Process
    dn = real(n, real64)
    x = 1.0d0 - 0.5d0 * alpha
    rst = dist%standardized_variable(x)
    rst = rst * s / sqrt(dn)
end function

! ------------------------------------------------------------------------------
pure function confidence_interval_array(dist, alpha, x) result(rst)
    !! Computes the confidence interval for the specified distribution.
    class(distribution), intent(in) :: dist
        !! The distribution object defining the probability distribution
        !! to establish the confidence level.
    real(real64), intent(in) :: alpha
        !! The probability value of interest.  For instance, use a value of 0.05
        !! for a confidence level of 95%.
    real(real64), intent(in) :: x(:)
        !! An N-element array containing the data to analyze.
    real(real64) :: rst
        !! The result.

    ! Process
    rst = confidence_interval(dist, alpha, standard_deviation(x), size(x))
end function

! ------------------------------------------------------------------------------
subroutine t_test_equal_variance(x1, x2, stat, p, dof)
    !! Computes the 2-tailed Student's T-Test for two data sets of 
    !! assumed equivalent variances.
    !!
    !! See Also
    !!
    !! - [Wikipedia](https://en.wikipedia.org/wiki/Student%27s_t-test)
    real(real64), intent(in) :: x1(:)
        !! An N-element array containing the first data set.
    real(real64), intent(in) :: x2(:)
        !! An M-element array containing the second data set.
    real(real64), intent(out) :: stat
        !! The Student-'s T-Test statistic.
    real(real64), intent(out) :: p
        !! The probability value that the two samples are likely to
        !! have come from two underlying populations that 
        !! have the same mean.
    real(real64), intent(out) :: dof
        !! The degrees of freedom.

    ! Parameters
    real(real64), parameter :: half = 0.5d0
    real(real64), parameter :: one = 1.0d0
    real(real64), parameter :: two = 2.0d0

    ! Local Variables
    real(real64) :: v1, v2, m1, m2, sv, a, b, x
    integer(int32) :: n1, n2

    ! Compute the T-statistic
    n1 = size(x1)
    n2 = size(x2)
    m1 = mean(x1)
    m2 = mean(x2)
    v1 = variance(x1)
    v2 = variance(x2)
    dof = n1 + n2 - two
    sv = ((n1 - one) * v1 + (n2 - one) * v2) / dof
    stat = abs(m1 - m2) / sqrt(sv * (one / real(n1) + one / real(n2)))

    ! Compute the probability
    a = half * dof
    b = half
    x = dof / (dof + stat**2)
    p = regularized_beta(a, b, x)
end subroutine

! ------------------------------------------------------------------------------
subroutine t_test_unequal_variance(x1, x2, stat, p, dof)
    !! Computes the 2-tailed Student's T-Test for two data sets of 
    !! assumed non-equivalent variances.
    !!
    !! See Also
    !!
    !! - [Wikipedia](https://en.wikipedia.org/wiki/Student%27s_t-test)
    real(real64), intent(in) :: x1(:)
        !! An N-element array containing the first data set.
    real(real64), intent(in) :: x2(:)
        !! An M-element array containing the second data set.
    real(real64), intent(out) :: stat
        !! The Student-'s T-Test statistic.
    real(real64), intent(out) :: p
        !! The probability value that the two samples are likely to
        !! have come from two underlying populations that 
        !! have the same mean.
    real(real64), intent(out) :: dof
        !! The degrees of freedom.

    ! Parameters
    real(real64), parameter :: half = 0.5d0
    real(real64), parameter :: one = 1.0d0

    ! Local Variables
    real(real64) :: v1, v2, m1, m2, sv, a, b, x
    integer(int32) :: n1, n2

    ! Compute the T-statistic
    n1 = size(x1)
    n2 = size(x2)
    m1 = mean(x1)
    m2 = mean(x2)
    v1 = variance(x1)
    v2 = variance(x2)
    dof = (v1 / real(n1) + v2 / real(n2))**2 / ((v1 / n1)**2 / (n1 - one) + &
        (v2 / n2)**2 / (n2 - one))
    sv = sqrt(v1 / n1 + v2 / n2)
    stat = (m1 - m2) / sv

    ! Compute the probability
    a = half * dof
    b = half
    x = dof / (dof + stat**2)
    p = regularized_beta(a, b, x)
end subroutine

! ------------------------------------------------------------------------------
subroutine t_test_paired(x1, x2, stat, p, dof, err)
    !! Computes the 2-tailed Student's T-Test for two paired data sets.
    !!
    !! See Also
    !!
    !! - [Wikipedia](https://en.wikipedia.org/wiki/Student%27s_t-test)
    real(real64), intent(in) :: x1(:)
        !! An N-element array containing the first data set.
    real(real64), intent(in) :: x2(:)
        !! An N-element array containing the second data set.
    real(real64), intent(out) :: stat
        !! The Student-'s T-Test statistic.
    real(real64), intent(out) :: p
        !! The probability value that the two samples are likely to
        !! have come from  two underlying populations that 
        !! have the same mean.
    real(real64), intent(out) :: dof
        !! The degrees of freedom.
    class(errors), intent(inout), optional, target :: err
        !! A mechanism for communicating errors and warnings to the 
        !! caller.  Possible warning and error codes are as follows.
        !! - FS_NO_ERROR: No errors encountered.
        !! - FS_ARRAY_SIZE_ERROR: Occurs if x1 and x2 are not the same 
        !!   length.

    ! Parameters
    real(real64), parameter :: zero = 0.0d0
    real(real64), parameter :: half = 0.5d0
    real(real64), parameter :: one = 1.0d0
    real(real64), parameter :: two = 2.0d0

    ! Local Variables
    class(errors), pointer :: errmgr
    type(errors), target :: deferr
    real(real64) :: v1, v2, m1, m2, sd, cov, a, b, x
    integer(int32) :: i, n1, n2, n
    
    ! Initialization
    if (present(err)) then
        errmgr => err
    else
        errmgr => deferr
    end if
    n1 = size(x1)
    n2 = size(x2)
    n = min(n1, n2)

    ! Input Checking
    if (n1 /= n2) then
        call report_arrays_not_same_size_error(errmgr, "t_test_paired_real64", &
            "X1", "X2", n1, n2)
        return
    end if

    ! Compute the T-statistic
    m1 = mean(x1)
    m2 = mean(x2)
    v1 = variance(x1)
    v2 = variance(x2)
    dof = real(n1) - one
    cov = zero
    do i = 1, n
        cov = cov + (x1(i) - m1) * (x2(i) - m2)
    end do
    cov = cov / dof
    sd = sqrt((v1 + v2 - two * cov) / n)
    stat = (m1 - m2) / sd

    ! Compute the probability
    a = half * dof
    b = half
    x = dof / (dof + stat**2)
    p = regularized_beta(a, b, x)
end subroutine

! ------------------------------------------------------------------------------
subroutine f_test(x1, x2, stat, p, dof1, dof2)
    !! Computes the F-test and returns the probability (two-tailed) that
    !! the variances of two data sets are not significantly different.
    !!
    !! See Also
    !!
    !! - [Wikipedia](https://en.wikipedia.org/wiki/F-test)
    real(real64), intent(in) :: x1(:)
        !! An N-element array containing the first data set.
    real(real64), intent(in) :: x2(:)
        !! An M-element array containing the second data set.
    real(real64), intent(out) :: stat
        !! The F-statistic.
    real(real64), intent(out) :: p
        !! The probability value that the two samples are likely to
        !! have come from the two underlying populations that 
        !! have the same variance.
    real(real64), intent(out) :: dof1
        !! A measure of the degrees of freedom.
    real(real64), intent(out) :: dof2
        !! A measure of the degrees of freedom.

    ! Parameters
    real(real64), parameter :: one = 1.0d0
    real(real64), parameter :: two = 2.0d0

    ! Local Variables
    integer(int32) :: n1, n2
    real(real64) :: v1, v2, m1, m2
    type(f_distribution) :: dist

    ! Compute the F-statistic
    n1 = size(x1)
    n2 = size(x2)
    m1 = mean(x1)
    m2 = mean(x2)
    v1 = variance(x1)
    v2 = variance(x2)
    if (v1 > v2) then
        stat = v1 / v2
        dof1 = n1 - one
        dof2 = n2 - one
    else
        stat = v2 / v1
        dof1 = n2 - one
        dof2 = n1 - one
    end if

    dist%d1 = dof1
    dist%d2 = dof2
    p = two * (one - dist%cdf(stat))! 2x because this is a two-tailed estimate
    if (p > one) p = two - p
end subroutine

! ------------------------------------------------------------------------------
subroutine bartletts_test(x, stat, p)
    !! Computes Bartlett's test statistic and associated probability.
    !!
    !! The statistic is calculated as follows.
    !!
    !! $$ \chi^{2} = \frac{(N - k) \ln(S_{p}^{2}) \sum_{i = 1}^{k} 
    !! \left(n_{i} - 1 \right) \ln(S_{i}^{2})}{1 + 
    !! \frac{1}{3 \left( k - 1 \right)} \left( \sum_{i = 1}^{k} 
    !! \left( \frac{1}{n_{i} - 1} \right) - \frac{1}{N - k} \right)} $$
    !!
    !! Where \( N = \sum_{i = 1}^{k} n_{i} \) and \( S_{p}^{2} \) is the pooled
    !! variance.
    !!
    !! The probability is calculated as the right-tail probability of the
    !! chi-squared distribution.
    !!
    !! Bartlett's test is most relevant for distributions showing strong 
    !! normality.  For distributions lacking strong normality, consider 
    !! Levene's test instead.
    !!
    !! See Also
    !!
    !! - [Wikipedia](https://en.wikipedia.org/wiki/Bartlett%27s_test)
    type(array_container), intent(in), dimension(:) :: x
        !! The arrays of data to analyze.
    real(real64), intent(out) :: stat
        !! The Bartlett's test statistic.
    real(real64), intent(out) :: p
        !! The probability value that the variances of each data set are
        !! equivalent.  A low p-value, less than some significance level,
        !! indicates a non-equivalance of variances.

    ! Local Variables
    integer(int32) :: i, n, k, ni
    real(real64) :: si, sp, numer, denom
    type(chi_squared_distribution) :: dist

    ! Initialization
    k = size(x)
    n = 0
    do i = 1, k
        n = n + size(x(i)%x)
    end do

    ! Compute the statistic
    n = 0
    sp = 0.0d0
    numer = 0.0d0
    denom = 0.0d0
    do i = 1, k
        ni = size(x(i)%x)
        n = n + ni
        si = variance(x(i)%x)
        sp = sp + (ni - 1.0d0) * si
        numer = numer + (ni - 1.0d0) * log(variance(x(i)%x))
        denom = denom + 1.0d0 / (ni - 1.0d0)
    end do
    sp = sp / real(n - k, real64)
    stat = ((n - k) * log(sp) - numer) / &
        (1.0d0 + (1.0d0 / (3.0d0 * k - 3.0d0)) * &
        (denom - 1.0d0 / real(n - k, real64)))

    ! Compute the p-value
    dist%dof = k - 1
    p = 1.0d0 - dist%cdf(stat)
end subroutine

! ------------------------------------------------------------------------------
subroutine levenes_test(x, stat, p, err)
    !! Computes Levene's test statistic and associated probability.
    !!
    !! The statistic is calculated as follows.
    !! $$ W = \frac{N - k}{k - 1} \frac{ \sum_{i = 1}^{k} N_{i} \left( Z_{i.} - 
    !! Z{..} \right)^{2}}{ \sum_{i = 1}^{k} \sum_{j = 1}^{n_{i}} \left( Z_{ij} -
    !! Z_{i.} \right)^{2} } $$
    !!
    !! Where:
    !! $$ Z_{ij} = |X_{ij} - \overline{X_{i.}}| $$
    !! $$ Z_{i.} = \frac{1}{n_{i}} \sum_{j = 1}^{n_{i}} Z_{ij} $$ 
    !! $$ Z_{..} = \frac{1}{N} \sum_{i = 1}^{k} \sum_{j = 1}^{n_{i}} Z_{ij} $$
    !!
    !! As the test statistic is approximately F-distributed, the F-distribution
    !! is used to calculate the probability term.
    !!
    !! See Also
    !!
    !! - [Wikipedia](https://en.wikipedia.org/wiki/Levene%27s_test)
    type(array_container), intent(in), dimension(:) :: x
        !! The arrays of data to analyze.
    real(real64), intent(out) :: stat
        !! The Bartlett's test statistic.
    real(real64), intent(out) :: p
        !! The probability value that the variances of each data set are
        !! equivalent.  A low p-value, less than some significance level,
        !! indicates a non-equivalance of variances.
    class(errors), intent(inout), optional, target :: err

    ! Local Variables
    integer(int32) :: i, j, k, n, ni, flag
    real(real64) :: numer, denom, inner, yi, z, zij
    real(real64), allocatable, dimension(:) :: y, zt, zi
    type(f_distribution) :: dist
    class(errors), pointer :: errmgr
    type(errors), target :: deferr
    
    ! Initialization
    if (present(err)) then
        errmgr => err
    else
        errmgr => deferr
    end if
    k = size(x)

    ! Local Memory Allocations
    allocate(y(k), zi(k), stat = flag)
    if (flag /= 0) then
        call report_memory_error(errmgr, "levenes_test", flag)
        return
    end if

    ! Compute the total mean
    z = 0.0d0
    n = 0
    do i = 1, k
        ni = size(x(i)%x)
        n = n + ni
        y(i) = mean(x(i)%x)
        zt = abs(x(i)%x - y(i))
        zi(i) = mean(zt)
        z = z + zi(i) * ni
    end do
    z = z / n

    ! Process
    numer = 0.0d0
    denom = 0.0d0
    do i = 1, k
        ni = size(x(i)%x)
        yi = y(i)
        numer = numer + ni * (zi(i) - z)**2
        
        inner = 0.0d0
        do j = 1, ni
            zij = abs(x(i)%x(j) - yi)
            inner = inner + (zij - zi(i))**2
        end do
        denom = denom + inner
    end do
    stat = real((N - k) / (k - 1), real64) * (numer / denom)
    dist%d1 = k - 1.0d0
    dist%d2 = real(n - k, real64)
    p = 1.0d0 - dist%cdf(stat)
end subroutine

! ------------------------------------------------------------------------------
pure function sample_size(dist, var, delta, bet, alpha) result(rst)
    !! Estimates the sample size required to achieve an experiment with the
    !! desired power and significance levels to ascertain the desired 
    !! difference in parameter.
    !!
    !! See Also
    !!
    !! - [Wikipedia](https://en.wikipedia.org/wiki/Power_of_a_test)
    class(distribution), intent(in) :: dist
        !! The distribution to utilize as a measure.
    real(real64), intent(in) :: var
        !! An estimate of the population variance.
    real(real64), intent(in) :: delta
        !! The parameter difference that is desired.
    real(real64), intent(in), optional :: bet
        !! The desired power level.  The default for this value is 0.2, for a 
        !! power of 80%.
    real(real64), intent(in), optional :: alpha
        !! The desired significance level.  The default for this value is 0.05
        !! for a confidence level of 95%.
    real(real64) :: rst
        !! The minimum sample size requried to achieve the desired experimental
        !! outcome.

    ! Local Variables
    real(real64) :: a, b, za, zb

    ! Initialization
    if (present(bet)) then
        b = bet
    else
        b = 0.8d0
    end if
    if (present(alpha)) then
        a = alpha
    else
        a = 0.05d0
    end if

    za = dist%standardized_variable(1.0d0 - a / 2.0d0)
    zb = dist%standardized_variable(b)
    rst = 2.0d0 * (za + zb)**2 * var / (delta**2)
end function

! ------------------------------------------------------------------------------
end module