fstats_descriptive_statistics.f90 Source File


Contents


Source Code

module fstats_descriptive_statistics
    use iso_fortran_env
    use linalg, only : sort
    use ferror
    use fstats_errors
    use fstats_types
    implicit none
    private
    public :: mean
    public :: variance
    public :: standard_deviation
    public :: median
    public :: quantile
    public :: trimmed_mean
    public :: covariance
    public :: pooled_variance
    
    interface pooled_variance
        !! Computes the pooled estimate of variance.
        module procedure :: pooled_variance_1
        module procedure :: pooled_variance_2
    end interface
contains
! ------------------------------------------------------------------------------
pure function mean(x) result(rst)
    !! Computes the mean of the values in an array.
    real(real64), intent(in) :: x(:)
        !! The array of values to analyze.
    real(real64) :: rst
        !! The result.

    ! Parameters
    real(real64), parameter :: zero = 0.0d0

    ! Local Variables
    integer(int32) :: i, n

    ! Process
    n = size(x)
    if (n == 0) then
        rst = zero
    else
        rst = x(1)
        do i = 2, n
            rst = rst + (x(i) - rst) / i
        end do
    end if
end function

! ------------------------------------------------------------------------------
pure function variance(x) result(rst)
    !! Computes the sample variance of the values in an array.
    !!
    !! The variance computed is the sample variance such that 
    !! $$ s^2 = \frac{\Sigma \left( x_{i} - \bar{x} \right)^2}{n - 1} $$.
    real(real64), intent(in) :: x(:)
        !! The array of values to analyze.
    real(real64) :: rst

    ! Parameters
    real(real64), parameter :: zero = 0.0d0
    real(real64), parameter :: one = 1.0d0

    ! Local Variables
    integer(int32) :: i, n
    real(real64) :: oldMean, newMean

    ! Process
    n = size(x)
    if (n <= 1) then
        rst = zero
    else
        oldMean = x(1)
        rst = zero
        do i = 2, n
            newMean = oldMean + (x(i) - oldMean) / i
            rst = rst + (x(i) - oldMean) * (x(i) - newMean)
            oldMean = newMean
        end do
        rst = rst / (n - one)
    end if
end function

! ------------------------------------------------------------------------------
pure function standard_deviation(x) result(rst)
    !! Computes the sample standard deviation of the values in an array.
    !! 
    !! The value computed is the sample standard deviation.
    !! $$ s = \sqrt{ \frac{\Sigma \left( x_{i} - \bar{x} \right)^2}{n - 1} } $$
    real(real64), intent(in) :: x(:)
        !! The array of values to analyze.
    real(real64) :: rst
        !! The result.

    ! Process
    rst = sqrt(variance(x))
end function

! ------------------------------------------------------------------------------
function median(x) result(rst)
    !! Computes the median of the values in an array.
    real(real64), intent(inout) :: x(:)
        !! The array of values to analyze.  On output, this array is sorted into
        !! ascending order.
    real(real64) :: rst
        !! The result.

    ! Parameters
    real(real64), parameter :: half = 0.5d0

    ! Local Variables
    integer(int32) :: n, nmid, nmidp1, flag, iflag

    ! Initialization
    n = size(x)
    nmid = n / 2
    nmidp1 = nmid + 1
    iflag = n - 2 * nmid
    
    ! Sort the array in ascending order
    call sort(x, .true.)

    ! Find the median
    if (iflag == 0) then
        rst = half * (x(nmid) + x(nmidp1))
    else
        rst = x(nmidp1)
    end if
end function

! ------------------------------------------------------------------------------
! REF: https://fortranwiki.org/fortran/show/Quartiles
!
! This is the method used by Minitab
pure function quantile(x, q) result(rst)
    !! Computes the specified quantile of a data set using the SAS 
    !! Method 4.
    !!
    !! See Also
    !!
    !! - [Wikipedia](https://en.wikipedia.org/wiki/Quantile)
    real(real64), intent(in) :: x(:)
        !! An N-element array containing the data.
    real(real64), intent(in) :: q
        !! The quantile to compute (e.g. 0.25 computes the 25% quantile).
    real(real64) :: rst
        !! The result.

    ! Parameters
    real(real64), parameter :: one = 1.0d0

    ! Local Variables
    real(real64) :: a, b, c, tol
    integer(int32) :: n, ib

    ! Initialization
    tol = sqrt(epsilon(tol))
    n = size(x)

    ! Process
    a = (n + one) * q
    b = mod(a, one)
    c = a - b

    ib = int(c, int32)
    if ((ib + 1) > n) then
        rst = (one - b) * x(ib) + b * x(n)
    else
        rst = (one - b) * x(ib) + b * x(ib + 1)
    end if
end function

! ------------------------------------------------------------------------------
function trimmed_mean(x, p) result(rst)
    !! Computes the trimmed mean of a data set.
    real(real64), intent(inout), dimension(:) :: x
        !! An N-element array containing the data.  On output, the
        !! array is sorted into ascending order.
    real(real64), intent(in), optional :: p
        !! An optional parameter specifying the percentage of values
        !! from either end of the distribution to remove.  The default
        !! is 0.05 such that the bottom 5% and top 5% are removed.
    real(real64) :: rst
        !! The trimmed mean.

    ! Local Variables
    integer(int32) :: i1, i2, n
    real(real64) :: pv

    ! Initialization
    if (present(p)) then
        pv = abs(p)
    else
        pv = 0.05d0
    end if

    ! Sort the array into ascending order
    call sort(x, .true.)

    ! Find the limiting indices
    n = size(x)
    i1 = max(floor(n * pv, int32), 1)
    i2 = min(n, n - i1 + 1)
    rst = mean(x(i1:i2))
end function

! ------------------------------------------------------------------------------
pure function covariance(x, y) result(rst)
    !! Computes the sample covariance of two data sets.
    !!
    !! The covariance computed is the sample covariance such that 
    !! $$ q_{jk} = \frac{\Sigma \left( x_{i} - \bar{x} \right) 
    !! \left( y_{i} - \bar{y} \right)}{n - 1} $$.
    real(real64), intent(in), dimension(:) :: x
        !! The first N-element data set.
    real(real64), intent(in), dimension(size(x)) :: y
        !! The second N-element data set.
    real(real64) :: rst
        !! The covariance.

    ! Parameters
    real(real64), parameter :: zero = 0.0d0
    real(real64), parameter :: one = 1.0d0

    ! Local Variables
    integer(int32) :: i, n
    real(real64) :: meanX, meanY

    ! Process
    n = size(x)
    if (n <= 1) then
        rst = zero
    else
        ! Compute the means
        meanX = x(1)
        meanY = y(1)
        do i = 2, n
            meanX = meanX + (x(i) - meanX) / i
            meanY = meanY + (y(i) - meanY) / i
        end do

        ! Compute the covariance
        rst = sum((x - meanX) * (y - meanY)) / (n - one)
    end if
end function

! ------------------------------------------------------------------------------
pure function pooled_variance_1(si, ni) result(rst)
    !! Computes the pooled estimate of variance.
    real(real64), intent(in), dimension(:) :: si
        !! An N-element array containing the estimates for each of the N
        !! variances.
    integer(int32), intent(in), dimension(size(si)) :: ni
        !! An N-element array containing the number of data points in each
        !! of the data sets used to compute the variances in si.
    real(real64) :: rst
        !! The pooled variance.

    ! Local Variables
    integer(int32) :: i, k, n

    ! Process
    k = size(si)
    rst = 0.0d0
    n = 0
    do i = 1, k
        n = n + ni(i)
        rst = rst + (ni(i) - 1.0d0) * si(i)
    end do
    rst = rst / real(n - k, real64)
end function

pure function pooled_variance_2(x) result(rst)
    !! Computes the pooled estimate of variance.
    type(array_container), intent(in), dimension(:) :: x
        !! An array of arrays of data.
    real(real64) :: rst
        !! The pooled variance.

    ! Local Variables
    integer(int32) :: i, k, n, ni

    ! Process
    k = size(x)
    n = 0
    rst = 0.0d0
    do i = 1, k
        ni = size(x(i)%x)
        n = n + ni
        rst = rst + variance(x(i)%x) * (ni - 1.0)
    end do
    rst = rst / real(n - k, real64)
end function

! ------------------------------------------------------------------------------
end module