module fstats_anova use iso_fortran_env use ieee_arithmetic use fstats_special_functions use fstats_descriptive_statistics use ferror use fstats_errors use fstats_distributions implicit none private public :: anova_factor public :: single_factor_anova_table public :: two_factor_anova_table public :: anova type anova_factor !! Defines an ANOVA factor result. real(real64) :: dof !! The number of degrees of freedome. real(real64) :: variance !! The estimate of variance. real(real64) :: sum_of_squares !! The sum of the squares. real(real64) :: f_statistic !! The F-statistic. real(real64) :: probability !! The variance probability term. end type type single_factor_anova_table !! Defines a single-factor ANOVA results table. type(anova_factor) :: main_factor !! The main, or main factor, results. type(anova_factor) :: within_factor !! The within-treatement (error) results. real(real64) :: total_dof !! The total number of degrees of freedom. real(real64) :: total_sum_of_squares !! The total sum of squares. real(real64) :: total_variance !! The total variance estimate. real(real64) :: overall_mean !! The overall mean value. end type type two_factor_anova_table !! Defines a two-factor ANOVA results table. type(anova_factor) :: main_factor_1 !! The first main-factor results. type(anova_factor) :: main_factor_2 !! The second main-factor results. type(anova_factor) :: interaction !! The interaction effects. type(anova_factor) :: within_factor !! The within (error) factor results. real(real64) :: total_dof !! The total number of degrees of freedom. real(real64) :: total_sum_of_squares !! The total sum of squares. real(real64) :: total_variance !! The total variance estimate. real(real64) :: overall_mean !! The overall mean value. end type interface anova !! Performs an analysis of variance (ANOVA) on the supplied data !! set. !! !! The following example illustrates a single-factor ANOVA on a !! data set. !! ```fortran !! program example !! use iso_fortran_env !! use fstats !! implicit none !! !! ! Local Variables !! character, parameter :: tab = achar(9) !! real(real64) :: x(10, 2) !! type(single_factor_anova_table) :: tbl !! !! ! Define the data !! x = reshape( & !! [ & !! 3.086d3, 3.082d3, 3.069d3, 3.072d3, 3.045d3, 3.070d3, 3.079d3, & !! 3.050d3, 3.062d3, 3.062d3, 3.075d3, 3.061d3, 3.063d3, 3.038d3, & !! 3.070d3, 3.062d3, 3.070d3, 3.049d3, 3.042d3, 3.063d3 & !! ], & !! [10, 2] & !! ) !! !! ! Perform the ANOVA !! tbl = anova(x) !! !! ! Print out the table !! print '(A)', "Description" // tab // "DOF" // tab // "Sum of Sq." // & !! tab // "Variance" // tab // "F-Stat" // tab // "P-Value" !! print '(AF2.0AF5.1AF5.1AF5.3AF5.3)', "Main Factor: " // tab, & !! tbl%main_factor%dof, tab, & !! tbl%main_factor%sum_of_squares, tab // tab, & !! tbl%main_factor%variance, tab // tab, & !! tbl%main_factor%f_statistic, tab, & !! tbl%main_factor%probability !! !! print '(AF3.0AF6.1AF5.1)', "Within: " // tab, & !! tbl%within_factor%dof, tab, & !! tbl%within_factor%sum_of_squares, tab // tab, & !! tbl%within_factor%variance !! !! print '(AF3.0AF6.1AF5.1)', "Total: " // tab // tab, & !! tbl%total_dof, tab, & !! tbl%total_sum_of_squares, tab // tab, & !! tbl%total_variance !! !! print '(AF6.1)', "Overall Mean: ", tbl%overall_mean !! end program !! ``` !! The above program produces the following output. !! ```text !! Description DOF Sum of Sq. Variance F-Stat P-Value !! Main Factor: 1. 352.8 352.8 2.147 0.160 !! Within: 18. 2958.2 164.3 !! Total: 19. 3311.0 174.3 !! Overall Mean: 3063.5 !! ``` !! !! See Also !! !! - [Wikipedia](https://en.wikipedia.org/wiki/Analysis_of_variance) !! - [SPC Excel Single Factor ANOVA](https://www.spcforexcel.com/knowledge/root-cause-analysis/single-factor-anova) !! - [SPC Excel Gage R&R](https://www.spcforexcel.com/knowledge/measurement-systems-analysis/anova-gage-rr-part-1) !! - [SPC Excel Understanding Regression Statistics](https://www.spcforexcel.com/knowledge/root-cause-analysis/understanding-regression-statistics-part-1) !! - [NIST - Two Way ANOVA](https://www.itl.nist.gov/div898/handbook/prc/section4/prc427.htm) module procedure :: anova_1_factor module procedure :: anova_2_factor module procedure :: anova_model_fit end interface contains ! ------------------------------------------------------------------------------ ! REF: https://www.spcforexcel.com/knowledge/root-cause-analysis/single-factor-anova function anova_1_factor(x) result(rst) !! Performs an analysis of variance (ANOVA) on the supplied data set. real(real64), intent(in) :: x(:,:) !! An M-by-N matrix containing the M replications of the N test !! points of interest. type(single_factor_anova_table) :: rst !! A single_factor_anova_table instance containing the ANOVA results. ! Parameters real(real64), parameter :: zero = 0.0d0 ! Local Variables integer(int32) :: j, a, n, nt real(real64) :: sum_all, tssq, essq, bssq ! Initialization a = size(x, 2) nt = size(x, 1) n = nt * a rst%within_factor%f_statistic = ieee_value(sum_all, IEEE_QUIET_NAN) rst%within_factor%probability = ieee_value(sum_all, IEEE_QUIET_NAN) ! Determine the degrees of freedom rst%main_factor%dof = a - 1 rst%within_factor%dof = n - a rst%total_dof = n - 1 ! Quick Return if (a == 1 .or. nt == 1) then rst%main_factor%sum_of_squares = zero rst%main_factor%variance = zero rst%main_factor%f_statistic = zero rst%main_factor%probability = zero rst%within_factor%sum_of_squares = zero rst%within_factor%variance = zero rst%total_variance = variance(pack(x, .true.)) rst%total_sum_of_squares = rst%total_variance * rst%total_dof rst%overall_mean = mean(pack(x, .true.)) return end if ! Compute the sum of squares for all factors sum_all = sum(x) tssq = sum(x**2) - (sum_all**2 / n) bssq = zero do j = 1, a bssq = bssq + sum(x(:,j))**2 end do bssq = (bssq / nt) - (sum_all**2 / n) essq = tssq - bssq rst%main_factor%sum_of_squares = bssq rst%within_factor%sum_of_squares = essq rst%total_sum_of_squares = tssq ! Compute the variance terms rst%main_factor%variance = bssq / rst%main_factor%dof rst%within_factor%variance = essq / rst%within_factor%dof rst%total_variance = tssq / rst%total_dof ! Compute the overall mean rst%overall_mean = mean(pack(x, .true.)) ! Compute the F-statistic and probability term call anova_probability( & rst%main_factor%variance, & rst%within_factor%variance, & rst%main_factor%dof, & rst%within_factor%dof, & rst%main_factor%f_statistic, & rst%main_factor%probability & ) end function ! ------------------------------------------------------------------------------ ! REF: https://www.spcforexcel.com/knowledge/measurement-systems-analysis/anova-gage-rr-part-1 ! REF: https://www.itl.nist.gov/div898/handbook/prc/section4/prc427.htm ! Data set is expected as a 3D array with each of the K pages containing the R ! treatments of N tests such that the array size is N-by-R-by-K function anova_2_factor(x) result(rst) !! Performs an analysis of variance (ANOVA) on the supplied data set. real(real64), intent(in) :: x(:,:,:) !! An M-by-N-by-K array containing the M replications of the !! N first factor results, and the K second factor results. type(two_factor_anova_table) :: rst !! A two_factor_anova_table instance containing the ANOVA results. ! Parameters real(real64), parameter :: zero = 0.0d0 real(real64), parameter :: one = 1.0d0 ! Local Variables integer(int32) :: i, j, jj, k, r, n real(real64) :: factorMean, sum_all real(real64), allocatable :: xpack(:) ! Initialization n = size(x, 3) k = size(x, 2) r = size(x, 1) rst%within_factor%f_statistic = ieee_value(sum_all, IEEE_QUIET_NAN) rst%within_factor%probability = ieee_value(sum_all, IEEE_QUIET_NAN) ! Quick Return if (k == 1) then ! This is a one-factor anova end if ! Determine the number of DOF rst%main_factor_1%dof = k - one rst%main_factor_2%dof = n - 1 rst%interaction%dof = (k - 1) * (n - 1) rst%within_factor%dof = n * k * (r - 1) rst%total_dof = n * k * r - 1 ! Compute the overall mean, sum of squares, and variance xpack = pack(x, .true.) rst%overall_mean = mean(xpack) rst%total_sum_of_squares = sum((xpack - rst%overall_mean)**2) rst%total_variance = rst%total_sum_of_squares / rst%total_dof ! Compute factor 1 results rst%main_factor_1%sum_of_squares = zero do i = 1, k factorMean = mean(pack(x(:,i,:), .true.)) rst%main_factor_1%sum_of_squares = rst%main_factor_1%sum_of_squares + & (factorMean - rst%overall_mean)**2 end do rst%main_factor_1%sum_of_squares = n * r * rst%main_factor_1%sum_of_squares rst%main_factor_1%variance = rst%main_factor_1%sum_of_squares / & rst%main_factor_1%dof ! Compute factor 2 results rst%main_factor_2%sum_of_squares = zero do i = 1, n factorMean = mean(pack(x(:,:,i), .true.)) rst%main_factor_2%sum_of_squares = rst%main_factor_2%sum_of_squares + & (factorMean - rst%overall_mean)**2 end do rst%main_factor_2%sum_of_squares = k * r * rst%main_factor_2%sum_of_squares rst%main_factor_2%variance = rst%main_factor_2%sum_of_squares / & rst%main_factor_2%dof ! Compute the within (error) term rst%within_factor%sum_of_squares = zero do j = 1, k do i = 1, n factorMean = mean(x(:,j,i)) do jj = 1, r rst%within_factor%sum_of_squares = & rst%within_factor%sum_of_squares + & (x(jj,j,i) - factorMean)**2 end do end do end do rst%within_factor%variance = rst%within_factor%sum_of_squares /& rst%within_factor%dof ! Compute the interaction term rst%interaction%sum_of_squares = rst%total_sum_of_squares - ( & rst%main_factor_1%sum_of_squares + & rst%main_factor_2%sum_of_squares + & rst%within_factor%sum_of_squares & ) rst%interaction%variance = rst%interaction%sum_of_squares / & rst%interaction%dof ! Compute the F-statistics call anova_probability( & rst%main_factor_1%variance, & rst%within_factor%variance, & rst%main_factor_1%dof, & rst%within_factor%dof, & rst%main_factor_1%f_statistic, & rst%main_factor_1%probability & ) call anova_probability( & rst%main_factor_2%variance, & rst%within_factor%variance, & rst%main_factor_2%dof, & rst%within_factor%dof, & rst%main_factor_2%f_statistic, & rst%main_factor_2%probability & ) call anova_probability( & rst%interaction%variance, & rst%within_factor%variance, & rst%interaction%dof, & rst%within_factor%dof, & rst%interaction%f_statistic, & rst%interaction%probability & ) end function ! ------------------------------------------------------------------------------ ! REF: https://www.spcforexcel.com/knowledge/root-cause-analysis/understanding-regression-statistics-part-1 function anova_model_fit(nmodelparams, ymeas, ymod, err) result(rst) !! Performs an analysis of variance (ANOVA) on the supplied data set. integer(int32), intent(in) :: nmodelparams !! The number of model parameters. real(real64), intent(in) :: ymeas(:) !! An N-element array containing the measured dependent variable data. real(real64), intent(in) :: ymod(:) !! An N-element array containing the modeled dependent variable data. class(errors), intent(inout), optional, target :: err !! A mechanism for communicating errors and warnings to the !! caller. Possible warning and error codes are as follows. !! - FS_NO_ERROR: No errors encountered. !! - FS_ARRAY_SIZE_ERROR: Occurs if ymeas and ymod are not the !! same length. !! - FS_MEMORY_ERROR: Occurs if a memory error is encountered. type(single_factor_anova_table) :: rst !! A single_factor_anova_table instance containing the ANOVA results. ! Local Variables integer(int32) :: n, flag real(real64), allocatable :: ypack(:) real(real64) :: sum_all class(errors), pointer :: errmgr type(errors), target :: deferr ! Initialization n = size(ymeas) if (present(err)) then errmgr => err else errmgr => deferr end if rst%within_factor%f_statistic = ieee_value(sum_all, IEEE_QUIET_NAN) rst%within_factor%probability = ieee_value(sum_all, IEEE_QUIET_NAN) ! Input Checking if (size(ymod) /= n) then call report_arrays_not_same_size_error(errmgr, "anova_model_fit", & "YMEAS", "YMOD", n, size(ymod)) return end if ! Memory Allocation allocate(ypack(2 * n), stat = flag) if (flag /= 0) then call report_memory_error(errmgr, "anova_model_fit", flag) return end if ! Determine the number of DOF rst%main_factor%dof = nmodelparams - 1 rst%within_factor%dof = n - rst%main_factor%dof - 1 rst%total_dof = n - 1 ! Process ypack(1:n) = ymeas ypack(n+1:2*n) = ymod rst%overall_mean = mean(ypack) rst%total_sum_of_squares = sum((ymeas - rst%overall_mean)**2) rst%main_factor%sum_of_squares = sum((ymod - rst%overall_mean)**2) rst%within_factor%sum_of_squares = sum((ymeas - ymod)**2) rst%total_variance = rst%total_sum_of_squares / rst%total_dof rst%main_factor%variance = rst%main_factor%sum_of_squares / & rst%main_factor%dof rst%within_factor%variance = rst%within_factor%sum_of_squares / & rst%within_factor%dof ! Compute the F-statistic and probability term call anova_probability( & rst%main_factor%variance, & rst%within_factor%variance, & rst%main_factor%dof, & rst%within_factor%dof, & rst%main_factor%f_statistic, & rst%main_factor%probability & ) ! Formatting 100 format(A, I0, A, I0, A) 101 format(A, I0, A) end function ! ****************************************************************************** ! PRIVATE ROUTINES ! ------------------------------------------------------------------------------ subroutine anova_probability(v1, v2, dof1, dof2, f, p) ! Arguments real(real64), intent(in) :: v1, v2, dof1, dof2 real(real64), intent(out) :: f, p ! Local Variables type(f_distribution) :: dist ! Process f = v1 / v2 dist%d1 = dof1 dist%d2 = dof2 p = 1.0d0 - dist%cdf(f) if (p > 1.0d0) then p = 2.0d0 - p end if end subroutine ! ------------------------------------------------------------------------------ end module