Coverage for src / gwtransport / gamma.py: 89%
71 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-27 06:32 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-27 06:32 +0000
1"""
2Gamma Distribution Utilities for Aquifer Pore Volume Heterogeneity.
4This module provides utilities for working with gamma distributions to model heterogeneous
5aquifer pore volumes in groundwater transport analysis. The gamma distribution offers a
6flexible three-parameter model (shape, scale, location) for representing the natural
7variability in flow path lengths and residence times within aquifer systems. In
8heterogeneous aquifers, water travels through multiple flow paths with different pore
9volumes; the location parameter additionally represents a guaranteed minimum pore volume
10(for example, immobile porosity or a geometric minimum travel distance).
12Parameterizations
13-----------------
14Two equivalent parameterizations are supported, each optionally with a location shift:
16- **(mean, std, loc)** — physically intuitive. ``mean`` is the total expected value,
17 ``std`` is the spread (invariant under shift), and ``loc`` is the lower bound of
18 support. Constraint: ``0 <= loc < mean``.
19- **(alpha, beta, loc)** — scipy-style. ``alpha`` is shape, ``beta`` is scale, and
20 ``loc`` is the lower bound of support. Constraint: ``alpha > 0``, ``beta > 0``,
21 ``loc >= 0``.
23Conversion formulas (with constraint ``mean > loc``):
25 alpha = ((mean - loc) / std) ** 2
26 beta = std ** 2 / (mean - loc)
27 mean = alpha * beta + loc
28 std = sqrt(alpha) * beta
30When ``loc == 0`` the three-parameter model reduces to the standard two-parameter
31gamma distribution.
33Available functions:
35- :func:`parse_parameters` - Parse and validate gamma distribution parameters from either
36 (mean, std, loc) or (alpha, beta, loc). Ensures exactly one parameter pair is provided
37 and validates positivity and ordering constraints.
39- :func:`mean_std_loc_to_alpha_beta` - Convert physically intuitive (mean, std, loc) parameters
40 to gamma shape/scale parameters.
42- :func:`alpha_beta_loc_to_mean_std` - Convert gamma (alpha, beta, loc) parameters back to
43 (mean, std) for physical interpretation.
45- :func:`bins` - Primary function for transport modeling. Creates discrete probability bins
46 from the (optionally shifted) gamma distribution with equal-probability bins (default) or
47 custom quantile edges. Returns bin edges, expected values (mean pore volume within each
48 bin), and probability masses (weight in transport calculations).
50- :func:`bin_masses` - Calculate probability mass for custom bin edges using the incomplete
51 gamma function. Lower-level function used internally by bins().
53This file is part of gwtransport which is released under AGPL-3.0 license.
54See the ./LICENSE file or go to https://github.com/gwtransport/gwtransport/blob/main/LICENSE for full license details.
55"""
57import numpy as np
58import numpy.typing as npt
59from scipy.special import gammainc
60from scipy.stats import gamma as gamma_dist
63def parse_parameters(
64 *,
65 mean: float | None = None,
66 std: float | None = None,
67 loc: float = 0.0,
68 alpha: float | None = None,
69 beta: float | None = None,
70) -> tuple[float, float, float]:
71 """
72 Parse parameters for gamma distribution.
74 Either ``(mean, std)`` or ``(alpha, beta)`` must be provided. ``loc`` is optional
75 and defaults to 0, which recovers the standard two-parameter gamma distribution.
77 Parameters
78 ----------
79 mean : float, optional
80 Mean of the gamma distribution. Must be strictly greater than ``loc``.
81 std : float, optional
82 Standard deviation of the gamma distribution. Must be positive. See
83 :ref:`concept-dispersion-scales` for what std represents depending
84 on APVD source. ``std`` is invariant under the ``loc`` shift.
85 loc : float, optional
86 Location (horizontal shift) of the gamma distribution; the lower bound of
87 support. Must satisfy ``loc >= 0`` and, when ``mean`` is supplied,
88 ``loc < mean``. Default is ``0.0``.
89 alpha : float, optional
90 Shape parameter of gamma distribution (must be > 0).
91 beta : float, optional
92 Scale parameter of gamma distribution (must be > 0).
94 Returns
95 -------
96 alpha : float
97 Shape parameter of gamma distribution.
98 beta : float
99 Scale parameter of gamma distribution.
100 loc : float
101 Location parameter of gamma distribution.
103 Raises
104 ------
105 ValueError
106 If neither ``(mean, std)`` nor ``(alpha, beta)`` is provided, if only one
107 of a pair is provided, if ``alpha`` or ``beta`` are not positive, if
108 ``loc`` is negative, or if ``mean <= loc``.
109 """
110 if loc < 0:
111 msg = "loc must be non-negative"
112 raise ValueError(msg)
114 if (alpha is None) != (beta is None):
115 msg = "alpha and beta must both be provided or both be None."
116 raise ValueError(msg)
118 if alpha is None or beta is None:
119 if mean is None or std is None:
120 msg = "Either (alpha, beta) or (mean, std) must be provided."
121 raise ValueError(msg)
123 alpha, beta = mean_std_loc_to_alpha_beta(mean=mean, std=std, loc=loc)
125 if alpha <= 0 or beta <= 0:
126 msg = "Alpha and beta must be positive"
127 raise ValueError(msg)
129 return alpha, beta, loc
132def mean_std_loc_to_alpha_beta(*, mean: float, std: float, loc: float = 0.0) -> tuple[float, float]:
133 """
134 Convert mean, standard deviation, and location of gamma distribution to shape/scale.
136 The two-parameter shape/scale representation (``alpha``, ``beta``) is derived from
137 the excess-over-``loc`` moments: ``mean_excess = mean - loc``, ``std_excess = std``.
139 Parameters
140 ----------
141 mean : float
142 Mean of the gamma distribution. Must be strictly greater than ``loc``.
143 std : float
144 Standard deviation of the gamma distribution. Must be positive. See
145 :ref:`concept-dispersion-scales` for what std represents depending
146 on APVD source. ``std`` is invariant under the ``loc`` shift.
147 loc : float, optional
148 Location (horizontal shift) of the gamma distribution. Must satisfy
149 ``0 <= loc < mean``. Default is ``0.0``.
151 Returns
152 -------
153 alpha : float
154 Shape parameter of gamma distribution.
155 beta : float
156 Scale parameter of gamma distribution.
158 Raises
159 ------
160 ValueError
161 If ``std`` is not positive, if ``loc`` is negative, or if ``mean <= loc``.
163 See Also
164 --------
165 alpha_beta_loc_to_mean_std : Convert shape/scale/loc parameters to mean and std.
166 parse_parameters : Parse and validate gamma distribution parameters.
168 Examples
169 --------
170 >>> from gwtransport.gamma import mean_std_loc_to_alpha_beta
171 >>> mean_pore_volume = 30000.0 # m³
172 >>> std_pore_volume = 8100.0 # m³
173 >>> alpha, beta = mean_std_loc_to_alpha_beta(
174 ... mean=mean_pore_volume, std=std_pore_volume
175 ... )
176 >>> print(f"Shape parameter (alpha): {alpha:.2f}")
177 Shape parameter (alpha): 13.72
178 >>> print(f"Scale parameter (beta): {beta:.2f}")
179 Scale parameter (beta): 2187.00
181 With a 5000 m³ minimum pore volume:
183 >>> alpha, beta = mean_std_loc_to_alpha_beta(mean=30000.0, std=8100.0, loc=5000.0)
184 >>> print(f"Shape parameter (alpha): {alpha:.2f}")
185 Shape parameter (alpha): 9.45
186 >>> print(f"Scale parameter (beta): {beta:.2f}")
187 Scale parameter (beta): 2646.00
188 """
189 if std <= 0:
190 msg = "std must be positive"
191 raise ValueError(msg)
192 if loc < 0:
193 msg = "loc must be non-negative"
194 raise ValueError(msg)
195 if mean <= loc:
196 msg = "mean must be strictly greater than loc"
197 raise ValueError(msg)
199 mean_excess = mean - loc
200 alpha = mean_excess**2 / std**2
201 beta = std**2 / mean_excess
202 return alpha, beta
205def alpha_beta_loc_to_mean_std(*, alpha: float, beta: float, loc: float = 0.0) -> tuple[float, float]:
206 """
207 Convert shape, scale, and location of gamma distribution to mean and standard deviation.
209 Parameters
210 ----------
211 alpha : float
212 Shape parameter of the gamma distribution. Must be positive.
213 beta : float
214 Scale parameter of the gamma distribution. Must be positive.
215 loc : float, optional
216 Location (horizontal shift) of the gamma distribution. Must be non-negative.
217 Default is ``0.0``.
219 Returns
220 -------
221 mean : float
222 Mean of the gamma distribution, equal to ``alpha * beta + loc``.
223 std : float
224 Standard deviation of the gamma distribution, equal to ``sqrt(alpha) * beta``.
225 ``std`` is invariant under the ``loc`` shift.
227 Raises
228 ------
229 ValueError
230 If ``loc`` is negative.
232 See Also
233 --------
234 mean_std_loc_to_alpha_beta : Convert mean/std/loc to shape and scale parameters.
235 parse_parameters : Parse and validate gamma distribution parameters.
237 Examples
238 --------
239 >>> from gwtransport.gamma import alpha_beta_loc_to_mean_std
240 >>> alpha = 13.72 # shape parameter
241 >>> beta = 2187.0 # scale parameter
242 >>> mean, std = alpha_beta_loc_to_mean_std(alpha=alpha, beta=beta)
243 >>> print(f"Mean pore volume: {mean:.0f} m³") # doctest: +ELLIPSIS
244 Mean pore volume: 3000... m³
245 >>> print(f"Std pore volume: {std:.0f} m³") # doctest: +ELLIPSIS
246 Std pore volume: 810... m³
247 """
248 if loc < 0:
249 msg = "loc must be non-negative"
250 raise ValueError(msg)
251 mean = alpha * beta + loc
252 std = np.sqrt(alpha) * beta
253 return mean, std
256def bins(
257 *,
258 mean: float | None = None,
259 std: float | None = None,
260 loc: float = 0.0,
261 alpha: float | None = None,
262 beta: float | None = None,
263 n_bins: int = 100,
264 quantile_edges: np.ndarray | None = None,
265) -> dict[str, npt.NDArray[np.floating]]:
266 """
267 Divide a (shifted) gamma distribution into bins and compute bin properties.
269 If ``n_bins`` is provided, the gamma distribution is divided into ``n_bins``
270 equal-mass bins. If ``quantile_edges`` is provided, the distribution is divided
271 into bins defined by those quantile edges. The quantile edges must lie in
272 ``[0, 1]`` with size ``n_bins + 1``; the first and last entries must be 0 and 1.
274 Parameters
275 ----------
276 mean : float, optional
277 Mean of the gamma distribution. Must be strictly greater than ``loc``.
278 std : float, optional
279 Standard deviation of the gamma distribution. Must be positive.
280 loc : float, optional
281 Location (horizontal shift) of the gamma distribution; the lower bound of
282 support. Must satisfy ``0 <= loc < mean`` (or ``loc >= 0`` when using
283 alpha/beta). Default is ``0.0``.
284 alpha : float, optional
285 Shape parameter of gamma distribution (must be > 0).
286 beta : float, optional
287 Scale parameter of gamma distribution (must be > 0).
288 n_bins : int, optional
289 Number of bins to divide the gamma distribution (must be > 1). Default is 100.
290 quantile_edges : array-like, optional
291 Quantile edges for binning. Must be in the range [0, 1] and of size
292 ``n_bins + 1``. The first and last quantile edges must be 0 and 1, respectively.
293 If provided, ``n_bins`` is ignored.
295 Returns
296 -------
297 dict
298 Dictionary with keys of type str and values of type numpy.ndarray:
300 - ``lower_bound``: lower bounds of bins (first one equals ``loc``)
301 - ``upper_bound``: upper bounds of bins (last one is inf)
302 - ``edges``: bin edges (lower_bound[0], upper_bound[0], ..., upper_bound[-1])
303 - ``expected_values``: expected values in bins. Is what you would expect to
304 observe if you repeatedly sampled from the probability distribution, but only
305 considered samples that fall within that particular bin.
306 - ``probability_mass``: probability mass in bins (invariant under ``loc`` shift).
308 Raises
309 ------
310 ValueError
311 If ``n_bins`` is not greater than 1, or if parameter validation in
312 :func:`parse_parameters` fails.
314 See Also
315 --------
316 bin_masses : Calculate probability mass for bins.
317 mean_std_loc_to_alpha_beta : Convert mean/std/loc to alpha/beta parameters.
318 gwtransport.advection.gamma_infiltration_to_extraction : Use bins for transport modeling.
319 :ref:`concept-gamma-distribution` : Two-parameter pore volume model.
320 :ref:`concept-gamma-loc` : Shifted gamma with minimum pore volume.
321 :ref:`concept-dispersion-scales` : What ``std`` represents (macrodispersion vs total spreading).
322 :ref:`assumption-gamma-distribution` : When gamma distribution is adequate.
324 Examples
325 --------
326 Create equal-mass bins for a gamma distribution:
328 >>> from gwtransport.gamma import bins
329 >>> result = bins(mean=30000.0, std=8100.0, n_bins=5)
331 With a location parameter representing a minimum pore volume:
333 >>> result = bins(mean=30000.0, std=8100.0, loc=5000.0, n_bins=5)
334 >>> float(result["edges"][0])
335 5000.0
337 Create bins with custom quantile edges:
339 >>> import numpy as np
340 >>> quantiles = np.array([0.0, 0.25, 0.5, 0.75, 1.0])
341 >>> result = bins(mean=30000.0, std=8100.0, quantile_edges=quantiles)
342 >>> print(f"Number of bins: {len(result['probability_mass'])}")
343 Number of bins: 4
344 """
345 alpha, beta, loc = parse_parameters(mean=mean, std=std, loc=loc, alpha=alpha, beta=beta)
347 # Calculate boundaries for equal mass bins
348 # If quantile_edges is provided, use it (n_bins is ignored)
349 # Otherwise, use n_bins (which defaults to 100)
350 if quantile_edges is not None:
351 n_bins = len(quantile_edges) - 1
352 else:
353 quantile_edges = np.linspace(0, 1, n_bins + 1) # includes 0 and 1
355 if n_bins <= 1:
356 msg = "Number of bins must be greater than 1"
357 raise ValueError(msg)
359 # Unshifted bin edges for the standard Gamma(alpha, beta) distribution, then shift
360 unshifted_edges = gamma_dist.ppf(quantile_edges, alpha, scale=beta)
361 bin_edges = unshifted_edges + loc
362 probability_mass = np.diff(quantile_edges) # probability mass for each bin
364 # Conditional mean within each bin for the unshifted distribution, then shift by loc.
365 # E[X | a <= X < b] for X ~ Gamma(alpha, beta) uses the identity
366 # E[X * 1_{a<=X<b}] = alpha * beta * (F_{alpha+1}(b/beta) - F_{alpha+1}(a/beta))
367 # where F_{alpha+1} is the CDF of Gamma(alpha+1, beta). The bin_masses helper returns
368 # exactly those differences.
369 diff_alpha_plus_1 = bin_masses(alpha=alpha + 1, beta=beta, bin_edges=unshifted_edges)
370 expected_values = beta * alpha * diff_alpha_plus_1 / probability_mass + loc
372 return {
373 "lower_bound": bin_edges[:-1],
374 "upper_bound": bin_edges[1:],
375 "edges": bin_edges,
376 "expected_values": expected_values,
377 "probability_mass": probability_mass,
378 }
381def bin_masses(*, alpha: float, beta: float, bin_edges: npt.ArrayLike) -> npt.NDArray[np.floating]:
382 """
383 Calculate probability mass for each bin in a standard (unshifted) gamma distribution.
385 Is the area under the Gamma(alpha, beta) PDF between the bin edges. This lower-level
386 function operates on the unshifted gamma distribution; if a location shift is needed,
387 callers should subtract ``loc`` from their physical bin edges before passing them in.
388 Because probability mass is invariant under a location shift, the result is identical
389 to that of the shifted distribution.
391 Parameters
392 ----------
393 alpha : float
394 Shape parameter of gamma distribution (must be > 0).
395 beta : float
396 Scale parameter of gamma distribution (must be > 0).
397 bin_edges : array-like
398 Bin edges of the unshifted distribution. Array of increasing values of size
399 ``len(bins) + 1``. Must be non-negative.
401 Returns
402 -------
403 numpy.ndarray
404 Probability mass for each bin.
406 Raises
407 ------
408 ValueError
409 If ``alpha`` or ``beta`` are not positive, if ``bin_edges`` contains fewer than two
410 values, if ``bin_edges`` are not monotonically increasing, or if any ``bin_edges``
411 are negative.
412 """
413 # Convert inputs to numpy arrays
414 bin_edges = np.asarray(bin_edges)
416 # Validate inputs
417 if alpha <= 0 or beta <= 0:
418 msg = "Alpha and beta must be positive"
419 raise ValueError(msg)
420 if len(bin_edges) < 2: # noqa: PLR2004
421 msg = "Bin edges must contain at least two values"
422 raise ValueError(msg)
423 if np.any(np.diff(bin_edges) < 0):
424 msg = "Bin edges must be increasing"
425 raise ValueError(msg)
426 if np.any(bin_edges < 0):
427 msg = "Bin edges must be non-negative"
428 raise ValueError(msg)
429 val = gammainc(alpha, bin_edges / beta)
430 return val[1:] - val[:-1]