Coverage for src / gwtransport / gamma.py: 89%

71 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-27 06:32 +0000

1""" 

2Gamma Distribution Utilities for Aquifer Pore Volume Heterogeneity. 

3 

4This module provides utilities for working with gamma distributions to model heterogeneous 

5aquifer pore volumes in groundwater transport analysis. The gamma distribution offers a 

6flexible three-parameter model (shape, scale, location) for representing the natural 

7variability in flow path lengths and residence times within aquifer systems. In 

8heterogeneous aquifers, water travels through multiple flow paths with different pore 

9volumes; the location parameter additionally represents a guaranteed minimum pore volume 

10(for example, immobile porosity or a geometric minimum travel distance). 

11 

12Parameterizations 

13----------------- 

14Two equivalent parameterizations are supported, each optionally with a location shift: 

15 

16- **(mean, std, loc)** — physically intuitive. ``mean`` is the total expected value, 

17 ``std`` is the spread (invariant under shift), and ``loc`` is the lower bound of 

18 support. Constraint: ``0 <= loc < mean``. 

19- **(alpha, beta, loc)** — scipy-style. ``alpha`` is shape, ``beta`` is scale, and 

20 ``loc`` is the lower bound of support. Constraint: ``alpha > 0``, ``beta > 0``, 

21 ``loc >= 0``. 

22 

23Conversion formulas (with constraint ``mean > loc``): 

24 

25 alpha = ((mean - loc) / std) ** 2 

26 beta = std ** 2 / (mean - loc) 

27 mean = alpha * beta + loc 

28 std = sqrt(alpha) * beta 

29 

30When ``loc == 0`` the three-parameter model reduces to the standard two-parameter 

31gamma distribution. 

32 

33Available functions: 

34 

35- :func:`parse_parameters` - Parse and validate gamma distribution parameters from either 

36 (mean, std, loc) or (alpha, beta, loc). Ensures exactly one parameter pair is provided 

37 and validates positivity and ordering constraints. 

38 

39- :func:`mean_std_loc_to_alpha_beta` - Convert physically intuitive (mean, std, loc) parameters 

40 to gamma shape/scale parameters. 

41 

42- :func:`alpha_beta_loc_to_mean_std` - Convert gamma (alpha, beta, loc) parameters back to 

43 (mean, std) for physical interpretation. 

44 

45- :func:`bins` - Primary function for transport modeling. Creates discrete probability bins 

46 from the (optionally shifted) gamma distribution with equal-probability bins (default) or 

47 custom quantile edges. Returns bin edges, expected values (mean pore volume within each 

48 bin), and probability masses (weight in transport calculations). 

49 

50- :func:`bin_masses` - Calculate probability mass for custom bin edges using the incomplete 

51 gamma function. Lower-level function used internally by bins(). 

52 

53This file is part of gwtransport which is released under AGPL-3.0 license. 

54See the ./LICENSE file or go to https://github.com/gwtransport/gwtransport/blob/main/LICENSE for full license details. 

55""" 

56 

57import numpy as np 

58import numpy.typing as npt 

59from scipy.special import gammainc 

60from scipy.stats import gamma as gamma_dist 

61 

62 

63def parse_parameters( 

64 *, 

65 mean: float | None = None, 

66 std: float | None = None, 

67 loc: float = 0.0, 

68 alpha: float | None = None, 

69 beta: float | None = None, 

70) -> tuple[float, float, float]: 

71 """ 

72 Parse parameters for gamma distribution. 

73 

74 Either ``(mean, std)`` or ``(alpha, beta)`` must be provided. ``loc`` is optional 

75 and defaults to 0, which recovers the standard two-parameter gamma distribution. 

76 

77 Parameters 

78 ---------- 

79 mean : float, optional 

80 Mean of the gamma distribution. Must be strictly greater than ``loc``. 

81 std : float, optional 

82 Standard deviation of the gamma distribution. Must be positive. See 

83 :ref:`concept-dispersion-scales` for what std represents depending 

84 on APVD source. ``std`` is invariant under the ``loc`` shift. 

85 loc : float, optional 

86 Location (horizontal shift) of the gamma distribution; the lower bound of 

87 support. Must satisfy ``loc >= 0`` and, when ``mean`` is supplied, 

88 ``loc < mean``. Default is ``0.0``. 

89 alpha : float, optional 

90 Shape parameter of gamma distribution (must be > 0). 

91 beta : float, optional 

92 Scale parameter of gamma distribution (must be > 0). 

93 

94 Returns 

95 ------- 

96 alpha : float 

97 Shape parameter of gamma distribution. 

98 beta : float 

99 Scale parameter of gamma distribution. 

100 loc : float 

101 Location parameter of gamma distribution. 

102 

103 Raises 

104 ------ 

105 ValueError 

106 If neither ``(mean, std)`` nor ``(alpha, beta)`` is provided, if only one 

107 of a pair is provided, if ``alpha`` or ``beta`` are not positive, if 

108 ``loc`` is negative, or if ``mean <= loc``. 

109 """ 

110 if loc < 0: 

111 msg = "loc must be non-negative" 

112 raise ValueError(msg) 

113 

114 if (alpha is None) != (beta is None): 

115 msg = "alpha and beta must both be provided or both be None." 

116 raise ValueError(msg) 

117 

118 if alpha is None or beta is None: 

119 if mean is None or std is None: 

120 msg = "Either (alpha, beta) or (mean, std) must be provided." 

121 raise ValueError(msg) 

122 

123 alpha, beta = mean_std_loc_to_alpha_beta(mean=mean, std=std, loc=loc) 

124 

125 if alpha <= 0 or beta <= 0: 

126 msg = "Alpha and beta must be positive" 

127 raise ValueError(msg) 

128 

129 return alpha, beta, loc 

130 

131 

132def mean_std_loc_to_alpha_beta(*, mean: float, std: float, loc: float = 0.0) -> tuple[float, float]: 

133 """ 

134 Convert mean, standard deviation, and location of gamma distribution to shape/scale. 

135 

136 The two-parameter shape/scale representation (``alpha``, ``beta``) is derived from 

137 the excess-over-``loc`` moments: ``mean_excess = mean - loc``, ``std_excess = std``. 

138 

139 Parameters 

140 ---------- 

141 mean : float 

142 Mean of the gamma distribution. Must be strictly greater than ``loc``. 

143 std : float 

144 Standard deviation of the gamma distribution. Must be positive. See 

145 :ref:`concept-dispersion-scales` for what std represents depending 

146 on APVD source. ``std`` is invariant under the ``loc`` shift. 

147 loc : float, optional 

148 Location (horizontal shift) of the gamma distribution. Must satisfy 

149 ``0 <= loc < mean``. Default is ``0.0``. 

150 

151 Returns 

152 ------- 

153 alpha : float 

154 Shape parameter of gamma distribution. 

155 beta : float 

156 Scale parameter of gamma distribution. 

157 

158 Raises 

159 ------ 

160 ValueError 

161 If ``std`` is not positive, if ``loc`` is negative, or if ``mean <= loc``. 

162 

163 See Also 

164 -------- 

165 alpha_beta_loc_to_mean_std : Convert shape/scale/loc parameters to mean and std. 

166 parse_parameters : Parse and validate gamma distribution parameters. 

167 

168 Examples 

169 -------- 

170 >>> from gwtransport.gamma import mean_std_loc_to_alpha_beta 

171 >>> mean_pore_volume = 30000.0 # m³ 

172 >>> std_pore_volume = 8100.0 # m³ 

173 >>> alpha, beta = mean_std_loc_to_alpha_beta( 

174 ... mean=mean_pore_volume, std=std_pore_volume 

175 ... ) 

176 >>> print(f"Shape parameter (alpha): {alpha:.2f}") 

177 Shape parameter (alpha): 13.72 

178 >>> print(f"Scale parameter (beta): {beta:.2f}") 

179 Scale parameter (beta): 2187.00 

180 

181 With a 5000 m³ minimum pore volume: 

182 

183 >>> alpha, beta = mean_std_loc_to_alpha_beta(mean=30000.0, std=8100.0, loc=5000.0) 

184 >>> print(f"Shape parameter (alpha): {alpha:.2f}") 

185 Shape parameter (alpha): 9.45 

186 >>> print(f"Scale parameter (beta): {beta:.2f}") 

187 Scale parameter (beta): 2646.00 

188 """ 

189 if std <= 0: 

190 msg = "std must be positive" 

191 raise ValueError(msg) 

192 if loc < 0: 

193 msg = "loc must be non-negative" 

194 raise ValueError(msg) 

195 if mean <= loc: 

196 msg = "mean must be strictly greater than loc" 

197 raise ValueError(msg) 

198 

199 mean_excess = mean - loc 

200 alpha = mean_excess**2 / std**2 

201 beta = std**2 / mean_excess 

202 return alpha, beta 

203 

204 

205def alpha_beta_loc_to_mean_std(*, alpha: float, beta: float, loc: float = 0.0) -> tuple[float, float]: 

206 """ 

207 Convert shape, scale, and location of gamma distribution to mean and standard deviation. 

208 

209 Parameters 

210 ---------- 

211 alpha : float 

212 Shape parameter of the gamma distribution. Must be positive. 

213 beta : float 

214 Scale parameter of the gamma distribution. Must be positive. 

215 loc : float, optional 

216 Location (horizontal shift) of the gamma distribution. Must be non-negative. 

217 Default is ``0.0``. 

218 

219 Returns 

220 ------- 

221 mean : float 

222 Mean of the gamma distribution, equal to ``alpha * beta + loc``. 

223 std : float 

224 Standard deviation of the gamma distribution, equal to ``sqrt(alpha) * beta``. 

225 ``std`` is invariant under the ``loc`` shift. 

226 

227 Raises 

228 ------ 

229 ValueError 

230 If ``loc`` is negative. 

231 

232 See Also 

233 -------- 

234 mean_std_loc_to_alpha_beta : Convert mean/std/loc to shape and scale parameters. 

235 parse_parameters : Parse and validate gamma distribution parameters. 

236 

237 Examples 

238 -------- 

239 >>> from gwtransport.gamma import alpha_beta_loc_to_mean_std 

240 >>> alpha = 13.72 # shape parameter 

241 >>> beta = 2187.0 # scale parameter 

242 >>> mean, std = alpha_beta_loc_to_mean_std(alpha=alpha, beta=beta) 

243 >>> print(f"Mean pore volume: {mean:.0f} m³") # doctest: +ELLIPSIS 

244 Mean pore volume: 3000... m³ 

245 >>> print(f"Std pore volume: {std:.0f} m³") # doctest: +ELLIPSIS 

246 Std pore volume: 810... m³ 

247 """ 

248 if loc < 0: 

249 msg = "loc must be non-negative" 

250 raise ValueError(msg) 

251 mean = alpha * beta + loc 

252 std = np.sqrt(alpha) * beta 

253 return mean, std 

254 

255 

256def bins( 

257 *, 

258 mean: float | None = None, 

259 std: float | None = None, 

260 loc: float = 0.0, 

261 alpha: float | None = None, 

262 beta: float | None = None, 

263 n_bins: int = 100, 

264 quantile_edges: np.ndarray | None = None, 

265) -> dict[str, npt.NDArray[np.floating]]: 

266 """ 

267 Divide a (shifted) gamma distribution into bins and compute bin properties. 

268 

269 If ``n_bins`` is provided, the gamma distribution is divided into ``n_bins`` 

270 equal-mass bins. If ``quantile_edges`` is provided, the distribution is divided 

271 into bins defined by those quantile edges. The quantile edges must lie in 

272 ``[0, 1]`` with size ``n_bins + 1``; the first and last entries must be 0 and 1. 

273 

274 Parameters 

275 ---------- 

276 mean : float, optional 

277 Mean of the gamma distribution. Must be strictly greater than ``loc``. 

278 std : float, optional 

279 Standard deviation of the gamma distribution. Must be positive. 

280 loc : float, optional 

281 Location (horizontal shift) of the gamma distribution; the lower bound of 

282 support. Must satisfy ``0 <= loc < mean`` (or ``loc >= 0`` when using 

283 alpha/beta). Default is ``0.0``. 

284 alpha : float, optional 

285 Shape parameter of gamma distribution (must be > 0). 

286 beta : float, optional 

287 Scale parameter of gamma distribution (must be > 0). 

288 n_bins : int, optional 

289 Number of bins to divide the gamma distribution (must be > 1). Default is 100. 

290 quantile_edges : array-like, optional 

291 Quantile edges for binning. Must be in the range [0, 1] and of size 

292 ``n_bins + 1``. The first and last quantile edges must be 0 and 1, respectively. 

293 If provided, ``n_bins`` is ignored. 

294 

295 Returns 

296 ------- 

297 dict 

298 Dictionary with keys of type str and values of type numpy.ndarray: 

299 

300 - ``lower_bound``: lower bounds of bins (first one equals ``loc``) 

301 - ``upper_bound``: upper bounds of bins (last one is inf) 

302 - ``edges``: bin edges (lower_bound[0], upper_bound[0], ..., upper_bound[-1]) 

303 - ``expected_values``: expected values in bins. Is what you would expect to 

304 observe if you repeatedly sampled from the probability distribution, but only 

305 considered samples that fall within that particular bin. 

306 - ``probability_mass``: probability mass in bins (invariant under ``loc`` shift). 

307 

308 Raises 

309 ------ 

310 ValueError 

311 If ``n_bins`` is not greater than 1, or if parameter validation in 

312 :func:`parse_parameters` fails. 

313 

314 See Also 

315 -------- 

316 bin_masses : Calculate probability mass for bins. 

317 mean_std_loc_to_alpha_beta : Convert mean/std/loc to alpha/beta parameters. 

318 gwtransport.advection.gamma_infiltration_to_extraction : Use bins for transport modeling. 

319 :ref:`concept-gamma-distribution` : Two-parameter pore volume model. 

320 :ref:`concept-gamma-loc` : Shifted gamma with minimum pore volume. 

321 :ref:`concept-dispersion-scales` : What ``std`` represents (macrodispersion vs total spreading). 

322 :ref:`assumption-gamma-distribution` : When gamma distribution is adequate. 

323 

324 Examples 

325 -------- 

326 Create equal-mass bins for a gamma distribution: 

327 

328 >>> from gwtransport.gamma import bins 

329 >>> result = bins(mean=30000.0, std=8100.0, n_bins=5) 

330 

331 With a location parameter representing a minimum pore volume: 

332 

333 >>> result = bins(mean=30000.0, std=8100.0, loc=5000.0, n_bins=5) 

334 >>> float(result["edges"][0]) 

335 5000.0 

336 

337 Create bins with custom quantile edges: 

338 

339 >>> import numpy as np 

340 >>> quantiles = np.array([0.0, 0.25, 0.5, 0.75, 1.0]) 

341 >>> result = bins(mean=30000.0, std=8100.0, quantile_edges=quantiles) 

342 >>> print(f"Number of bins: {len(result['probability_mass'])}") 

343 Number of bins: 4 

344 """ 

345 alpha, beta, loc = parse_parameters(mean=mean, std=std, loc=loc, alpha=alpha, beta=beta) 

346 

347 # Calculate boundaries for equal mass bins 

348 # If quantile_edges is provided, use it (n_bins is ignored) 

349 # Otherwise, use n_bins (which defaults to 100) 

350 if quantile_edges is not None: 

351 n_bins = len(quantile_edges) - 1 

352 else: 

353 quantile_edges = np.linspace(0, 1, n_bins + 1) # includes 0 and 1 

354 

355 if n_bins <= 1: 

356 msg = "Number of bins must be greater than 1" 

357 raise ValueError(msg) 

358 

359 # Unshifted bin edges for the standard Gamma(alpha, beta) distribution, then shift 

360 unshifted_edges = gamma_dist.ppf(quantile_edges, alpha, scale=beta) 

361 bin_edges = unshifted_edges + loc 

362 probability_mass = np.diff(quantile_edges) # probability mass for each bin 

363 

364 # Conditional mean within each bin for the unshifted distribution, then shift by loc. 

365 # E[X | a <= X < b] for X ~ Gamma(alpha, beta) uses the identity 

366 # E[X * 1_{a<=X<b}] = alpha * beta * (F_{alpha+1}(b/beta) - F_{alpha+1}(a/beta)) 

367 # where F_{alpha+1} is the CDF of Gamma(alpha+1, beta). The bin_masses helper returns 

368 # exactly those differences. 

369 diff_alpha_plus_1 = bin_masses(alpha=alpha + 1, beta=beta, bin_edges=unshifted_edges) 

370 expected_values = beta * alpha * diff_alpha_plus_1 / probability_mass + loc 

371 

372 return { 

373 "lower_bound": bin_edges[:-1], 

374 "upper_bound": bin_edges[1:], 

375 "edges": bin_edges, 

376 "expected_values": expected_values, 

377 "probability_mass": probability_mass, 

378 } 

379 

380 

381def bin_masses(*, alpha: float, beta: float, bin_edges: npt.ArrayLike) -> npt.NDArray[np.floating]: 

382 """ 

383 Calculate probability mass for each bin in a standard (unshifted) gamma distribution. 

384 

385 Is the area under the Gamma(alpha, beta) PDF between the bin edges. This lower-level 

386 function operates on the unshifted gamma distribution; if a location shift is needed, 

387 callers should subtract ``loc`` from their physical bin edges before passing them in. 

388 Because probability mass is invariant under a location shift, the result is identical 

389 to that of the shifted distribution. 

390 

391 Parameters 

392 ---------- 

393 alpha : float 

394 Shape parameter of gamma distribution (must be > 0). 

395 beta : float 

396 Scale parameter of gamma distribution (must be > 0). 

397 bin_edges : array-like 

398 Bin edges of the unshifted distribution. Array of increasing values of size 

399 ``len(bins) + 1``. Must be non-negative. 

400 

401 Returns 

402 ------- 

403 numpy.ndarray 

404 Probability mass for each bin. 

405 

406 Raises 

407 ------ 

408 ValueError 

409 If ``alpha`` or ``beta`` are not positive, if ``bin_edges`` contains fewer than two 

410 values, if ``bin_edges`` are not monotonically increasing, or if any ``bin_edges`` 

411 are negative. 

412 """ 

413 # Convert inputs to numpy arrays 

414 bin_edges = np.asarray(bin_edges) 

415 

416 # Validate inputs 

417 if alpha <= 0 or beta <= 0: 

418 msg = "Alpha and beta must be positive" 

419 raise ValueError(msg) 

420 if len(bin_edges) < 2: # noqa: PLR2004 

421 msg = "Bin edges must contain at least two values" 

422 raise ValueError(msg) 

423 if np.any(np.diff(bin_edges) < 0): 

424 msg = "Bin edges must be increasing" 

425 raise ValueError(msg) 

426 if np.any(bin_edges < 0): 

427 msg = "Bin edges must be non-negative" 

428 raise ValueError(msg) 

429 val = gammainc(alpha, bin_edges / beta) 

430 return val[1:] - val[:-1]