Coverage for src / gwtransport / deposition.py: 73%

73 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-04 18:51 +0000

1""" 

2Deposition Analysis for 1D Aquifer Systems. 

3 

4This module analyzes compound transport by deposition in aquifer systems with tools for 

5computing concentrations and deposition rates based on aquifer properties. The model assumes 

61D groundwater flow where compound deposition occurs along the flow path, enriching the water. 

7Deposition processes include pathogen attachment to aquifer matrix, particle filtration, or 

8chemical precipitation. The module follows advection module patterns for consistency in 

9forward (deposition to extraction) and reverse (extraction to deposition) calculations. 

10 

11Available functions: 

12 

13- :func:`deposition_to_extraction` - Compute concentrations from deposition rates (convolution). 

14 Given deposition rate time series [ng/m²/day], computes resulting concentration changes in 

15 extracted water [ng/m³]. Uses flow-weighted integration over contact area between water and 

16 aquifer matrix. Accounts for aquifer geometry (porosity, thickness) and residence time 

17 distribution. 

18 

19- :func:`extraction_to_deposition` - Compute deposition rates from concentration changes 

20 (deconvolution). Given concentration change time series in extracted water [ng/m³], estimates 

21 deposition rate history [ng/m²/day] that produced those changes. Solves underdetermined 

22 inverse problem using nullspace regularization with configurable objectives ('squared_differences' 

23 for smooth solutions, 'summed_differences' for sparse solutions). Handles NaN values in 

24 concentration data by excluding corresponding time periods. 

25 

26- :func:`compute_deposition_weights` - Internal helper function. Compute weight matrix relating 

27 deposition rates to concentration changes. Used by both deposition_to_extraction (forward) and 

28 extraction_to_deposition (reverse). Calculates contact area between water parcels and aquifer 

29 matrix based on streamline geometry and residence times. 

30 

31- :func:`spinup_duration` - Compute spinup duration for deposition modeling. Returns residence 

32 time at first time step, representing time needed for system to become fully informed. Before 

33 this duration, extracted concentration lacks complete deposition history. Useful for determining 

34 valid analysis period and identifying when boundary effects are negligible. 

35 

36This file is part of gwtransport which is released under AGPL-3.0 license. 

37See the ./LICENSE file or go to https://github.com/gwtransport/gwtransport/blob/main/LICENSE for full license details. 

38""" 

39 

40import numpy as np 

41import numpy.typing as npt 

42import pandas as pd 

43 

44from gwtransport.residence_time import residence_time 

45from gwtransport.surfacearea import compute_average_heights 

46from gwtransport.utils import linear_interpolate, solve_underdetermined_system 

47 

48 

49def compute_deposition_weights( 

50 *, 

51 flow_values: npt.ArrayLike, 

52 tedges: pd.DatetimeIndex, 

53 cout_tedges: pd.DatetimeIndex, 

54 aquifer_pore_volume: float, 

55 porosity: float, 

56 thickness: float, 

57 retardation_factor: float = 1.0, 

58) -> npt.NDArray[np.floating]: 

59 """Compute deposition weights for concentration-deposition convolution. 

60 

61 Parameters 

62 ---------- 

63 flow_values : array-like 

64 Flow rates in aquifer [m3/day]. Length must equal len(tedges) - 1. 

65 tedges : pandas.DatetimeIndex 

66 Time bin edges for flow data. 

67 cout_tedges : pandas.DatetimeIndex 

68 Time bin edges for output concentration data. 

69 aquifer_pore_volume : float 

70 Aquifer pore volume [m3]. 

71 porosity : float 

72 Aquifer porosity [dimensionless]. 

73 thickness : float 

74 Aquifer thickness [m]. 

75 retardation_factor : float, optional 

76 Compound retardation factor, by default 1.0. 

77 

78 Returns 

79 ------- 

80 numpy.ndarray 

81 Deposition weights matrix with shape (len(cout_tedges) - 1, len(tedges) - 1). 

82 May contain NaN values where residence time cannot be computed. 

83 

84 Notes 

85 ----- 

86 The returned weights matrix may contain NaN values in locations where the 

87 residence time calculation fails or is undefined. This typically occurs 

88 when flow conditions result in invalid or non-physical residence times. 

89 """ 

90 # Convert to days relative to first time edge 

91 t0 = tedges[0] 

92 tedges_days = ((tedges - t0) / pd.Timedelta(days=1)).values 

93 cout_tedges_days = ((cout_tedges - t0) / pd.Timedelta(days=1)).values 

94 

95 # Compute residence times and cumulative flow 

96 rt_edges = residence_time( 

97 flow=flow_values, 

98 flow_tedges=tedges, 

99 index=cout_tedges, 

100 aquifer_pore_volumes=float(aquifer_pore_volume), 

101 retardation_factor=retardation_factor, 

102 direction="extraction_to_infiltration", 

103 ) 

104 cout_tedges_days_infiltration = cout_tedges_days - rt_edges[0] 

105 

106 flow_cum = np.concatenate(([0.0], np.cumsum(flow_values * np.diff(tedges_days)))) 

107 

108 # Interpolate volumes at concentration time edges 

109 start_vol = linear_interpolate(x_ref=tedges_days, y_ref=flow_cum, x_query=cout_tedges_days_infiltration) 

110 end_vol = linear_interpolate(x_ref=tedges_days, y_ref=flow_cum, x_query=cout_tedges_days) 

111 

112 # Compute deposition weights 

113 flow_cum_cout = flow_cum[None, :] - start_vol[:, None] 

114 volume_array = compute_average_heights( 

115 x_edges=tedges_days, y_edges=flow_cum_cout, y_lower=0.0, y_upper=retardation_factor * float(aquifer_pore_volume) 

116 ) 

117 area_array = volume_array / (porosity * thickness) 

118 extracted_volume = np.diff(end_vol) 

119 return area_array * np.diff(tedges_days)[None, :] / extracted_volume[:, None] 

120 

121 

122def deposition_to_extraction( 

123 *, 

124 dep: npt.ArrayLike, 

125 flow: npt.ArrayLike, 

126 tedges: pd.DatetimeIndex | np.ndarray, 

127 cout_tedges: pd.DatetimeIndex | np.ndarray, 

128 aquifer_pore_volume: float, 

129 porosity: float, 

130 thickness: float, 

131 retardation_factor: float = 1.0, 

132) -> npt.NDArray[np.floating]: 

133 """Compute concentrations from deposition rates (convolution). 

134 

135 Parameters 

136 ---------- 

137 dep : array-like 

138 Deposition rates [ng/m2/day]. Length must equal len(tedges) - 1. 

139 flow : array-like 

140 Flow rates in aquifer [m3/day]. Length must equal len(tedges) - 1. 

141 tedges : pandas.DatetimeIndex 

142 Time bin edges for deposition and flow data. 

143 cout_tedges : pandas.DatetimeIndex 

144 Time bin edges for output concentration data. 

145 aquifer_pore_volume : float 

146 Aquifer pore volume [m3]. 

147 porosity : float 

148 Aquifer porosity [dimensionless]. 

149 thickness : float 

150 Aquifer thickness [m]. 

151 retardation_factor : float, optional 

152 Compound retardation factor, by default 1.0. 

153 

154 Returns 

155 ------- 

156 numpy.ndarray 

157 Concentration changes [ng/m3] with length len(cout_tedges) - 1. 

158 

159 Examples 

160 -------- 

161 >>> import pandas as pd 

162 >>> import numpy as np 

163 >>> from gwtransport.deposition import deposition_to_extraction 

164 >>> dates = pd.date_range("2020-01-01", "2020-01-10", freq="D") 

165 >>> tedges = pd.date_range("2019-12-31 12:00", "2020-01-10 12:00", freq="D") 

166 >>> cout_tedges = pd.date_range("2020-01-03 12:00", "2020-01-12 12:00", freq="D") 

167 >>> dep = np.ones(len(dates)) 

168 >>> flow = np.full(len(dates), 100.0) 

169 >>> cout = deposition_to_extraction( 

170 ... dep=dep, 

171 ... flow=flow, 

172 ... tedges=tedges, 

173 ... cout_tedges=cout_tedges, 

174 ... aquifer_pore_volume=500.0, 

175 ... porosity=0.3, 

176 ... thickness=10.0, 

177 ... ) 

178 

179 See Also 

180 -------- 

181 extraction_to_deposition : Inverse operation (deconvolution) 

182 gwtransport.advection.infiltration_to_extraction : For concentration transport without deposition 

183 :ref:`concept-transport-equation` : Flow-weighted averaging approach 

184 """ 

185 tedges, cout_tedges = pd.DatetimeIndex(tedges), pd.DatetimeIndex(cout_tedges) 

186 dep_values, flow_values = np.asarray(dep), np.asarray(flow) 

187 

188 # Validate input dimensions and values 

189 if len(tedges) != len(dep_values) + 1: 

190 _msg = "tedges must have one more element than dep" 

191 raise ValueError(_msg) 

192 if len(tedges) != len(flow_values) + 1: 

193 _msg = "tedges must have one more element than flow" 

194 raise ValueError(_msg) 

195 if np.any(np.isnan(dep_values)) or np.any(np.isnan(flow_values)): 

196 _msg = "Input arrays cannot contain NaN values" 

197 raise ValueError(_msg) 

198 

199 # Validate physical parameters 

200 if not 0 < porosity < 1: 

201 _msg = f"Porosity must be in (0, 1), got {porosity}" 

202 raise ValueError(_msg) 

203 if thickness <= 0: 

204 _msg = f"Thickness must be positive, got {thickness}" 

205 raise ValueError(_msg) 

206 if aquifer_pore_volume <= 0: 

207 _msg = f"Aquifer pore volume must be positive, got {aquifer_pore_volume}" 

208 raise ValueError(_msg) 

209 

210 # Compute deposition weights 

211 deposition_weights = compute_deposition_weights( 

212 flow_values=flow_values, 

213 tedges=tedges, 

214 cout_tedges=cout_tedges, 

215 aquifer_pore_volume=aquifer_pore_volume, 

216 porosity=porosity, 

217 thickness=thickness, 

218 retardation_factor=retardation_factor, 

219 ) 

220 

221 return deposition_weights.dot(dep_values) 

222 

223 

224def extraction_to_deposition( 

225 *, 

226 flow: npt.ArrayLike, 

227 tedges: pd.DatetimeIndex | np.ndarray, 

228 cout: npt.ArrayLike, 

229 cout_tedges: pd.DatetimeIndex | np.ndarray, 

230 aquifer_pore_volume: float, 

231 porosity: float, 

232 thickness: float, 

233 retardation_factor: float = 1.0, 

234 nullspace_objective: str = "squared_differences", 

235) -> npt.NDArray[np.floating]: 

236 """ 

237 Compute deposition rates from concentration changes (deconvolution). 

238 

239 The solution for the deposition is fundamentally underdetermined, as multiple 

240 deposition histories can lead to the same concentration. This function 

241 computes a least-squares solution and then selects a specific solution from the 

242 nullspace of the problem based on the provided objective. 

243 

244 Parameters 

245 ---------- 

246 flow : array-like 

247 Flow rates in aquifer [m3/day]. Length must equal len(tedges) - 1. 

248 Must not contain NaN values. 

249 tedges : pandas.DatetimeIndex 

250 Time bin edges for deposition and flow data. Length must equal 

251 len(flow) + 1. 

252 cout : array-like 

253 Concentration changes in extracted water [ng/m3]. Length must equal 

254 len(cout_tedges) - 1. May contain NaN values, which will be excluded 

255 from the computation along with corresponding rows in the weight matrix. 

256 cout_tedges : pandas.DatetimeIndex 

257 Time bin edges for output concentration data. Length must equal 

258 len(cout) + 1. 

259 aquifer_pore_volume : float 

260 Aquifer pore volume [m3]. 

261 porosity : float 

262 Aquifer porosity [dimensionless]. 

263 thickness : float 

264 Aquifer thickness [m]. 

265 retardation_factor : float, optional 

266 Compound retardation factor, by default 1.0. Values > 1.0 indicate 

267 slower transport due to sorption/interaction. 

268 nullspace_objective : str or callable, optional 

269 Objective function to minimize in the nullspace. Options: 

270 

271 * "squared_differences" : Minimize sum of squared differences between 

272 adjacent deposition rates (default, provides smooth solutions) 

273 * "summed_differences" : Minimize sum of absolute differences between 

274 adjacent deposition rates (promotes sparse/piecewise constant solutions) 

275 * callable : Custom objective function with signature 

276 ``objective(coeffs, x_ls, nullspace_basis)`` 

277 

278 Default is "squared_differences". 

279 

280 Returns 

281 ------- 

282 numpy.ndarray 

283 Mean deposition rates [ng/m2/day] between tedges. Length equals 

284 len(tedges) - 1. 

285 

286 Raises 

287 ------ 

288 ValueError 

289 If input dimensions are incompatible, if flow contains NaN values, 

290 or if the optimization fails. 

291 

292 Notes 

293 ----- 

294 This function solves the inverse problem of determining deposition rates 

295 from observed concentration changes. Since multiple deposition histories 

296 can produce the same concentration pattern, regularization in the nullspace 

297 is used to select a physically meaningful solution. 

298 

299 The algorithm: 

300 

301 1. Validates input dimensions and checks for NaN values in flow 

302 2. Computes deposition weight matrix relating deposition to concentration 

303 3. Identifies valid rows (no NaN in weights or concentration data) 

304 4. Solves the underdetermined system using nullspace regularization 

305 5. Returns the regularized deposition rate solution 

306 

307 Examples 

308 -------- 

309 Basic usage with default squared differences regularization: 

310 

311 >>> import pandas as pd 

312 >>> import numpy as np 

313 >>> from gwtransport.deposition import extraction_to_deposition 

314 >>> 

315 >>> # Create input data 

316 >>> dates = pd.date_range("2020-01-01", "2020-01-10", freq="D") 

317 >>> tedges = pd.date_range("2019-12-31 12:00", "2020-01-10 12:00", freq="D") 

318 >>> cout_tedges = pd.date_range("2020-01-03 12:00", "2020-01-12 12:00", freq="D") 

319 >>> 

320 >>> # Flow and concentration data 

321 >>> flow = np.full(len(dates), 100.0) # m3/day 

322 >>> cout = np.ones(len(cout_tedges) - 1) * 10.0 # ng/m3 

323 >>> 

324 >>> # Compute deposition rates 

325 >>> dep = extraction_to_deposition( 

326 ... flow=flow, 

327 ... tedges=tedges, 

328 ... cout=cout, 

329 ... cout_tedges=cout_tedges, 

330 ... aquifer_pore_volume=500.0, 

331 ... porosity=0.3, 

332 ... thickness=10.0, 

333 ... ) 

334 >>> print(f"Deposition rates shape: {dep.shape}") 

335 Deposition rates shape: (10,) 

336 >>> print(f"Mean deposition rate: {np.nanmean(dep):.2f} ng/m2/day") 

337 Mean deposition rate: 6.00 ng/m2/day 

338 

339 With summed differences regularization for sparse solutions: 

340 

341 >>> dep_sparse = extraction_to_deposition( 

342 ... flow=flow, 

343 ... tedges=tedges, 

344 ... cout=cout, 

345 ... cout_tedges=cout_tedges, 

346 ... aquifer_pore_volume=500.0, 

347 ... porosity=0.3, 

348 ... thickness=10.0, 

349 ... nullspace_objective="summed_differences", 

350 ... ) 

351 

352 With custom regularization objective: 

353 

354 >>> def l2_norm_objective(coeffs, x_ls, nullspace_basis): 

355 ... x = x_ls + nullspace_basis @ coeffs 

356 ... return np.sum(x**2) # Minimize L2 norm of solution 

357 >>> 

358 >>> dep_l2 = extraction_to_deposition( 

359 ... flow=flow, 

360 ... tedges=tedges, 

361 ... cout=cout, 

362 ... cout_tedges=cout_tedges, 

363 ... aquifer_pore_volume=500.0, 

364 ... porosity=0.3, 

365 ... thickness=10.0, 

366 ... nullspace_objective=l2_norm_objective, 

367 ... ) 

368 

369 Handling missing concentration data: 

370 

371 >>> # Concentration with some NaN values 

372 >>> cout_nan = cout.copy() 

373 >>> cout_nan[2:4] = np.nan # Missing data for some time periods 

374 >>> 

375 >>> dep_robust = extraction_to_deposition( # doctest: +SKIP 

376 ... flow=flow, 

377 ... tedges=tedges, 

378 ... cout=cout_nan, 

379 ... cout_tedges=cout_tedges, 

380 ... aquifer_pore_volume=500.0, 

381 ... porosity=0.3, 

382 ... thickness=10.0, 

383 ... ) 

384 

385 See Also 

386 -------- 

387 deposition_to_extraction : Forward operation (convolution) 

388 gwtransport.advection.extraction_to_infiltration : For concentration transport without deposition 

389 :ref:`concept-transport-equation` : Flow-weighted averaging approach 

390 """ 

391 tedges, cout_tedges = pd.DatetimeIndex(tedges), pd.DatetimeIndex(cout_tedges) 

392 cout_values, flow_values = np.asarray(cout), np.asarray(flow) 

393 

394 # Validate input dimensions and values 

395 if len(cout_tedges) != len(cout_values) + 1: 

396 msg = "cout_tedges must have one more element than cout" 

397 raise ValueError(msg) 

398 if len(tedges) != len(flow_values) + 1: 

399 msg = "tedges must have one more element than flow" 

400 raise ValueError(msg) 

401 if np.any(np.isnan(flow_values)): 

402 msg = "flow array cannot contain NaN values" 

403 raise ValueError(msg) 

404 

405 # Validate physical parameters 

406 if not 0 < porosity < 1: 

407 msg = f"Porosity must be in (0, 1), got {porosity}" 

408 raise ValueError(msg) 

409 if thickness <= 0: 

410 msg = f"Thickness must be positive, got {thickness}" 

411 raise ValueError(msg) 

412 if aquifer_pore_volume <= 0: 

413 msg = f"Aquifer pore volume must be positive, got {aquifer_pore_volume}" 

414 raise ValueError(msg) 

415 

416 # Compute deposition weights 

417 deposition_weights = compute_deposition_weights( 

418 flow_values=flow_values, 

419 tedges=tedges, 

420 cout_tedges=cout_tedges, 

421 aquifer_pore_volume=aquifer_pore_volume, 

422 porosity=porosity, 

423 thickness=thickness, 

424 retardation_factor=retardation_factor, 

425 ) 

426 

427 # Solve underdetermined system using utils function 

428 return solve_underdetermined_system( 

429 coefficient_matrix=deposition_weights, 

430 rhs_vector=cout_values, 

431 nullspace_objective=nullspace_objective, 

432 optimization_method="Nelder-Mead", 

433 ) 

434 

435 

436def spinup_duration( 

437 *, 

438 flow: np.ndarray, 

439 flow_tedges: pd.DatetimeIndex, 

440 aquifer_pore_volume: float, 

441 retardation_factor: float, 

442) -> float: 

443 """ 

444 Compute the spinup duration for deposition modeling. 

445 

446 The spinup duration is the residence time at the first time step, representing 

447 the time needed for the system to become fully informed. Before this duration, 

448 the extracted concentration lacks complete deposition history. 

449 

450 Parameters 

451 ---------- 

452 flow : numpy.ndarray 

453 Flow rate of water in the aquifer [m3/day]. 

454 flow_tedges : pandas.DatetimeIndex 

455 Time edges for the flow data. 

456 aquifer_pore_volume : float 

457 Pore volume of the aquifer [m3]. 

458 retardation_factor : float 

459 Retardation factor of the compound in the aquifer [dimensionless]. 

460 

461 Returns 

462 ------- 

463 float 

464 Spinup duration in days. 

465 """ 

466 rt = residence_time( 

467 flow=flow, 

468 flow_tedges=flow_tedges, 

469 aquifer_pore_volumes=aquifer_pore_volume, 

470 retardation_factor=retardation_factor, 

471 direction="infiltration_to_extraction", 

472 ) 

473 rt_value: float = float(np.asarray(rt[0, 0])) 

474 if np.isnan(rt_value): 

475 msg = "Residence time at the first time step is NaN. This indicates that the aquifer is not fully informed: flow timeseries too short." 

476 raise ValueError(msg) 

477 

478 # Return the first residence time value 

479 return rt_value