Coverage for src/gwtransport/deposition.py: 80%

55 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-09-29 20:01 +0000

1"""Deposition analysis for 1D aquifer systems. 

2 

3Analyze compound transport by deposition in aquifer systems with tools for 

4computing concentrations and deposition rates based on aquifer properties. 

5 

6The model assumes 1D groundwater flow where compound deposition occurs along 

7the flow path, enriching the water. Follows advection module patterns for 

8consistency. 

9 

10Functions 

11--------- 

12extraction_to_deposition : Compute deposition rates from concentration changes 

13deposition_to_extraction : Compute concentrations from deposition rates 

14""" 

15 

16import numpy as np 

17import numpy.typing as npt 

18import pandas as pd 

19 

20from gwtransport.residence_time import residence_time 

21from gwtransport.surfacearea import compute_average_heights 

22from gwtransport.utils import linear_interpolate, solve_underdetermined_system 

23 

24 

25def compute_deposition_weights( 

26 *, 

27 flow_values, 

28 tedges, 

29 cout_tedges, 

30 aquifer_pore_volume, 

31 porosity, 

32 thickness, 

33 retardation_factor=1.0, 

34): 

35 """Compute deposition weights for concentration-deposition convolution. 

36 

37 Parameters 

38 ---------- 

39 flow_values : array-like 

40 Flow rates in aquifer [m3/day]. Length must equal len(tedges) - 1. 

41 tedges : pandas.DatetimeIndex 

42 Time bin edges for flow data. 

43 cout_tedges : pandas.DatetimeIndex 

44 Time bin edges for output concentration data. 

45 aquifer_pore_volume : float 

46 Aquifer pore volume [m3]. 

47 porosity : float 

48 Aquifer porosity [dimensionless]. 

49 thickness : float 

50 Aquifer thickness [m]. 

51 retardation_factor : float, optional 

52 Compound retardation factor, by default 1.0. 

53 

54 Returns 

55 ------- 

56 numpy.ndarray 

57 Deposition weights matrix with shape (len(cout_tedges) - 1, len(tedges) - 1). 

58 May contain NaN values where residence time cannot be computed. 

59 

60 Notes 

61 ----- 

62 The returned weights matrix may contain NaN values in locations where the 

63 residence time calculation fails or is undefined. This typically occurs 

64 when flow conditions result in invalid or non-physical residence times. 

65 """ 

66 # Convert to days relative to first time edge 

67 t0 = tedges[0] 

68 tedges_days = ((tedges - t0) / pd.Timedelta(days=1)).values 

69 cout_tedges_days = ((cout_tedges - t0) / pd.Timedelta(days=1)).values 

70 

71 # Compute residence times and cumulative flow 

72 rt_edges = residence_time( 

73 flow=flow_values, 

74 flow_tedges=tedges, 

75 index=cout_tedges, 

76 aquifer_pore_volume=float(aquifer_pore_volume), 

77 retardation_factor=retardation_factor, 

78 direction="extraction_to_infiltration", 

79 ) 

80 cout_tedges_days_infiltration = cout_tedges_days - rt_edges[0] 

81 

82 flow_tdelta = np.diff(tedges_days, prepend=0.0) 

83 flow_cum = (np.concatenate(([0.0], flow_values)) * flow_tdelta).cumsum() 

84 

85 # Interpolate volumes at concentration time edges 

86 start_vol = linear_interpolate(tedges_days, flow_cum, cout_tedges_days_infiltration) 

87 end_vol = linear_interpolate(tedges_days, flow_cum, cout_tedges_days) 

88 

89 # Compute deposition weights 

90 flow_cum_cout = flow_cum[None, :] - start_vol[:, None] 

91 volume_array = compute_average_heights( 

92 tedges_days, flow_cum_cout, 0.0, retardation_factor * float(aquifer_pore_volume) 

93 ) 

94 area_array = volume_array / (porosity * thickness) 

95 extracted_volume = np.diff(end_vol) 

96 return area_array * np.diff(tedges_days)[None, :] / extracted_volume[:, None] 

97 

98 

99def deposition_to_extraction( 

100 *, 

101 dep: npt.ArrayLike, 

102 flow: npt.ArrayLike, 

103 tedges: pd.DatetimeIndex | np.ndarray, 

104 cout_tedges: pd.DatetimeIndex | np.ndarray, 

105 aquifer_pore_volume: float, 

106 porosity: float, 

107 thickness: float, 

108 retardation_factor: float = 1.0, 

109) -> np.ndarray: 

110 """Compute concentrations from deposition rates (convolution). 

111 

112 Parameters 

113 ---------- 

114 dep : array-like 

115 Deposition rates [ng/m2/day]. Length must equal len(tedges) - 1. 

116 flow : array-like 

117 Flow rates in aquifer [m3/day]. Length must equal len(tedges) - 1. 

118 tedges : pandas.DatetimeIndex 

119 Time bin edges for deposition and flow data. 

120 cout_tedges : pandas.DatetimeIndex 

121 Time bin edges for output concentration data. 

122 aquifer_pore_volume : float 

123 Aquifer pore volume [m3]. 

124 porosity : float 

125 Aquifer porosity [dimensionless]. 

126 thickness : float 

127 Aquifer thickness [m]. 

128 retardation_factor : float, optional 

129 Compound retardation factor, by default 1.0. 

130 

131 Returns 

132 ------- 

133 numpy.ndarray 

134 Concentration changes [ng/m3] with length len(cout_tedges) - 1. 

135 

136 Examples 

137 -------- 

138 >>> import pandas as pd 

139 >>> import numpy as np 

140 >>> dates = pd.date_range("2020-01-01", "2020-01-10", freq="D") 

141 >>> tedges = pd.date_range("2019-12-31 12:00", "2020-01-10 12:00", freq="D") 

142 >>> cout_tedges = pd.date_range("2020-01-03 12:00", "2020-01-12 12:00", freq="D") 

143 >>> dep = np.ones(len(dates)) 

144 >>> flow = np.full(len(dates), 100.0) 

145 >>> cout = deposition_to_extraction( 

146 ... dep=dep, 

147 ... flow=flow, 

148 ... tedges=tedges, 

149 ... cout_tedges=cout_tedges, 

150 ... aquifer_pore_volume=500.0, 

151 ... porosity=0.3, 

152 ... thickness=10.0, 

153 ... ) 

154 """ 

155 tedges, cout_tedges = pd.DatetimeIndex(tedges), pd.DatetimeIndex(cout_tedges) 

156 dep_values, flow_values = np.asarray(dep), np.asarray(flow) 

157 

158 # Validate input dimensions and values 

159 if len(tedges) != len(dep_values) + 1: 

160 _msg = "tedges must have one more element than dep" 

161 raise ValueError(_msg) 

162 if len(tedges) != len(flow_values) + 1: 

163 _msg = "tedges must have one more element than flow" 

164 raise ValueError(_msg) 

165 if np.any(np.isnan(dep_values)) or np.any(np.isnan(flow_values)): 

166 _msg = "Input arrays cannot contain NaN values" 

167 raise ValueError(_msg) 

168 

169 # Compute deposition weights 

170 deposition_weights = compute_deposition_weights( 

171 flow_values=flow_values, 

172 tedges=tedges, 

173 cout_tedges=cout_tedges, 

174 aquifer_pore_volume=aquifer_pore_volume, 

175 porosity=porosity, 

176 thickness=thickness, 

177 retardation_factor=retardation_factor, 

178 ) 

179 

180 return deposition_weights.dot(dep_values) 

181 

182 

183def extraction_to_deposition( 

184 *, 

185 flow: npt.ArrayLike, 

186 tedges: pd.DatetimeIndex | np.ndarray, 

187 cout: npt.ArrayLike, 

188 cout_tedges: pd.DatetimeIndex | np.ndarray, 

189 aquifer_pore_volume: float, 

190 porosity: float, 

191 thickness: float, 

192 retardation_factor: float = 1.0, 

193 nullspace_objective: str = "squared_differences", 

194) -> np.ndarray: 

195 """ 

196 Compute deposition rates from concentration changes (deconvolution). 

197 

198 The solution for the deposition is fundamentally underdetermined, as multiple 

199 deposition histories can lead to the same concentration. This function 

200 computes a least-squares solution and then selects a specific solution from the 

201 nullspace of the problem based on the provided objective. 

202 

203 Parameters 

204 ---------- 

205 flow : array-like 

206 Flow rates in aquifer [m3/day]. Length must equal len(tedges) - 1. 

207 Must not contain NaN values. 

208 tedges : pandas.DatetimeIndex 

209 Time bin edges for deposition and flow data. Length must equal 

210 len(flow) + 1. 

211 cout : array-like 

212 Concentration changes in extracted water [ng/m3]. Length must equal 

213 len(cout_tedges) - 1. May contain NaN values, which will be excluded 

214 from the computation along with corresponding rows in the weight matrix. 

215 cout_tedges : pandas.DatetimeIndex 

216 Time bin edges for output concentration data. Length must equal 

217 len(cout) + 1. 

218 aquifer_pore_volume : float 

219 Aquifer pore volume [m3]. 

220 porosity : float 

221 Aquifer porosity [dimensionless]. 

222 thickness : float 

223 Aquifer thickness [m]. 

224 retardation_factor : float, optional 

225 Compound retardation factor, by default 1.0. Values > 1.0 indicate 

226 slower transport due to sorption/interaction. 

227 nullspace_objective : str or callable, optional 

228 Objective function to minimize in the nullspace. Options: 

229 

230 * "squared_differences" : Minimize sum of squared differences between 

231 adjacent deposition rates (default, provides smooth solutions) 

232 * "summed_differences" : Minimize sum of absolute differences between 

233 adjacent deposition rates (promotes sparse/piecewise constant solutions) 

234 * callable : Custom objective function with signature 

235 ``objective(coeffs, x_ls, nullspace_basis)`` 

236 

237 Default is "squared_differences". 

238 

239 Returns 

240 ------- 

241 numpy.ndarray 

242 Mean deposition rates [ng/m2/day] between tedges. Length equals 

243 len(tedges) - 1. 

244 

245 Raises 

246 ------ 

247 ValueError 

248 If input dimensions are incompatible, if flow contains NaN values, 

249 or if the optimization fails. 

250 

251 Notes 

252 ----- 

253 This function solves the inverse problem of determining deposition rates 

254 from observed concentration changes. Since multiple deposition histories 

255 can produce the same concentration pattern, regularization in the nullspace 

256 is used to select a physically meaningful solution. 

257 

258 The algorithm: 

259 

260 1. Validates input dimensions and checks for NaN values in flow 

261 2. Computes deposition weight matrix relating deposition to concentration 

262 3. Identifies valid rows (no NaN in weights or concentration data) 

263 4. Solves the underdetermined system using nullspace regularization 

264 5. Returns the regularized deposition rate solution 

265 

266 Examples 

267 -------- 

268 Basic usage with default squared differences regularization: 

269 

270 >>> import pandas as pd 

271 >>> import numpy as np 

272 >>> from gwtransport.deposition import extraction_to_deposition 

273 >>> 

274 >>> # Create input data 

275 >>> dates = pd.date_range("2020-01-01", "2020-01-10", freq="D") 

276 >>> tedges = pd.date_range("2019-12-31 12:00", "2020-01-10 12:00", freq="D") 

277 >>> cout_tedges = pd.date_range("2020-01-03 12:00", "2020-01-12 12:00", freq="D") 

278 >>> 

279 >>> # Flow and concentration data 

280 >>> flow = np.full(len(dates), 100.0) # m3/day 

281 >>> cout = np.ones(len(cout_tedges) - 1) * 10.0 # ng/m3 

282 >>> 

283 >>> # Compute deposition rates 

284 >>> dep = extraction_to_deposition( 

285 ... flow=flow, 

286 ... tedges=tedges, 

287 ... cout=cout, 

288 ... cout_tedges=cout_tedges, 

289 ... aquifer_pore_volume=500.0, 

290 ... porosity=0.3, 

291 ... thickness=10.0, 

292 ... ) 

293 >>> print(f"Deposition rates shape: {dep.shape}") 

294 >>> print(f"Mean deposition rate: {np.nanmean(dep):.2f} ng/m2/day") 

295 

296 With summed differences regularization for sparse solutions: 

297 

298 >>> dep_sparse = extraction_to_deposition( 

299 ... flow=flow, 

300 ... tedges=tedges, 

301 ... cout=cout, 

302 ... cout_tedges=cout_tedges, 

303 ... aquifer_pore_volume=500.0, 

304 ... porosity=0.3, 

305 ... thickness=10.0, 

306 ... nullspace_objective="summed_differences", 

307 ... ) 

308 

309 With custom regularization objective: 

310 

311 >>> def l2_norm_objective(coeffs, x_ls, nullspace_basis): 

312 ... x = x_ls + nullspace_basis @ coeffs 

313 ... return np.sum(x**2) # Minimize L2 norm of solution 

314 >>> 

315 >>> dep_l2 = extraction_to_deposition( 

316 ... flow=flow, 

317 ... tedges=tedges, 

318 ... cout=cout, 

319 ... cout_tedges=cout_tedges, 

320 ... aquifer_pore_volume=500.0, 

321 ... porosity=0.3, 

322 ... thickness=10.0, 

323 ... nullspace_objective=l2_norm_objective, 

324 ... ) 

325 

326 Handling missing concentration data: 

327 

328 >>> # Concentration with some NaN values 

329 >>> cout_nan = cout.copy() 

330 >>> cout_nan[2:4] = np.nan # Missing data for some time periods 

331 >>> 

332 >>> dep_robust = extraction_to_deposition( 

333 ... flow=flow, 

334 ... tedges=tedges, 

335 ... cout=cout_nan, 

336 ... cout_tedges=cout_tedges, 

337 ... aquifer_pore_volume=500.0, 

338 ... porosity=0.3, 

339 ... thickness=10.0, 

340 ... ) 

341 """ 

342 tedges, cout_tedges = pd.DatetimeIndex(tedges), pd.DatetimeIndex(cout_tedges) 

343 cout_values, flow_values = np.asarray(cout), np.asarray(flow) 

344 

345 # Validate input dimensions and values 

346 if len(cout_tedges) != len(cout_values) + 1: 

347 msg = "cout_tedges must have one more element than cout" 

348 raise ValueError(msg) 

349 if len(tedges) != len(flow_values) + 1: 

350 msg = "tedges must have one more element than flow" 

351 raise ValueError(msg) 

352 if np.any(np.isnan(flow_values)): 

353 msg = "flow array cannot contain NaN values" 

354 raise ValueError(msg) 

355 

356 # Compute deposition weights 

357 deposition_weights = compute_deposition_weights( 

358 flow_values=flow_values, 

359 tedges=tedges, 

360 cout_tedges=cout_tedges, 

361 aquifer_pore_volume=aquifer_pore_volume, 

362 porosity=porosity, 

363 thickness=thickness, 

364 retardation_factor=retardation_factor, 

365 ) 

366 

367 # Solve underdetermined system using utils function 

368 return solve_underdetermined_system( 

369 coefficient_matrix=deposition_weights, 

370 rhs_vector=cout_values, 

371 nullspace_objective=nullspace_objective, 

372 optimization_method="Nelder-Mead", 

373 ) 

374 

375 

376def spinup_duration( 

377 *, 

378 flow: np.ndarray, 

379 flow_tedges: pd.DatetimeIndex, 

380 aquifer_pore_volume: float, 

381 retardation_factor: float, 

382) -> float: 

383 """ 

384 Compute the spinup duration for deposition modeling. 

385 

386 The spinup duration is the residence time at the first time step, representing 

387 the time needed for the system to become fully informed. Before this duration, 

388 the extracted concentration lacks complete deposition history. 

389 

390 Parameters 

391 ---------- 

392 flow : numpy.ndarray 

393 Flow rate of water in the aquifer [m3/day]. 

394 flow_tedges : pandas.DatetimeIndex 

395 Time edges for the flow data. 

396 aquifer_pore_volume : float 

397 Pore volume of the aquifer [m3]. 

398 retardation_factor : float 

399 Retardation factor of the compound in the aquifer [dimensionless]. 

400 

401 Returns 

402 ------- 

403 float 

404 Spinup duration in days. 

405 """ 

406 rt = residence_time( 

407 flow=flow, 

408 flow_tedges=flow_tedges, 

409 aquifer_pore_volume=aquifer_pore_volume, 

410 retardation_factor=retardation_factor, 

411 direction="infiltration_to_extraction", 

412 ) 

413 if np.isnan(rt[0, 0]): 

414 msg = "Residence time at the first time step is NaN. This indicates that the aquifer is not fully informed: flow timeseries too short." 

415 raise ValueError(msg) 

416 

417 # Return the first residence time value 

418 return float(rt[0, 0])