Coverage for src/gwtransport/deposition.py: 0%

54 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-20 13:45 +0000

1"""Deposition analysis for 1D aquifer systems. 

2 

3Analyze compound transport by deposition in aquifer systems with tools for 

4computing concentrations and deposition rates based on aquifer properties. 

5 

6The model assumes 1D groundwater flow where compound deposition occurs along 

7the flow path, enriching the water. Follows advection module patterns for 

8consistency. 

9 

10Functions 

11--------- 

12extraction_to_deposition : Compute deposition rates from concentration changes 

13deposition_to_extraction : Compute concentrations from deposition rates 

14""" 

15 

16import numpy as np 

17import pandas as pd 

18 

19from gwtransport.residence_time import residence_time 

20from gwtransport.surfacearea import compute_average_heights 

21from gwtransport.utils import linear_interpolate, solve_underdetermined_system 

22 

23 

24def compute_deposition_weights( 

25 *, 

26 flow_values, 

27 tedges, 

28 cout_tedges, 

29 aquifer_pore_volume_value, 

30 porosity, 

31 thickness, 

32 retardation_factor=1.0, 

33): 

34 """Compute deposition weights for concentration-deposition convolution. 

35 

36 Parameters 

37 ---------- 

38 flow_values : array_like 

39 Flow rates in aquifer [m3/day]. Length must equal len(tedges) - 1. 

40 tedges : pandas.DatetimeIndex 

41 Time bin edges for flow data. 

42 cout_tedges : pandas.DatetimeIndex 

43 Time bin edges for output concentration data. 

44 aquifer_pore_volume_value : float 

45 Aquifer pore volume [m3]. 

46 porosity : float 

47 Aquifer porosity [dimensionless]. 

48 thickness : float 

49 Aquifer thickness [m]. 

50 retardation_factor : float, optional 

51 Compound retardation factor, by default 1.0. 

52 

53 Returns 

54 ------- 

55 ndarray 

56 Deposition weights matrix with shape (len(cout_tedges) - 1, len(tedges) - 1). 

57 May contain NaN values where residence time cannot be computed. 

58 

59 Notes 

60 ----- 

61 The returned weights matrix may contain NaN values in locations where the 

62 residence time calculation fails or is undefined. This typically occurs 

63 when flow conditions result in invalid or non-physical residence times. 

64 """ 

65 # Convert to days relative to first time edge 

66 t0 = tedges[0] 

67 tedges_days = ((tedges - t0) / pd.Timedelta(days=1)).values 

68 cout_tedges_days = ((cout_tedges - t0) / pd.Timedelta(days=1)).values 

69 

70 # Compute residence times and cumulative flow 

71 rt_edges = residence_time( 

72 flow=flow_values, 

73 flow_tedges=tedges, 

74 index=cout_tedges, 

75 aquifer_pore_volume=float(aquifer_pore_volume_value), 

76 retardation_factor=retardation_factor, 

77 direction="extraction_to_infiltration", 

78 ) 

79 cout_tedges_days_infiltration = cout_tedges_days - rt_edges[0] 

80 

81 flow_tdelta = np.diff(tedges_days, prepend=0.0) 

82 flow_cum = (np.concatenate(([0.0], flow_values)) * flow_tdelta).cumsum() 

83 

84 # Interpolate volumes at concentration time edges 

85 start_vol = linear_interpolate(tedges_days, flow_cum, cout_tedges_days_infiltration) 

86 end_vol = linear_interpolate(tedges_days, flow_cum, cout_tedges_days) 

87 

88 # Compute deposition weights 

89 flow_cum_cout = flow_cum[None, :] - start_vol[:, None] 

90 volume_array = compute_average_heights( 

91 tedges_days, flow_cum_cout, 0.0, retardation_factor * float(aquifer_pore_volume_value) 

92 ) 

93 area_array = volume_array / (porosity * thickness) 

94 extracted_volume = np.diff(end_vol) 

95 return area_array * np.diff(tedges_days)[None, :] / extracted_volume[:, None] 

96 

97 

98def deposition_to_extraction( 

99 *, 

100 dep, 

101 flow, 

102 tedges, 

103 cout_tedges, 

104 aquifer_pore_volume_value, 

105 porosity, 

106 thickness, 

107 retardation_factor=1.0, 

108): 

109 """Compute concentrations from deposition rates (convolution). 

110 

111 Parameters 

112 ---------- 

113 dep : array_like 

114 Deposition rates [ng/m2/day]. Length must equal len(tedges) - 1. 

115 flow : array_like 

116 Flow rates in aquifer [m3/day]. Length must equal len(tedges) - 1. 

117 tedges : pandas.DatetimeIndex 

118 Time bin edges for deposition and flow data. 

119 cout_tedges : pandas.DatetimeIndex 

120 Time bin edges for output concentration data. 

121 aquifer_pore_volume_value : float 

122 Aquifer pore volume [m3]. 

123 porosity : float 

124 Aquifer porosity [dimensionless]. 

125 thickness : float 

126 Aquifer thickness [m]. 

127 retardation_factor : float, optional 

128 Compound retardation factor, by default 1.0. 

129 

130 Returns 

131 ------- 

132 ndarray 

133 Concentration changes [ng/m3] with length len(cout_tedges) - 1. 

134 

135 Examples 

136 -------- 

137 >>> import pandas as pd 

138 >>> import numpy as np 

139 >>> dates = pd.date_range("2020-01-01", "2020-01-10", freq="D") 

140 >>> tedges = pd.date_range("2019-12-31 12:00", "2020-01-10 12:00", freq="D") 

141 >>> cout_tedges = pd.date_range("2020-01-03 12:00", "2020-01-12 12:00", freq="D") 

142 >>> dep = np.ones(len(dates)) 

143 >>> flow = np.full(len(dates), 100.0) 

144 >>> cout = deposition_to_extraction( 

145 ... dep=dep, 

146 ... flow=flow, 

147 ... tedges=tedges, 

148 ... cout_tedges=cout_tedges, 

149 ... aquifer_pore_volume_value=500.0, 

150 ... porosity=0.3, 

151 ... thickness=10.0, 

152 ... ) 

153 """ 

154 tedges, cout_tedges = pd.DatetimeIndex(tedges), pd.DatetimeIndex(cout_tedges) 

155 dep_values, flow_values = np.asarray(dep), np.asarray(flow) 

156 

157 # Validate input dimensions and values 

158 if len(tedges) != len(dep_values) + 1: 

159 _msg = "tedges must have one more element than dep" 

160 raise ValueError(_msg) 

161 if len(tedges) != len(flow_values) + 1: 

162 _msg = "tedges must have one more element than flow" 

163 raise ValueError(_msg) 

164 if np.any(np.isnan(dep_values)) or np.any(np.isnan(flow_values)): 

165 _msg = "Input arrays cannot contain NaN values" 

166 raise ValueError(_msg) 

167 

168 # Compute deposition weights 

169 deposition_weights = compute_deposition_weights( 

170 flow_values=flow_values, 

171 tedges=tedges, 

172 cout_tedges=cout_tedges, 

173 aquifer_pore_volume_value=aquifer_pore_volume_value, 

174 porosity=porosity, 

175 thickness=thickness, 

176 retardation_factor=retardation_factor, 

177 ) 

178 

179 return deposition_weights.dot(dep_values) 

180 

181 

182def extraction_to_deposition( 

183 *, 

184 flow, 

185 tedges, 

186 cout, 

187 cout_tedges, 

188 aquifer_pore_volume_value, 

189 porosity, 

190 thickness, 

191 retardation_factor=1.0, 

192 nullspace_objective="squared_differences", 

193): 

194 """ 

195 Compute deposition rates from concentration changes (deconvolution). 

196 

197 The solution for the deposition is fundamentally underdetermined, as multiple 

198 deposition histories can lead to the same concentration. This function 

199 computes a least-squares solution and then selects a specific solution from the 

200 nullspace of the problem based on the provided objective. 

201 

202 Parameters 

203 ---------- 

204 flow : array_like 

205 Flow rates in aquifer [m3/day]. Length must equal len(tedges) - 1. 

206 Must not contain NaN values. 

207 tedges : pandas.DatetimeIndex 

208 Time bin edges for deposition and flow data. Length must equal 

209 len(flow) + 1. 

210 cout : array_like 

211 Concentration changes in extracted water [ng/m3]. Length must equal 

212 len(cout_tedges) - 1. May contain NaN values, which will be excluded 

213 from the computation along with corresponding rows in the weight matrix. 

214 cout_tedges : pandas.DatetimeIndex 

215 Time bin edges for output concentration data. Length must equal 

216 len(cout) + 1. 

217 aquifer_pore_volume_value : float 

218 Aquifer pore volume [m3]. 

219 porosity : float 

220 Aquifer porosity [dimensionless]. 

221 thickness : float 

222 Aquifer thickness [m]. 

223 retardation_factor : float, optional 

224 Compound retardation factor, by default 1.0. Values > 1.0 indicate 

225 slower transport due to sorption/interaction. 

226 nullspace_objective : str or callable, optional 

227 Objective function to minimize in the nullspace. Options: 

228 

229 * "squared_differences" : Minimize sum of squared differences between 

230 adjacent deposition rates (default, provides smooth solutions) 

231 * "summed_differences" : Minimize sum of absolute differences between 

232 adjacent deposition rates (promotes sparse/piecewise constant solutions) 

233 * callable : Custom objective function with signature 

234 ``objective(coeffs, x_ls, nullspace_basis)`` 

235 

236 Default is "squared_differences". 

237 

238 Returns 

239 ------- 

240 numpy.ndarray 

241 Mean deposition rates [ng/m2/day] between tedges. Length equals 

242 len(tedges) - 1. 

243 

244 Raises 

245 ------ 

246 ValueError 

247 If input dimensions are incompatible, if flow contains NaN values, 

248 or if the optimization fails. 

249 

250 Notes 

251 ----- 

252 This function solves the inverse problem of determining deposition rates 

253 from observed concentration changes. Since multiple deposition histories 

254 can produce the same concentration pattern, regularization in the nullspace 

255 is used to select a physically meaningful solution. 

256 

257 The algorithm: 

258 

259 1. Validates input dimensions and checks for NaN values in flow 

260 2. Computes deposition weight matrix relating deposition to concentration 

261 3. Identifies valid rows (no NaN in weights or concentration data) 

262 4. Solves the underdetermined system using nullspace regularization 

263 5. Returns the regularized deposition rate solution 

264 

265 Examples 

266 -------- 

267 Basic usage with default squared differences regularization: 

268 

269 >>> import pandas as pd 

270 >>> import numpy as np 

271 >>> from gwtransport.deposition import extraction_to_deposition 

272 >>> 

273 >>> # Create input data 

274 >>> dates = pd.date_range("2020-01-01", "2020-01-10", freq="D") 

275 >>> tedges = pd.date_range("2019-12-31 12:00", "2020-01-10 12:00", freq="D") 

276 >>> cout_tedges = pd.date_range("2020-01-03 12:00", "2020-01-12 12:00", freq="D") 

277 >>> 

278 >>> # Flow and concentration data 

279 >>> flow = np.full(len(dates), 100.0) # m3/day 

280 >>> cout = np.ones(len(cout_tedges) - 1) * 10.0 # ng/m3 

281 >>> 

282 >>> # Compute deposition rates 

283 >>> dep = extraction_to_deposition( 

284 ... flow=flow, 

285 ... tedges=tedges, 

286 ... cout=cout, 

287 ... cout_tedges=cout_tedges, 

288 ... aquifer_pore_volume_value=500.0, 

289 ... porosity=0.3, 

290 ... thickness=10.0, 

291 ... ) 

292 >>> print(f"Deposition rates shape: {dep.shape}") 

293 >>> print(f"Mean deposition rate: {np.nanmean(dep):.2f} ng/m2/day") 

294 

295 With summed differences regularization for sparse solutions: 

296 

297 >>> dep_sparse = extraction_to_deposition( 

298 ... flow=flow, 

299 ... tedges=tedges, 

300 ... cout=cout, 

301 ... cout_tedges=cout_tedges, 

302 ... aquifer_pore_volume_value=500.0, 

303 ... porosity=0.3, 

304 ... thickness=10.0, 

305 ... nullspace_objective="summed_differences", 

306 ... ) 

307 

308 With custom regularization objective: 

309 

310 >>> def l2_norm_objective(coeffs, x_ls, nullspace_basis): 

311 ... x = x_ls + nullspace_basis @ coeffs 

312 ... return np.sum(x**2) # Minimize L2 norm of solution 

313 >>> 

314 >>> dep_l2 = extraction_to_deposition( 

315 ... flow=flow, 

316 ... tedges=tedges, 

317 ... cout=cout, 

318 ... cout_tedges=cout_tedges, 

319 ... aquifer_pore_volume_value=500.0, 

320 ... porosity=0.3, 

321 ... thickness=10.0, 

322 ... nullspace_objective=l2_norm_objective, 

323 ... ) 

324 

325 Handling missing concentration data: 

326 

327 >>> # Concentration with some NaN values 

328 >>> cout_nan = cout.copy() 

329 >>> cout_nan[2:4] = np.nan # Missing data for some time periods 

330 >>> 

331 >>> dep_robust = extraction_to_deposition( 

332 ... flow=flow, 

333 ... tedges=tedges, 

334 ... cout=cout_nan, 

335 ... cout_tedges=cout_tedges, 

336 ... aquifer_pore_volume_value=500.0, 

337 ... porosity=0.3, 

338 ... thickness=10.0, 

339 ... ) 

340 """ 

341 tedges, cout_tedges = pd.DatetimeIndex(tedges), pd.DatetimeIndex(cout_tedges) 

342 cout_values, flow_values = np.asarray(cout), np.asarray(flow) 

343 

344 # Validate input dimensions and values 

345 if len(cout_tedges) != len(cout_values) + 1: 

346 msg = "cout_tedges must have one more element than cout" 

347 raise ValueError(msg) 

348 if len(tedges) != len(flow_values) + 1: 

349 msg = "tedges must have one more element than flow" 

350 raise ValueError(msg) 

351 if np.any(np.isnan(flow_values)): 

352 msg = "flow array cannot contain NaN values" 

353 raise ValueError(msg) 

354 

355 # Compute deposition weights 

356 deposition_weights = compute_deposition_weights( 

357 flow_values=flow_values, 

358 tedges=tedges, 

359 cout_tedges=cout_tedges, 

360 aquifer_pore_volume_value=aquifer_pore_volume_value, 

361 porosity=porosity, 

362 thickness=thickness, 

363 retardation_factor=retardation_factor, 

364 ) 

365 

366 # Solve underdetermined system using utils function 

367 return solve_underdetermined_system( 

368 coefficient_matrix=deposition_weights, 

369 rhs_vector=cout_values, 

370 nullspace_objective=nullspace_objective, 

371 optimization_method="Nelder-Mead", 

372 ) 

373 

374 

375def spinup_duration( 

376 *, 

377 flow: np.ndarray, 

378 flow_tedges: pd.DatetimeIndex, 

379 aquifer_pore_volume_value: float, 

380 retardation_factor: float, 

381) -> float: 

382 """ 

383 Compute the spinup duration for deposition modeling. 

384 

385 The spinup duration is the residence time at the first time step, representing 

386 the time needed for the system to become fully informed. Before this duration, 

387 the extracted concentration lacks complete deposition history. 

388 

389 Parameters 

390 ---------- 

391 flow : numpy.ndarray 

392 Flow rate of water in the aquifer [m3/day]. 

393 flow_tedges : pandas.DatetimeIndex 

394 Time edges for the flow data. 

395 aquifer_pore_volume_value : float 

396 Pore volume of the aquifer [m3]. 

397 retardation_factor : float 

398 Retardation factor of the compound in the aquifer [dimensionless]. 

399 

400 Returns 

401 ------- 

402 float 

403 Spinup duration in days. 

404 """ 

405 rt = residence_time( 

406 flow=flow, 

407 flow_tedges=flow_tedges, 

408 aquifer_pore_volume=aquifer_pore_volume_value, 

409 retardation_factor=retardation_factor, 

410 direction="infiltration_to_extraction", 

411 ) 

412 if np.isnan(rt[0, 0]): 

413 msg = "Residence time at the first time step is NaN. This indicates that the aquifer is not fully informed: flow timeseries too short." 

414 raise ValueError(msg) 

415 

416 # Return the first residence time value 

417 return float(rt[0, 0])