Coverage for src / gwtransport / deposition.py: 73%
73 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-04 18:51 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-04 18:51 +0000
1"""
2Deposition Analysis for 1D Aquifer Systems.
4This module analyzes compound transport by deposition in aquifer systems with tools for
5computing concentrations and deposition rates based on aquifer properties. The model assumes
61D groundwater flow where compound deposition occurs along the flow path, enriching the water.
7Deposition processes include pathogen attachment to aquifer matrix, particle filtration, or
8chemical precipitation. The module follows advection module patterns for consistency in
9forward (deposition to extraction) and reverse (extraction to deposition) calculations.
11Available functions:
13- :func:`deposition_to_extraction` - Compute concentrations from deposition rates (convolution).
14 Given deposition rate time series [ng/m²/day], computes resulting concentration changes in
15 extracted water [ng/m³]. Uses flow-weighted integration over contact area between water and
16 aquifer matrix. Accounts for aquifer geometry (porosity, thickness) and residence time
17 distribution.
19- :func:`extraction_to_deposition` - Compute deposition rates from concentration changes
20 (deconvolution). Given concentration change time series in extracted water [ng/m³], estimates
21 deposition rate history [ng/m²/day] that produced those changes. Solves underdetermined
22 inverse problem using nullspace regularization with configurable objectives ('squared_differences'
23 for smooth solutions, 'summed_differences' for sparse solutions). Handles NaN values in
24 concentration data by excluding corresponding time periods.
26- :func:`compute_deposition_weights` - Internal helper function. Compute weight matrix relating
27 deposition rates to concentration changes. Used by both deposition_to_extraction (forward) and
28 extraction_to_deposition (reverse). Calculates contact area between water parcels and aquifer
29 matrix based on streamline geometry and residence times.
31- :func:`spinup_duration` - Compute spinup duration for deposition modeling. Returns residence
32 time at first time step, representing time needed for system to become fully informed. Before
33 this duration, extracted concentration lacks complete deposition history. Useful for determining
34 valid analysis period and identifying when boundary effects are negligible.
36This file is part of gwtransport which is released under AGPL-3.0 license.
37See the ./LICENSE file or go to https://github.com/gwtransport/gwtransport/blob/main/LICENSE for full license details.
38"""
40import numpy as np
41import numpy.typing as npt
42import pandas as pd
44from gwtransport.residence_time import residence_time
45from gwtransport.surfacearea import compute_average_heights
46from gwtransport.utils import linear_interpolate, solve_underdetermined_system
49def compute_deposition_weights(
50 *,
51 flow_values: npt.ArrayLike,
52 tedges: pd.DatetimeIndex,
53 cout_tedges: pd.DatetimeIndex,
54 aquifer_pore_volume: float,
55 porosity: float,
56 thickness: float,
57 retardation_factor: float = 1.0,
58) -> npt.NDArray[np.floating]:
59 """Compute deposition weights for concentration-deposition convolution.
61 Parameters
62 ----------
63 flow_values : array-like
64 Flow rates in aquifer [m3/day]. Length must equal len(tedges) - 1.
65 tedges : pandas.DatetimeIndex
66 Time bin edges for flow data.
67 cout_tedges : pandas.DatetimeIndex
68 Time bin edges for output concentration data.
69 aquifer_pore_volume : float
70 Aquifer pore volume [m3].
71 porosity : float
72 Aquifer porosity [dimensionless].
73 thickness : float
74 Aquifer thickness [m].
75 retardation_factor : float, optional
76 Compound retardation factor, by default 1.0.
78 Returns
79 -------
80 numpy.ndarray
81 Deposition weights matrix with shape (len(cout_tedges) - 1, len(tedges) - 1).
82 May contain NaN values where residence time cannot be computed.
84 Notes
85 -----
86 The returned weights matrix may contain NaN values in locations where the
87 residence time calculation fails or is undefined. This typically occurs
88 when flow conditions result in invalid or non-physical residence times.
89 """
90 # Convert to days relative to first time edge
91 t0 = tedges[0]
92 tedges_days = ((tedges - t0) / pd.Timedelta(days=1)).values
93 cout_tedges_days = ((cout_tedges - t0) / pd.Timedelta(days=1)).values
95 # Compute residence times and cumulative flow
96 rt_edges = residence_time(
97 flow=flow_values,
98 flow_tedges=tedges,
99 index=cout_tedges,
100 aquifer_pore_volumes=float(aquifer_pore_volume),
101 retardation_factor=retardation_factor,
102 direction="extraction_to_infiltration",
103 )
104 cout_tedges_days_infiltration = cout_tedges_days - rt_edges[0]
106 flow_cum = np.concatenate(([0.0], np.cumsum(flow_values * np.diff(tedges_days))))
108 # Interpolate volumes at concentration time edges
109 start_vol = linear_interpolate(x_ref=tedges_days, y_ref=flow_cum, x_query=cout_tedges_days_infiltration)
110 end_vol = linear_interpolate(x_ref=tedges_days, y_ref=flow_cum, x_query=cout_tedges_days)
112 # Compute deposition weights
113 flow_cum_cout = flow_cum[None, :] - start_vol[:, None]
114 volume_array = compute_average_heights(
115 x_edges=tedges_days, y_edges=flow_cum_cout, y_lower=0.0, y_upper=retardation_factor * float(aquifer_pore_volume)
116 )
117 area_array = volume_array / (porosity * thickness)
118 extracted_volume = np.diff(end_vol)
119 return area_array * np.diff(tedges_days)[None, :] / extracted_volume[:, None]
122def deposition_to_extraction(
123 *,
124 dep: npt.ArrayLike,
125 flow: npt.ArrayLike,
126 tedges: pd.DatetimeIndex | np.ndarray,
127 cout_tedges: pd.DatetimeIndex | np.ndarray,
128 aquifer_pore_volume: float,
129 porosity: float,
130 thickness: float,
131 retardation_factor: float = 1.0,
132) -> npt.NDArray[np.floating]:
133 """Compute concentrations from deposition rates (convolution).
135 Parameters
136 ----------
137 dep : array-like
138 Deposition rates [ng/m2/day]. Length must equal len(tedges) - 1.
139 flow : array-like
140 Flow rates in aquifer [m3/day]. Length must equal len(tedges) - 1.
141 tedges : pandas.DatetimeIndex
142 Time bin edges for deposition and flow data.
143 cout_tedges : pandas.DatetimeIndex
144 Time bin edges for output concentration data.
145 aquifer_pore_volume : float
146 Aquifer pore volume [m3].
147 porosity : float
148 Aquifer porosity [dimensionless].
149 thickness : float
150 Aquifer thickness [m].
151 retardation_factor : float, optional
152 Compound retardation factor, by default 1.0.
154 Returns
155 -------
156 numpy.ndarray
157 Concentration changes [ng/m3] with length len(cout_tedges) - 1.
159 Examples
160 --------
161 >>> import pandas as pd
162 >>> import numpy as np
163 >>> from gwtransport.deposition import deposition_to_extraction
164 >>> dates = pd.date_range("2020-01-01", "2020-01-10", freq="D")
165 >>> tedges = pd.date_range("2019-12-31 12:00", "2020-01-10 12:00", freq="D")
166 >>> cout_tedges = pd.date_range("2020-01-03 12:00", "2020-01-12 12:00", freq="D")
167 >>> dep = np.ones(len(dates))
168 >>> flow = np.full(len(dates), 100.0)
169 >>> cout = deposition_to_extraction(
170 ... dep=dep,
171 ... flow=flow,
172 ... tedges=tedges,
173 ... cout_tedges=cout_tedges,
174 ... aquifer_pore_volume=500.0,
175 ... porosity=0.3,
176 ... thickness=10.0,
177 ... )
179 See Also
180 --------
181 extraction_to_deposition : Inverse operation (deconvolution)
182 gwtransport.advection.infiltration_to_extraction : For concentration transport without deposition
183 :ref:`concept-transport-equation` : Flow-weighted averaging approach
184 """
185 tedges, cout_tedges = pd.DatetimeIndex(tedges), pd.DatetimeIndex(cout_tedges)
186 dep_values, flow_values = np.asarray(dep), np.asarray(flow)
188 # Validate input dimensions and values
189 if len(tedges) != len(dep_values) + 1:
190 _msg = "tedges must have one more element than dep"
191 raise ValueError(_msg)
192 if len(tedges) != len(flow_values) + 1:
193 _msg = "tedges must have one more element than flow"
194 raise ValueError(_msg)
195 if np.any(np.isnan(dep_values)) or np.any(np.isnan(flow_values)):
196 _msg = "Input arrays cannot contain NaN values"
197 raise ValueError(_msg)
199 # Validate physical parameters
200 if not 0 < porosity < 1:
201 _msg = f"Porosity must be in (0, 1), got {porosity}"
202 raise ValueError(_msg)
203 if thickness <= 0:
204 _msg = f"Thickness must be positive, got {thickness}"
205 raise ValueError(_msg)
206 if aquifer_pore_volume <= 0:
207 _msg = f"Aquifer pore volume must be positive, got {aquifer_pore_volume}"
208 raise ValueError(_msg)
210 # Compute deposition weights
211 deposition_weights = compute_deposition_weights(
212 flow_values=flow_values,
213 tedges=tedges,
214 cout_tedges=cout_tedges,
215 aquifer_pore_volume=aquifer_pore_volume,
216 porosity=porosity,
217 thickness=thickness,
218 retardation_factor=retardation_factor,
219 )
221 return deposition_weights.dot(dep_values)
224def extraction_to_deposition(
225 *,
226 flow: npt.ArrayLike,
227 tedges: pd.DatetimeIndex | np.ndarray,
228 cout: npt.ArrayLike,
229 cout_tedges: pd.DatetimeIndex | np.ndarray,
230 aquifer_pore_volume: float,
231 porosity: float,
232 thickness: float,
233 retardation_factor: float = 1.0,
234 nullspace_objective: str = "squared_differences",
235) -> npt.NDArray[np.floating]:
236 """
237 Compute deposition rates from concentration changes (deconvolution).
239 The solution for the deposition is fundamentally underdetermined, as multiple
240 deposition histories can lead to the same concentration. This function
241 computes a least-squares solution and then selects a specific solution from the
242 nullspace of the problem based on the provided objective.
244 Parameters
245 ----------
246 flow : array-like
247 Flow rates in aquifer [m3/day]. Length must equal len(tedges) - 1.
248 Must not contain NaN values.
249 tedges : pandas.DatetimeIndex
250 Time bin edges for deposition and flow data. Length must equal
251 len(flow) + 1.
252 cout : array-like
253 Concentration changes in extracted water [ng/m3]. Length must equal
254 len(cout_tedges) - 1. May contain NaN values, which will be excluded
255 from the computation along with corresponding rows in the weight matrix.
256 cout_tedges : pandas.DatetimeIndex
257 Time bin edges for output concentration data. Length must equal
258 len(cout) + 1.
259 aquifer_pore_volume : float
260 Aquifer pore volume [m3].
261 porosity : float
262 Aquifer porosity [dimensionless].
263 thickness : float
264 Aquifer thickness [m].
265 retardation_factor : float, optional
266 Compound retardation factor, by default 1.0. Values > 1.0 indicate
267 slower transport due to sorption/interaction.
268 nullspace_objective : str or callable, optional
269 Objective function to minimize in the nullspace. Options:
271 * "squared_differences" : Minimize sum of squared differences between
272 adjacent deposition rates (default, provides smooth solutions)
273 * "summed_differences" : Minimize sum of absolute differences between
274 adjacent deposition rates (promotes sparse/piecewise constant solutions)
275 * callable : Custom objective function with signature
276 ``objective(coeffs, x_ls, nullspace_basis)``
278 Default is "squared_differences".
280 Returns
281 -------
282 numpy.ndarray
283 Mean deposition rates [ng/m2/day] between tedges. Length equals
284 len(tedges) - 1.
286 Raises
287 ------
288 ValueError
289 If input dimensions are incompatible, if flow contains NaN values,
290 or if the optimization fails.
292 Notes
293 -----
294 This function solves the inverse problem of determining deposition rates
295 from observed concentration changes. Since multiple deposition histories
296 can produce the same concentration pattern, regularization in the nullspace
297 is used to select a physically meaningful solution.
299 The algorithm:
301 1. Validates input dimensions and checks for NaN values in flow
302 2. Computes deposition weight matrix relating deposition to concentration
303 3. Identifies valid rows (no NaN in weights or concentration data)
304 4. Solves the underdetermined system using nullspace regularization
305 5. Returns the regularized deposition rate solution
307 Examples
308 --------
309 Basic usage with default squared differences regularization:
311 >>> import pandas as pd
312 >>> import numpy as np
313 >>> from gwtransport.deposition import extraction_to_deposition
314 >>>
315 >>> # Create input data
316 >>> dates = pd.date_range("2020-01-01", "2020-01-10", freq="D")
317 >>> tedges = pd.date_range("2019-12-31 12:00", "2020-01-10 12:00", freq="D")
318 >>> cout_tedges = pd.date_range("2020-01-03 12:00", "2020-01-12 12:00", freq="D")
319 >>>
320 >>> # Flow and concentration data
321 >>> flow = np.full(len(dates), 100.0) # m3/day
322 >>> cout = np.ones(len(cout_tedges) - 1) * 10.0 # ng/m3
323 >>>
324 >>> # Compute deposition rates
325 >>> dep = extraction_to_deposition(
326 ... flow=flow,
327 ... tedges=tedges,
328 ... cout=cout,
329 ... cout_tedges=cout_tedges,
330 ... aquifer_pore_volume=500.0,
331 ... porosity=0.3,
332 ... thickness=10.0,
333 ... )
334 >>> print(f"Deposition rates shape: {dep.shape}")
335 Deposition rates shape: (10,)
336 >>> print(f"Mean deposition rate: {np.nanmean(dep):.2f} ng/m2/day")
337 Mean deposition rate: 6.00 ng/m2/day
339 With summed differences regularization for sparse solutions:
341 >>> dep_sparse = extraction_to_deposition(
342 ... flow=flow,
343 ... tedges=tedges,
344 ... cout=cout,
345 ... cout_tedges=cout_tedges,
346 ... aquifer_pore_volume=500.0,
347 ... porosity=0.3,
348 ... thickness=10.0,
349 ... nullspace_objective="summed_differences",
350 ... )
352 With custom regularization objective:
354 >>> def l2_norm_objective(coeffs, x_ls, nullspace_basis):
355 ... x = x_ls + nullspace_basis @ coeffs
356 ... return np.sum(x**2) # Minimize L2 norm of solution
357 >>>
358 >>> dep_l2 = extraction_to_deposition(
359 ... flow=flow,
360 ... tedges=tedges,
361 ... cout=cout,
362 ... cout_tedges=cout_tedges,
363 ... aquifer_pore_volume=500.0,
364 ... porosity=0.3,
365 ... thickness=10.0,
366 ... nullspace_objective=l2_norm_objective,
367 ... )
369 Handling missing concentration data:
371 >>> # Concentration with some NaN values
372 >>> cout_nan = cout.copy()
373 >>> cout_nan[2:4] = np.nan # Missing data for some time periods
374 >>>
375 >>> dep_robust = extraction_to_deposition( # doctest: +SKIP
376 ... flow=flow,
377 ... tedges=tedges,
378 ... cout=cout_nan,
379 ... cout_tedges=cout_tedges,
380 ... aquifer_pore_volume=500.0,
381 ... porosity=0.3,
382 ... thickness=10.0,
383 ... )
385 See Also
386 --------
387 deposition_to_extraction : Forward operation (convolution)
388 gwtransport.advection.extraction_to_infiltration : For concentration transport without deposition
389 :ref:`concept-transport-equation` : Flow-weighted averaging approach
390 """
391 tedges, cout_tedges = pd.DatetimeIndex(tedges), pd.DatetimeIndex(cout_tedges)
392 cout_values, flow_values = np.asarray(cout), np.asarray(flow)
394 # Validate input dimensions and values
395 if len(cout_tedges) != len(cout_values) + 1:
396 msg = "cout_tedges must have one more element than cout"
397 raise ValueError(msg)
398 if len(tedges) != len(flow_values) + 1:
399 msg = "tedges must have one more element than flow"
400 raise ValueError(msg)
401 if np.any(np.isnan(flow_values)):
402 msg = "flow array cannot contain NaN values"
403 raise ValueError(msg)
405 # Validate physical parameters
406 if not 0 < porosity < 1:
407 msg = f"Porosity must be in (0, 1), got {porosity}"
408 raise ValueError(msg)
409 if thickness <= 0:
410 msg = f"Thickness must be positive, got {thickness}"
411 raise ValueError(msg)
412 if aquifer_pore_volume <= 0:
413 msg = f"Aquifer pore volume must be positive, got {aquifer_pore_volume}"
414 raise ValueError(msg)
416 # Compute deposition weights
417 deposition_weights = compute_deposition_weights(
418 flow_values=flow_values,
419 tedges=tedges,
420 cout_tedges=cout_tedges,
421 aquifer_pore_volume=aquifer_pore_volume,
422 porosity=porosity,
423 thickness=thickness,
424 retardation_factor=retardation_factor,
425 )
427 # Solve underdetermined system using utils function
428 return solve_underdetermined_system(
429 coefficient_matrix=deposition_weights,
430 rhs_vector=cout_values,
431 nullspace_objective=nullspace_objective,
432 optimization_method="Nelder-Mead",
433 )
436def spinup_duration(
437 *,
438 flow: np.ndarray,
439 flow_tedges: pd.DatetimeIndex,
440 aquifer_pore_volume: float,
441 retardation_factor: float,
442) -> float:
443 """
444 Compute the spinup duration for deposition modeling.
446 The spinup duration is the residence time at the first time step, representing
447 the time needed for the system to become fully informed. Before this duration,
448 the extracted concentration lacks complete deposition history.
450 Parameters
451 ----------
452 flow : numpy.ndarray
453 Flow rate of water in the aquifer [m3/day].
454 flow_tedges : pandas.DatetimeIndex
455 Time edges for the flow data.
456 aquifer_pore_volume : float
457 Pore volume of the aquifer [m3].
458 retardation_factor : float
459 Retardation factor of the compound in the aquifer [dimensionless].
461 Returns
462 -------
463 float
464 Spinup duration in days.
465 """
466 rt = residence_time(
467 flow=flow,
468 flow_tedges=flow_tedges,
469 aquifer_pore_volumes=aquifer_pore_volume,
470 retardation_factor=retardation_factor,
471 direction="infiltration_to_extraction",
472 )
473 rt_value: float = float(np.asarray(rt[0, 0]))
474 if np.isnan(rt_value):
475 msg = "Residence time at the first time step is NaN. This indicates that the aquifer is not fully informed: flow timeseries too short."
476 raise ValueError(msg)
478 # Return the first residence time value
479 return rt_value