Coverage for src/gwtransport/deposition.py: 80%
55 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-09-29 20:01 +0000
« prev ^ index » next coverage.py v7.10.7, created at 2025-09-29 20:01 +0000
1"""Deposition analysis for 1D aquifer systems.
3Analyze compound transport by deposition in aquifer systems with tools for
4computing concentrations and deposition rates based on aquifer properties.
6The model assumes 1D groundwater flow where compound deposition occurs along
7the flow path, enriching the water. Follows advection module patterns for
8consistency.
10Functions
11---------
12extraction_to_deposition : Compute deposition rates from concentration changes
13deposition_to_extraction : Compute concentrations from deposition rates
14"""
16import numpy as np
17import numpy.typing as npt
18import pandas as pd
20from gwtransport.residence_time import residence_time
21from gwtransport.surfacearea import compute_average_heights
22from gwtransport.utils import linear_interpolate, solve_underdetermined_system
25def compute_deposition_weights(
26 *,
27 flow_values,
28 tedges,
29 cout_tedges,
30 aquifer_pore_volume,
31 porosity,
32 thickness,
33 retardation_factor=1.0,
34):
35 """Compute deposition weights for concentration-deposition convolution.
37 Parameters
38 ----------
39 flow_values : array-like
40 Flow rates in aquifer [m3/day]. Length must equal len(tedges) - 1.
41 tedges : pandas.DatetimeIndex
42 Time bin edges for flow data.
43 cout_tedges : pandas.DatetimeIndex
44 Time bin edges for output concentration data.
45 aquifer_pore_volume : float
46 Aquifer pore volume [m3].
47 porosity : float
48 Aquifer porosity [dimensionless].
49 thickness : float
50 Aquifer thickness [m].
51 retardation_factor : float, optional
52 Compound retardation factor, by default 1.0.
54 Returns
55 -------
56 numpy.ndarray
57 Deposition weights matrix with shape (len(cout_tedges) - 1, len(tedges) - 1).
58 May contain NaN values where residence time cannot be computed.
60 Notes
61 -----
62 The returned weights matrix may contain NaN values in locations where the
63 residence time calculation fails or is undefined. This typically occurs
64 when flow conditions result in invalid or non-physical residence times.
65 """
66 # Convert to days relative to first time edge
67 t0 = tedges[0]
68 tedges_days = ((tedges - t0) / pd.Timedelta(days=1)).values
69 cout_tedges_days = ((cout_tedges - t0) / pd.Timedelta(days=1)).values
71 # Compute residence times and cumulative flow
72 rt_edges = residence_time(
73 flow=flow_values,
74 flow_tedges=tedges,
75 index=cout_tedges,
76 aquifer_pore_volume=float(aquifer_pore_volume),
77 retardation_factor=retardation_factor,
78 direction="extraction_to_infiltration",
79 )
80 cout_tedges_days_infiltration = cout_tedges_days - rt_edges[0]
82 flow_tdelta = np.diff(tedges_days, prepend=0.0)
83 flow_cum = (np.concatenate(([0.0], flow_values)) * flow_tdelta).cumsum()
85 # Interpolate volumes at concentration time edges
86 start_vol = linear_interpolate(tedges_days, flow_cum, cout_tedges_days_infiltration)
87 end_vol = linear_interpolate(tedges_days, flow_cum, cout_tedges_days)
89 # Compute deposition weights
90 flow_cum_cout = flow_cum[None, :] - start_vol[:, None]
91 volume_array = compute_average_heights(
92 tedges_days, flow_cum_cout, 0.0, retardation_factor * float(aquifer_pore_volume)
93 )
94 area_array = volume_array / (porosity * thickness)
95 extracted_volume = np.diff(end_vol)
96 return area_array * np.diff(tedges_days)[None, :] / extracted_volume[:, None]
99def deposition_to_extraction(
100 *,
101 dep: npt.ArrayLike,
102 flow: npt.ArrayLike,
103 tedges: pd.DatetimeIndex | np.ndarray,
104 cout_tedges: pd.DatetimeIndex | np.ndarray,
105 aquifer_pore_volume: float,
106 porosity: float,
107 thickness: float,
108 retardation_factor: float = 1.0,
109) -> np.ndarray:
110 """Compute concentrations from deposition rates (convolution).
112 Parameters
113 ----------
114 dep : array-like
115 Deposition rates [ng/m2/day]. Length must equal len(tedges) - 1.
116 flow : array-like
117 Flow rates in aquifer [m3/day]. Length must equal len(tedges) - 1.
118 tedges : pandas.DatetimeIndex
119 Time bin edges for deposition and flow data.
120 cout_tedges : pandas.DatetimeIndex
121 Time bin edges for output concentration data.
122 aquifer_pore_volume : float
123 Aquifer pore volume [m3].
124 porosity : float
125 Aquifer porosity [dimensionless].
126 thickness : float
127 Aquifer thickness [m].
128 retardation_factor : float, optional
129 Compound retardation factor, by default 1.0.
131 Returns
132 -------
133 numpy.ndarray
134 Concentration changes [ng/m3] with length len(cout_tedges) - 1.
136 Examples
137 --------
138 >>> import pandas as pd
139 >>> import numpy as np
140 >>> dates = pd.date_range("2020-01-01", "2020-01-10", freq="D")
141 >>> tedges = pd.date_range("2019-12-31 12:00", "2020-01-10 12:00", freq="D")
142 >>> cout_tedges = pd.date_range("2020-01-03 12:00", "2020-01-12 12:00", freq="D")
143 >>> dep = np.ones(len(dates))
144 >>> flow = np.full(len(dates), 100.0)
145 >>> cout = deposition_to_extraction(
146 ... dep=dep,
147 ... flow=flow,
148 ... tedges=tedges,
149 ... cout_tedges=cout_tedges,
150 ... aquifer_pore_volume=500.0,
151 ... porosity=0.3,
152 ... thickness=10.0,
153 ... )
154 """
155 tedges, cout_tedges = pd.DatetimeIndex(tedges), pd.DatetimeIndex(cout_tedges)
156 dep_values, flow_values = np.asarray(dep), np.asarray(flow)
158 # Validate input dimensions and values
159 if len(tedges) != len(dep_values) + 1:
160 _msg = "tedges must have one more element than dep"
161 raise ValueError(_msg)
162 if len(tedges) != len(flow_values) + 1:
163 _msg = "tedges must have one more element than flow"
164 raise ValueError(_msg)
165 if np.any(np.isnan(dep_values)) or np.any(np.isnan(flow_values)):
166 _msg = "Input arrays cannot contain NaN values"
167 raise ValueError(_msg)
169 # Compute deposition weights
170 deposition_weights = compute_deposition_weights(
171 flow_values=flow_values,
172 tedges=tedges,
173 cout_tedges=cout_tedges,
174 aquifer_pore_volume=aquifer_pore_volume,
175 porosity=porosity,
176 thickness=thickness,
177 retardation_factor=retardation_factor,
178 )
180 return deposition_weights.dot(dep_values)
183def extraction_to_deposition(
184 *,
185 flow: npt.ArrayLike,
186 tedges: pd.DatetimeIndex | np.ndarray,
187 cout: npt.ArrayLike,
188 cout_tedges: pd.DatetimeIndex | np.ndarray,
189 aquifer_pore_volume: float,
190 porosity: float,
191 thickness: float,
192 retardation_factor: float = 1.0,
193 nullspace_objective: str = "squared_differences",
194) -> np.ndarray:
195 """
196 Compute deposition rates from concentration changes (deconvolution).
198 The solution for the deposition is fundamentally underdetermined, as multiple
199 deposition histories can lead to the same concentration. This function
200 computes a least-squares solution and then selects a specific solution from the
201 nullspace of the problem based on the provided objective.
203 Parameters
204 ----------
205 flow : array-like
206 Flow rates in aquifer [m3/day]. Length must equal len(tedges) - 1.
207 Must not contain NaN values.
208 tedges : pandas.DatetimeIndex
209 Time bin edges for deposition and flow data. Length must equal
210 len(flow) + 1.
211 cout : array-like
212 Concentration changes in extracted water [ng/m3]. Length must equal
213 len(cout_tedges) - 1. May contain NaN values, which will be excluded
214 from the computation along with corresponding rows in the weight matrix.
215 cout_tedges : pandas.DatetimeIndex
216 Time bin edges for output concentration data. Length must equal
217 len(cout) + 1.
218 aquifer_pore_volume : float
219 Aquifer pore volume [m3].
220 porosity : float
221 Aquifer porosity [dimensionless].
222 thickness : float
223 Aquifer thickness [m].
224 retardation_factor : float, optional
225 Compound retardation factor, by default 1.0. Values > 1.0 indicate
226 slower transport due to sorption/interaction.
227 nullspace_objective : str or callable, optional
228 Objective function to minimize in the nullspace. Options:
230 * "squared_differences" : Minimize sum of squared differences between
231 adjacent deposition rates (default, provides smooth solutions)
232 * "summed_differences" : Minimize sum of absolute differences between
233 adjacent deposition rates (promotes sparse/piecewise constant solutions)
234 * callable : Custom objective function with signature
235 ``objective(coeffs, x_ls, nullspace_basis)``
237 Default is "squared_differences".
239 Returns
240 -------
241 numpy.ndarray
242 Mean deposition rates [ng/m2/day] between tedges. Length equals
243 len(tedges) - 1.
245 Raises
246 ------
247 ValueError
248 If input dimensions are incompatible, if flow contains NaN values,
249 or if the optimization fails.
251 Notes
252 -----
253 This function solves the inverse problem of determining deposition rates
254 from observed concentration changes. Since multiple deposition histories
255 can produce the same concentration pattern, regularization in the nullspace
256 is used to select a physically meaningful solution.
258 The algorithm:
260 1. Validates input dimensions and checks for NaN values in flow
261 2. Computes deposition weight matrix relating deposition to concentration
262 3. Identifies valid rows (no NaN in weights or concentration data)
263 4. Solves the underdetermined system using nullspace regularization
264 5. Returns the regularized deposition rate solution
266 Examples
267 --------
268 Basic usage with default squared differences regularization:
270 >>> import pandas as pd
271 >>> import numpy as np
272 >>> from gwtransport.deposition import extraction_to_deposition
273 >>>
274 >>> # Create input data
275 >>> dates = pd.date_range("2020-01-01", "2020-01-10", freq="D")
276 >>> tedges = pd.date_range("2019-12-31 12:00", "2020-01-10 12:00", freq="D")
277 >>> cout_tedges = pd.date_range("2020-01-03 12:00", "2020-01-12 12:00", freq="D")
278 >>>
279 >>> # Flow and concentration data
280 >>> flow = np.full(len(dates), 100.0) # m3/day
281 >>> cout = np.ones(len(cout_tedges) - 1) * 10.0 # ng/m3
282 >>>
283 >>> # Compute deposition rates
284 >>> dep = extraction_to_deposition(
285 ... flow=flow,
286 ... tedges=tedges,
287 ... cout=cout,
288 ... cout_tedges=cout_tedges,
289 ... aquifer_pore_volume=500.0,
290 ... porosity=0.3,
291 ... thickness=10.0,
292 ... )
293 >>> print(f"Deposition rates shape: {dep.shape}")
294 >>> print(f"Mean deposition rate: {np.nanmean(dep):.2f} ng/m2/day")
296 With summed differences regularization for sparse solutions:
298 >>> dep_sparse = extraction_to_deposition(
299 ... flow=flow,
300 ... tedges=tedges,
301 ... cout=cout,
302 ... cout_tedges=cout_tedges,
303 ... aquifer_pore_volume=500.0,
304 ... porosity=0.3,
305 ... thickness=10.0,
306 ... nullspace_objective="summed_differences",
307 ... )
309 With custom regularization objective:
311 >>> def l2_norm_objective(coeffs, x_ls, nullspace_basis):
312 ... x = x_ls + nullspace_basis @ coeffs
313 ... return np.sum(x**2) # Minimize L2 norm of solution
314 >>>
315 >>> dep_l2 = extraction_to_deposition(
316 ... flow=flow,
317 ... tedges=tedges,
318 ... cout=cout,
319 ... cout_tedges=cout_tedges,
320 ... aquifer_pore_volume=500.0,
321 ... porosity=0.3,
322 ... thickness=10.0,
323 ... nullspace_objective=l2_norm_objective,
324 ... )
326 Handling missing concentration data:
328 >>> # Concentration with some NaN values
329 >>> cout_nan = cout.copy()
330 >>> cout_nan[2:4] = np.nan # Missing data for some time periods
331 >>>
332 >>> dep_robust = extraction_to_deposition(
333 ... flow=flow,
334 ... tedges=tedges,
335 ... cout=cout_nan,
336 ... cout_tedges=cout_tedges,
337 ... aquifer_pore_volume=500.0,
338 ... porosity=0.3,
339 ... thickness=10.0,
340 ... )
341 """
342 tedges, cout_tedges = pd.DatetimeIndex(tedges), pd.DatetimeIndex(cout_tedges)
343 cout_values, flow_values = np.asarray(cout), np.asarray(flow)
345 # Validate input dimensions and values
346 if len(cout_tedges) != len(cout_values) + 1:
347 msg = "cout_tedges must have one more element than cout"
348 raise ValueError(msg)
349 if len(tedges) != len(flow_values) + 1:
350 msg = "tedges must have one more element than flow"
351 raise ValueError(msg)
352 if np.any(np.isnan(flow_values)):
353 msg = "flow array cannot contain NaN values"
354 raise ValueError(msg)
356 # Compute deposition weights
357 deposition_weights = compute_deposition_weights(
358 flow_values=flow_values,
359 tedges=tedges,
360 cout_tedges=cout_tedges,
361 aquifer_pore_volume=aquifer_pore_volume,
362 porosity=porosity,
363 thickness=thickness,
364 retardation_factor=retardation_factor,
365 )
367 # Solve underdetermined system using utils function
368 return solve_underdetermined_system(
369 coefficient_matrix=deposition_weights,
370 rhs_vector=cout_values,
371 nullspace_objective=nullspace_objective,
372 optimization_method="Nelder-Mead",
373 )
376def spinup_duration(
377 *,
378 flow: np.ndarray,
379 flow_tedges: pd.DatetimeIndex,
380 aquifer_pore_volume: float,
381 retardation_factor: float,
382) -> float:
383 """
384 Compute the spinup duration for deposition modeling.
386 The spinup duration is the residence time at the first time step, representing
387 the time needed for the system to become fully informed. Before this duration,
388 the extracted concentration lacks complete deposition history.
390 Parameters
391 ----------
392 flow : numpy.ndarray
393 Flow rate of water in the aquifer [m3/day].
394 flow_tedges : pandas.DatetimeIndex
395 Time edges for the flow data.
396 aquifer_pore_volume : float
397 Pore volume of the aquifer [m3].
398 retardation_factor : float
399 Retardation factor of the compound in the aquifer [dimensionless].
401 Returns
402 -------
403 float
404 Spinup duration in days.
405 """
406 rt = residence_time(
407 flow=flow,
408 flow_tedges=flow_tedges,
409 aquifer_pore_volume=aquifer_pore_volume,
410 retardation_factor=retardation_factor,
411 direction="infiltration_to_extraction",
412 )
413 if np.isnan(rt[0, 0]):
414 msg = "Residence time at the first time step is NaN. This indicates that the aquifer is not fully informed: flow timeseries too short."
415 raise ValueError(msg)
417 # Return the first residence time value
418 return float(rt[0, 0])