Coverage for src/gwtransport/deposition.py: 0%
54 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-20 13:45 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-20 13:45 +0000
1"""Deposition analysis for 1D aquifer systems.
3Analyze compound transport by deposition in aquifer systems with tools for
4computing concentrations and deposition rates based on aquifer properties.
6The model assumes 1D groundwater flow where compound deposition occurs along
7the flow path, enriching the water. Follows advection module patterns for
8consistency.
10Functions
11---------
12extraction_to_deposition : Compute deposition rates from concentration changes
13deposition_to_extraction : Compute concentrations from deposition rates
14"""
16import numpy as np
17import pandas as pd
19from gwtransport.residence_time import residence_time
20from gwtransport.surfacearea import compute_average_heights
21from gwtransport.utils import linear_interpolate, solve_underdetermined_system
24def compute_deposition_weights(
25 *,
26 flow_values,
27 tedges,
28 cout_tedges,
29 aquifer_pore_volume_value,
30 porosity,
31 thickness,
32 retardation_factor=1.0,
33):
34 """Compute deposition weights for concentration-deposition convolution.
36 Parameters
37 ----------
38 flow_values : array_like
39 Flow rates in aquifer [m3/day]. Length must equal len(tedges) - 1.
40 tedges : pandas.DatetimeIndex
41 Time bin edges for flow data.
42 cout_tedges : pandas.DatetimeIndex
43 Time bin edges for output concentration data.
44 aquifer_pore_volume_value : float
45 Aquifer pore volume [m3].
46 porosity : float
47 Aquifer porosity [dimensionless].
48 thickness : float
49 Aquifer thickness [m].
50 retardation_factor : float, optional
51 Compound retardation factor, by default 1.0.
53 Returns
54 -------
55 ndarray
56 Deposition weights matrix with shape (len(cout_tedges) - 1, len(tedges) - 1).
57 May contain NaN values where residence time cannot be computed.
59 Notes
60 -----
61 The returned weights matrix may contain NaN values in locations where the
62 residence time calculation fails or is undefined. This typically occurs
63 when flow conditions result in invalid or non-physical residence times.
64 """
65 # Convert to days relative to first time edge
66 t0 = tedges[0]
67 tedges_days = ((tedges - t0) / pd.Timedelta(days=1)).values
68 cout_tedges_days = ((cout_tedges - t0) / pd.Timedelta(days=1)).values
70 # Compute residence times and cumulative flow
71 rt_edges = residence_time(
72 flow=flow_values,
73 flow_tedges=tedges,
74 index=cout_tedges,
75 aquifer_pore_volume=float(aquifer_pore_volume_value),
76 retardation_factor=retardation_factor,
77 direction="extraction_to_infiltration",
78 )
79 cout_tedges_days_infiltration = cout_tedges_days - rt_edges[0]
81 flow_tdelta = np.diff(tedges_days, prepend=0.0)
82 flow_cum = (np.concatenate(([0.0], flow_values)) * flow_tdelta).cumsum()
84 # Interpolate volumes at concentration time edges
85 start_vol = linear_interpolate(tedges_days, flow_cum, cout_tedges_days_infiltration)
86 end_vol = linear_interpolate(tedges_days, flow_cum, cout_tedges_days)
88 # Compute deposition weights
89 flow_cum_cout = flow_cum[None, :] - start_vol[:, None]
90 volume_array = compute_average_heights(
91 tedges_days, flow_cum_cout, 0.0, retardation_factor * float(aquifer_pore_volume_value)
92 )
93 area_array = volume_array / (porosity * thickness)
94 extracted_volume = np.diff(end_vol)
95 return area_array * np.diff(tedges_days)[None, :] / extracted_volume[:, None]
98def deposition_to_extraction(
99 *,
100 dep,
101 flow,
102 tedges,
103 cout_tedges,
104 aquifer_pore_volume_value,
105 porosity,
106 thickness,
107 retardation_factor=1.0,
108):
109 """Compute concentrations from deposition rates (convolution).
111 Parameters
112 ----------
113 dep : array_like
114 Deposition rates [ng/m2/day]. Length must equal len(tedges) - 1.
115 flow : array_like
116 Flow rates in aquifer [m3/day]. Length must equal len(tedges) - 1.
117 tedges : pandas.DatetimeIndex
118 Time bin edges for deposition and flow data.
119 cout_tedges : pandas.DatetimeIndex
120 Time bin edges for output concentration data.
121 aquifer_pore_volume_value : float
122 Aquifer pore volume [m3].
123 porosity : float
124 Aquifer porosity [dimensionless].
125 thickness : float
126 Aquifer thickness [m].
127 retardation_factor : float, optional
128 Compound retardation factor, by default 1.0.
130 Returns
131 -------
132 ndarray
133 Concentration changes [ng/m3] with length len(cout_tedges) - 1.
135 Examples
136 --------
137 >>> import pandas as pd
138 >>> import numpy as np
139 >>> dates = pd.date_range("2020-01-01", "2020-01-10", freq="D")
140 >>> tedges = pd.date_range("2019-12-31 12:00", "2020-01-10 12:00", freq="D")
141 >>> cout_tedges = pd.date_range("2020-01-03 12:00", "2020-01-12 12:00", freq="D")
142 >>> dep = np.ones(len(dates))
143 >>> flow = np.full(len(dates), 100.0)
144 >>> cout = deposition_to_extraction(
145 ... dep=dep,
146 ... flow=flow,
147 ... tedges=tedges,
148 ... cout_tedges=cout_tedges,
149 ... aquifer_pore_volume_value=500.0,
150 ... porosity=0.3,
151 ... thickness=10.0,
152 ... )
153 """
154 tedges, cout_tedges = pd.DatetimeIndex(tedges), pd.DatetimeIndex(cout_tedges)
155 dep_values, flow_values = np.asarray(dep), np.asarray(flow)
157 # Validate input dimensions and values
158 if len(tedges) != len(dep_values) + 1:
159 _msg = "tedges must have one more element than dep"
160 raise ValueError(_msg)
161 if len(tedges) != len(flow_values) + 1:
162 _msg = "tedges must have one more element than flow"
163 raise ValueError(_msg)
164 if np.any(np.isnan(dep_values)) or np.any(np.isnan(flow_values)):
165 _msg = "Input arrays cannot contain NaN values"
166 raise ValueError(_msg)
168 # Compute deposition weights
169 deposition_weights = compute_deposition_weights(
170 flow_values=flow_values,
171 tedges=tedges,
172 cout_tedges=cout_tedges,
173 aquifer_pore_volume_value=aquifer_pore_volume_value,
174 porosity=porosity,
175 thickness=thickness,
176 retardation_factor=retardation_factor,
177 )
179 return deposition_weights.dot(dep_values)
182def extraction_to_deposition(
183 *,
184 flow,
185 tedges,
186 cout,
187 cout_tedges,
188 aquifer_pore_volume_value,
189 porosity,
190 thickness,
191 retardation_factor=1.0,
192 nullspace_objective="squared_differences",
193):
194 """
195 Compute deposition rates from concentration changes (deconvolution).
197 The solution for the deposition is fundamentally underdetermined, as multiple
198 deposition histories can lead to the same concentration. This function
199 computes a least-squares solution and then selects a specific solution from the
200 nullspace of the problem based on the provided objective.
202 Parameters
203 ----------
204 flow : array_like
205 Flow rates in aquifer [m3/day]. Length must equal len(tedges) - 1.
206 Must not contain NaN values.
207 tedges : pandas.DatetimeIndex
208 Time bin edges for deposition and flow data. Length must equal
209 len(flow) + 1.
210 cout : array_like
211 Concentration changes in extracted water [ng/m3]. Length must equal
212 len(cout_tedges) - 1. May contain NaN values, which will be excluded
213 from the computation along with corresponding rows in the weight matrix.
214 cout_tedges : pandas.DatetimeIndex
215 Time bin edges for output concentration data. Length must equal
216 len(cout) + 1.
217 aquifer_pore_volume_value : float
218 Aquifer pore volume [m3].
219 porosity : float
220 Aquifer porosity [dimensionless].
221 thickness : float
222 Aquifer thickness [m].
223 retardation_factor : float, optional
224 Compound retardation factor, by default 1.0. Values > 1.0 indicate
225 slower transport due to sorption/interaction.
226 nullspace_objective : str or callable, optional
227 Objective function to minimize in the nullspace. Options:
229 * "squared_differences" : Minimize sum of squared differences between
230 adjacent deposition rates (default, provides smooth solutions)
231 * "summed_differences" : Minimize sum of absolute differences between
232 adjacent deposition rates (promotes sparse/piecewise constant solutions)
233 * callable : Custom objective function with signature
234 ``objective(coeffs, x_ls, nullspace_basis)``
236 Default is "squared_differences".
238 Returns
239 -------
240 numpy.ndarray
241 Mean deposition rates [ng/m2/day] between tedges. Length equals
242 len(tedges) - 1.
244 Raises
245 ------
246 ValueError
247 If input dimensions are incompatible, if flow contains NaN values,
248 or if the optimization fails.
250 Notes
251 -----
252 This function solves the inverse problem of determining deposition rates
253 from observed concentration changes. Since multiple deposition histories
254 can produce the same concentration pattern, regularization in the nullspace
255 is used to select a physically meaningful solution.
257 The algorithm:
259 1. Validates input dimensions and checks for NaN values in flow
260 2. Computes deposition weight matrix relating deposition to concentration
261 3. Identifies valid rows (no NaN in weights or concentration data)
262 4. Solves the underdetermined system using nullspace regularization
263 5. Returns the regularized deposition rate solution
265 Examples
266 --------
267 Basic usage with default squared differences regularization:
269 >>> import pandas as pd
270 >>> import numpy as np
271 >>> from gwtransport.deposition import extraction_to_deposition
272 >>>
273 >>> # Create input data
274 >>> dates = pd.date_range("2020-01-01", "2020-01-10", freq="D")
275 >>> tedges = pd.date_range("2019-12-31 12:00", "2020-01-10 12:00", freq="D")
276 >>> cout_tedges = pd.date_range("2020-01-03 12:00", "2020-01-12 12:00", freq="D")
277 >>>
278 >>> # Flow and concentration data
279 >>> flow = np.full(len(dates), 100.0) # m3/day
280 >>> cout = np.ones(len(cout_tedges) - 1) * 10.0 # ng/m3
281 >>>
282 >>> # Compute deposition rates
283 >>> dep = extraction_to_deposition(
284 ... flow=flow,
285 ... tedges=tedges,
286 ... cout=cout,
287 ... cout_tedges=cout_tedges,
288 ... aquifer_pore_volume_value=500.0,
289 ... porosity=0.3,
290 ... thickness=10.0,
291 ... )
292 >>> print(f"Deposition rates shape: {dep.shape}")
293 >>> print(f"Mean deposition rate: {np.nanmean(dep):.2f} ng/m2/day")
295 With summed differences regularization for sparse solutions:
297 >>> dep_sparse = extraction_to_deposition(
298 ... flow=flow,
299 ... tedges=tedges,
300 ... cout=cout,
301 ... cout_tedges=cout_tedges,
302 ... aquifer_pore_volume_value=500.0,
303 ... porosity=0.3,
304 ... thickness=10.0,
305 ... nullspace_objective="summed_differences",
306 ... )
308 With custom regularization objective:
310 >>> def l2_norm_objective(coeffs, x_ls, nullspace_basis):
311 ... x = x_ls + nullspace_basis @ coeffs
312 ... return np.sum(x**2) # Minimize L2 norm of solution
313 >>>
314 >>> dep_l2 = extraction_to_deposition(
315 ... flow=flow,
316 ... tedges=tedges,
317 ... cout=cout,
318 ... cout_tedges=cout_tedges,
319 ... aquifer_pore_volume_value=500.0,
320 ... porosity=0.3,
321 ... thickness=10.0,
322 ... nullspace_objective=l2_norm_objective,
323 ... )
325 Handling missing concentration data:
327 >>> # Concentration with some NaN values
328 >>> cout_nan = cout.copy()
329 >>> cout_nan[2:4] = np.nan # Missing data for some time periods
330 >>>
331 >>> dep_robust = extraction_to_deposition(
332 ... flow=flow,
333 ... tedges=tedges,
334 ... cout=cout_nan,
335 ... cout_tedges=cout_tedges,
336 ... aquifer_pore_volume_value=500.0,
337 ... porosity=0.3,
338 ... thickness=10.0,
339 ... )
340 """
341 tedges, cout_tedges = pd.DatetimeIndex(tedges), pd.DatetimeIndex(cout_tedges)
342 cout_values, flow_values = np.asarray(cout), np.asarray(flow)
344 # Validate input dimensions and values
345 if len(cout_tedges) != len(cout_values) + 1:
346 msg = "cout_tedges must have one more element than cout"
347 raise ValueError(msg)
348 if len(tedges) != len(flow_values) + 1:
349 msg = "tedges must have one more element than flow"
350 raise ValueError(msg)
351 if np.any(np.isnan(flow_values)):
352 msg = "flow array cannot contain NaN values"
353 raise ValueError(msg)
355 # Compute deposition weights
356 deposition_weights = compute_deposition_weights(
357 flow_values=flow_values,
358 tedges=tedges,
359 cout_tedges=cout_tedges,
360 aquifer_pore_volume_value=aquifer_pore_volume_value,
361 porosity=porosity,
362 thickness=thickness,
363 retardation_factor=retardation_factor,
364 )
366 # Solve underdetermined system using utils function
367 return solve_underdetermined_system(
368 coefficient_matrix=deposition_weights,
369 rhs_vector=cout_values,
370 nullspace_objective=nullspace_objective,
371 optimization_method="Nelder-Mead",
372 )
375def spinup_duration(
376 *,
377 flow: np.ndarray,
378 flow_tedges: pd.DatetimeIndex,
379 aquifer_pore_volume_value: float,
380 retardation_factor: float,
381) -> float:
382 """
383 Compute the spinup duration for deposition modeling.
385 The spinup duration is the residence time at the first time step, representing
386 the time needed for the system to become fully informed. Before this duration,
387 the extracted concentration lacks complete deposition history.
389 Parameters
390 ----------
391 flow : numpy.ndarray
392 Flow rate of water in the aquifer [m3/day].
393 flow_tedges : pandas.DatetimeIndex
394 Time edges for the flow data.
395 aquifer_pore_volume_value : float
396 Pore volume of the aquifer [m3].
397 retardation_factor : float
398 Retardation factor of the compound in the aquifer [dimensionless].
400 Returns
401 -------
402 float
403 Spinup duration in days.
404 """
405 rt = residence_time(
406 flow=flow,
407 flow_tedges=flow_tedges,
408 aquifer_pore_volume=aquifer_pore_volume_value,
409 retardation_factor=retardation_factor,
410 direction="infiltration_to_extraction",
411 )
412 if np.isnan(rt[0, 0]):
413 msg = "Residence time at the first time step is NaN. This indicates that the aquifer is not fully informed: flow timeseries too short."
414 raise ValueError(msg)
416 # Return the first residence time value
417 return float(rt[0, 0])