Coverage for src/gwtransport/advection.py: 0%
112 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-20 13:45 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-20 13:45 +0000
1"""
2Advection Analysis for 1D Aquifer Systems.
4This module provides functions to analyze compound transport by advection
5in aquifer systems. It includes tools for computing concentrations of the extracted water
6based on the concentration of the infiltrating water, extraction data and aquifer properties.
8The model assumes requires the groundwaterflow to be reduced to a 1D system. On one side,
9water with a certain concentration infiltrates ('cin'), the water flows through the aquifer and
10the compound of interest flows through the aquifer with a retarded velocity. The water is
11extracted ('cout').
13Main functions:
14- infiltration_to_extraction: Compute the concentration of the extracted water by shifting cin with its residence time. This corresponds to a convolution operation.
15- gamma_infiltration_to_extraction: Similar to infiltration_to_extraction, but for a gamma distribution of aquifer pore volumes.
16- distribution_infiltration_to_extraction: Similar to infiltration_to_extraction, but for an arbitrary distribution of aquifer pore volumes.
17"""
19import numpy as np
20import pandas as pd
22from gwtransport import gamma
23from gwtransport.residence_time import residence_time
24from gwtransport.utils import compute_time_edges, interp_series, partial_isin
27def infiltration_to_extraction(cin_series, flow_series, aquifer_pore_volume, retardation_factor=1.0, cout_index="cin"):
28 """
29 Compute the concentration of the extracted water by shifting cin with its residence time.
31 The compound is retarded in the aquifer with a retardation factor. The residence
32 time is computed based on the flow rate of the water in the aquifer and the pore volume
33 of the aquifer.
35 This function represents infiltration to extraction modeling (equivalent to convolution).
37 Parameters
38 ----------
39 cin_series : pandas.Series
40 Concentration of the compound in the infiltrating water [ng/m3]. The cin_series should be the average concentration of a time bin. The index should be a pandas.DatetimeIndex
41 and is labeled at the end of the time bin.
42 flow_series : pandas.Series
43 Flow rate of water in the aquifer [m3/day]. The flow_series should be the average flow of a time bin. The index should be a pandas.DatetimeIndex
44 and is labeled at the end of the time bin.
45 aquifer_pore_volume : float
46 Pore volume of the aquifer [m3].
47 cout_index : str, optional
48 The index of the output series. Can be 'cin', 'flow', or 'cout'. Default is 'cin'.
49 - 'cin': The output series will have the same index as `cin_series`.
50 - 'flow': The output series will have the same index as `flow_series`.
51 - 'cout': The output series will have the same index as `cin_series + residence_time`.
53 Returns
54 -------
55 numpy.ndarray
56 Concentration of the compound in the extracted water [ng/m3].
58 Examples
59 --------
60 Basic usage with single pore volume:
62 >>> import pandas as pd
63 >>> import numpy as np
64 >>> from gwtransport.advection import infiltration_to_extraction
65 >>>
66 >>> # Create input data
67 >>> dates = pd.date_range(start="2020-01-01", end="2020-01-10", freq="D")
68 >>> cin = pd.Series(np.ones(len(dates)), index=dates)
69 >>> flow = pd.Series(np.ones(len(dates)) * 100, index=dates) # 100 m3/day
70 >>>
71 >>> # Single aquifer pore volume
72 >>> aquifer_pore_volume = 500.0 # m3
73 >>>
74 >>> # Run infiltration to extraction model (default returns on cin index)
75 >>> cout = infiltration_to_extraction(cin, flow, aquifer_pore_volume)
76 >>> len(cout) == len(cin)
77 True
79 Different output index options:
81 >>> # Output on cin index (default)
82 >>> cout_cin = infiltration_to_extraction(
83 ... cin, flow, aquifer_pore_volume, cout_index="cin"
84 ... )
85 >>>
86 >>> # Output on flow index
87 >>> cout_flow = infiltration_to_extraction(
88 ... cin, flow, aquifer_pore_volume, cout_index="flow"
89 ... )
90 >>>
91 >>> # Output on shifted time index (cin + residence_time)
92 >>> cout_shifted = infiltration_to_extraction(
93 ... cin, flow, aquifer_pore_volume, cout_index="cout"
94 ... )
96 With retardation factor:
98 >>> # Compound moves twice as slowly due to sorption
99 >>> cout = infiltration_to_extraction(
100 ... cin, flow, aquifer_pore_volume, retardation_factor=2.0
101 ... )
102 """
103 # Create flow tedges from the flow series index (assuming it's at the end of bins)
104 flow_tedges = compute_time_edges(tedges=None, tstart=None, tend=flow_series.index, number_of_bins=len(flow_series))
105 rt_array = residence_time(
106 flow=flow_series,
107 flow_tedges=flow_tedges,
108 index=cin_series.index,
109 aquifer_pore_volume=aquifer_pore_volume,
110 retardation_factor=retardation_factor,
111 direction="infiltration_to_extraction",
112 )
114 rt = pd.to_timedelta(rt_array[0], unit="D", errors="coerce")
115 index = cin_series.index + rt
117 cout = pd.Series(data=cin_series.values, index=index, name="cout")
119 if cout_index == "cin":
120 return interp_series(cout, cin_series.index)
121 if cout_index == "flow":
122 # If cout_index is 'flow', we need to resample cout to the flow index
123 return interp_series(cout, flow_series.index)
124 if cout_index == "cout":
125 # If cout_index is 'cout', we return the cout as is
126 return cout.values
128 msg = f"Invalid cout_index: {cout_index}. Must be 'cin', 'flow', or 'cout'."
129 raise ValueError(msg)
132def extraction_to_infiltration(cout, flow, aquifer_pore_volume, retardation_factor=1.0, resample_dates=None):
133 """
134 Compute the concentration of the infiltrating water by shifting cout with its residence time.
136 This function represents extraction to infiltration modeling (equivalent to deconvolution).
138 Parameters
139 ----------
140 cout : pandas.Series
141 Concentration of the compound in the extracted water [ng/m3].
142 flow : pandas.Series
143 Flow rate of water in the aquifer [m3/day].
144 aquifer_pore_volume : float
145 Pore volume of the aquifer [m3].
147 Returns
148 -------
149 numpy.ndarray
150 Concentration of the compound in the infiltrating water [ng/m3].
151 """
152 msg = "Extraction to infiltration advection (deconvolution) is not implemented yet"
153 raise NotImplementedError(msg)
156def gamma_infiltration_to_extraction(
157 *,
158 cin,
159 flow,
160 tedges,
161 cout_tedges,
162 alpha=None,
163 beta=None,
164 mean=None,
165 std=None,
166 n_bins=100,
167 retardation_factor=1.0,
168):
169 """
170 Compute the concentration of the extracted water by shifting cin with its residence time.
172 The compound is retarded in the aquifer with a retardation factor. The residence
173 time is computed based on the flow rate of the water in the aquifer and the pore volume
174 of the aquifer. The aquifer pore volume is approximated by a gamma distribution, with
175 parameters alpha and beta.
177 This function represents infiltration to extraction modeling (equivalent to convolution).
179 Provide either alpha and beta or mean and std.
181 Parameters
182 ----------
183 cin : pandas.Series
184 Concentration of the compound in infiltrating water or temperature of infiltrating
185 water.
186 cin_tedges : pandas.DatetimeIndex
187 Time edges for the concentration data. Used to compute the cumulative concentration.
188 Has a length of one more than `cin`.
189 cout_tedges : pandas.DatetimeIndex
190 Time edges for the output data. Used to compute the cumulative concentration.
191 Has a length of one more than `flow`.
192 flow : pandas.Series
193 Flow rate of water in the aquifer [m3/day].
194 flow_tedges : pandas.DatetimeIndex
195 Time edges for the flow data. Used to compute the cumulative flow.
196 Has a length of one more than `flow`.
197 alpha : float, optional
198 Shape parameter of gamma distribution of the aquifer pore volume (must be > 0)
199 beta : float, optional
200 Scale parameter of gamma distribution of the aquifer pore volume (must be > 0)
201 mean : float, optional
202 Mean of the gamma distribution.
203 std : float, optional
204 Standard deviation of the gamma distribution.
205 n_bins : int
206 Number of bins to discretize the gamma distribution.
207 retardation_factor : float
208 Retardation factor of the compound in the aquifer.
210 Returns
211 -------
212 numpy.ndarray
213 Concentration of the compound in the extracted water [ng/m3] or temperature.
215 Examples
216 --------
217 Basic usage with alpha and beta parameters:
219 >>> import pandas as pd
220 >>> import numpy as np
221 >>> from gwtransport.utils import compute_time_edges
222 >>> from gwtransport.advection import gamma_infiltration_to_extraction
223 >>>
224 >>> # Create input data with aligned time edges
225 >>> dates = pd.date_range(start="2020-01-01", end="2020-01-20", freq="D")
226 >>> tedges = compute_time_edges(
227 ... tedges=None, tstart=None, tend=dates, number_of_bins=len(dates)
228 ... )
229 >>>
230 >>> # Create output time edges (can be different alignment)
231 >>> cout_dates = pd.date_range(start="2020-01-05", end="2020-01-15", freq="D")
232 >>> cout_tedges = compute_time_edges(
233 ... tedges=None, tstart=None, tend=cout_dates, number_of_bins=len(cout_dates)
234 ... )
235 >>>
236 >>> # Input concentration and flow (same length, aligned with tedges)
237 >>> cin = pd.Series(np.ones(len(dates)), index=dates)
238 >>> flow = pd.Series(np.ones(len(dates)) * 100, index=dates) # 100 m3/day
239 >>>
240 >>> # Run gamma_infiltration_to_extraction with alpha/beta parameters
241 >>> cout = gamma_infiltration_to_extraction(
242 ... cin=cin,
243 ... cin_tedges=tedges,
244 ... cout_tedges=cout_tedges,
245 ... flow=flow,
246 ... flow_tedges=tedges, # Must be identical to cin_tedges
247 ... alpha=10.0,
248 ... beta=10.0,
249 ... n_bins=5,
250 ... )
251 >>> cout.shape
252 (11,)
254 Using mean and std parameters instead:
256 >>> cout = gamma_infiltration_to_extraction(
257 ... cin=cin,
258 ... cin_tedges=tedges,
259 ... cout_tedges=cout_tedges,
260 ... flow=flow,
261 ... flow_tedges=tedges,
262 ... mean=100.0,
263 ... std=20.0,
264 ... n_bins=5,
265 ... )
267 With retardation factor:
269 >>> cout = gamma_infiltration_to_extraction(
270 ... cin=cin,
271 ... cin_tedges=tedges,
272 ... cout_tedges=cout_tedges,
273 ... flow=flow,
274 ... flow_tedges=tedges,
275 ... alpha=10.0,
276 ... beta=10.0,
277 ... retardation_factor=2.0, # Doubles residence time
278 ... )
279 """
280 bins = gamma.bins(alpha=alpha, beta=beta, mean=mean, std=std, n_bins=n_bins)
281 return distribution_infiltration_to_extraction(
282 cin=cin,
283 flow=flow,
284 tedges=tedges,
285 cout_tedges=cout_tedges,
286 aquifer_pore_volumes=bins["expected_value"],
287 retardation_factor=retardation_factor,
288 )
291def gamma_extraction_to_infiltration(cout, flow, alpha, beta, n_bins=100, retardation_factor=1.0):
292 """
293 Compute the concentration of the infiltrating water by shifting cout with its residence time.
295 This function represents extraction to infiltration modeling (equivalent to deconvolution).
297 Parameters
298 ----------
299 cout : pandas.Series
300 Concentration of the compound in the extracted water [ng/m3].
301 flow : pandas.Series
302 Flow rate of water in the aquifer [m3/day].
303 alpha : float
304 Shape parameter of gamma distribution of the aquifer pore volume (must be > 0)
305 beta : float
306 Scale parameter of gamma distribution of the aquifer pore volume (must be > 0)
307 n_bins : int
308 Number of bins to discretize the gamma distribution.
309 retardation_factor : float
310 Retardation factor of the compound in the aquifer.
312 Returns
313 -------
314 NotImplementedError
315 This function is not yet implemented.
316 """
317 msg = "Extraction to infiltration advection gamma (deconvolution) is not implemented yet"
318 raise NotImplementedError(msg)
321def distribution_infiltration_to_extraction(
322 *,
323 cin,
324 flow,
325 tedges,
326 cout_tedges,
327 aquifer_pore_volumes,
328 retardation_factor=1.0,
329):
330 """
331 Compute the concentration of the extracted water using flow-weighted advection.
333 This function implements an infiltration to extraction advection model where cin and flow values
334 correspond to the same aligned time bins defined by tedges.
336 The algorithm:
337 1. Computes residence times for each pore volume at cout time edges
338 2. Calculates infiltration time edges by subtracting residence times
339 3. Determines temporal overlaps between infiltration and cin time windows
340 4. Creates flow-weighted overlap matrices normalized by total weights
341 5. Computes weighted contributions and averages across pore volumes
343 Parameters
344 ----------
345 cin : array-like
346 Concentration values of infiltrating water or temperature [concentration units].
347 Length must match the number of time bins defined by tedges.
348 flow : array-like
349 Flow rate values in the aquifer [m3/day].
350 Length must match cin and the number of time bins defined by tedges.
351 tedges : pandas.DatetimeIndex
352 Time edges defining bins for both cin and flow data. Has length of
353 len(cin) + 1 and len(flow) + 1.
354 cout_tedges : pandas.DatetimeIndex
355 Time edges for output data bins. Has length of desired output + 1.
356 Can have different time alignment and resolution than tedges.
357 aquifer_pore_volumes : array-like
358 Array of aquifer pore volumes [m3] representing the distribution
359 of residence times in the aquifer system.
360 retardation_factor : float, optional
361 Retardation factor of the compound in the aquifer (default 1.0).
362 Values > 1.0 indicate slower transport due to sorption/interaction.
364 Returns
365 -------
366 numpy.ndarray
367 Flow-weighted concentration in the extracted water. Same units as cin.
368 Length equals len(cout_tedges) - 1. NaN values indicate time periods
369 with no valid contributions from the infiltration data.
371 Raises
372 ------
373 ValueError
374 If tedges length doesn't match cin/flow arrays plus one, or if
375 infiltration time edges become non-monotonic (invalid input conditions).
377 Examples
378 --------
379 Basic usage with pandas Series:
381 >>> import pandas as pd
382 >>> import numpy as np
383 >>> from gwtransport.utils import compute_time_edges
384 >>> from gwtransport.advection import distribution_infiltration_to_extraction
385 >>>
386 >>> # Create input data
387 >>> dates = pd.date_range(start="2020-01-01", end="2020-01-20", freq="D")
388 >>> tedges = compute_time_edges(
389 ... tedges=None, tstart=None, tend=dates, number_of_bins=len(dates)
390 ... )
391 >>>
392 >>> # Create output time edges (different alignment)
393 >>> cout_dates = pd.date_range(start="2020-01-05", end="2020-01-15", freq="D")
394 >>> cout_tedges = compute_time_edges(
395 ... tedges=None, tstart=None, tend=cout_dates, number_of_bins=len(cout_dates)
396 ... )
397 >>>
398 >>> # Input concentration and flow
399 >>> cin = pd.Series(np.ones(len(dates)), index=dates)
400 >>> flow = pd.Series(np.ones(len(dates)) * 100, index=dates) # 100 m3/day
401 >>>
402 >>> # Define distribution of aquifer pore volumes
403 >>> aquifer_pore_volumes = np.array([50, 100, 200]) # m3
404 >>>
405 >>> # Run distribution_infiltration_to_extraction
406 >>> cout = distribution_infiltration_to_extraction(
407 ... cin=cin,
408 ... flow=flow,
409 ... tedges=tedges,
410 ... cout_tedges=cout_tedges,
411 ... aquifer_pore_volumes=aquifer_pore_volumes,
412 ... )
413 >>> cout.shape
414 (11,)
416 Using array inputs instead of pandas Series:
418 >>> # Convert to arrays
419 >>> cin_values = cin.values
420 >>> flow_values = flow.values
421 >>>
422 >>> cout = distribution_infiltration_to_extraction(
423 ... cin=cin_values,
424 ... flow=flow_values,
425 ... tedges=tedges,
426 ... cout_tedges=cout_tedges,
427 ... aquifer_pore_volumes=aquifer_pore_volumes,
428 ... )
430 With retardation factor:
432 >>> cout = distribution_infiltration_to_extraction(
433 ... cin=cin,
434 ... flow=flow,
435 ... tedges=tedges,
436 ... cout_tedges=cout_tedges,
437 ... aquifer_pore_volumes=aquifer_pore_volumes,
438 ... retardation_factor=2.0, # Compound moves twice as slowly
439 ... )
441 Using single pore volume:
443 >>> single_volume = np.array([100]) # Single 100 m3 pore volume
444 >>> cout = distribution_infiltration_to_extraction(
445 ... cin=cin,
446 ... flow=flow,
447 ... tedges=tedges,
448 ... cout_tedges=cout_tedges,
449 ... aquifer_pore_volumes=single_volume,
450 ... )
451 """
452 tedges = pd.DatetimeIndex(tedges)
453 cout_tedges = pd.DatetimeIndex(cout_tedges)
455 if len(tedges) != len(cin) + 1:
456 msg = "tedges must have one more element than cin"
457 raise ValueError(msg)
458 if len(tedges) != len(flow) + 1:
459 msg = "tedges must have one more element than flow"
460 raise ValueError(msg)
462 # Convert to arrays for vectorized operations
463 cin_values = np.asarray(cin)
464 flow_values = np.asarray(flow)
466 # Validate inputs do not contain NaN values
467 if np.any(np.isnan(cin_values)):
468 msg = "cin contains NaN values, which are not allowed"
469 raise ValueError(msg)
470 if np.any(np.isnan(flow_values)):
471 msg = "flow contains NaN values, which are not allowed"
472 raise ValueError(msg)
473 cin_tedges_days = ((tedges - tedges[0]) / pd.Timedelta(days=1)).values
474 cout_tedges_days = ((cout_tedges - tedges[0]) / pd.Timedelta(days=1)).values
475 aquifer_pore_volumes = np.asarray(aquifer_pore_volumes)
477 # Pre-compute all residence times and infiltration edges
478 rt_edges_2d = residence_time(
479 flow=flow_values,
480 flow_tedges=tedges,
481 index=cout_tedges,
482 aquifer_pore_volume=aquifer_pore_volumes,
483 retardation_factor=retardation_factor,
484 direction="extraction_to_infiltration",
485 )
486 infiltration_tedges_2d = cout_tedges_days[None, :] - rt_edges_2d
488 # Pre-compute valid bins and count
489 valid_bins_2d = ~(np.isnan(infiltration_tedges_2d[:, :-1]) | np.isnan(infiltration_tedges_2d[:, 1:]))
490 valid_pv_count = np.sum(valid_bins_2d, axis=0)
492 # Initialize accumulator
493 normalized_weights = _distribution_infiltration_to_extraction_weights(
494 cout_tedges,
495 aquifer_pore_volumes,
496 cin_values,
497 flow_values,
498 cin_tedges_days,
499 infiltration_tedges_2d,
500 valid_bins_2d,
501 valid_pv_count,
502 )
504 # Apply to concentrations and handle NaN for periods with no contributions
505 out = normalized_weights.dot(cin_values)
506 out[valid_pv_count == 0] = np.nan
508 return out
511def _distribution_infiltration_to_extraction_weights(
512 cout_tedges,
513 aquifer_pore_volumes,
514 cin_values,
515 flow_values,
516 cin_tedges_days,
517 infiltration_tedges_2d,
518 valid_bins_2d,
519 valid_pv_count,
520):
521 accumulated_weights = np.zeros((len(cout_tedges) - 1, len(cin_values)))
523 # Loop over each pore volume
524 for i in range(len(aquifer_pore_volumes)):
525 if np.any(valid_bins_2d[i, :]):
526 overlap_matrix = partial_isin(infiltration_tedges_2d[i, :], cin_tedges_days)
527 accumulated_weights[valid_bins_2d[i, :], :] += overlap_matrix[valid_bins_2d[i, :], :]
529 # Average across valid pore volumes and apply flow weighting
530 averaged_weights = np.zeros_like(accumulated_weights)
531 valid_cout = valid_pv_count > 0
532 averaged_weights[valid_cout, :] = accumulated_weights[valid_cout, :] / valid_pv_count[valid_cout, None]
534 # Apply flow weighting after averaging
535 flow_weighted_averaged = averaged_weights * flow_values[None, :]
537 total_weights = np.sum(flow_weighted_averaged, axis=1)
538 valid_weights = total_weights > 0
539 normalized_weights = np.zeros_like(flow_weighted_averaged)
540 normalized_weights[valid_weights, :] = flow_weighted_averaged[valid_weights, :] / total_weights[valid_weights, None]
541 return normalized_weights
544def distribution_extraction_to_infiltration(
545 *,
546 cout,
547 flow,
548 tedges,
549 cin_tedges,
550 aquifer_pore_volumes,
551 retardation_factor=1.0,
552):
553 """
554 Compute the concentration of the infiltrating water from extracted water (deconvolution).
556 This function implements an extraction to infiltration advection model (inverse of distribution_infiltration_to_extraction)
557 where cout and flow values correspond to the same aligned time bins defined by tedges.
559 SYMMETRIC RELATIONSHIP:
560 - distribution_infiltration_to_extraction: cin + tedges → cout + cout_tedges
561 - distribution_extraction_to_infiltration: cout + tedges → cin + cin_tedges
563 The algorithm (symmetric to distribution_infiltration_to_extraction):
564 1. Computes residence times for each pore volume at cint time edges
565 2. Calculates extraction time edges by adding residence times (reverse of infiltration_to_extraction)
566 3. Determines temporal overlaps between extraction and cout time windows
567 4. Creates flow-weighted overlap matrices normalized by total weights
568 5. Computes weighted contributions and averages across pore volumes
570 Parameters
571 ----------
572 cout : array-like
573 Concentration values of extracted water [concentration units].
574 Length must match the number of time bins defined by tedges.
575 flow : array-like
576 Flow rate values in the aquifer [m3/day].
577 Length must match cout and the number of time bins defined by tedges.
578 tedges : pandas.DatetimeIndex
579 Time edges defining bins for both cout and flow data. Has length of
580 len(cout) + 1 and len(flow) + 1.
581 cin_tedges : pandas.DatetimeIndex
582 Time edges for output (infiltration) data bins. Has length of desired output + 1.
583 Can have different time alignment and resolution than tedges.
584 aquifer_pore_volumes : array-like
585 Array of aquifer pore volumes [m3] representing the distribution
586 of residence times in the aquifer system.
587 retardation_factor : float, optional
588 Retardation factor of the compound in the aquifer (default 1.0).
589 Values > 1.0 indicate slower transport due to sorption/interaction.
591 Returns
592 -------
593 numpy.ndarray
594 Flow-weighted concentration in the infiltrating water. Same units as cout.
595 Length equals len(cin_tedges) - 1. NaN values indicate time periods
596 with no valid contributions from the extraction data.
598 Raises
599 ------
600 ValueError
601 If tedges length doesn't match cout/flow arrays plus one, or if
602 extraction time edges become non-monotonic (invalid input conditions).
604 Examples
605 --------
606 Basic usage with pandas Series:
608 >>> import pandas as pd
609 >>> import numpy as np
610 >>> from gwtransport.utils import compute_time_edges
611 >>> from gwtransport.advection import distribution_extraction_to_infiltration
612 >>>
613 >>> # Create input data
614 >>> dates = pd.date_range(start="2020-01-01", end="2020-01-20", freq="D")
615 >>> tedges = compute_time_edges(
616 ... tedges=None, tstart=None, tend=dates, number_of_bins=len(dates)
617 ... )
618 >>>
619 >>> # Create output time edges (different alignment)
620 >>> cint_dates = pd.date_range(start="2019-12-25", end="2020-01-15", freq="D")
621 >>> cin_tedges = compute_time_edges(
622 ... tedges=None, tstart=None, tend=cint_dates, number_of_bins=len(cint_dates)
623 ... )
624 >>>
625 >>> # Input concentration and flow
626 >>> cout = pd.Series(np.ones(len(dates)), index=dates)
627 >>> flow = pd.Series(np.ones(len(dates)) * 100, index=dates) # 100 m3/day
628 >>>
629 >>> # Define distribution of aquifer pore volumes
630 >>> aquifer_pore_volumes = np.array([50, 100, 200]) # m3
631 >>>
632 >>> # Run distribution_extraction_to_infiltration
633 >>> cin = distribution_extraction_to_infiltration(
634 ... cout=cout,
635 ... flow=flow,
636 ... tedges=tedges,
637 ... cin_tedges=cin_tedges,
638 ... aquifer_pore_volumes=aquifer_pore_volumes,
639 ... )
640 >>> cin.shape
641 (22,)
643 Using array inputs instead of pandas Series:
645 >>> # Convert to arrays
646 >>> cout_values = cout.values
647 >>> flow_values = flow.values
648 >>>
649 >>> cin = distribution_extraction_to_infiltration(
650 ... cout=cout_values,
651 ... flow=flow_values,
652 ... tedges=tedges,
653 ... cin_tedges=cin_tedges,
654 ... aquifer_pore_volumes=aquifer_pore_volumes,
655 ... )
657 With retardation factor:
659 >>> cin = distribution_extraction_to_infiltration(
660 ... cout=cout,
661 ... flow=flow,
662 ... tedges=tedges,
663 ... cin_tedges=cin_tedges,
664 ... aquifer_pore_volumes=aquifer_pore_volumes,
665 ... retardation_factor=2.0, # Compound moves twice as slowly
666 ... )
668 Using single pore volume:
670 >>> single_volume = np.array([100]) # Single 100 m3 pore volume
671 >>> cin = distribution_extraction_to_infiltration(
672 ... cout=cout,
673 ... flow=flow,
674 ... tedges=tedges,
675 ... cin_tedges=cin_tedges,
676 ... aquifer_pore_volumes=single_volume,
677 ... )
678 """
679 tedges = pd.DatetimeIndex(tedges)
680 cin_tedges = pd.DatetimeIndex(cin_tedges)
682 if len(tedges) != len(cout) + 1:
683 msg = "tedges must have one more element than cout"
684 raise ValueError(msg)
685 if len(tedges) != len(flow) + 1:
686 msg = "tedges must have one more element than flow"
687 raise ValueError(msg)
689 # Convert to arrays for vectorized operations
690 cout_values = np.asarray(cout)
691 flow_values = np.asarray(flow)
693 # Validate inputs do not contain NaN values
694 if np.any(np.isnan(cout_values)):
695 msg = "cout contains NaN values, which are not allowed"
696 raise ValueError(msg)
697 if np.any(np.isnan(flow_values)):
698 msg = "flow contains NaN values, which are not allowed"
699 raise ValueError(msg)
700 cout_tedges_days = ((tedges - tedges[0]) / pd.Timedelta(days=1)).values
701 cin_tedges_days = ((cin_tedges - tedges[0]) / pd.Timedelta(days=1)).values
702 aquifer_pore_volumes = np.asarray(aquifer_pore_volumes)
704 # Pre-compute all residence times and extraction edges (symmetric to infiltration_to_extraction)
705 rt_edges_2d = residence_time(
706 flow=flow_values,
707 flow_tedges=tedges,
708 index=cin_tedges,
709 aquifer_pore_volume=aquifer_pore_volumes,
710 retardation_factor=retardation_factor,
711 direction="infiltration_to_extraction", # Computing from infiltration perspective
712 )
713 extraction_tedges_2d = cin_tedges_days[None, :] + rt_edges_2d
715 # Pre-compute valid bins and count
716 valid_bins_2d = ~(np.isnan(extraction_tedges_2d[:, :-1]) | np.isnan(extraction_tedges_2d[:, 1:]))
717 valid_pv_count = np.sum(valid_bins_2d, axis=0)
719 # Initialize accumulator
720 normalized_weights = _distribution_extraction_to_infiltration_weights(
721 cin_tedges,
722 aquifer_pore_volumes,
723 cout_values,
724 flow_values,
725 cout_tedges_days,
726 extraction_tedges_2d,
727 valid_bins_2d,
728 valid_pv_count,
729 )
731 # Apply to concentrations and handle NaN for periods with no contributions
732 out = normalized_weights.dot(cout_values)
733 out[valid_pv_count == 0] = np.nan
735 return out
738def _distribution_extraction_to_infiltration_weights(
739 cin_tedges,
740 aquifer_pore_volumes,
741 cout_values,
742 flow_values,
743 cout_tedges_days,
744 extraction_tedges_2d,
745 valid_bins_2d,
746 valid_pv_count,
747):
748 """
749 Compute extraction to infiltration transformation weights matrix.
751 Computes the weight matrix for the extraction to infiltration transformation,
752 ensuring mathematical symmetry with the infiltration to extraction operation. The extraction to infiltration
753 weights represent the transpose relationship needed for deconvolution.
755 SYMMETRIC RELATIONSHIP:
756 - Infiltration to extraction weights: W_infiltration_to_extraction maps cin → cout
757 - Extraction to infiltration weights: W_extraction_to_infiltration maps cout → cin
758 - Mathematical constraint: W_extraction_to_infiltration should be the pseudo-inverse of W_infiltration_to_extraction
760 The algorithm mirrors _distribution_infiltration_to_extraction_weights but with transposed
761 temporal overlap computations to ensure mathematical consistency.
763 Parameters
764 ----------
765 cin_tedges : pandas.DatetimeIndex
766 Time edges for output (infiltration) data bins.
767 aquifer_pore_volumes : array-like
768 Array of aquifer pore volumes [m3].
769 cout_values : array-like
770 Concentration values of extracted water.
771 flow_values : array-like
772 Flow rate values in the aquifer [m3/day].
773 cout_tedges_days : array-like
774 Time edges for cout data in days since reference.
775 extraction_tedges_2d : array-like
776 2D array of extraction time edges for each pore volume.
777 valid_bins_2d : array-like
778 2D boolean array indicating valid time bins for each pore volume.
779 valid_pv_count : array-like
780 Count of valid pore volumes for each output time bin.
782 Returns
783 -------
784 numpy.ndarray
785 Normalized weight matrix for extraction to infiltration transformation.
786 Shape: (len(cin_tedges) - 1, len(cout_values))
787 """
788 accumulated_weights = np.zeros((len(cin_tedges) - 1, len(cout_values)))
790 # Loop over each pore volume (same structure as infiltration_to_extraction)
791 for i in range(len(aquifer_pore_volumes)):
792 if np.any(valid_bins_2d[i, :]):
793 # SYMMETRIC temporal overlap computation:
794 # Infiltration to extraction: maps infiltration → cout time windows
795 # Extraction to infiltration: maps extraction → cout time windows (transposed relationship)
796 overlap_matrix = partial_isin(extraction_tedges_2d[i, :], cout_tedges_days)
797 accumulated_weights[valid_bins_2d[i, :], :] += overlap_matrix[valid_bins_2d[i, :], :]
799 # Average across valid pore volumes (symmetric to infiltration_to_extraction)
800 averaged_weights = np.zeros_like(accumulated_weights)
801 valid_cout = valid_pv_count > 0
802 averaged_weights[valid_cout, :] = accumulated_weights[valid_cout, :] / valid_pv_count[valid_cout, None]
804 # Apply flow weighting (symmetric to infiltration_to_extraction)
805 flow_weighted_averaged = averaged_weights * flow_values[None, :]
807 # Normalize by total weights (symmetric to infiltration_to_extraction)
808 total_weights = np.sum(flow_weighted_averaged, axis=1)
809 valid_weights = total_weights > 0
810 normalized_weights = np.zeros_like(flow_weighted_averaged)
811 normalized_weights[valid_weights, :] = flow_weighted_averaged[valid_weights, :] / total_weights[valid_weights, None]
813 return normalized_weights