Coverage for src / gwtransport / examples.py: 84%
61 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-04 18:51 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-04 18:51 +0000
1"""
2Example Data Generation for Groundwater Transport Modeling.
4This module provides utilities to generate synthetic datasets for demonstrating
5and testing groundwater transport models. It creates realistic flow patterns,
6temperature/concentration time series, and deposition events suitable for testing
7advection, diffusion, and deposition analysis functions.
9Available functions:
11- :func:`generate_example_data` - Generate comprehensive synthetic dataset with flow and
12 temperature time series. Creates seasonal flow patterns with optional spill events,
13 temperature data via synthetic sinusoidal patterns or real KNMI soil temperature, and
14 extracted temperature computed through gamma-distributed pore volume transport. Returns
15 DataFrame with flow, temp_infiltration, temp_extraction columns plus attrs containing
16 generation parameters and aquifer properties. Temperature generation methods: 'synthetic'
17 (seasonal sinusoidal pattern), 'constant' (constant temperature with noise), or
18 'soil_temperature' (real data from KNMI station 260).
20- :func:`generate_example_deposition_timeseries` - Generate synthetic deposition time series
21 for pathogen/contaminant deposition analysis. Combines baseline deposition, seasonal patterns,
22 random noise, and episodic contamination events with exponential decay. Returns Series with
23 deposition rates [ng/m²/day] and attrs containing generation parameters. Useful for testing
24 extraction_to_deposition deconvolution and deposition_to_extraction convolution functions.
26This file is part of gwtransport which is released under AGPL-3.0 license.
27See the ./LICENSE file or go to https://github.com/gwtransport/gwtransport/blob/main/LICENSE for full license details.
28"""
30import numpy as np
31import numpy.typing as npt
32import pandas as pd
34from gwtransport.advection import gamma_infiltration_to_extraction
35from gwtransport.gamma import mean_std_to_alpha_beta
36from gwtransport.utils import compute_time_edges, get_soil_temperature
39def generate_example_data(
40 *,
41 date_start: str = "2020-01-01",
42 date_end: str = "2021-12-31",
43 date_freq: str = "D",
44 flow_mean: float = 100.0, # m3/day
45 flow_amplitude: float = 30.0, # m3/day
46 flow_noise: float = 10.0, # m3/day
47 temp_infiltration_method: str = "synthetic", # Method for generating infiltration temperature
48 temp_infiltration_mean: float = 12.0, # °C
49 temp_infiltration_amplitude: float = 8.0, # °C
50 temp_measurement_noise: float = 1.0, # °C
51 aquifer_pore_volume_gamma_mean: float = 1000.0, # m3
52 aquifer_pore_volume_gamma_std: float = 200.0, # m3
53 aquifer_pore_volume_gamma_nbins: int = 250, # Discretization resolution
54 retardation_factor: float = 1.0,
55) -> tuple[pd.DataFrame, pd.DatetimeIndex]:
56 """
57 Generate synthetic temperature and flow data for groundwater transport examples.
59 Parameters
60 ----------
61 date_start, date_end : str
62 Start and end dates for the generated time series (YYYY-MM-DD).
63 date_freq : str, default "D"
64 Frequency string for pandas.date_range (default 'D').
65 flow_mean : float, default 100.0
66 Mean flow rate in m3/day
67 flow_amplitude : float, default 30.0
68 Seasonal amplitude of flow rate in m3/day
69 flow_noise : float, default 10.0
70 Random noise level for flow rate in m3/day
71 temp_infiltration_method : str, default "synthetic"
72 Method for generating infiltration temperature. Options:
73 - "synthetic": Seasonal pattern. temp_infiltration_mean and temp_infiltration_amplitude define the pattern. temp_measurement_noise is applied.
74 - "constant": Constant temperature equal to temp_infiltration_mean. temp_measurement_noise is still applied.
75 - "soil_temperature": Use real soil temperature data from KNMI station
76 temp_infiltration_mean : float, default 12.0
77 Mean temperature of infiltrating water in °C
78 temp_infiltration_amplitude : float, default 8.0
79 Seasonal amplitude of infiltration temperature in °C (only used for "synthetic" method)
80 temp_measurement_noise : float, default 1.0
81 Random noise level for infiltration temperature in °C
82 aquifer_pore_volume_gamma_mean : float, default 1000.0
83 Mean pore volume of the aquifer gamma distribution in m3
84 aquifer_pore_volume_gamma_std : float, default 200.0
85 Standard deviation of aquifer pore volume gamma distribution in m3
86 aquifer_pore_volume_gamma_nbins : int, default 250
87 Number of bins to discretize the aquifer pore volume gamma distribution
88 retardation_factor : float, default 1.0
89 Retardation factor for temperature transport
91 Returns
92 -------
93 tuple
94 A tuple containing:
95 - pandas.DataFrame: DataFrame with columns 'flow', 'temp_infiltration', 'temp_extraction'
96 and metadata attributes for the aquifer parameters
97 - pandas.DatetimeIndex: Time edges (tedges) used for the flow calculations
99 Raises
100 ------
101 ValueError
102 If temp_infiltration_method is not one of the supported methods
103 """
104 # Create date range
105 dates = pd.date_range(start=date_start, end=date_end, freq=date_freq).tz_localize("UTC")
106 days = (dates - dates[0]).days.values
108 # Generate flow data with seasonal pattern (higher in winter)
109 seasonal_flow = flow_mean + flow_amplitude * np.sin(2 * np.pi * days / 365 + np.pi)
110 flow = seasonal_flow + np.random.normal(0, flow_noise, len(dates))
111 flow = np.maximum(flow, 5.0) # Ensure flow is not too small or negative
113 min_days_for_spills = 60
114 if len(dates) > min_days_for_spills: # Only add spills for longer time series
115 n_spills = np.random.randint(6, 16)
116 for _ in range(n_spills):
117 spill_start = np.random.randint(0, len(dates) - 30)
118 spill_duration = np.random.randint(15, 45)
119 spill_magnitude = np.random.uniform(2.0, 5.0)
121 flow[spill_start : spill_start + spill_duration] /= spill_magnitude
123 # Generate infiltration temperature
124 if temp_infiltration_method == "synthetic":
125 # Seasonal pattern with noise
126 infiltration_temp = temp_infiltration_mean + temp_infiltration_amplitude * np.sin(2 * np.pi * days / 365)
127 infiltration_temp += np.random.normal(0, temp_measurement_noise, len(dates))
128 elif temp_infiltration_method == "constant":
129 # Constant temperature
130 infiltration_temp = np.full(len(dates), temp_infiltration_mean)
131 infiltration_temp += np.random.normal(0, temp_measurement_noise, len(dates))
132 elif temp_infiltration_method == "soil_temperature":
133 # Use soil temperature data already includes measurement noise
134 infiltration_temp = (
135 get_soil_temperature(
136 station_number=260, # Example station number
137 interpolate_missing_values=True,
138 )["TB3"]
139 .resample(date_freq)
140 .mean()[dates]
141 .values
142 )
143 else:
144 msg = f"Unknown temperature method: {temp_infiltration_method}"
145 raise ValueError(msg)
147 # Compute tedges for the flow series
148 tedges = compute_time_edges(tedges=None, tstart=None, tend=dates, number_of_bins=len(dates))
150 temp_extraction = gamma_infiltration_to_extraction(
151 cin=infiltration_temp,
152 flow=flow,
153 tedges=tedges,
154 cout_tedges=tedges,
155 mean=aquifer_pore_volume_gamma_mean, # Use mean pore volume
156 std=aquifer_pore_volume_gamma_std, # Use standard deviation for heterogeneity
157 n_bins=aquifer_pore_volume_gamma_nbins, # Discretization resolution
158 retardation_factor=retardation_factor,
159 )
161 # Add some noise to represent measurement errors
162 temp_extraction += np.random.normal(0, temp_measurement_noise, len(dates))
164 # Create data frame
165 alpha, beta = mean_std_to_alpha_beta(mean=aquifer_pore_volume_gamma_mean, std=aquifer_pore_volume_gamma_std)
166 df = pd.DataFrame(
167 data={"flow": flow, "temp_infiltration": infiltration_temp, "temp_extraction": temp_extraction},
168 index=dates,
169 )
170 df.attrs = {
171 "description": "Example data for groundwater transport modeling",
172 "source": "Synthetic data generated by gwtransport.examples.generate_example_data",
173 "aquifer_pore_volume_gamma_mean": aquifer_pore_volume_gamma_mean,
174 "aquifer_pore_volume_gamma_std": aquifer_pore_volume_gamma_std,
175 "aquifer_pore_volume_gamma_alpha": alpha,
176 "aquifer_pore_volume_gamma_beta": beta,
177 "aquifer_pore_volume_gamma_nbins": aquifer_pore_volume_gamma_nbins,
178 "retardation_factor": retardation_factor,
179 "date_start": date_start,
180 "date_end": date_end,
181 "date_freq": date_freq,
182 "flow_mean": flow_mean,
183 "flow_amplitude": flow_amplitude,
184 "flow_noise": flow_noise,
185 "temp_infiltration_method": temp_infiltration_method,
186 "temp_infiltration_mean": temp_infiltration_mean,
187 "temp_infiltration_amplitude": temp_infiltration_amplitude,
188 "temp_measurement_noise": temp_measurement_noise,
189 }
190 return df, tedges
193def generate_example_deposition_timeseries(
194 *,
195 date_start: str = "2018-01-01",
196 date_end: str = "2023-12-31",
197 freq: str = "D",
198 base: float = 0.8,
199 seasonal_amplitude: float = 0.3,
200 noise_scale: float = 0.1,
201 event_dates: npt.ArrayLike | pd.DatetimeIndex | None = None,
202 event_magnitude: float = 3.0,
203 event_duration: int = 30,
204 event_decay_scale: float = 10.0,
205 ensure_non_negative: bool = True,
206) -> tuple[pd.Series, pd.DatetimeIndex]:
207 """
208 Generate synthetic deposition timeseries for groundwater transport examples.
210 Parameters
211 ----------
212 date_start, date_end : str
213 Start and end dates for the generated time series (YYYY-MM-DD).
214 freq : str
215 Frequency string for pandas.date_range (default 'D').
216 base : float
217 Baseline deposition rate (ng/m^2/day).
218 seasonal_amplitude : float
219 Amplitude of the annual seasonal sinusoidal pattern.
220 noise_scale : float
221 Standard deviation scale for Gaussian noise added to the signal.
222 event_dates : list-like or None
223 Dates (strings or pandas-compatible) at which to place episodic events. If None,
224 a sensible default list is used.
225 event_magnitude : float
226 Peak magnitude multiplier for events.
227 event_duration : int
228 Duration of each event in days.
229 event_decay_scale : float
230 Decay scale used in the exponential decay for event time series.
231 ensure_non_negative : bool
232 If True, negative values are clipped to zero.
234 Returns
235 -------
236 pandas.Series
237 Time series of deposition values indexed by daily timestamps.
238 """
239 # Create synthetic deposition time series - needs to match flow period
240 dates = pd.date_range(date_start, date_end, freq=freq).tz_localize("UTC")
241 n_dates = len(dates)
242 tedges = compute_time_edges(tedges=None, tstart=None, tend=dates, number_of_bins=n_dates)
244 # Base deposition rate with seasonal and event patterns
245 seasonal_pattern = seasonal_amplitude * np.sin(2 * np.pi * np.arange(n_dates) / 365.25)
246 noise = noise_scale * np.random.normal(0, 1, n_dates)
248 # Default event dates if not provided
249 if event_dates is None:
250 event_dates = ["2020-06-15", "2021-03-20", "2021-09-10", "2022-07-05"]
251 # Convert to DatetimeIndex - handles list, array, or DatetimeIndex input
252 if isinstance(event_dates, pd.DatetimeIndex):
253 event_dates_index = event_dates
254 else:
255 # Convert ArrayLike to list for pd.to_datetime
256 event_dates_list = event_dates if isinstance(event_dates, list) else list(np.asarray(event_dates))
257 event_dates_index = pd.DatetimeIndex(pd.to_datetime(event_dates_list))
259 event = np.zeros(n_dates)
260 for event_date in event_dates_index:
261 event_idx = dates.get_indexer([event_date], method="nearest")[0]
262 event_indices = np.arange(event_idx, min(event_idx + event_duration, n_dates))
263 decay_pattern = event_magnitude * np.exp(-np.arange(len(event_indices)) / event_decay_scale)
264 event[event_indices] += decay_pattern
266 # Combine all components
267 total = base + seasonal_pattern + noise + event
268 if ensure_non_negative:
269 total = np.maximum(total, 0.0)
271 series = pd.Series(data=total, index=dates, name="deposition")
272 series.attrs = {
273 "description": "Example deposition time series for groundwater transport modeling",
274 "source": "Synthetic data generated by gwtransport.examples.generate_example_deposition_timeseries",
275 "base": base,
276 "seasonal_amplitude": seasonal_amplitude,
277 "noise_scale": noise_scale,
278 "event_dates": [str(d.date()) for d in event_dates_index],
279 "event_magnitude": event_magnitude,
280 "event_duration": event_duration,
281 "event_decay_scale": event_decay_scale,
282 "date_start": date_start,
283 "date_end": date_end,
284 "date_freq": freq,
285 }
287 # Create deposition series
288 return series, tedges