Coverage for src/gwtransport/examples.py: 0%
57 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-20 13:45 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-20 13:45 +0000
1"""
2Example data generation for groundwater transport modeling.
4This module provides utilities to generate synthetic temperature and flow data
5for demonstrating and testing groundwater transport models. It was separated
6from the utils module to resolve circular import dependencies.
7"""
9import numpy as np
10import pandas as pd
12from gwtransport.advection import gamma_infiltration_to_extraction
13from gwtransport.gamma import mean_std_to_alpha_beta
14from gwtransport.utils import compute_time_edges, get_soil_temperature
17def generate_example_data(
18 *,
19 date_start="2020-01-01",
20 date_end="2021-12-31",
21 date_freq="D",
22 flow_mean=100.0, # m3/day
23 flow_amplitude=30.0, # m3/day
24 flow_noise=10.0, # m3/day
25 temp_infiltration_method="synthetic", # Method for generating infiltration temperature
26 temp_infiltration_mean=12.0, # °C
27 temp_infiltration_amplitude=8.0, # °C
28 temp_measurement_noise=1.0, # °C
29 aquifer_pore_volume_gamma_mean=1000.0, # m3
30 aquifer_pore_volume_gamma_std=200.0, # m3
31 aquifer_pore_volume_gamma_nbins=250, # Discretization resolution
32 retardation_factor=1.0,
33):
34 """
35 Generate synthetic temperature and flow data for groundwater transport examples.
37 Parameters
38 ----------
39 date_start, date_end : str
40 Start and end dates for the generated time series (YYYY-MM-DD).
41 date_freq : str, default "D"
42 Frequency string for pandas.date_range (default 'D').
43 flow_mean : float, default 100.0
44 Mean flow rate in m3/day
45 flow_amplitude : float, default 30.0
46 Seasonal amplitude of flow rate in m3/day
47 flow_noise : float, default 10.0
48 Random noise level for flow rate in m3/day
49 temp_infiltration_method : str, default "synthetic"
50 Method for generating infiltration temperature. Options:
51 - "synthetic": Seasonal pattern. temp_infiltration_mean and temp_infiltration_amplitude define the pattern. temp_measurement_noise is applied.
52 - "constant": Constant temperature equal to temp_infiltration_mean. temp_measurement_noise is still applied.
53 - "soil_temperature": Use real soil temperature data from KNMI station
54 temp_infiltration_mean : float, default 12.0
55 Mean temperature of infiltrating water in °C
56 temp_infiltration_amplitude : float, default 8.0
57 Seasonal amplitude of infiltration temperature in °C (only used for "synthetic" method)
58 temp_measurement_noise : float, default 1.0
59 Random noise level for infiltration temperature in °C
60 aquifer_pore_volume_gamma_mean : float, default 1000.0
61 Mean pore volume of the aquifer gamma distribution in m3
62 aquifer_pore_volume_gamma_std : float, default 200.0
63 Standard deviation of aquifer pore volume gamma distribution in m3
64 aquifer_pore_volume_gamma_nbins : int, default 250
65 Number of bins to discretize the aquifer pore volume gamma distribution
66 retardation_factor : float, default 1.0
67 Retardation factor for temperature transport
69 Returns
70 -------
71 tuple
72 A tuple containing:
73 - pandas.DataFrame: DataFrame with columns 'flow', 'temp_infiltration', 'temp_extraction'
74 and metadata attributes for the aquifer parameters
75 - pandas.DatetimeIndex: Time edges (tedges) used for the flow calculations
77 Raises
78 ------
79 ValueError
80 If temp_infiltration_method is not one of the supported methods
81 """
82 # Create date range
83 dates = pd.date_range(start=date_start, end=date_end, freq=date_freq).tz_localize("UTC")
84 days = (dates - dates[0]).days.values
86 # Generate flow data with seasonal pattern (higher in winter)
87 seasonal_flow = flow_mean + flow_amplitude * np.sin(2 * np.pi * days / 365 + np.pi)
88 flow = seasonal_flow + np.random.normal(0, flow_noise, len(dates))
89 flow = np.maximum(flow, 5.0) # Ensure flow is not too small or negative
91 min_days_for_spills = 60
92 if len(dates) > min_days_for_spills: # Only add spills for longer time series
93 n_spills = np.random.randint(6, 16)
94 for _ in range(n_spills):
95 spill_start = np.random.randint(0, len(dates) - 30)
96 spill_duration = np.random.randint(15, 45)
97 spill_magnitude = np.random.uniform(2.0, 5.0)
99 flow[spill_start : spill_start + spill_duration] /= spill_magnitude
101 # Generate infiltration temperature
102 if temp_infiltration_method == "synthetic":
103 # Seasonal pattern with noise
104 infiltration_temp = temp_infiltration_mean + temp_infiltration_amplitude * np.sin(2 * np.pi * days / 365)
105 infiltration_temp += np.random.normal(0, temp_measurement_noise, len(dates))
106 elif temp_infiltration_method == "constant":
107 # Constant temperature
108 infiltration_temp = np.full(len(dates), temp_infiltration_mean)
109 infiltration_temp += np.random.normal(0, temp_measurement_noise, len(dates))
110 elif temp_infiltration_method == "soil_temperature":
111 # Use soil temperature data already includes measurement noise
112 infiltration_temp = (
113 get_soil_temperature(
114 station_number=260, # Example station number
115 interpolate_missing_values=True,
116 )["TB3"]
117 .resample(date_freq)
118 .mean()[dates]
119 .values
120 )
121 else:
122 msg = f"Unknown temperature method: {temp_infiltration_method}"
123 raise ValueError(msg)
125 # Compute tedges for the flow series
126 tedges = compute_time_edges(tedges=None, tstart=None, tend=dates, number_of_bins=len(dates))
128 temp_extraction = gamma_infiltration_to_extraction(
129 cin=infiltration_temp,
130 flow=flow,
131 tedges=tedges,
132 cout_tedges=tedges,
133 mean=aquifer_pore_volume_gamma_mean, # Use mean pore volume
134 std=aquifer_pore_volume_gamma_std, # Use standard deviation for heterogeneity
135 n_bins=aquifer_pore_volume_gamma_nbins, # Discretization resolution
136 retardation_factor=retardation_factor,
137 )
139 # Add some noise to represent measurement errors
140 temp_extraction += np.random.normal(0, temp_measurement_noise, len(dates))
142 # Create data frame
143 alpha, beta = mean_std_to_alpha_beta(mean=aquifer_pore_volume_gamma_mean, std=aquifer_pore_volume_gamma_std)
144 df = pd.DataFrame(
145 data={"flow": flow, "temp_infiltration": infiltration_temp, "temp_extraction": temp_extraction},
146 index=dates,
147 )
148 df.attrs = {
149 "description": "Example data for groundwater transport modeling",
150 "source": "Synthetic data generated by gwtransport.examples.generate_example_data",
151 "aquifer_pore_volume_gamma_mean": aquifer_pore_volume_gamma_mean,
152 "aquifer_pore_volume_gamma_std": aquifer_pore_volume_gamma_std,
153 "aquifer_pore_volume_gamma_alpha": alpha,
154 "aquifer_pore_volume_gamma_beta": beta,
155 "aquifer_pore_volume_gamma_nbins": aquifer_pore_volume_gamma_nbins,
156 "retardation_factor": retardation_factor,
157 "date_start": date_start,
158 "date_end": date_end,
159 "date_freq": date_freq,
160 "flow_mean": flow_mean,
161 "flow_amplitude": flow_amplitude,
162 "flow_noise": flow_noise,
163 "temp_infiltration_method": temp_infiltration_method,
164 "temp_infiltration_mean": temp_infiltration_mean,
165 "temp_infiltration_amplitude": temp_infiltration_amplitude,
166 "temp_measurement_noise": temp_measurement_noise,
167 }
168 return df, tedges
171def generate_example_deposition_timeseries(
172 date_start="2018-01-01",
173 date_end="2023-12-31",
174 freq="D",
175 base: float = 0.8,
176 seasonal_amplitude: float = 0.3,
177 noise_scale: float = 0.1,
178 event_dates=None,
179 event_magnitude: float = 3.0,
180 event_duration: int = 30,
181 event_decay_scale: float = 10.0,
182 *,
183 ensure_non_negative: bool = True,
184):
185 """
186 Generate synthetic deposition timeseries for groundwater transport examples.
188 Parameters
189 ----------
190 date_start, date_end : str
191 Start and end dates for the generated time series (YYYY-MM-DD).
192 freq : str
193 Frequency string for pandas.date_range (default 'D').
194 base : float
195 Baseline deposition rate (ng/m^2/day).
196 seasonal_amplitude : float
197 Amplitude of the annual seasonal sinusoidal pattern.
198 noise_scale : float
199 Standard deviation scale for Gaussian noise added to the signal.
200 event_dates : list-like or None
201 Dates (strings or pandas-compatible) at which to place episodic events. If None,
202 a sensible default list is used.
203 event_magnitude : float
204 Peak magnitude multiplier for events.
205 event_duration : int
206 Duration of each event in days.
207 event_decay_scale : float
208 Decay scale used in the exponential decay for event time series.
209 ensure_non_negative : bool
210 If True, negative values are clipped to zero.
212 Returns
213 -------
214 pandas.Series
215 Time series of deposition values indexed by daily timestamps.
216 """
217 # Create synthetic deposition time series - needs to match flow period
218 dates = pd.date_range(date_start, date_end, freq=freq).tz_localize("UTC")
219 n_dates = len(dates)
220 tedges = compute_time_edges(tedges=None, tstart=None, tend=dates, number_of_bins=n_dates)
222 # Base deposition rate with seasonal and event patterns
223 seasonal_pattern = seasonal_amplitude * np.sin(2 * np.pi * np.arange(n_dates) / 365.25)
224 noise = noise_scale * np.random.normal(0, 1, n_dates)
226 # Default event dates if not provided
227 if event_dates is None:
228 event_dates = ["2020-06-15", "2021-03-20", "2021-09-10", "2022-07-05"]
229 event_dates = pd.to_datetime(event_dates)
231 event = np.zeros(n_dates)
232 for event_date in event_dates:
233 event_idx = dates.get_indexer([event_date], method="nearest")[0]
234 event_indices = np.arange(event_idx, min(event_idx + event_duration, n_dates))
235 decay_pattern = event_magnitude * np.exp(-np.arange(len(event_indices)) / event_decay_scale)
236 event[event_indices] += decay_pattern
238 # Combine all components
239 total = base + seasonal_pattern + noise + event
240 if ensure_non_negative:
241 total = np.maximum(total, 0.0)
243 series = pd.Series(data=total, index=dates, name="deposition")
244 series.attrs = {
245 "description": "Example deposition time series for groundwater transport modeling",
246 "source": "Synthetic data generated by gwtransport.examples.generate_example_deposition_timeseries",
247 "base": base,
248 "seasonal_amplitude": seasonal_amplitude,
249 "noise_scale": noise_scale,
250 "event_dates": [str(d.date()) for d in event_dates],
251 "event_magnitude": event_magnitude,
252 "event_duration": event_duration,
253 "event_decay_scale": event_decay_scale,
254 "date_start": date_start,
255 "date_end": date_end,
256 "date_freq": freq,
257 }
259 # Create deposition series
260 return series, tedges