Coverage for src/gwtransport/examples.py: 86%

57 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-09-29 20:01 +0000

1""" 

2Example data generation for groundwater transport modeling. 

3 

4This module provides utilities to generate synthetic temperature and flow data 

5for demonstrating and testing groundwater transport models. It was separated 

6from the utils module to resolve circular import dependencies. 

7""" 

8 

9import numpy as np 

10import pandas as pd 

11 

12from gwtransport.advection import gamma_infiltration_to_extraction 

13from gwtransport.gamma import mean_std_to_alpha_beta 

14from gwtransport.utils import compute_time_edges, get_soil_temperature 

15 

16 

17def generate_example_data( 

18 *, 

19 date_start="2020-01-01", 

20 date_end="2021-12-31", 

21 date_freq="D", 

22 flow_mean=100.0, # m3/day 

23 flow_amplitude=30.0, # m3/day 

24 flow_noise=10.0, # m3/day 

25 temp_infiltration_method="synthetic", # Method for generating infiltration temperature 

26 temp_infiltration_mean=12.0, # °C 

27 temp_infiltration_amplitude=8.0, # °C 

28 temp_measurement_noise=1.0, # °C 

29 aquifer_pore_volume_gamma_mean=1000.0, # m3 

30 aquifer_pore_volume_gamma_std=200.0, # m3 

31 aquifer_pore_volume_gamma_nbins=250, # Discretization resolution 

32 retardation_factor=1.0, 

33): 

34 """ 

35 Generate synthetic temperature and flow data for groundwater transport examples. 

36 

37 Parameters 

38 ---------- 

39 date_start, date_end : str 

40 Start and end dates for the generated time series (YYYY-MM-DD). 

41 date_freq : str, default "D" 

42 Frequency string for pandas.date_range (default 'D'). 

43 flow_mean : float, default 100.0 

44 Mean flow rate in m3/day 

45 flow_amplitude : float, default 30.0 

46 Seasonal amplitude of flow rate in m3/day 

47 flow_noise : float, default 10.0 

48 Random noise level for flow rate in m3/day 

49 temp_infiltration_method : str, default "synthetic" 

50 Method for generating infiltration temperature. Options: 

51 - "synthetic": Seasonal pattern. temp_infiltration_mean and temp_infiltration_amplitude define the pattern. temp_measurement_noise is applied. 

52 - "constant": Constant temperature equal to temp_infiltration_mean. temp_measurement_noise is still applied. 

53 - "soil_temperature": Use real soil temperature data from KNMI station 

54 temp_infiltration_mean : float, default 12.0 

55 Mean temperature of infiltrating water in °C 

56 temp_infiltration_amplitude : float, default 8.0 

57 Seasonal amplitude of infiltration temperature in °C (only used for "synthetic" method) 

58 temp_measurement_noise : float, default 1.0 

59 Random noise level for infiltration temperature in °C 

60 aquifer_pore_volume_gamma_mean : float, default 1000.0 

61 Mean pore volume of the aquifer gamma distribution in m3 

62 aquifer_pore_volume_gamma_std : float, default 200.0 

63 Standard deviation of aquifer pore volume gamma distribution in m3 

64 aquifer_pore_volume_gamma_nbins : int, default 250 

65 Number of bins to discretize the aquifer pore volume gamma distribution 

66 retardation_factor : float, default 1.0 

67 Retardation factor for temperature transport 

68 

69 Returns 

70 ------- 

71 tuple 

72 A tuple containing: 

73 - pandas.DataFrame: DataFrame with columns 'flow', 'temp_infiltration', 'temp_extraction' 

74 and metadata attributes for the aquifer parameters 

75 - pandas.DatetimeIndex: Time edges (tedges) used for the flow calculations 

76 

77 Raises 

78 ------ 

79 ValueError 

80 If temp_infiltration_method is not one of the supported methods 

81 """ 

82 # Create date range 

83 dates = pd.date_range(start=date_start, end=date_end, freq=date_freq).tz_localize("UTC") 

84 days = (dates - dates[0]).days.values 

85 

86 # Generate flow data with seasonal pattern (higher in winter) 

87 seasonal_flow = flow_mean + flow_amplitude * np.sin(2 * np.pi * days / 365 + np.pi) 

88 flow = seasonal_flow + np.random.normal(0, flow_noise, len(dates)) 

89 flow = np.maximum(flow, 5.0) # Ensure flow is not too small or negative 

90 

91 min_days_for_spills = 60 

92 if len(dates) > min_days_for_spills: # Only add spills for longer time series 

93 n_spills = np.random.randint(6, 16) 

94 for _ in range(n_spills): 

95 spill_start = np.random.randint(0, len(dates) - 30) 

96 spill_duration = np.random.randint(15, 45) 

97 spill_magnitude = np.random.uniform(2.0, 5.0) 

98 

99 flow[spill_start : spill_start + spill_duration] /= spill_magnitude 

100 

101 # Generate infiltration temperature 

102 if temp_infiltration_method == "synthetic": 

103 # Seasonal pattern with noise 

104 infiltration_temp = temp_infiltration_mean + temp_infiltration_amplitude * np.sin(2 * np.pi * days / 365) 

105 infiltration_temp += np.random.normal(0, temp_measurement_noise, len(dates)) 

106 elif temp_infiltration_method == "constant": 

107 # Constant temperature 

108 infiltration_temp = np.full(len(dates), temp_infiltration_mean) 

109 infiltration_temp += np.random.normal(0, temp_measurement_noise, len(dates)) 

110 elif temp_infiltration_method == "soil_temperature": 

111 # Use soil temperature data already includes measurement noise 

112 infiltration_temp = ( 

113 get_soil_temperature( 

114 station_number=260, # Example station number 

115 interpolate_missing_values=True, 

116 )["TB3"] 

117 .resample(date_freq) 

118 .mean()[dates] 

119 .values 

120 ) 

121 else: 

122 msg = f"Unknown temperature method: {temp_infiltration_method}" 

123 raise ValueError(msg) 

124 

125 # Compute tedges for the flow series 

126 tedges = compute_time_edges(tedges=None, tstart=None, tend=dates, number_of_bins=len(dates)) 

127 

128 temp_extraction = gamma_infiltration_to_extraction( 

129 cin=infiltration_temp, 

130 flow=flow, 

131 tedges=tedges, 

132 cout_tedges=tedges, 

133 mean=aquifer_pore_volume_gamma_mean, # Use mean pore volume 

134 std=aquifer_pore_volume_gamma_std, # Use standard deviation for heterogeneity 

135 n_bins=aquifer_pore_volume_gamma_nbins, # Discretization resolution 

136 retardation_factor=retardation_factor, 

137 ) 

138 

139 # Add some noise to represent measurement errors 

140 temp_extraction += np.random.normal(0, temp_measurement_noise, len(dates)) 

141 

142 # Create data frame 

143 alpha, beta = mean_std_to_alpha_beta(mean=aquifer_pore_volume_gamma_mean, std=aquifer_pore_volume_gamma_std) 

144 df = pd.DataFrame( 

145 data={"flow": flow, "temp_infiltration": infiltration_temp, "temp_extraction": temp_extraction}, 

146 index=dates, 

147 ) 

148 df.attrs = { 

149 "description": "Example data for groundwater transport modeling", 

150 "source": "Synthetic data generated by gwtransport.examples.generate_example_data", 

151 "aquifer_pore_volume_gamma_mean": aquifer_pore_volume_gamma_mean, 

152 "aquifer_pore_volume_gamma_std": aquifer_pore_volume_gamma_std, 

153 "aquifer_pore_volume_gamma_alpha": alpha, 

154 "aquifer_pore_volume_gamma_beta": beta, 

155 "aquifer_pore_volume_gamma_nbins": aquifer_pore_volume_gamma_nbins, 

156 "retardation_factor": retardation_factor, 

157 "date_start": date_start, 

158 "date_end": date_end, 

159 "date_freq": date_freq, 

160 "flow_mean": flow_mean, 

161 "flow_amplitude": flow_amplitude, 

162 "flow_noise": flow_noise, 

163 "temp_infiltration_method": temp_infiltration_method, 

164 "temp_infiltration_mean": temp_infiltration_mean, 

165 "temp_infiltration_amplitude": temp_infiltration_amplitude, 

166 "temp_measurement_noise": temp_measurement_noise, 

167 } 

168 return df, tedges 

169 

170 

171def generate_example_deposition_timeseries( 

172 date_start="2018-01-01", 

173 date_end="2023-12-31", 

174 freq="D", 

175 base: float = 0.8, 

176 seasonal_amplitude: float = 0.3, 

177 noise_scale: float = 0.1, 

178 event_dates: list[str] | pd.DatetimeIndex | None = None, 

179 event_magnitude: float = 3.0, 

180 event_duration: int = 30, 

181 event_decay_scale: float = 10.0, 

182 *, 

183 ensure_non_negative: bool = True, 

184): 

185 """ 

186 Generate synthetic deposition timeseries for groundwater transport examples. 

187 

188 Parameters 

189 ---------- 

190 date_start, date_end : str 

191 Start and end dates for the generated time series (YYYY-MM-DD). 

192 freq : str 

193 Frequency string for pandas.date_range (default 'D'). 

194 base : float 

195 Baseline deposition rate (ng/m^2/day). 

196 seasonal_amplitude : float 

197 Amplitude of the annual seasonal sinusoidal pattern. 

198 noise_scale : float 

199 Standard deviation scale for Gaussian noise added to the signal. 

200 event_dates : list-like or None 

201 Dates (strings or pandas-compatible) at which to place episodic events. If None, 

202 a sensible default list is used. 

203 event_magnitude : float 

204 Peak magnitude multiplier for events. 

205 event_duration : int 

206 Duration of each event in days. 

207 event_decay_scale : float 

208 Decay scale used in the exponential decay for event time series. 

209 ensure_non_negative : bool 

210 If True, negative values are clipped to zero. 

211 

212 Returns 

213 ------- 

214 pandas.Series 

215 Time series of deposition values indexed by daily timestamps. 

216 """ 

217 # Create synthetic deposition time series - needs to match flow period 

218 dates = pd.date_range(date_start, date_end, freq=freq).tz_localize("UTC") 

219 n_dates = len(dates) 

220 tedges = compute_time_edges(tedges=None, tstart=None, tend=dates, number_of_bins=n_dates) 

221 

222 # Base deposition rate with seasonal and event patterns 

223 seasonal_pattern = seasonal_amplitude * np.sin(2 * np.pi * np.arange(n_dates) / 365.25) 

224 noise = noise_scale * np.random.normal(0, 1, n_dates) 

225 

226 # Default event dates if not provided 

227 if event_dates is None: 

228 event_dates = ["2020-06-15", "2021-03-20", "2021-09-10", "2022-07-05"] 

229 event_dates = pd.DatetimeIndex(pd.to_datetime(event_dates)) 

230 

231 event = np.zeros(n_dates) 

232 for event_date in event_dates: 

233 event_idx = dates.get_indexer([event_date], method="nearest")[0] 

234 event_indices = np.arange(event_idx, min(event_idx + event_duration, n_dates)) 

235 decay_pattern = event_magnitude * np.exp(-np.arange(len(event_indices)) / event_decay_scale) 

236 event[event_indices] += decay_pattern 

237 

238 # Combine all components 

239 total = base + seasonal_pattern + noise + event 

240 if ensure_non_negative: 

241 total = np.maximum(total, 0.0) 

242 

243 series = pd.Series(data=total, index=dates, name="deposition") 

244 series.attrs = { 

245 "description": "Example deposition time series for groundwater transport modeling", 

246 "source": "Synthetic data generated by gwtransport.examples.generate_example_deposition_timeseries", 

247 "base": base, 

248 "seasonal_amplitude": seasonal_amplitude, 

249 "noise_scale": noise_scale, 

250 "event_dates": [str(d.date()) for d in event_dates], 

251 "event_magnitude": event_magnitude, 

252 "event_duration": event_duration, 

253 "event_decay_scale": event_decay_scale, 

254 "date_start": date_start, 

255 "date_end": date_end, 

256 "date_freq": freq, 

257 } 

258 

259 # Create deposition series 

260 return series, tedges