Coverage for src / gwtransport / examples.py: 73%

106 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-27 06:32 +0000

1""" 

2Example Data Generation for Groundwater Transport Modeling. 

3 

4This module provides utilities to generate synthetic datasets for demonstrating 

5and testing groundwater transport models. It creates realistic flow patterns, 

6concentration/temperature time series, and deposition events suitable for testing 

7advection, diffusion, and deposition analysis functions. 

8 

9Available functions: 

10 

11- :func:`generate_example_data` - Generate comprehensive synthetic dataset with flow and 

12 concentration time series. Creates seasonal flow patterns with optional spill events, 

13 input concentration data via synthetic sinusoidal patterns, constant values, or real KNMI 

14 soil temperature, and extracted concentration computed through gamma-distributed pore volume 

15 transport. When diffusion parameters are provided, uses the diffusion module instead of 

16 pure advection. Returns DataFrame with flow, cin, cout columns plus attrs containing 

17 generation parameters and aquifer properties. 

18 

19- :func:`generate_temperature_example_data` - Convenience wrapper around 

20 :func:`generate_example_data` with sensible defaults for temperature transport including 

21 thermal retardation, thermal diffusivity, and longitudinal dispersivity. 

22 

23- :func:`generate_ec_example_data` - Convenience wrapper around 

24 :func:`generate_example_data` with sensible defaults for electrical conductivity (EC) 

25 transport. EC is a conservative tracer (retardation factor 1.0) with negligible molecular 

26 diffusivity compared to thermal transport. 

27 

28- :func:`generate_example_deposition_timeseries` - Generate synthetic deposition time series 

29 for pathogen/contaminant deposition analysis. Combines baseline deposition, seasonal patterns, 

30 random noise, and episodic contamination events with exponential decay. Returns Series with 

31 deposition rates [ng/m²/day] and attrs containing generation parameters. Useful for testing 

32 extraction_to_deposition deconvolution and deposition_to_extraction convolution functions. 

33 

34This file is part of gwtransport which is released under AGPL-3.0 license. 

35See the ./LICENSE file or go to https://github.com/gwtransport/gwtransport/blob/main/LICENSE for full license details. 

36""" 

37 

38import numpy as np 

39import numpy.typing as npt 

40import pandas as pd 

41 

42from gwtransport.advection import gamma_infiltration_to_extraction, infiltration_to_extraction 

43from gwtransport.diffusion_fast import gamma_infiltration_to_extraction as diffusion_gamma_infiltration_to_extraction 

44from gwtransport.diffusion_fast import infiltration_to_extraction as diffusion_infiltration_to_extraction 

45from gwtransport.gamma import mean_std_loc_to_alpha_beta 

46from gwtransport.utils import compute_time_edges, get_soil_temperature 

47 

48_DEFAULT_GAMMA_MEAN = 1000.0 # m3 

49_DEFAULT_GAMMA_STD = 200.0 # m3 

50_DEFAULT_GAMMA_LOC = 0.0 # m3, minimum pore volume 

51_DEFAULT_GAMMA_NBINS = 250 

52 

53 

54def generate_example_data( 

55 *, 

56 date_start: str = "2020-01-01", 

57 date_end: str = "2021-12-31", 

58 date_freq: str = "D", 

59 flow_mean: float = 100.0, # m3/day 

60 flow_amplitude: float = 30.0, # m3/day 

61 flow_noise: float = 10.0, # m3/day 

62 cin_method: str = "synthetic", 

63 cin_mean: float = 12.0, 

64 cin_amplitude: float = 8.0, 

65 measurement_noise: float = 1.0, 

66 aquifer_pore_volumes: npt.ArrayLike | None = None, 

67 aquifer_pore_volume_gamma_mean: float | None = None, 

68 aquifer_pore_volume_gamma_std: float | None = None, 

69 aquifer_pore_volume_gamma_loc: float | None = None, 

70 aquifer_pore_volume_gamma_nbins: int | None = None, 

71 retardation_factor: float = 1.0, 

72 molecular_diffusivity: float | None = None, 

73 longitudinal_dispersivity: float | None = None, 

74 streamline_length: float | None = None, 

75 rng: np.random.Generator | int | None = None, 

76) -> tuple[pd.DataFrame, pd.DatetimeIndex]: 

77 """ 

78 Generate synthetic concentration/temperature and flow data for groundwater transport. 

79 

80 Creates a synthetic dataset with seasonal flow patterns, input concentration (cin), 

81 and output concentration (cout) computed via gamma-distributed pore volume transport. 

82 When ``molecular_diffusivity``, ``longitudinal_dispersivity``, and ``streamline_length`` 

83 are provided, the diffusion module is used instead of pure advection. 

84 

85 Parameters 

86 ---------- 

87 date_start, date_end : str 

88 Start and end dates for the generated time series (YYYY-MM-DD). 

89 date_freq : str, default "D" 

90 Frequency string for pandas.date_range. 

91 flow_mean : float, default 100.0 

92 Mean flow rate [m3/day]. 

93 flow_amplitude : float, default 30.0 

94 Seasonal amplitude of flow rate [m3/day]. 

95 flow_noise : float, default 10.0 

96 Random noise level for flow rate [m3/day]. 

97 cin_method : str, default "synthetic" 

98 Method for generating infiltration concentration. Options: 

99 

100 - ``"synthetic"``: Seasonal sinusoidal pattern defined by ``cin_mean`` and 

101 ``cin_amplitude``. Measurement noise is applied. 

102 - ``"constant"``: Constant value equal to ``cin_mean``. Measurement noise 

103 is still applied. 

104 - ``"soil_temperature"``: Real soil temperature data from KNMI station 260. 

105 cin_mean : float, default 12.0 

106 Mean value of infiltrating concentration. 

107 cin_amplitude : float, default 8.0 

108 Seasonal amplitude of infiltration concentration (only used for 

109 ``"synthetic"`` method). 

110 measurement_noise : float, default 1.0 

111 Random noise level applied to both cin and cout to represent 

112 measurement errors. 

113 aquifer_pore_volumes : array-like or None, default None 

114 Discrete aquifer pore volumes [m3] representing the distribution of 

115 residence times. When provided, the gamma distribution is bypassed and 

116 none of the ``aquifer_pore_volume_gamma_*`` parameters may be passed. 

117 When ``None``, the pore volume distribution is built from the gamma 

118 parameters below. 

119 aquifer_pore_volume_gamma_mean : float or None, default None 

120 Mean pore volume of the aquifer gamma distribution [m3] (default 1000.0 

121 when unset). Must be strictly greater than 

122 ``aquifer_pore_volume_gamma_loc``. Mutually exclusive with 

123 ``aquifer_pore_volumes``. 

124 aquifer_pore_volume_gamma_std : float or None, default None 

125 Standard deviation of aquifer pore volume gamma distribution [m3] 

126 (default 200.0 when unset; invariant under the ``loc`` shift). 

127 Mutually exclusive with ``aquifer_pore_volumes``. 

128 aquifer_pore_volume_gamma_loc : float or None, default None 

129 Location (minimum pore volume) of the aquifer gamma distribution [m3] 

130 (default 0.0 when unset). Must satisfy ``0 <= loc < mean``. Mutually 

131 exclusive with ``aquifer_pore_volumes``. 

132 aquifer_pore_volume_gamma_nbins : int or None, default None 

133 Number of bins to discretize the aquifer pore volume gamma distribution 

134 (default 250 when unset). Mutually exclusive with 

135 ``aquifer_pore_volumes``. 

136 retardation_factor : float, default 1.0 

137 Retardation factor for transport. 

138 molecular_diffusivity : float or None, default None 

139 Effective molecular diffusivity [m2/day]. When provided together with 

140 ``longitudinal_dispersivity`` and ``streamline_length``, the diffusion 

141 module is used instead of pure advection. For solutes, typically ~1e-5 

142 m2/day (negligible). For heat, use thermal diffusivity ~0.01-0.1 m2/day. 

143 longitudinal_dispersivity : float or None, default None 

144 Longitudinal dispersivity [m]. Must be provided together with 

145 ``molecular_diffusivity`` and ``streamline_length``. 

146 streamline_length : float or None, default None 

147 Travel distance along the streamline [m]. Must be provided together 

148 with ``molecular_diffusivity`` and ``longitudinal_dispersivity``. 

149 rng : numpy.random.Generator, int, or None, default None 

150 Source of randomness for the synthetic flow noise, spill events, and 

151 measurement noise. Accepted in any form supported by 

152 :func:`numpy.random.default_rng`. Pass an integer (or 

153 :class:`numpy.random.Generator`) for reproducible output; ``None`` 

154 draws fresh entropy each call. 

155 

156 Returns 

157 ------- 

158 tuple 

159 A tuple containing: 

160 

161 - pandas.DataFrame: DataFrame with columns ``'flow'``, ``'cin'``, 

162 ``'cout'`` and metadata attributes for the aquifer parameters. 

163 - pandas.DatetimeIndex: Time edges (tedges) used for the flow 

164 calculations. 

165 

166 Raises 

167 ------ 

168 ValueError 

169 If ``cin_method`` is not one of the supported methods, if only some 

170 of the diffusion parameters are provided, or if ``aquifer_pore_volumes`` 

171 is passed together with any ``aquifer_pore_volume_gamma_*`` parameter. 

172 

173 See Also 

174 -------- 

175 generate_temperature_example_data : Wrapper with thermal transport defaults. 

176 """ 

177 rng = np.random.default_rng(rng) 

178 

179 # Create date range 

180 dates = pd.date_range(start=date_start, end=date_end, freq=date_freq).tz_localize("UTC") 

181 days = (dates - dates[0]).days.values 

182 

183 # Generate flow data with seasonal pattern (higher in winter) 

184 seasonal_flow = flow_mean + flow_amplitude * np.sin(2 * np.pi * days / 365 + np.pi) 

185 flow = seasonal_flow + rng.normal(0, flow_noise, len(dates)) 

186 flow = np.maximum(flow, 5.0) # Ensure flow is not too small or negative 

187 

188 min_days_for_spills = 60 

189 if len(dates) > min_days_for_spills: # Only add spills for longer time series 

190 n_spills = int(rng.integers(6, 16)) 

191 for _ in range(n_spills): 

192 spill_start = int(rng.integers(0, len(dates) - 30)) 

193 spill_duration = int(rng.integers(15, 45)) 

194 spill_magnitude = float(rng.uniform(2.0, 5.0)) 

195 

196 flow[spill_start : spill_start + spill_duration] /= spill_magnitude 

197 

198 # Generate infiltration concentration. nonoise is needed to compute cout. 

199 if cin_method == "synthetic": 

200 # Seasonal pattern with noise 

201 cin_nonoise = cin_mean + cin_amplitude * np.sin(2 * np.pi * days / 365) 

202 cin_values = cin_nonoise + rng.normal(0, measurement_noise, len(dates)) 

203 elif cin_method == "constant": 

204 # Constant value 

205 cin_nonoise = np.full(len(dates), cin_mean) 

206 cin_values = cin_nonoise + rng.normal(0, measurement_noise, len(dates)) 

207 elif cin_method == "soil_temperature": 

208 # Use soil temperature data (already includes measurement noise) 

209 cin_nonoise = cin_values = ( 

210 get_soil_temperature( 

211 station_number=260, # Example station number 

212 interpolate_missing_values=True, 

213 )["TB3"] 

214 .resample(date_freq) 

215 .mean()[dates] 

216 .values 

217 ) 

218 else: 

219 msg = f"Unknown cin_method: {cin_method}" 

220 raise ValueError(msg) 

221 

222 # Compute tedges for the flow series 

223 tedges = compute_time_edges(tedges=None, tstart=None, tend=dates, number_of_bins=len(dates)) 

224 

225 # Validate pore volume parameterization: either discrete volumes or gamma parameters, not both. 

226 gamma_set_by_user = [ 

227 name 

228 for name, value in { 

229 "aquifer_pore_volume_gamma_mean": aquifer_pore_volume_gamma_mean, 

230 "aquifer_pore_volume_gamma_std": aquifer_pore_volume_gamma_std, 

231 "aquifer_pore_volume_gamma_loc": aquifer_pore_volume_gamma_loc, 

232 "aquifer_pore_volume_gamma_nbins": aquifer_pore_volume_gamma_nbins, 

233 }.items() 

234 if value is not None 

235 ] 

236 if aquifer_pore_volumes is not None and gamma_set_by_user: 

237 msg = ( 

238 "aquifer_pore_volumes is mutually exclusive with the aquifer_pore_volume_gamma_* " 

239 f"parameters; got both aquifer_pore_volumes and {gamma_set_by_user}." 

240 ) 

241 raise ValueError(msg) 

242 

243 # Validate diffusion parameterization: all three parameters provided or none. 

244 diffusion_provided = (molecular_diffusivity, longitudinal_dispersivity, streamline_length) 

245 n_diffusion = sum(1 for p in diffusion_provided if p is not None) 

246 if 0 < n_diffusion < len(diffusion_provided): 

247 msg = "molecular_diffusivity, longitudinal_dispersivity, and streamline_length must all be provided together." 

248 raise ValueError(msg) 

249 

250 # Fill in gamma defaults so downstream callers see concrete values (not used when 

251 # aquifer_pore_volumes is supplied, but kept in scope for the attrs block below). 

252 gamma_mean = aquifer_pore_volume_gamma_mean if aquifer_pore_volume_gamma_mean is not None else _DEFAULT_GAMMA_MEAN 

253 gamma_std = aquifer_pore_volume_gamma_std if aquifer_pore_volume_gamma_std is not None else _DEFAULT_GAMMA_STD 

254 gamma_loc = aquifer_pore_volume_gamma_loc if aquifer_pore_volume_gamma_loc is not None else _DEFAULT_GAMMA_LOC 

255 gamma_nbins = ( 

256 aquifer_pore_volume_gamma_nbins if aquifer_pore_volume_gamma_nbins is not None else _DEFAULT_GAMMA_NBINS 

257 ) 

258 alpha, beta = mean_std_loc_to_alpha_beta(mean=gamma_mean, std=gamma_std, loc=gamma_loc) 

259 

260 # Compute cout. Branch on pore volume parameterization, then on diffusion. 

261 if aquifer_pore_volumes is not None: 

262 aquifer_pore_volumes_array = np.asarray(aquifer_pore_volumes, dtype=float) 

263 if ( 

264 molecular_diffusivity is not None 

265 and longitudinal_dispersivity is not None 

266 and streamline_length is not None 

267 ): 

268 cout_values = diffusion_infiltration_to_extraction( 

269 cin=cin_nonoise, 

270 flow=flow, 

271 tedges=tedges, 

272 cout_tedges=tedges, 

273 aquifer_pore_volumes=aquifer_pore_volumes_array, 

274 mean_streamline_length=streamline_length, 

275 mean_molecular_diffusivity=molecular_diffusivity, 

276 mean_longitudinal_dispersivity=longitudinal_dispersivity, 

277 retardation_factor=retardation_factor, 

278 suppress_dispersion_warning=True, 

279 ) 

280 else: 

281 cout_values = infiltration_to_extraction( 

282 cin=cin_nonoise, 

283 flow=flow, 

284 tedges=tedges, 

285 cout_tedges=tedges, 

286 aquifer_pore_volumes=aquifer_pore_volumes_array, 

287 retardation_factor=retardation_factor, 

288 ) 

289 elif molecular_diffusivity is not None and longitudinal_dispersivity is not None and streamline_length is not None: 

290 cout_values = diffusion_gamma_infiltration_to_extraction( 

291 cin=cin_nonoise, 

292 flow=flow, 

293 tedges=tedges, 

294 cout_tedges=tedges, 

295 mean=gamma_mean, 

296 std=gamma_std, 

297 loc=gamma_loc, 

298 n_bins=gamma_nbins, 

299 mean_streamline_length=streamline_length, 

300 mean_molecular_diffusivity=molecular_diffusivity, 

301 mean_longitudinal_dispersivity=longitudinal_dispersivity, 

302 retardation_factor=retardation_factor, 

303 suppress_dispersion_warning=True, 

304 ) 

305 else: 

306 cout_values = gamma_infiltration_to_extraction( 

307 cin=cin_nonoise, 

308 flow=flow, 

309 tedges=tedges, 

310 cout_tedges=tedges, 

311 mean=gamma_mean, 

312 std=gamma_std, 

313 loc=gamma_loc, 

314 n_bins=gamma_nbins, 

315 retardation_factor=retardation_factor, 

316 ) 

317 

318 # Add some noise to represent measurement errors 

319 cout_values += rng.normal(0, measurement_noise, len(dates)) 

320 

321 # Create data frame 

322 df = pd.DataFrame( 

323 data={"flow": flow, "cin": cin_values, "cout": cout_values}, 

324 index=dates, 

325 ) 

326 df.attrs.update({ 

327 "description": "Example data for groundwater transport modeling", 

328 "source": "Synthetic data generated by gwtransport.examples.generate_example_data", 

329 "retardation_factor": retardation_factor, 

330 "date_start": date_start, 

331 "date_end": date_end, 

332 "date_freq": date_freq, 

333 "flow_mean": flow_mean, 

334 "flow_amplitude": flow_amplitude, 

335 "flow_noise": flow_noise, 

336 "cin_method": cin_method, 

337 "cin_mean": cin_mean, 

338 "cin_amplitude": cin_amplitude, 

339 "measurement_noise": measurement_noise, 

340 }) 

341 if aquifer_pore_volumes is not None: 

342 df.attrs["aquifer_pore_volume_parameterization"] = "discrete" 

343 df.attrs["aquifer_pore_volumes"] = np.asarray(aquifer_pore_volumes, dtype=float) 

344 else: 

345 df.attrs.update({ 

346 "aquifer_pore_volume_parameterization": "gamma", 

347 "aquifer_pore_volume_gamma_mean": gamma_mean, 

348 "aquifer_pore_volume_gamma_std": gamma_std, 

349 "aquifer_pore_volume_gamma_loc": gamma_loc, 

350 "aquifer_pore_volume_gamma_alpha": alpha, 

351 "aquifer_pore_volume_gamma_beta": beta, 

352 "aquifer_pore_volume_gamma_nbins": gamma_nbins, 

353 }) 

354 if molecular_diffusivity is not None: 

355 df.attrs["molecular_diffusivity"] = molecular_diffusivity 

356 df.attrs["longitudinal_dispersivity"] = longitudinal_dispersivity 

357 df.attrs["streamline_length"] = streamline_length 

358 

359 return df, tedges 

360 

361 

362def generate_temperature_example_data(**kwargs) -> tuple[pd.DataFrame, pd.DatetimeIndex]: 

363 """ 

364 Generate synthetic temperature and flow data for groundwater transport examples. 

365 

366 Convenience wrapper around :func:`generate_example_data` with sensible 

367 defaults for temperature transport: thermal retardation factor, thermal 

368 diffusivity, longitudinal dispersivity, and streamline length. 

369 

370 Typical parameter values for temperature transport in various sand types: 

371 

372 +---------------------------------+------------+-------------+--------------------+ 

373 | Parameter | Fine sand | Medium sand | Coarse sand/gravel | 

374 +=================================+============+=============+====================+ 

375 | retardation_factor R | 2.0--3.0 | 1.5--2.5 | 1.2--2.0 | 

376 +---------------------------------+------------+-------------+--------------------+ 

377 | molecular_diffusivity (m2/day) | 0.03--0.06 | 0.05--0.08 | 0.08--0.12 | 

378 +---------------------------------+------------+-------------+--------------------+ 

379 | longitudinal_dispersivity (m) | 0.1--1.0 | 0.5--5.0 | 1.0--10.0 | 

380 +---------------------------------+------------+-------------+--------------------+ 

381 | streamline_length (m) | site-specific | 

382 +---------------------------------+------------+-------------+--------------------+ 

383 

384 Parameters 

385 ---------- 

386 **kwargs 

387 All keyword arguments are forwarded to :func:`generate_example_data`. 

388 Defaults that differ from ``generate_example_data``: 

389 

390 - ``retardation_factor`` : 2.0 (thermal retardation) 

391 - ``molecular_diffusivity`` : 0.05 m2/day (thermal diffusivity) 

392 - ``longitudinal_dispersivity`` : 1.0 m 

393 - ``streamline_length`` : 100.0 m 

394 

395 Returns 

396 ------- 

397 tuple 

398 See :func:`generate_example_data`. 

399 

400 See Also 

401 -------- 

402 generate_example_data : Generic version with full parameter control. 

403 """ 

404 defaults = { 

405 "retardation_factor": 2.0, 

406 "molecular_diffusivity": 0.05, 

407 "longitudinal_dispersivity": 1.0, 

408 "streamline_length": 100.0, 

409 } 

410 for key, value in defaults.items(): 

411 kwargs.setdefault(key, value) 

412 return generate_example_data(**kwargs) 

413 

414 

415def generate_ec_example_data(**kwargs) -> tuple[pd.DataFrame, pd.DatetimeIndex]: 

416 """ 

417 Generate synthetic electrical conductivity and flow data for groundwater transport examples. 

418 

419 Convenience wrapper around :func:`generate_example_data` with sensible 

420 defaults for electrical conductivity (EC) transport. EC is a conservative 

421 tracer: dissolved ions travel at water velocity without retardation. 

422 

423 Typical parameter values for EC (dissolved ion) transport in various sand 

424 types. The molecular diffusivity represents effective ionic diffusion in 

425 porous media (free-water D_0 reduced by porosity/tortuosity). It is 

426 negligible compared to mechanical dispersion at field scale. 

427 

428 +---------------------------------+----------------+----------------+--------------------+ 

429 | Parameter | Fine sand | Medium sand | Coarse sand/gravel | 

430 +=================================+================+================+====================+ 

431 | retardation_factor R | 1.0 | 1.0 | 1.0 | 

432 +---------------------------------+----------------+----------------+--------------------+ 

433 | molecular_diffusivity (m2/day) | 3e-5 -- 5e-5 | 4e-5 -- 8e-5 | 5e-5 -- 1e-4 | 

434 +---------------------------------+----------------+----------------+--------------------+ 

435 | longitudinal_dispersivity (m) | 0.1--1.0 | 0.5--5.0 | 1.0--10.0 | 

436 +---------------------------------+----------------+----------------+--------------------+ 

437 | streamline_length (m) | site-specific | 

438 +---------------------------------+----------------+----------------+--------------------+ 

439 

440 Parameters 

441 ---------- 

442 **kwargs 

443 All keyword arguments are forwarded to :func:`generate_example_data`. 

444 Defaults that differ from ``generate_example_data``: 

445 

446 - ``cin_mean`` : 500.0 (uS/cm, typical surface water EC) 

447 - ``cin_amplitude`` : 150.0 (uS/cm, seasonal variation) 

448 - ``measurement_noise`` : 10.0 (uS/cm) 

449 - ``retardation_factor`` : 1.0 (conservative tracer) 

450 - ``molecular_diffusivity`` : 5e-5 m2/day (effective ionic diffusion) 

451 - ``longitudinal_dispersivity`` : 1.0 m 

452 - ``streamline_length`` : 100.0 m 

453 

454 Returns 

455 ------- 

456 tuple 

457 See :func:`generate_example_data`. 

458 

459 See Also 

460 -------- 

461 generate_example_data : Generic version with full parameter control. 

462 generate_temperature_example_data : Wrapper with thermal transport defaults. 

463 """ 

464 defaults = { 

465 "cin_mean": 500.0, 

466 "cin_amplitude": 150.0, 

467 "measurement_noise": 10.0, 

468 "retardation_factor": 1.0, 

469 "molecular_diffusivity": 5e-5, 

470 "longitudinal_dispersivity": 1.0, 

471 "streamline_length": 100.0, 

472 } 

473 for key, value in defaults.items(): 

474 kwargs.setdefault(key, value) 

475 return generate_example_data(**kwargs) 

476 

477 

478def generate_example_deposition_timeseries( 

479 *, 

480 date_start: str = "2018-01-01", 

481 date_end: str = "2023-12-31", 

482 freq: str = "D", 

483 base: float = 0.8, 

484 seasonal_amplitude: float = 0.3, 

485 noise_scale: float = 0.1, 

486 event_dates: npt.ArrayLike | pd.DatetimeIndex | None = None, 

487 event_magnitude: float = 3.0, 

488 event_duration: int = 30, 

489 event_decay_scale: float = 10.0, 

490 ensure_non_negative: bool = True, 

491) -> tuple[pd.Series, pd.DatetimeIndex]: 

492 """ 

493 Generate synthetic deposition timeseries for groundwater transport examples. 

494 

495 Parameters 

496 ---------- 

497 date_start, date_end : str 

498 Start and end dates for the generated time series (YYYY-MM-DD). 

499 freq : str 

500 Frequency string for pandas.date_range (default 'D'). 

501 base : float 

502 Baseline deposition rate (ng/m^2/day). 

503 seasonal_amplitude : float 

504 Amplitude of the annual seasonal sinusoidal pattern. 

505 noise_scale : float 

506 Standard deviation scale for Gaussian noise added to the signal. 

507 event_dates : list-like or None 

508 Dates (strings or pandas-compatible) at which to place episodic events. If None, 

509 a sensible default list is used. 

510 event_magnitude : float 

511 Peak magnitude multiplier for events. 

512 event_duration : int 

513 Duration of each event in days. 

514 event_decay_scale : float 

515 Decay scale used in the exponential decay for event time series. 

516 ensure_non_negative : bool 

517 If True, negative values are clipped to zero. 

518 

519 Returns 

520 ------- 

521 pandas.Series 

522 Time series of deposition values indexed by daily timestamps. 

523 """ 

524 # Create synthetic deposition time series - needs to match flow period 

525 dates = pd.date_range(date_start, date_end, freq=freq).tz_localize("UTC") 

526 n_dates = len(dates) 

527 tedges = compute_time_edges(tedges=None, tstart=None, tend=dates, number_of_bins=n_dates) 

528 

529 # Base deposition rate with seasonal and event patterns 

530 seasonal_pattern = seasonal_amplitude * np.sin(2 * np.pi * np.arange(n_dates) / 365.25) 

531 noise = noise_scale * np.random.normal(0, 1, n_dates) 

532 

533 # Default event dates if not provided 

534 if event_dates is None: 

535 event_dates = ["2020-06-15", "2021-03-20", "2021-09-10", "2022-07-05"] 

536 # Convert to DatetimeIndex - handles list, array, or DatetimeIndex input 

537 if isinstance(event_dates, pd.DatetimeIndex): 

538 event_dates_index = event_dates 

539 else: 

540 # Convert ArrayLike to list for pd.to_datetime 

541 event_dates_list = event_dates if isinstance(event_dates, list) else list(np.asarray(event_dates)) 

542 event_dates_index = pd.DatetimeIndex(pd.to_datetime(event_dates_list)) 

543 

544 event = np.zeros(n_dates) 

545 for event_date in event_dates_index: 

546 event_idx = dates.get_indexer([event_date], method="nearest")[0] 

547 event_indices = np.arange(event_idx, min(event_idx + event_duration, n_dates)) 

548 decay_pattern = event_magnitude * np.exp(-np.arange(len(event_indices)) / event_decay_scale) 

549 event[event_indices] += decay_pattern 

550 

551 # Combine all components 

552 total = base + seasonal_pattern + noise + event 

553 if ensure_non_negative: 

554 total = np.maximum(total, 0.0) 

555 

556 series = pd.Series(data=total, index=dates, name="deposition") 

557 series.attrs.update({ 

558 "description": "Example deposition time series for groundwater transport modeling", 

559 "source": "Synthetic data generated by gwtransport.examples.generate_example_deposition_timeseries", 

560 "base": base, 

561 "seasonal_amplitude": seasonal_amplitude, 

562 "noise_scale": noise_scale, 

563 "event_dates": [str(d.date()) for d in event_dates_index], 

564 "event_magnitude": event_magnitude, 

565 "event_duration": event_duration, 

566 "event_decay_scale": event_decay_scale, 

567 "date_start": date_start, 

568 "date_end": date_end, 

569 "date_freq": freq, 

570 }) 

571 

572 # Create deposition series 

573 return series, tedges