Coverage for src/gwtransport/advection.py: 0%

112 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-20 13:45 +0000

1""" 

2Advection Analysis for 1D Aquifer Systems. 

3 

4This module provides functions to analyze compound transport by advection 

5in aquifer systems. It includes tools for computing concentrations of the extracted water 

6based on the concentration of the infiltrating water, extraction data and aquifer properties. 

7 

8The model assumes requires the groundwaterflow to be reduced to a 1D system. On one side, 

9water with a certain concentration infiltrates ('cin'), the water flows through the aquifer and 

10the compound of interest flows through the aquifer with a retarded velocity. The water is 

11extracted ('cout'). 

12 

13Main functions: 

14- infiltration_to_extraction: Compute the concentration of the extracted water by shifting cin with its residence time. This corresponds to a convolution operation. 

15- gamma_infiltration_to_extraction: Similar to infiltration_to_extraction, but for a gamma distribution of aquifer pore volumes. 

16- distribution_infiltration_to_extraction: Similar to infiltration_to_extraction, but for an arbitrary distribution of aquifer pore volumes. 

17""" 

18 

19import numpy as np 

20import pandas as pd 

21 

22from gwtransport import gamma 

23from gwtransport.residence_time import residence_time 

24from gwtransport.utils import compute_time_edges, interp_series, partial_isin 

25 

26 

27def infiltration_to_extraction(cin_series, flow_series, aquifer_pore_volume, retardation_factor=1.0, cout_index="cin"): 

28 """ 

29 Compute the concentration of the extracted water by shifting cin with its residence time. 

30 

31 The compound is retarded in the aquifer with a retardation factor. The residence 

32 time is computed based on the flow rate of the water in the aquifer and the pore volume 

33 of the aquifer. 

34 

35 This function represents infiltration to extraction modeling (equivalent to convolution). 

36 

37 Parameters 

38 ---------- 

39 cin_series : pandas.Series 

40 Concentration of the compound in the infiltrating water [ng/m3]. The cin_series should be the average concentration of a time bin. The index should be a pandas.DatetimeIndex 

41 and is labeled at the end of the time bin. 

42 flow_series : pandas.Series 

43 Flow rate of water in the aquifer [m3/day]. The flow_series should be the average flow of a time bin. The index should be a pandas.DatetimeIndex 

44 and is labeled at the end of the time bin. 

45 aquifer_pore_volume : float 

46 Pore volume of the aquifer [m3]. 

47 cout_index : str, optional 

48 The index of the output series. Can be 'cin', 'flow', or 'cout'. Default is 'cin'. 

49 - 'cin': The output series will have the same index as `cin_series`. 

50 - 'flow': The output series will have the same index as `flow_series`. 

51 - 'cout': The output series will have the same index as `cin_series + residence_time`. 

52 

53 Returns 

54 ------- 

55 numpy.ndarray 

56 Concentration of the compound in the extracted water [ng/m3]. 

57 

58 Examples 

59 -------- 

60 Basic usage with single pore volume: 

61 

62 >>> import pandas as pd 

63 >>> import numpy as np 

64 >>> from gwtransport.advection import infiltration_to_extraction 

65 >>> 

66 >>> # Create input data 

67 >>> dates = pd.date_range(start="2020-01-01", end="2020-01-10", freq="D") 

68 >>> cin = pd.Series(np.ones(len(dates)), index=dates) 

69 >>> flow = pd.Series(np.ones(len(dates)) * 100, index=dates) # 100 m3/day 

70 >>> 

71 >>> # Single aquifer pore volume 

72 >>> aquifer_pore_volume = 500.0 # m3 

73 >>> 

74 >>> # Run infiltration to extraction model (default returns on cin index) 

75 >>> cout = infiltration_to_extraction(cin, flow, aquifer_pore_volume) 

76 >>> len(cout) == len(cin) 

77 True 

78 

79 Different output index options: 

80 

81 >>> # Output on cin index (default) 

82 >>> cout_cin = infiltration_to_extraction( 

83 ... cin, flow, aquifer_pore_volume, cout_index="cin" 

84 ... ) 

85 >>> 

86 >>> # Output on flow index 

87 >>> cout_flow = infiltration_to_extraction( 

88 ... cin, flow, aquifer_pore_volume, cout_index="flow" 

89 ... ) 

90 >>> 

91 >>> # Output on shifted time index (cin + residence_time) 

92 >>> cout_shifted = infiltration_to_extraction( 

93 ... cin, flow, aquifer_pore_volume, cout_index="cout" 

94 ... ) 

95 

96 With retardation factor: 

97 

98 >>> # Compound moves twice as slowly due to sorption 

99 >>> cout = infiltration_to_extraction( 

100 ... cin, flow, aquifer_pore_volume, retardation_factor=2.0 

101 ... ) 

102 """ 

103 # Create flow tedges from the flow series index (assuming it's at the end of bins) 

104 flow_tedges = compute_time_edges(tedges=None, tstart=None, tend=flow_series.index, number_of_bins=len(flow_series)) 

105 rt_array = residence_time( 

106 flow=flow_series, 

107 flow_tedges=flow_tedges, 

108 index=cin_series.index, 

109 aquifer_pore_volume=aquifer_pore_volume, 

110 retardation_factor=retardation_factor, 

111 direction="infiltration_to_extraction", 

112 ) 

113 

114 rt = pd.to_timedelta(rt_array[0], unit="D", errors="coerce") 

115 index = cin_series.index + rt 

116 

117 cout = pd.Series(data=cin_series.values, index=index, name="cout") 

118 

119 if cout_index == "cin": 

120 return interp_series(cout, cin_series.index) 

121 if cout_index == "flow": 

122 # If cout_index is 'flow', we need to resample cout to the flow index 

123 return interp_series(cout, flow_series.index) 

124 if cout_index == "cout": 

125 # If cout_index is 'cout', we return the cout as is 

126 return cout.values 

127 

128 msg = f"Invalid cout_index: {cout_index}. Must be 'cin', 'flow', or 'cout'." 

129 raise ValueError(msg) 

130 

131 

132def extraction_to_infiltration(cout, flow, aquifer_pore_volume, retardation_factor=1.0, resample_dates=None): 

133 """ 

134 Compute the concentration of the infiltrating water by shifting cout with its residence time. 

135 

136 This function represents extraction to infiltration modeling (equivalent to deconvolution). 

137 

138 Parameters 

139 ---------- 

140 cout : pandas.Series 

141 Concentration of the compound in the extracted water [ng/m3]. 

142 flow : pandas.Series 

143 Flow rate of water in the aquifer [m3/day]. 

144 aquifer_pore_volume : float 

145 Pore volume of the aquifer [m3]. 

146 

147 Returns 

148 ------- 

149 numpy.ndarray 

150 Concentration of the compound in the infiltrating water [ng/m3]. 

151 """ 

152 msg = "Extraction to infiltration advection (deconvolution) is not implemented yet" 

153 raise NotImplementedError(msg) 

154 

155 

156def gamma_infiltration_to_extraction( 

157 *, 

158 cin, 

159 flow, 

160 tedges, 

161 cout_tedges, 

162 alpha=None, 

163 beta=None, 

164 mean=None, 

165 std=None, 

166 n_bins=100, 

167 retardation_factor=1.0, 

168): 

169 """ 

170 Compute the concentration of the extracted water by shifting cin with its residence time. 

171 

172 The compound is retarded in the aquifer with a retardation factor. The residence 

173 time is computed based on the flow rate of the water in the aquifer and the pore volume 

174 of the aquifer. The aquifer pore volume is approximated by a gamma distribution, with 

175 parameters alpha and beta. 

176 

177 This function represents infiltration to extraction modeling (equivalent to convolution). 

178 

179 Provide either alpha and beta or mean and std. 

180 

181 Parameters 

182 ---------- 

183 cin : pandas.Series 

184 Concentration of the compound in infiltrating water or temperature of infiltrating 

185 water. 

186 cin_tedges : pandas.DatetimeIndex 

187 Time edges for the concentration data. Used to compute the cumulative concentration. 

188 Has a length of one more than `cin`. 

189 cout_tedges : pandas.DatetimeIndex 

190 Time edges for the output data. Used to compute the cumulative concentration. 

191 Has a length of one more than `flow`. 

192 flow : pandas.Series 

193 Flow rate of water in the aquifer [m3/day]. 

194 flow_tedges : pandas.DatetimeIndex 

195 Time edges for the flow data. Used to compute the cumulative flow. 

196 Has a length of one more than `flow`. 

197 alpha : float, optional 

198 Shape parameter of gamma distribution of the aquifer pore volume (must be > 0) 

199 beta : float, optional 

200 Scale parameter of gamma distribution of the aquifer pore volume (must be > 0) 

201 mean : float, optional 

202 Mean of the gamma distribution. 

203 std : float, optional 

204 Standard deviation of the gamma distribution. 

205 n_bins : int 

206 Number of bins to discretize the gamma distribution. 

207 retardation_factor : float 

208 Retardation factor of the compound in the aquifer. 

209 

210 Returns 

211 ------- 

212 numpy.ndarray 

213 Concentration of the compound in the extracted water [ng/m3] or temperature. 

214 

215 Examples 

216 -------- 

217 Basic usage with alpha and beta parameters: 

218 

219 >>> import pandas as pd 

220 >>> import numpy as np 

221 >>> from gwtransport.utils import compute_time_edges 

222 >>> from gwtransport.advection import gamma_infiltration_to_extraction 

223 >>> 

224 >>> # Create input data with aligned time edges 

225 >>> dates = pd.date_range(start="2020-01-01", end="2020-01-20", freq="D") 

226 >>> tedges = compute_time_edges( 

227 ... tedges=None, tstart=None, tend=dates, number_of_bins=len(dates) 

228 ... ) 

229 >>> 

230 >>> # Create output time edges (can be different alignment) 

231 >>> cout_dates = pd.date_range(start="2020-01-05", end="2020-01-15", freq="D") 

232 >>> cout_tedges = compute_time_edges( 

233 ... tedges=None, tstart=None, tend=cout_dates, number_of_bins=len(cout_dates) 

234 ... ) 

235 >>> 

236 >>> # Input concentration and flow (same length, aligned with tedges) 

237 >>> cin = pd.Series(np.ones(len(dates)), index=dates) 

238 >>> flow = pd.Series(np.ones(len(dates)) * 100, index=dates) # 100 m3/day 

239 >>> 

240 >>> # Run gamma_infiltration_to_extraction with alpha/beta parameters 

241 >>> cout = gamma_infiltration_to_extraction( 

242 ... cin=cin, 

243 ... cin_tedges=tedges, 

244 ... cout_tedges=cout_tedges, 

245 ... flow=flow, 

246 ... flow_tedges=tedges, # Must be identical to cin_tedges 

247 ... alpha=10.0, 

248 ... beta=10.0, 

249 ... n_bins=5, 

250 ... ) 

251 >>> cout.shape 

252 (11,) 

253 

254 Using mean and std parameters instead: 

255 

256 >>> cout = gamma_infiltration_to_extraction( 

257 ... cin=cin, 

258 ... cin_tedges=tedges, 

259 ... cout_tedges=cout_tedges, 

260 ... flow=flow, 

261 ... flow_tedges=tedges, 

262 ... mean=100.0, 

263 ... std=20.0, 

264 ... n_bins=5, 

265 ... ) 

266 

267 With retardation factor: 

268 

269 >>> cout = gamma_infiltration_to_extraction( 

270 ... cin=cin, 

271 ... cin_tedges=tedges, 

272 ... cout_tedges=cout_tedges, 

273 ... flow=flow, 

274 ... flow_tedges=tedges, 

275 ... alpha=10.0, 

276 ... beta=10.0, 

277 ... retardation_factor=2.0, # Doubles residence time 

278 ... ) 

279 """ 

280 bins = gamma.bins(alpha=alpha, beta=beta, mean=mean, std=std, n_bins=n_bins) 

281 return distribution_infiltration_to_extraction( 

282 cin=cin, 

283 flow=flow, 

284 tedges=tedges, 

285 cout_tedges=cout_tedges, 

286 aquifer_pore_volumes=bins["expected_value"], 

287 retardation_factor=retardation_factor, 

288 ) 

289 

290 

291def gamma_extraction_to_infiltration(cout, flow, alpha, beta, n_bins=100, retardation_factor=1.0): 

292 """ 

293 Compute the concentration of the infiltrating water by shifting cout with its residence time. 

294 

295 This function represents extraction to infiltration modeling (equivalent to deconvolution). 

296 

297 Parameters 

298 ---------- 

299 cout : pandas.Series 

300 Concentration of the compound in the extracted water [ng/m3]. 

301 flow : pandas.Series 

302 Flow rate of water in the aquifer [m3/day]. 

303 alpha : float 

304 Shape parameter of gamma distribution of the aquifer pore volume (must be > 0) 

305 beta : float 

306 Scale parameter of gamma distribution of the aquifer pore volume (must be > 0) 

307 n_bins : int 

308 Number of bins to discretize the gamma distribution. 

309 retardation_factor : float 

310 Retardation factor of the compound in the aquifer. 

311 

312 Returns 

313 ------- 

314 NotImplementedError 

315 This function is not yet implemented. 

316 """ 

317 msg = "Extraction to infiltration advection gamma (deconvolution) is not implemented yet" 

318 raise NotImplementedError(msg) 

319 

320 

321def distribution_infiltration_to_extraction( 

322 *, 

323 cin, 

324 flow, 

325 tedges, 

326 cout_tedges, 

327 aquifer_pore_volumes, 

328 retardation_factor=1.0, 

329): 

330 """ 

331 Compute the concentration of the extracted water using flow-weighted advection. 

332 

333 This function implements an infiltration to extraction advection model where cin and flow values 

334 correspond to the same aligned time bins defined by tedges. 

335 

336 The algorithm: 

337 1. Computes residence times for each pore volume at cout time edges 

338 2. Calculates infiltration time edges by subtracting residence times 

339 3. Determines temporal overlaps between infiltration and cin time windows 

340 4. Creates flow-weighted overlap matrices normalized by total weights 

341 5. Computes weighted contributions and averages across pore volumes 

342 

343 Parameters 

344 ---------- 

345 cin : array-like 

346 Concentration values of infiltrating water or temperature [concentration units]. 

347 Length must match the number of time bins defined by tedges. 

348 flow : array-like 

349 Flow rate values in the aquifer [m3/day]. 

350 Length must match cin and the number of time bins defined by tedges. 

351 tedges : pandas.DatetimeIndex 

352 Time edges defining bins for both cin and flow data. Has length of 

353 len(cin) + 1 and len(flow) + 1. 

354 cout_tedges : pandas.DatetimeIndex 

355 Time edges for output data bins. Has length of desired output + 1. 

356 Can have different time alignment and resolution than tedges. 

357 aquifer_pore_volumes : array-like 

358 Array of aquifer pore volumes [m3] representing the distribution 

359 of residence times in the aquifer system. 

360 retardation_factor : float, optional 

361 Retardation factor of the compound in the aquifer (default 1.0). 

362 Values > 1.0 indicate slower transport due to sorption/interaction. 

363 

364 Returns 

365 ------- 

366 numpy.ndarray 

367 Flow-weighted concentration in the extracted water. Same units as cin. 

368 Length equals len(cout_tedges) - 1. NaN values indicate time periods 

369 with no valid contributions from the infiltration data. 

370 

371 Raises 

372 ------ 

373 ValueError 

374 If tedges length doesn't match cin/flow arrays plus one, or if 

375 infiltration time edges become non-monotonic (invalid input conditions). 

376 

377 Examples 

378 -------- 

379 Basic usage with pandas Series: 

380 

381 >>> import pandas as pd 

382 >>> import numpy as np 

383 >>> from gwtransport.utils import compute_time_edges 

384 >>> from gwtransport.advection import distribution_infiltration_to_extraction 

385 >>> 

386 >>> # Create input data 

387 >>> dates = pd.date_range(start="2020-01-01", end="2020-01-20", freq="D") 

388 >>> tedges = compute_time_edges( 

389 ... tedges=None, tstart=None, tend=dates, number_of_bins=len(dates) 

390 ... ) 

391 >>> 

392 >>> # Create output time edges (different alignment) 

393 >>> cout_dates = pd.date_range(start="2020-01-05", end="2020-01-15", freq="D") 

394 >>> cout_tedges = compute_time_edges( 

395 ... tedges=None, tstart=None, tend=cout_dates, number_of_bins=len(cout_dates) 

396 ... ) 

397 >>> 

398 >>> # Input concentration and flow 

399 >>> cin = pd.Series(np.ones(len(dates)), index=dates) 

400 >>> flow = pd.Series(np.ones(len(dates)) * 100, index=dates) # 100 m3/day 

401 >>> 

402 >>> # Define distribution of aquifer pore volumes 

403 >>> aquifer_pore_volumes = np.array([50, 100, 200]) # m3 

404 >>> 

405 >>> # Run distribution_infiltration_to_extraction 

406 >>> cout = distribution_infiltration_to_extraction( 

407 ... cin=cin, 

408 ... flow=flow, 

409 ... tedges=tedges, 

410 ... cout_tedges=cout_tedges, 

411 ... aquifer_pore_volumes=aquifer_pore_volumes, 

412 ... ) 

413 >>> cout.shape 

414 (11,) 

415 

416 Using array inputs instead of pandas Series: 

417 

418 >>> # Convert to arrays 

419 >>> cin_values = cin.values 

420 >>> flow_values = flow.values 

421 >>> 

422 >>> cout = distribution_infiltration_to_extraction( 

423 ... cin=cin_values, 

424 ... flow=flow_values, 

425 ... tedges=tedges, 

426 ... cout_tedges=cout_tedges, 

427 ... aquifer_pore_volumes=aquifer_pore_volumes, 

428 ... ) 

429 

430 With retardation factor: 

431 

432 >>> cout = distribution_infiltration_to_extraction( 

433 ... cin=cin, 

434 ... flow=flow, 

435 ... tedges=tedges, 

436 ... cout_tedges=cout_tedges, 

437 ... aquifer_pore_volumes=aquifer_pore_volumes, 

438 ... retardation_factor=2.0, # Compound moves twice as slowly 

439 ... ) 

440 

441 Using single pore volume: 

442 

443 >>> single_volume = np.array([100]) # Single 100 m3 pore volume 

444 >>> cout = distribution_infiltration_to_extraction( 

445 ... cin=cin, 

446 ... flow=flow, 

447 ... tedges=tedges, 

448 ... cout_tedges=cout_tedges, 

449 ... aquifer_pore_volumes=single_volume, 

450 ... ) 

451 """ 

452 tedges = pd.DatetimeIndex(tedges) 

453 cout_tedges = pd.DatetimeIndex(cout_tedges) 

454 

455 if len(tedges) != len(cin) + 1: 

456 msg = "tedges must have one more element than cin" 

457 raise ValueError(msg) 

458 if len(tedges) != len(flow) + 1: 

459 msg = "tedges must have one more element than flow" 

460 raise ValueError(msg) 

461 

462 # Convert to arrays for vectorized operations 

463 cin_values = np.asarray(cin) 

464 flow_values = np.asarray(flow) 

465 

466 # Validate inputs do not contain NaN values 

467 if np.any(np.isnan(cin_values)): 

468 msg = "cin contains NaN values, which are not allowed" 

469 raise ValueError(msg) 

470 if np.any(np.isnan(flow_values)): 

471 msg = "flow contains NaN values, which are not allowed" 

472 raise ValueError(msg) 

473 cin_tedges_days = ((tedges - tedges[0]) / pd.Timedelta(days=1)).values 

474 cout_tedges_days = ((cout_tedges - tedges[0]) / pd.Timedelta(days=1)).values 

475 aquifer_pore_volumes = np.asarray(aquifer_pore_volumes) 

476 

477 # Pre-compute all residence times and infiltration edges 

478 rt_edges_2d = residence_time( 

479 flow=flow_values, 

480 flow_tedges=tedges, 

481 index=cout_tedges, 

482 aquifer_pore_volume=aquifer_pore_volumes, 

483 retardation_factor=retardation_factor, 

484 direction="extraction_to_infiltration", 

485 ) 

486 infiltration_tedges_2d = cout_tedges_days[None, :] - rt_edges_2d 

487 

488 # Pre-compute valid bins and count 

489 valid_bins_2d = ~(np.isnan(infiltration_tedges_2d[:, :-1]) | np.isnan(infiltration_tedges_2d[:, 1:])) 

490 valid_pv_count = np.sum(valid_bins_2d, axis=0) 

491 

492 # Initialize accumulator 

493 normalized_weights = _distribution_infiltration_to_extraction_weights( 

494 cout_tedges, 

495 aquifer_pore_volumes, 

496 cin_values, 

497 flow_values, 

498 cin_tedges_days, 

499 infiltration_tedges_2d, 

500 valid_bins_2d, 

501 valid_pv_count, 

502 ) 

503 

504 # Apply to concentrations and handle NaN for periods with no contributions 

505 out = normalized_weights.dot(cin_values) 

506 out[valid_pv_count == 0] = np.nan 

507 

508 return out 

509 

510 

511def _distribution_infiltration_to_extraction_weights( 

512 cout_tedges, 

513 aquifer_pore_volumes, 

514 cin_values, 

515 flow_values, 

516 cin_tedges_days, 

517 infiltration_tedges_2d, 

518 valid_bins_2d, 

519 valid_pv_count, 

520): 

521 accumulated_weights = np.zeros((len(cout_tedges) - 1, len(cin_values))) 

522 

523 # Loop over each pore volume 

524 for i in range(len(aquifer_pore_volumes)): 

525 if np.any(valid_bins_2d[i, :]): 

526 overlap_matrix = partial_isin(infiltration_tedges_2d[i, :], cin_tedges_days) 

527 accumulated_weights[valid_bins_2d[i, :], :] += overlap_matrix[valid_bins_2d[i, :], :] 

528 

529 # Average across valid pore volumes and apply flow weighting 

530 averaged_weights = np.zeros_like(accumulated_weights) 

531 valid_cout = valid_pv_count > 0 

532 averaged_weights[valid_cout, :] = accumulated_weights[valid_cout, :] / valid_pv_count[valid_cout, None] 

533 

534 # Apply flow weighting after averaging 

535 flow_weighted_averaged = averaged_weights * flow_values[None, :] 

536 

537 total_weights = np.sum(flow_weighted_averaged, axis=1) 

538 valid_weights = total_weights > 0 

539 normalized_weights = np.zeros_like(flow_weighted_averaged) 

540 normalized_weights[valid_weights, :] = flow_weighted_averaged[valid_weights, :] / total_weights[valid_weights, None] 

541 return normalized_weights 

542 

543 

544def distribution_extraction_to_infiltration( 

545 *, 

546 cout, 

547 flow, 

548 tedges, 

549 cin_tedges, 

550 aquifer_pore_volumes, 

551 retardation_factor=1.0, 

552): 

553 """ 

554 Compute the concentration of the infiltrating water from extracted water (deconvolution). 

555 

556 This function implements an extraction to infiltration advection model (inverse of distribution_infiltration_to_extraction) 

557 where cout and flow values correspond to the same aligned time bins defined by tedges. 

558 

559 SYMMETRIC RELATIONSHIP: 

560 - distribution_infiltration_to_extraction: cin + tedges → cout + cout_tedges 

561 - distribution_extraction_to_infiltration: cout + tedges → cin + cin_tedges 

562 

563 The algorithm (symmetric to distribution_infiltration_to_extraction): 

564 1. Computes residence times for each pore volume at cint time edges 

565 2. Calculates extraction time edges by adding residence times (reverse of infiltration_to_extraction) 

566 3. Determines temporal overlaps between extraction and cout time windows 

567 4. Creates flow-weighted overlap matrices normalized by total weights 

568 5. Computes weighted contributions and averages across pore volumes 

569 

570 Parameters 

571 ---------- 

572 cout : array-like 

573 Concentration values of extracted water [concentration units]. 

574 Length must match the number of time bins defined by tedges. 

575 flow : array-like 

576 Flow rate values in the aquifer [m3/day]. 

577 Length must match cout and the number of time bins defined by tedges. 

578 tedges : pandas.DatetimeIndex 

579 Time edges defining bins for both cout and flow data. Has length of 

580 len(cout) + 1 and len(flow) + 1. 

581 cin_tedges : pandas.DatetimeIndex 

582 Time edges for output (infiltration) data bins. Has length of desired output + 1. 

583 Can have different time alignment and resolution than tedges. 

584 aquifer_pore_volumes : array-like 

585 Array of aquifer pore volumes [m3] representing the distribution 

586 of residence times in the aquifer system. 

587 retardation_factor : float, optional 

588 Retardation factor of the compound in the aquifer (default 1.0). 

589 Values > 1.0 indicate slower transport due to sorption/interaction. 

590 

591 Returns 

592 ------- 

593 numpy.ndarray 

594 Flow-weighted concentration in the infiltrating water. Same units as cout. 

595 Length equals len(cin_tedges) - 1. NaN values indicate time periods 

596 with no valid contributions from the extraction data. 

597 

598 Raises 

599 ------ 

600 ValueError 

601 If tedges length doesn't match cout/flow arrays plus one, or if 

602 extraction time edges become non-monotonic (invalid input conditions). 

603 

604 Examples 

605 -------- 

606 Basic usage with pandas Series: 

607 

608 >>> import pandas as pd 

609 >>> import numpy as np 

610 >>> from gwtransport.utils import compute_time_edges 

611 >>> from gwtransport.advection import distribution_extraction_to_infiltration 

612 >>> 

613 >>> # Create input data 

614 >>> dates = pd.date_range(start="2020-01-01", end="2020-01-20", freq="D") 

615 >>> tedges = compute_time_edges( 

616 ... tedges=None, tstart=None, tend=dates, number_of_bins=len(dates) 

617 ... ) 

618 >>> 

619 >>> # Create output time edges (different alignment) 

620 >>> cint_dates = pd.date_range(start="2019-12-25", end="2020-01-15", freq="D") 

621 >>> cin_tedges = compute_time_edges( 

622 ... tedges=None, tstart=None, tend=cint_dates, number_of_bins=len(cint_dates) 

623 ... ) 

624 >>> 

625 >>> # Input concentration and flow 

626 >>> cout = pd.Series(np.ones(len(dates)), index=dates) 

627 >>> flow = pd.Series(np.ones(len(dates)) * 100, index=dates) # 100 m3/day 

628 >>> 

629 >>> # Define distribution of aquifer pore volumes 

630 >>> aquifer_pore_volumes = np.array([50, 100, 200]) # m3 

631 >>> 

632 >>> # Run distribution_extraction_to_infiltration 

633 >>> cin = distribution_extraction_to_infiltration( 

634 ... cout=cout, 

635 ... flow=flow, 

636 ... tedges=tedges, 

637 ... cin_tedges=cin_tedges, 

638 ... aquifer_pore_volumes=aquifer_pore_volumes, 

639 ... ) 

640 >>> cin.shape 

641 (22,) 

642 

643 Using array inputs instead of pandas Series: 

644 

645 >>> # Convert to arrays 

646 >>> cout_values = cout.values 

647 >>> flow_values = flow.values 

648 >>> 

649 >>> cin = distribution_extraction_to_infiltration( 

650 ... cout=cout_values, 

651 ... flow=flow_values, 

652 ... tedges=tedges, 

653 ... cin_tedges=cin_tedges, 

654 ... aquifer_pore_volumes=aquifer_pore_volumes, 

655 ... ) 

656 

657 With retardation factor: 

658 

659 >>> cin = distribution_extraction_to_infiltration( 

660 ... cout=cout, 

661 ... flow=flow, 

662 ... tedges=tedges, 

663 ... cin_tedges=cin_tedges, 

664 ... aquifer_pore_volumes=aquifer_pore_volumes, 

665 ... retardation_factor=2.0, # Compound moves twice as slowly 

666 ... ) 

667 

668 Using single pore volume: 

669 

670 >>> single_volume = np.array([100]) # Single 100 m3 pore volume 

671 >>> cin = distribution_extraction_to_infiltration( 

672 ... cout=cout, 

673 ... flow=flow, 

674 ... tedges=tedges, 

675 ... cin_tedges=cin_tedges, 

676 ... aquifer_pore_volumes=single_volume, 

677 ... ) 

678 """ 

679 tedges = pd.DatetimeIndex(tedges) 

680 cin_tedges = pd.DatetimeIndex(cin_tedges) 

681 

682 if len(tedges) != len(cout) + 1: 

683 msg = "tedges must have one more element than cout" 

684 raise ValueError(msg) 

685 if len(tedges) != len(flow) + 1: 

686 msg = "tedges must have one more element than flow" 

687 raise ValueError(msg) 

688 

689 # Convert to arrays for vectorized operations 

690 cout_values = np.asarray(cout) 

691 flow_values = np.asarray(flow) 

692 

693 # Validate inputs do not contain NaN values 

694 if np.any(np.isnan(cout_values)): 

695 msg = "cout contains NaN values, which are not allowed" 

696 raise ValueError(msg) 

697 if np.any(np.isnan(flow_values)): 

698 msg = "flow contains NaN values, which are not allowed" 

699 raise ValueError(msg) 

700 cout_tedges_days = ((tedges - tedges[0]) / pd.Timedelta(days=1)).values 

701 cin_tedges_days = ((cin_tedges - tedges[0]) / pd.Timedelta(days=1)).values 

702 aquifer_pore_volumes = np.asarray(aquifer_pore_volumes) 

703 

704 # Pre-compute all residence times and extraction edges (symmetric to infiltration_to_extraction) 

705 rt_edges_2d = residence_time( 

706 flow=flow_values, 

707 flow_tedges=tedges, 

708 index=cin_tedges, 

709 aquifer_pore_volume=aquifer_pore_volumes, 

710 retardation_factor=retardation_factor, 

711 direction="infiltration_to_extraction", # Computing from infiltration perspective 

712 ) 

713 extraction_tedges_2d = cin_tedges_days[None, :] + rt_edges_2d 

714 

715 # Pre-compute valid bins and count 

716 valid_bins_2d = ~(np.isnan(extraction_tedges_2d[:, :-1]) | np.isnan(extraction_tedges_2d[:, 1:])) 

717 valid_pv_count = np.sum(valid_bins_2d, axis=0) 

718 

719 # Initialize accumulator 

720 normalized_weights = _distribution_extraction_to_infiltration_weights( 

721 cin_tedges, 

722 aquifer_pore_volumes, 

723 cout_values, 

724 flow_values, 

725 cout_tedges_days, 

726 extraction_tedges_2d, 

727 valid_bins_2d, 

728 valid_pv_count, 

729 ) 

730 

731 # Apply to concentrations and handle NaN for periods with no contributions 

732 out = normalized_weights.dot(cout_values) 

733 out[valid_pv_count == 0] = np.nan 

734 

735 return out 

736 

737 

738def _distribution_extraction_to_infiltration_weights( 

739 cin_tedges, 

740 aquifer_pore_volumes, 

741 cout_values, 

742 flow_values, 

743 cout_tedges_days, 

744 extraction_tedges_2d, 

745 valid_bins_2d, 

746 valid_pv_count, 

747): 

748 """ 

749 Compute extraction to infiltration transformation weights matrix. 

750 

751 Computes the weight matrix for the extraction to infiltration transformation, 

752 ensuring mathematical symmetry with the infiltration to extraction operation. The extraction to infiltration 

753 weights represent the transpose relationship needed for deconvolution. 

754 

755 SYMMETRIC RELATIONSHIP: 

756 - Infiltration to extraction weights: W_infiltration_to_extraction maps cin → cout 

757 - Extraction to infiltration weights: W_extraction_to_infiltration maps cout → cin 

758 - Mathematical constraint: W_extraction_to_infiltration should be the pseudo-inverse of W_infiltration_to_extraction 

759 

760 The algorithm mirrors _distribution_infiltration_to_extraction_weights but with transposed 

761 temporal overlap computations to ensure mathematical consistency. 

762 

763 Parameters 

764 ---------- 

765 cin_tedges : pandas.DatetimeIndex 

766 Time edges for output (infiltration) data bins. 

767 aquifer_pore_volumes : array-like 

768 Array of aquifer pore volumes [m3]. 

769 cout_values : array-like 

770 Concentration values of extracted water. 

771 flow_values : array-like 

772 Flow rate values in the aquifer [m3/day]. 

773 cout_tedges_days : array-like 

774 Time edges for cout data in days since reference. 

775 extraction_tedges_2d : array-like 

776 2D array of extraction time edges for each pore volume. 

777 valid_bins_2d : array-like 

778 2D boolean array indicating valid time bins for each pore volume. 

779 valid_pv_count : array-like 

780 Count of valid pore volumes for each output time bin. 

781 

782 Returns 

783 ------- 

784 numpy.ndarray 

785 Normalized weight matrix for extraction to infiltration transformation. 

786 Shape: (len(cin_tedges) - 1, len(cout_values)) 

787 """ 

788 accumulated_weights = np.zeros((len(cin_tedges) - 1, len(cout_values))) 

789 

790 # Loop over each pore volume (same structure as infiltration_to_extraction) 

791 for i in range(len(aquifer_pore_volumes)): 

792 if np.any(valid_bins_2d[i, :]): 

793 # SYMMETRIC temporal overlap computation: 

794 # Infiltration to extraction: maps infiltration → cout time windows 

795 # Extraction to infiltration: maps extraction → cout time windows (transposed relationship) 

796 overlap_matrix = partial_isin(extraction_tedges_2d[i, :], cout_tedges_days) 

797 accumulated_weights[valid_bins_2d[i, :], :] += overlap_matrix[valid_bins_2d[i, :], :] 

798 

799 # Average across valid pore volumes (symmetric to infiltration_to_extraction) 

800 averaged_weights = np.zeros_like(accumulated_weights) 

801 valid_cout = valid_pv_count > 0 

802 averaged_weights[valid_cout, :] = accumulated_weights[valid_cout, :] / valid_pv_count[valid_cout, None] 

803 

804 # Apply flow weighting (symmetric to infiltration_to_extraction) 

805 flow_weighted_averaged = averaged_weights * flow_values[None, :] 

806 

807 # Normalize by total weights (symmetric to infiltration_to_extraction) 

808 total_weights = np.sum(flow_weighted_averaged, axis=1) 

809 valid_weights = total_weights > 0 

810 normalized_weights = np.zeros_like(flow_weighted_averaged) 

811 normalized_weights[valid_weights, :] = flow_weighted_averaged[valid_weights, :] / total_weights[valid_weights, None] 

812 

813 return normalized_weights