Coverage for src/gwtransport/fronttracking/validation.py: 95%

1"""

2Physics validation utilities for front tracking in (V, θ) coordinates.

4This module provides functions to verify physical correctness of front-tracking

5simulations, including entropy conditions, concentration bounds, mass conservation,

6and event ordering. The solver runs in cumulative-flow coordinate

7``θ = ∫flow(t') dt'``; events on ``state.events`` carry ``"theta"`` (m³). Because

8``flow ≥ 0`` is enforced, θ is monotone non-decreasing in t, so θ-ordering and

9chronological ordering are equivalent.

11This file is part of gwtransport which is released under AGPL-3.0 license.

12See the ./LICENSE file or go to https://github.com/gwtransport/gwtransport/blob/main/LICENSE for full license details.

13"""

15import logging

17import numpy as np

18import numpy.typing as npt

19import pandas as pd

21from gwtransport._time import tedges_to_days

22from gwtransport.fronttracking.output import (

23 compute_breakthrough_curve,

24 compute_cumulative_inlet_mass,

25 compute_domain_mass,

26)

27from gwtransport.fronttracking.solver import FrontTrackerState

28from gwtransport.fronttracking.waves import ShockWave

30# Numerical tolerance constants

31EPSILON_CONCENTRATION_TOLERANCE = -1e-14 # Minimum allowed concentration (machine precision)

33# Mass-balance check (7) tolerance and grid.

34#

35# The independent outlet mass integrates the breakthrough curve with the trapezoid

36# rule (see ``_independent_outlet_mass``). For a shock-bearing, sharply-curved

37# breakthrough this is only first-order accurate: the measured relative error for the

38# canonical favorable-sorption pulse oscillates at ~5e-4 across a modest grid band

39# (2000-4000 points; it only falls reliably below 1e-4 above ~20000 points). The grid is

40# deliberately kept modest because a numerical DecayingShockWave makes

41# ``compute_breakthrough_curve`` slow (seconds to minutes for large grids), so we cannot

42# refine the integral to machine precision. ``_MASS_BALANCE_RTOL`` therefore bounds that

43# ~5e-4 grid noise with ~16x margin; a physical 30% inlet-mass error yields a relative

44# error of ~0.23 (= 1 - 1/1.3) to ~0.43 (= 1/0.7 - 1), i.e. ~20-40x this floor, so the

45# check still has strong teeth against a genuine conservation failure.

46_MASS_BALANCE_RTOL = 1e-2

47_MASS_BALANCE_GRID_POINTS = 3000

49logger = logging.getLogger(__name__)

52def _independent_outlet_mass(tracker_state: FrontTrackerState, *, n_grid: int = _MASS_BALANCE_GRID_POINTS) -> float:

53 """Outlet-side mass total computed independently of the ``m_in - m_dom`` identity.

55 Integrated to θ_max (the last θ-bin edge). Sums the mass that has already left through

56 the outlet, ``∫₀^θ_max c_out(τ) dτ``, and the mass still in the domain, ``m_dom(θ_max)``.

57 The breakthrough integral uses :func:`compute_breakthrough_curve`, which dispatches

58 :func:`concentration_at_point` directly (pure wave evaluation), so this total never

59 references the conservation identity ``m_out = m_in − m_dom`` that the mass-balance

60 check is meant to test. Comparing it to :func:`compute_cumulative_inlet_mass` at θ_max

61 is therefore a genuine, non-tautological conservation check: for a pulse that has not

62 fully broken through by θ_max, the partial breakthrough integral plus the residual

63 domain mass still equals the cumulative inlet mass.

65 Parameters

66 ----------

67 tracker_state : FrontTrackerState

68 Solver state; must expose ``v_outlet``, ``sorption``, ``waves`` and

69 ``theta_edges``.

70 n_grid : int, optional

71 Number of trapezoid nodes for the breakthrough integral over ``[0, θ_max]``.

73 Returns

74 -------

75 float

76 Independent outlet-side mass total [mass].

77 """

78 v_outlet = tracker_state.v_outlet

79 sorption = tracker_state.sorption

80 waves = tracker_state.waves

81 theta_max = float(np.asarray(tracker_state.theta_edges, dtype=float)[-1])

83 if theta_max <= 0.0:

84 return compute_domain_mass(theta=theta_max, v_outlet=v_outlet, waves=waves, sorption=sorption)

86 theta_grid: npt.NDArray[np.floating] = np.linspace(0.0, theta_max, n_grid)

87 breakthrough = compute_breakthrough_curve(theta_grid, v_outlet, waves, sorption)

88 mass_out = float(np.trapezoid(breakthrough, theta_grid))

89 mass_dom = compute_domain_mass(theta=theta_max, v_outlet=v_outlet, waves=waves, sorption=sorption)

90 return mass_out + mass_dom

93def verify_physics(

94 structure: dict,

95 cout: npt.ArrayLike,

96 cout_tedges: pd.DatetimeIndex,

97 cin: npt.ArrayLike,

98 *,

99 verbose: bool = True,

100 rtol: float = 1e-10,

101) -> dict:

102 """

103 Run comprehensive physics verification checks on front tracking results.

104

105 Performs the following checks:

106

107 1. Entropy condition for all shocks

108 2. No negative concentrations (within tolerance)

109 3. Output concentration <= input maximum

110 4. Finite first arrival θ

111 5. No NaN values after spin-up period

112 6. Events θ-ordered (equivalent to chronological under non-negative flow)

113 7. Mass conservation: independent outlet integral + domain mass == inlet mass at θ_max

114

115 Parameters

116 ----------

117 structure : dict

118 Structure returned from ``infiltration_to_extraction_nonlinear_sorption``.

119 Must contain keys: ``'waves'``, ``'theta_first_arrival'``, ``'events'``,

120 and optionally ``'tracker_state'``.

121 cout : array-like

122 Bin-averaged output concentrations.

123 cout_tedges : pandas.DatetimeIndex

124 Output time edges for bins (only used for the spin-up mask).

125 cin : array-like

126 Input concentrations.

127 verbose : bool, optional

128 If True, print detailed results. If False, only return summary. Default True.

129 rtol : float, optional

130 Relative tolerance for numerical checks. Default 1e-10. For the mass-balance

131 check (7) the effective tolerance is ``max(rtol, _MASS_BALANCE_RTOL)`` because

132 that check integrates a shock-bearing breakthrough curve and is only first-order

133 accurate (see ``_MASS_BALANCE_RTOL``).

134

135 Returns

136 -------

137 results : dict

138 Dictionary containing:

139

140 - ``'all_passed'``: bool - True if all checks passed

141 - ``'n_checks'``: int - Total number of checks performed

142 - ``'n_passed'``: int - Number of checks that passed

143 - ``'failures'``: list of str - Description of failed checks (empty if all passed)

144 - ``'checks'``: list of dict - Per-check result records; each has ``'name'``,

145 ``'passed'``, ``'message'`` keys.

146 - ``'summary'``: str - One-line summary

147

148 Examples

149 --------

150 .. disable_try_examples

151

152 ::

153

154 results = verify_physics(structure, cout, cout_tedges, cin, verbose=False)

155 print(results["summary"])

156 assert results["all_passed"]

157 """

158 cout = np.asarray(cout, dtype=float)

159 cin = np.asarray(cin, dtype=float)

160 failures: list[str] = []

161 checks: list[dict] = []

162

163 # Check 1: Entropy condition for all shocks (Lax in (V, θ): λ_θ(C_L) >= s >= λ_θ(C_R)).

164 shocks = [w for w in structure["waves"] if isinstance(w, ShockWave)]

165 entropy_violations = [s for s in shocks if not s.satisfies_entropy()]

166 check1_pass = len(entropy_violations) == 0

167 checks.append({

168 "name": "Shock entropy condition",

169 "passed": check1_pass,

170 "message": f"Entropy violations: {len(entropy_violations)}/{len(shocks)} shocks",

171 })

172 if not check1_pass:

173 failures.append(f"Entropy violations: {len(entropy_violations)} shocks violate entropy condition")

174

175 # Check 2: No negative concentrations (within tolerance)

176 valid_cout = cout[~np.isnan(cout)]

177 min_cout = np.min(valid_cout) if len(valid_cout) > 0 else 0.0

178 check2_pass = min_cout >= EPSILON_CONCENTRATION_TOLERANCE

179 checks.append({

180 "name": "Non-negative concentrations",

181 "passed": check2_pass,

182 "message": f"Minimum concentration: {min_cout:.2e}",

183 })

184 if not check2_pass:

185 failures.append(f"Negative concentrations found: min = {min_cout:.2e}")

186

187 # Check 3: Output doesn't exceed input (within tight tolerance)

188 max_cout = np.max(valid_cout) if len(valid_cout) > 0 else 0.0

189 max_cin = np.max(cin)

190 check3_pass = max_cout <= max_cin * (1.0 + rtol)

191 checks.append({

192 "name": "Output <= input maximum",

193 "passed": check3_pass,

194 "message": f"Max output: {max_cout:.2f}, Max input: {max_cin:.2f}",

195 })

196 if not check3_pass:

197 failures.append(f"Output exceeds input: {max_cout:.2f} > {max_cin:.2f}")

198

199 # Check 4: Finite first arrival θ

200 theta_first = structure["theta_first_arrival"]

201 check4_pass = np.isfinite(theta_first)

202 checks.append({

203 "name": "Finite first arrival θ",

204 "passed": check4_pass,

205 "message": f"First arrival: θ={theta_first:.2f}",

206 })

207 if not check4_pass:

208 failures.append(f"First arrival θ is not finite: {theta_first}")

209

210 # Check 5: No NaN values after spin-up

211 tracker_state = structure.get("tracker_state")

212 if tracker_state is not None and np.isfinite(theta_first):

213 # theta_at_t measures days from the input origin tracker_state.tedges[0]; the output

214 # grid must be referenced to that same origin (not its own first edge) or the mask

215 # shifts and NaNs after spin-up slip through.

216 t_days = tedges_to_days(cout_tedges, ref=tracker_state.tedges[0])[:-1]

217 theta_at_edge = tracker_state.theta_at_t_array(t_days)

218 mask_after_spinup = theta_at_edge >= theta_first

219 elif not np.isfinite(theta_first):

220 # No spin-up bound — every output row counts as "after spin-up".

221 mask_after_spinup = np.ones(len(cout), dtype=bool)

222 else:

223 # No tracker state to translate — nothing to check.

224 mask_after_spinup = np.zeros(len(cout), dtype=bool)

225 cout_after_spinup = cout[mask_after_spinup]

226 nan_count = np.sum(np.isnan(cout_after_spinup))

227 check5_pass = nan_count == 0

228 checks.append({

229 "name": "No NaN after spin-up",

230 "passed": check5_pass,

231 "message": f"NaN values after spin-up: {nan_count}/{len(cout_after_spinup)}",

232 })

233 if not check5_pass:

234 failures.append(f"Found {nan_count} NaN values after spin-up period")

235

236 # Check 6: Events θ-ordered. ``np.all(np.diff(...) >= 0)`` is vacuously True

237 # for an empty/singleton sequence, so the same expression covers the N/A case;

238 # only the message differs.

239 event_thetas = [e["theta"] for e in structure.get("events", [])]

240 check6_pass = bool(np.all(np.diff(event_thetas) >= 0))

241 checks.append({

242 "name": "Events θ-ordered",

243 "passed": check6_pass,

244 "message": f"{len(event_thetas)} events" if len(event_thetas) > 1 else f"{len(event_thetas)} events (N/A)",

245 })

246 if not check6_pass:

247 failures.append("Events are not θ-ordered")

248

249 # Check 7: Total integrated outlet mass vs total inlet mass (in θ-space).

250 #

251 # The outlet-side total is computed *independently* of the conservation identity

252 # ``m_out = m_in − m_dom`` (which the old check used on both sides, making it an

253 # algebraic tautology that passed for any input). ``_independent_outlet_mass``

254 # integrates the breakthrough curve and adds the spatial domain mass; both come from

255 # direct wave evaluation, so a mismatch with the cumulative inlet mass signals a real

256 # conservation failure. Integrated to θ_max (the last θ-bin edge); for pulses that

257 # have not fully broken through there, the partial breakthrough integral plus the

258 # residual domain mass still equals the cumulative inlet mass.

259 if tracker_state is not None and hasattr(tracker_state, "theta_edges"):

260 theta_edges_arr = np.asarray(tracker_state.theta_edges, dtype=float)

261 theta_integration_end = float(theta_edges_arr[-1])

262

263 total_mass_in = compute_cumulative_inlet_mass(theta=theta_integration_end, cin=cin, theta_edges=theta_edges_arr)

264 independent_mass_out = _independent_outlet_mass(tracker_state)

265

266 if total_mass_in > 0:

267 relative_error_total = abs(independent_mass_out - total_mass_in) / total_mass_in

268 else:

269 relative_error_total = abs(independent_mass_out - total_mass_in)

270

271 mass_balance_threshold = max(rtol, _MASS_BALANCE_RTOL)

272 check7_pass = relative_error_total <= mass_balance_threshold

273 checks.append({

274 "name": "Total integrated outlet mass",

275 "passed": check7_pass,

276 "message": (

277 f"Relative error: {relative_error_total:.2e} (independent outlet integral to "

278 f"θ={theta_integration_end:.1f}; threshold {mass_balance_threshold:.2e})"

279 ),

280 })

281 if not check7_pass:

282 failures.append(

283 f"Total outlet mass mismatch: relative_error={relative_error_total:.2e} > "

284 f"{mass_balance_threshold:.2e} (independent_mass_out={independent_mass_out:.6e}, "

285 f"total_mass_in={total_mass_in:.6e}, θ_integration_end={theta_integration_end:.1f})"

286 )

287 else:

288 check7_pass = True

289 checks.append({

290 "name": "Total integrated outlet mass",

291 "passed": True,

292 "message": "Skipped (tracker state not available)",

293 })

294

295 # Compile results

296 n_checks = len(checks)

297 n_passed = sum(c["passed"] for c in checks)

298 all_passed = len(failures) == 0

299

300 if all_passed:

301 summary = f"All {n_checks} physics checks passed"

302 else:

303 summary = f"{n_passed}/{n_checks} checks passed ({len(failures)} failures)"

304

305 results = {

306 "all_passed": all_passed,

307 "n_checks": n_checks,

308 "n_passed": n_passed,

309 "failures": failures,

310 "checks": checks,

311 "summary": summary,

312 }

313

314 if verbose:

315 logger.info("\nPhysics Verification:")

316 for i, check in enumerate(checks, 1):

317 status = "PASS" if check["passed"] else "FAIL"

318 logger.info(" %d. %s: %s %s", i, check["name"], status, check["message"])

319

320 if all_passed:

321 logger.info("\n%s", summary)

322 else:

323 logger.warning("\n%s", summary)

324 logger.warning("\nFailures:")

325 for i, failure in enumerate(failures, 1):

326 logger.warning(" %d. %s", i, failure)

327

328 return results