Coverage for src/gwtransport/deposition.py: 0%

75 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-06-22 18:30 +0000

1""" 

2Deposition Analysis for 1D Aquifer Systems. 

3 

4This module provides functions to analyze compound deposition and concentration 

5in aquifer systems. It includes tools for computing deposition rates, concentration 

6changes, and related coefficients based on extraction data and aquifer properties. 

7 

8The model assumes requires the groundwaterflow to be reduced to a 1D system. On one side, 

9water with a certain concentration infiltrates ('cin'), the water flows through the aquifer and 

10the compound of interest flows through the aquifer with a retarded velocity. During the soil passage, 

11deposition enriches the water with the compound. The water is extracted ('cout') and the concentration 

12increase due to deposition is called 'dcout'. 

13 

14Main functions: 

15- backward: Calculate deposition rates from extraction data (backward operation, equivalent to deconvolution) 

16- forward: Determine concentration changes due to deposition (forward operation, equivalent to convolution) 

17- deposition_coefficients: Generate coefficients for deposition modeling 

18""" 

19 

20import numpy as np 

21import pandas as pd 

22from scipy.linalg import null_space 

23from scipy.optimize import minimize 

24 

25from gwtransport.residence_time import residence_time 

26from gwtransport.utils import interp_series 

27 

28 

29def backward( 

30 cout, flow, aquifer_pore_volume, porosity, thickness, retardation_factor, nullspace_objective="squared_lengths" 

31): 

32 """ 

33 Compute the deposition given the added concentration of the compound in the extracted water. 

34 

35 Parameters 

36 ---------- 

37 cout : pandas.Series 

38 Concentration of the compound in the extracted water [ng/m3]. 

39 flow : pandas.Series 

40 Flow rate of water in the aquifer [m3/day]. 

41 aquifer_pore_volume : float 

42 Pore volume of the aquifer [m3]. 

43 porosity : float 

44 Porosity of the aquifer [dimensionless]. 

45 thickness : float 

46 Thickness of the aquifer [m]. 

47 retardation_factor : float 

48 Retardation factor of the compound in the aquifer [dimensionless]. 

49 nullspace_objective : str or callable, optional 

50 Objective to minimize in the nullspace. If a string, it should be either "squared_lengths" or "summed_lengths". If a callable, it should take the form `objective(x, xLS, colsOfNullspace)`. Default is "squared_lengths". 

51 

52 Returns 

53 ------- 

54 numpy.ndarray 

55 Deposition of the compound in the aquifer [ng/m2/day]. 

56 """ 

57 # concentration extracted water is coeff dot deposition 

58 coeff, _, index_dep = deposition_coefficients( 

59 cout.index, 

60 flow, 

61 aquifer_pore_volume, 

62 porosity=porosity, 

63 thickness=thickness, 

64 retardation_factor=retardation_factor, 

65 ) 

66 

67 # cout should be of length coeff.shape[0] 

68 if len(cout) != coeff.shape[0]: 

69 msg = f"Length of cout ({len(cout)}) should be equal to the number of rows in coeff ({coeff.shape[0]})" 

70 raise ValueError(msg) 

71 

72 if not index_dep.isin(flow.index).all(): 

73 msg = "The flow timeseries is either not long enough or is not alligned well" 

74 raise ValueError(msg, index_dep, flow.index) 

75 

76 # Underdetermined least squares solution 

77 deposition_ls, *_ = np.linalg.lstsq(coeff, cout, rcond=None) 

78 

79 # Nullspace -> multiple solutions exist, deposition_ls is just one of them 

80 cols_of_nullspace = null_space(coeff, rcond=None) 

81 nullrank = cols_of_nullspace.shape[1] 

82 

83 # Pick a solution in the nullspace that meets new objective 

84 def objective(x, x_ls, cols_of_nullspace): 

85 sols = x_ls + cols_of_nullspace @ x 

86 return np.square(sols[1:] - sols[:-1]).sum() 

87 

88 deposition_0 = np.zeros(nullrank) 

89 res = minimize(objective, x0=deposition_0, args=(deposition_ls, cols_of_nullspace), method="BFGS") 

90 

91 if not res.success: 

92 msg = f"Optimization failed: {res.message}" 

93 raise ValueError(msg) 

94 

95 # Squared lengths is stable to solve, thus a good starting point 

96 if nullspace_objective != "squared_lengths": 

97 if nullspace_objective == "summed_lengths": 

98 

99 def objective(x, x_ls, cols_of_nullspace): 

100 sols = x_ls + cols_of_nullspace @ x 

101 return np.abs(sols[1:] - sols[:-1]).sum() 

102 

103 res = minimize(objective, x0=res.x, args=(deposition_ls, cols_of_nullspace), method="BFGS") 

104 

105 elif callable(nullspace_objective): 

106 res = minimize(nullspace_objective, x0=res.x, args=(deposition_ls, cols_of_nullspace), method="BFGS") 

107 

108 else: 

109 msg = f"Unknown nullspace objective: {nullspace_objective}" 

110 raise ValueError(msg) 

111 

112 if not res.success: 

113 msg = f"Optimization failed: {res.message}" 

114 raise ValueError(msg) 

115 

116 return deposition_ls + cols_of_nullspace @ res.x 

117 

118 

119def forward(dcout_index, deposition, flow, aquifer_pore_volume, porosity, thickness, retardation_factor): 

120 """ 

121 Compute the increase in concentration of the compound in the extracted water by the deposition. 

122 

123 This function represents a forward operation (equivalent to convolution). 

124 

125 Parameters 

126 ---------- 

127 dcout_index : pandas.Series 

128 Concentration of the compound in the extracted water [ng/m3]. 

129 deposition : pandas.Series 

130 Deposition of the compound in the aquifer [ng/m2/day]. 

131 flow : pandas.Series 

132 Flow rate of water in the aquifer [m3/day]. 

133 aquifer_pore_volume : float 

134 Pore volume of the aquifer [m3]. 

135 porosity : float 

136 Porosity of the aquifer [dimensionless]. 

137 thickness : float 

138 Thickness of the aquiifer [m]. 

139 

140 Returns 

141 ------- 

142 numpy.ndarray 

143 Concentration of the compound in the extracted water [ng/m3]. 

144 """ 

145 cout_date_range = dcout_date_range_from_dcout_index(dcout_index) 

146 coeff, _, dep_index = deposition_coefficients( 

147 cout_date_range, 

148 flow, 

149 aquifer_pore_volume, 

150 porosity=porosity, 

151 thickness=thickness, 

152 retardation_factor=retardation_factor, 

153 ) 

154 return coeff @ deposition[dep_index] 

155 

156 

157def deposition_coefficients(dcout_index, flow, aquifer_pore_volume, porosity, thickness, retardation_factor): 

158 """ 

159 Compute the coefficients of the deposition model. 

160 

161 Parameters 

162 ---------- 

163 dcout_index : pandas.Series 

164 Concentration of the compound in the extracted water [ng/m3]. 

165 flow : pandas.Series 

166 Flow rate of water in the aquifer [m3/day]. 

167 aquifer_pore_volume : float 

168 Pore volume of the aquifer [m3]. 

169 porosity : float 

170 Porosity of the aquifer [dimensionless]. 

171 thickness : float 

172 Thickness of the aquifer [m]. 

173 retardation_factor : float 

174 Retardation factor of the compound in the aquifer [dimensionless]. 

175 

176 Returns 

177 ------- 

178 numpy.ndarray 

179 Coefficients of the deposition model [m2/day]. 

180 pandas.DataFrame 

181 Dataframe containing the residence time of the retarded compound in the aquifer [days]. 

182 pandas.DatetimeIndex 

183 Datetime index of the deposition. 

184 """ 

185 # Get deposition indices 

186 rt = residence_time(flow, aquifer_pore_volume, retardation_factor=retardation_factor, direction="extraction") 

187 index_dep = deposition_index_from_dcout_index(dcout_index, flow, aquifer_pore_volume, retardation_factor) 

188 

189 if not index_dep.isin(flow.index).all(): 

190 msg = "The flow timeseries is either not long enough or is not alligned well" 

191 raise ValueError(msg, index_dep, flow.index) 

192 

193 df = pd.DataFrame( 

194 data={ 

195 "flow": flow[dcout_index.floor(freq="D")].values, 

196 "rt": pd.to_timedelta(interp_series(rt, dcout_index), "D"), 

197 "dates_infiltration": dcout_index - pd.to_timedelta(interp_series(rt, dcout_index), "D"), 

198 "darea": flow[dcout_index.floor(freq="D")].values 

199 / (retardation_factor * porosity * thickness), # Aquifer area cathing deposition 

200 }, 

201 index=dcout_index, 

202 ) 

203 

204 # Compute coefficients 

205 dt = np.zeros((len(dcout_index), len(index_dep)), dtype=float) 

206 

207 for iout, (date_extraction, row) in enumerate(df.iterrows()): 

208 itinf = index_dep.searchsorted(row.dates_infiltration.floor(freq="D")) # partial day 

209 itextr = index_dep.searchsorted(date_extraction.floor(freq="D")) # whole day 

210 

211 dt[iout, itinf] = (index_dep[itinf + 1] - row.dates_infiltration) / pd.to_timedelta(1.0, unit="D") 

212 dt[iout, itinf + 1 : itextr] = 1.0 

213 

214 # fraction of first day 

215 dt[iout, itextr] = (date_extraction - index_dep[itextr]) / pd.to_timedelta(1.0, unit="D") 

216 

217 if not np.isclose(dt.sum(axis=1), df.rt.values / pd.to_timedelta(1.0, unit="D")).all(): 

218 msg = "Residence times do not match" 

219 raise ValueError(msg) 

220 

221 flow_floor = flow.median() / 100.0 # m3/day To increase numerical stability 

222 flow_floored = df.flow.clip(lower=flow_floor) 

223 coeff = (df.darea / flow_floored).values[:, None] * dt 

224 

225 if np.isnan(coeff).any(): 

226 msg = "Coefficients contain nan values." 

227 raise ValueError(msg) 

228 

229 return coeff, df, index_dep 

230 

231 

232def dcout_date_range_from_dcout_index(dcout_index): 

233 """ 

234 Compute the date range of the concentration of the compound in the extracted water. 

235 

236 Parameters 

237 ---------- 

238 dcout_index : pandas.DatetimeIndex 

239 Index of the concentration of the compound in the extracted water. 

240 

241 Returns 

242 ------- 

243 pandas.DatetimeIndex 

244 Date range of the concentration of the compound in the extracted water. 

245 """ 

246 return pd.date_range(start=dcout_index.min().floor("D"), end=dcout_index.max(), freq="D") 

247 

248 

249def deposition_index_from_dcout_index(dcout_index, flow, aquifer_pore_volume, retardation_factor): 

250 """ 

251 Compute the index of the deposition from the concentration of the compound in the extracted water index. 

252 

253 Creates and index for each day, starting at the first day that contributes to the first dcout measurement, 

254 and ending at the last day that contributes to the last dcout measurement. The times are floored to the 

255 beginning of the day. 

256 

257 Parameters 

258 ---------- 

259 dcout_index : pandas.DatetimeIndex 

260 Index of the concentration of the compound in the extracted water. 

261 flow : pandas.Series 

262 Flow rate of water in the aquifer [m3/day]. 

263 aquifer_pore_volume : float 

264 Pore volume of the aquifer [m3]. 

265 retardation_factor : float 

266 Retardation factor of the compound in the aquifer [dimensionless]. 

267 

268 Returns 

269 ------- 

270 pandas.DatetimeIndex 

271 Index of the deposition. 

272 """ 

273 rt = residence_time( 

274 flow, 

275 aquifer_pore_volume, 

276 retardation_factor=retardation_factor, 

277 direction="extraction", 

278 return_pandas_series=True, 

279 ) 

280 rt_at_start_cout = pd.to_timedelta(interp_series(rt, dcout_index.min()), "D") 

281 start_dep = (dcout_index.min() - rt_at_start_cout).floor("D") 

282 end_dep = dcout_index.max() 

283 return pd.date_range(start=start_dep, end=end_dep, freq="D")