Coverage for src/gwtransport/deposition.py: 0%

76 statements  

« prev     ^ index     » next       coverage.py v7.8.2, created at 2025-06-06 15:45 +0000

1""" 

2Deposition Analysis for 1D Aquifer Systems. 

3 

4This module provides functions to analyze compound deposition and concentration 

5in aquifer systems. It includes tools for computing deposition rates, concentration 

6changes, and related coefficients based on extraction data and aquifer properties. 

7 

8The model assumes requires the groundwaterflow to be reduced to a 1D system. On one side, 

9water with a certain concentration infiltrates ('cin'), the water flows through the aquifer and 

10the compound of interest flows through the aquifer with a retarded velocity. During the soil passage, 

11deposition enriches the water with the compound. The water is extracted ('cout') and the concentration 

12increase due to deposition is called 'dcout'. 

13 

14Main functions: 

15- backward: Calculate deposition rates from extraction data (backward operation, equivalent to deconvolution) 

16- forward: Determine concentration changes due to deposition (forward operation, equivalent to convolution) 

17- deposition_coefficients: Generate coefficients for deposition modeling 

18""" 

19 

20import numpy as np 

21import pandas as pd 

22from scipy.linalg import null_space 

23from scipy.optimize import minimize 

24 

25from gwtransport.residence_time import residence_time 

26from gwtransport.utils import interp_series 

27 

28 

29def backward( 

30 cout, flow, aquifer_pore_volume, porosity, thickness, retardation_factor, nullspace_objective="squared_lengths" 

31): 

32 """ 

33 Compute the deposition given the added concentration of the compound in the extracted water. 

34 

35 Parameters 

36 ---------- 

37 cout : pandas.Series 

38 Concentration of the compound in the extracted water [ng/m3]. 

39 flow : pandas.Series 

40 Flow rate of water in the aquifer [m3/day]. 

41 aquifer_pore_volume : float 

42 Pore volume of the aquifer [m3]. 

43 porosity : float 

44 Porosity of the aquifer [dimensionless]. 

45 thickness : float 

46 Thickness of the aquifer [m]. 

47 retardation_factor : float 

48 Retardation factor of the compound in the aquifer [dimensionless]. 

49 nullspace_objective : str or callable, optional 

50 Objective to minimize in the nullspace. If a string, it should be either "squared_lengths" or "summed_lengths". If a callable, it should take the form `objective(x, xLS, colsOfNullspace)`. Default is "squared_lengths". 

51 

52 Returns 

53 ------- 

54 pandas.Series 

55 Deposition of the compound in the aquifer [ng/m2/day]. 

56 """ 

57 # concentration extracted water is coeff dot deposition 

58 coeff, _, index_dep = deposition_coefficients( 

59 cout.index, 

60 flow, 

61 aquifer_pore_volume, 

62 porosity=porosity, 

63 thickness=thickness, 

64 retardation_factor=retardation_factor, 

65 ) 

66 

67 # cout should be of length coeff.shape[0] 

68 if len(cout) != coeff.shape[0]: 

69 msg = f"Length of cout ({len(cout)}) should be equal to the number of rows in coeff ({coeff.shape[0]})" 

70 raise ValueError(msg) 

71 

72 if not index_dep.isin(flow.index).all(): 

73 msg = "The flow timeseries is either not long enough or is not alligned well" 

74 raise ValueError(msg, index_dep, flow.index) 

75 

76 # Underdetermined least squares solution 

77 deposition_ls, *_ = np.linalg.lstsq(coeff, cout, rcond=None) 

78 

79 # Nullspace -> multiple solutions exist, deposition_ls is just one of them 

80 cols_of_nullspace = null_space(coeff, rcond=None) 

81 nullrank = cols_of_nullspace.shape[1] 

82 

83 # Pick a solution in the nullspace that meets new objective 

84 def objective(x, x_ls, cols_of_nullspace): 

85 sols = x_ls + cols_of_nullspace @ x 

86 return np.square(sols[1:] - sols[:-1]).sum() 

87 

88 deposition_0 = np.zeros(nullrank) 

89 res = minimize(objective, x0=deposition_0, args=(deposition_ls, cols_of_nullspace), method="BFGS") 

90 

91 if not res.success: 

92 msg = f"Optimization failed: {res.message}" 

93 raise ValueError(msg) 

94 

95 # Squared lengths is stable to solve, thus a good starting point 

96 if nullspace_objective != "squared_lengths": 

97 if nullspace_objective == "summed_lengths": 

98 

99 def objective(x, x_ls, cols_of_nullspace): 

100 sols = x_ls + cols_of_nullspace @ x 

101 return np.abs(sols[1:] - sols[:-1]).sum() 

102 

103 res = minimize(objective, x0=res.x, args=(deposition_ls, cols_of_nullspace), method="BFGS") 

104 

105 elif callable(nullspace_objective): 

106 res = minimize(nullspace_objective, x0=res.x, args=(deposition_ls, cols_of_nullspace), method="BFGS") 

107 

108 else: 

109 msg = f"Unknown nullspace objective: {nullspace_objective}" 

110 raise ValueError(msg) 

111 

112 if not res.success: 

113 msg = f"Optimization failed: {res.message}" 

114 raise ValueError(msg) 

115 

116 deposition_data = deposition_ls + cols_of_nullspace @ res.x 

117 return pd.Series(data=deposition_data, index=index_dep, name="deposition") 

118 

119 

120def forward(dcout_index, deposition, flow, aquifer_pore_volume, porosity, thickness, retardation_factor): 

121 """ 

122 Compute the increase in concentration of the compound in the extracted water by the deposition. 

123 

124 This function represents a forward operation (equivalent to convolution). 

125 

126 Parameters 

127 ---------- 

128 dcout_index : pandas.Series 

129 Concentration of the compound in the extracted water [ng/m3]. 

130 deposition : pandas.Series 

131 Deposition of the compound in the aquifer [ng/m2/day]. 

132 flow : pandas.Series 

133 Flow rate of water in the aquifer [m3/day]. 

134 aquifer_pore_volume : float 

135 Pore volume of the aquifer [m3]. 

136 porosity : float 

137 Porosity of the aquifer [dimensionless]. 

138 thickness : float 

139 Thickness of the aquiifer [m]. 

140 

141 Returns 

142 ------- 

143 pandas.Series 

144 Concentration of the compound in the extracted water [ng/m3]. 

145 """ 

146 cout_date_range = dcout_date_range_from_dcout_index(dcout_index) 

147 coeff, _, dep_index = deposition_coefficients( 

148 cout_date_range, 

149 flow, 

150 aquifer_pore_volume, 

151 porosity=porosity, 

152 thickness=thickness, 

153 retardation_factor=retardation_factor, 

154 ) 

155 return pd.Series(coeff @ deposition[dep_index], index=dcout_index, name="dcout") 

156 

157 

158def deposition_coefficients(dcout_index, flow, aquifer_pore_volume, porosity, thickness, retardation_factor): 

159 """ 

160 Compute the coefficients of the deposition model. 

161 

162 Parameters 

163 ---------- 

164 dcout_index : pandas.Series 

165 Concentration of the compound in the extracted water [ng/m3]. 

166 flow : pandas.Series 

167 Flow rate of water in the aquifer [m3/day]. 

168 aquifer_pore_volume : float 

169 Pore volume of the aquifer [m3]. 

170 porosity : float 

171 Porosity of the aquifer [dimensionless]. 

172 thickness : float 

173 Thickness of the aquifer [m]. 

174 retardation_factor : float 

175 Retardation factor of the compound in the aquifer [dimensionless]. 

176 

177 Returns 

178 ------- 

179 numpy.ndarray 

180 Coefficients of the deposition model [m2/day]. 

181 pandas.DataFrame 

182 Dataframe containing the residence time of the retarded compound in the aquifer [days]. 

183 pandas.DatetimeIndex 

184 Datetime index of the deposition. 

185 """ 

186 # Get deposition indices 

187 rt = residence_time(flow, aquifer_pore_volume, retardation_factor=retardation_factor, direction="extraction") 

188 index_dep = deposition_index_from_dcout_index(dcout_index, flow, aquifer_pore_volume, retardation_factor) 

189 

190 if not index_dep.isin(flow.index).all(): 

191 msg = "The flow timeseries is either not long enough or is not alligned well" 

192 raise ValueError(msg, index_dep, flow.index) 

193 

194 df = pd.DataFrame( 

195 data={ 

196 "flow": flow[dcout_index.floor(freq="D")].values, 

197 "rt": pd.to_timedelta(interp_series(rt, dcout_index), "D"), 

198 "dates_infiltration": dcout_index - pd.to_timedelta(interp_series(rt, dcout_index), "D"), 

199 "darea": flow[dcout_index.floor(freq="D")].values 

200 / (retardation_factor * porosity * thickness), # Aquifer area cathing deposition 

201 }, 

202 index=dcout_index, 

203 ) 

204 

205 # Compute coefficients 

206 dt = np.zeros((len(dcout_index), len(index_dep)), dtype=float) 

207 

208 for iout, (date_extraction, row) in enumerate(df.iterrows()): 

209 itinf = index_dep.searchsorted(row.dates_infiltration.floor(freq="D")) # partial day 

210 itextr = index_dep.searchsorted(date_extraction.floor(freq="D")) # whole day 

211 

212 dt[iout, itinf] = (index_dep[itinf + 1] - row.dates_infiltration) / pd.to_timedelta(1.0, unit="D") 

213 dt[iout, itinf + 1 : itextr] = 1.0 

214 

215 # fraction of first day 

216 dt[iout, itextr] = (date_extraction - index_dep[itextr]) / pd.to_timedelta(1.0, unit="D") 

217 

218 if not np.isclose(dt.sum(axis=1), df.rt.values / pd.to_timedelta(1.0, unit="D")).all(): 

219 msg = "Residence times do not match" 

220 raise ValueError(msg) 

221 

222 flow_floor = flow.median() / 100.0 # m3/day To increase numerical stability 

223 flow_floored = df.flow.clip(lower=flow_floor) 

224 coeff = (df.darea / flow_floored).values[:, None] * dt 

225 

226 if np.isnan(coeff).any(): 

227 msg = "Coefficients contain nan values." 

228 raise ValueError(msg) 

229 

230 return coeff, df, index_dep 

231 

232 

233def dcout_date_range_from_dcout_index(dcout_index): 

234 """ 

235 Compute the date range of the concentration of the compound in the extracted water. 

236 

237 Parameters 

238 ---------- 

239 dcout_index : pandas.DatetimeIndex 

240 Index of the concentration of the compound in the extracted water. 

241 

242 Returns 

243 ------- 

244 pandas.DatetimeIndex 

245 Date range of the concentration of the compound in the extracted water. 

246 """ 

247 return pd.date_range(start=dcout_index.min().floor("D"), end=dcout_index.max(), freq="D") 

248 

249 

250def deposition_index_from_dcout_index(dcout_index, flow, aquifer_pore_volume, retardation_factor): 

251 """ 

252 Compute the index of the deposition from the concentration of the compound in the extracted water index. 

253 

254 Creates and index for each day, starting at the first day that contributes to the first dcout measurement, 

255 and ending at the last day that contributes to the last dcout measurement. The times are floored to the 

256 beginning of the day. 

257 

258 Parameters 

259 ---------- 

260 dcout_index : pandas.DatetimeIndex 

261 Index of the concentration of the compound in the extracted water. 

262 flow : pandas.Series 

263 Flow rate of water in the aquifer [m3/day]. 

264 aquifer_pore_volume : float 

265 Pore volume of the aquifer [m3]. 

266 retardation_factor : float 

267 Retardation factor of the compound in the aquifer [dimensionless]. 

268 

269 Returns 

270 ------- 

271 pandas.DatetimeIndex 

272 Index of the deposition. 

273 """ 

274 rt = residence_time( 

275 flow, 

276 aquifer_pore_volume, 

277 retardation_factor=retardation_factor, 

278 direction="extraction", 

279 return_pandas_series=True, 

280 ) 

281 rt_at_start_cout = pd.to_timedelta(interp_series(rt, dcout_index.min()), "D") 

282 start_dep = (dcout_index.min() - rt_at_start_cout).floor("D") 

283 end_dep = dcout_index.max() 

284 return pd.date_range(start=start_dep, end=end_dep, freq="D")