Coverage for src/gwtransport/logremoval.py: 0%

1"""

2Functions for calculating log removal efficiency in water treatment systems.

4This module provides utilities to calculate log removal values for different

5configurations of water treatment systems, including both basic log removal

6calculations and parallel flow arrangements where multiple treatment processes

7operate simultaneously on different fractions of the total flow.

9Log removal is a standard measure in water treatment that represents the

10reduction of pathogen concentration on a logarithmic scale. For example,

11a log removal of 3 represents a 99.9% reduction in pathogen concentration.

13Functions

14---------

15residence_time_to_log_removal : Calculate log removal from residence times and removal rate

16parallel_mean : Calculate weighted average log removal for parallel flow systems

17gamma_pdf : Compute PDF of log removal given gamma-distributed residence time

18gamma_cdf : Compute CDF of log removal given gamma-distributed residence time

19gamma_mean : Compute mean log removal for gamma-distributed residence time

20gamma_find_flow_for_target_mean : Find flow rate for target mean log removal

22Notes

23-----

24For systems in series, log removals are typically summed directly, while for

25parallel systems, a weighted average based on flow distribution is required.

26The parallel_mean function supports multi-dimensional arrays via the axis parameter

27and performs minimal validation for improved performance.

28"""

30import numpy as np

31from scipy import stats

32from scipy.special import digamma, gamma

35def residence_time_to_log_removal(residence_times, log_removal_rate):

36 """

37 Compute log removal given residence times and a log removal rate.

39 This function calculates the log removal efficiency based on the

40 residence times of water in a treatment system and the log removal

41 rate coefficient.

43 The calculation uses the formula:

44 Log Removal = log_removal_rate * log10(residence_time)

46 Parameters

47 ----------

48 residence_times : array_like

49 Array of residence times (in consistent units, e.g., hours, days).

50 Must be positive values.

51 log_removal_rate : float

52 Log removal rate coefficient that relates residence time to

53 log removal efficiency. Units should be consistent with

54 residence_times.

56 Returns

57 -------

58 log_removals : ndarray

59 Array of log removal values corresponding to the input residence times.

60 Same shape as input residence_times.

62 Notes

63 -----

64 Log removal is a logarithmic measure of pathogen reduction:

65 - Log 1 = 90% reduction

66 - Log 2 = 99% reduction

67 - Log 3 = 99.9% reduction

69 The log removal rate coefficient determines how effectively the

70 treatment system removes pathogens per unit log time.

72 Examples

73 --------

74 >>> import numpy as np

75 >>> residence_times = np.array([1.0, 10.0, 100.0])

76 >>> log_removal_rate = 2.0

77 >>> residence_time_to_log_removal(residence_times, log_removal_rate)

78 array([0. , 2. , 4. ])

80 >>> # Single residence time

81 >>> residence_time_to_log_removal(5.0, 1.5)

82 1.0484550065040283

84 >>> # 2D array of residence times

85 >>> residence_times_2d = np.array([[1.0, 10.0], [100.0, 1000.0]])

86 >>> residence_time_to_log_removal(residence_times_2d, 1.0)

87 array([[0., 1.],

88 [2., 3.]])

89 """

90 # Convert to numpy array for consistent handling

91 residence_times = np.asarray(residence_times, dtype=float)

93 # Calculate log removal using the formula

94 return log_removal_rate * np.log10(residence_times)

97def parallel_mean(log_removals, flow_fractions=None, axis=None):

98 """

99 Calculate the weighted average log removal for a system with parallel flows.

100

101 This function computes the overall log removal efficiency of a parallel

102 filtration system. If flow_fractions is not provided, it assumes equal

103 distribution of flow across all paths.

104

105 The calculation uses the formula:

106

107 Total Log Removal = -log10(sum(F_i * 10^(-LR_i)))

108

109 Where:

110 - F_i = fraction of flow through system i (decimal, sum to 1.0)

111 - LR_i = log removal of system i

112

113 Parameters

114 ----------

115 log_removals : array_like

116 Array of log removal values for each parallel flow.

117 Each value represents the log10 reduction of pathogens.

118 For multi-dimensional arrays, the parallel mean is computed along

119 the specified axis.

120

121 flow_fractions : array_like, optional

122 Array of flow fractions for each parallel flow.

123 Must sum to 1.0 along the specified axis and have compatible shape

124 with log_removals. If None, equal flow distribution is assumed

125 (default is None).

126

127 axis : int, optional

128 Axis along which to compute the parallel mean for multi-dimensional

129 arrays. If None, the array is treated as 1-dimensional

130 (default is None).

131

132 Returns

133 -------

134 float or array_like

135 The combined log removal value for the parallel system.

136 If log_removals is multi-dimensional and axis is specified,

137 returns an array with the specified axis removed.

138

139 Notes

140 -----

141 This function performs minimal input validation to reduce complexity.

142 NumPy will handle most error cases naturally through broadcasting

143 and array operations.

144

145 Notes

146 -----

147 Log removal is a logarithmic measure of pathogen reduction:

148 - Log 1 = 90% reduction

149 - Log 2 = 99% reduction

150 - Log 3 = 99.9% reduction

151

152 For parallel flows, the combined removal is typically less effective

153 than the best individual removal but better than the worst.

154

155 Examples

156 --------

157 >>> import numpy as np

158 >>> # Three parallel streams with equal flow and log removals of 3, 4, and 5

159 >>> log_removals = np.array([3, 4, 5])

160 >>> parallel_mean(log_removals)

161 3.431798275933005

162

163 >>> # Two parallel streams with weighted flow

164 >>> log_removals = np.array([3, 5])

165 >>> flow_fractions = np.array([0.7, 0.3])

166 >>> parallel_mean(log_removals, flow_fractions)

167 3.153044674980176

168

169 >>> # Multi-dimensional array: parallel mean along axis 1

170 >>> log_removals_2d = np.array([[3, 4, 5], [2, 3, 4]])

171 >>> parallel_mean(log_removals_2d, axis=1)

172 array([3.43179828, 2.43179828])

173

174 See Also

175 --------

176 For systems in series, log removals would be summed directly.

177 """

178 # Convert log_removals to numpy array if it isn't already

179 log_removals = np.asarray(log_removals, dtype=float)

180

181 # If flow_fractions is not provided, assume equal distribution

182 if flow_fractions is None:

183 if axis is None:

184 # 1D case: calculate the number of parallel flows

185 n = len(log_removals)

186 # Create equal flow fractions (avoid division by zero)

187 flow_fractions = np.full(n, 1.0 / n) if n > 0 else np.array([])

188 else:

189 # Multi-dimensional case: create equal flow fractions along the specified axis

190 n = log_removals.shape[axis]

191 shape = [1] * log_removals.ndim

192 shape[axis] = n

193 flow_fractions = np.full(shape, 1.0 / n)

194 else:

195 # Convert flow_fractions to numpy array

196 flow_fractions = np.asarray(flow_fractions, dtype=float)

197

198 # Note: Shape compatibility and sum validation removed to reduce complexity

199 # NumPy will handle incompatible shapes through broadcasting or errors

200

201 # Convert log removal to decimal reduction values

202 decimal_reductions = 10 ** (-log_removals)

203

204 # Calculate weighted average decimal reduction

205 weighted_decimal_reduction = np.sum(flow_fractions * decimal_reductions, axis=axis)

206

207 # Convert back to log scale

208 return -np.log10(weighted_decimal_reduction)

209

210

211def gamma_pdf(r, rt_alpha, rt_beta, log_removal_rate):

212 """

213 Compute the probability density function (PDF) of log removal given a gamma distribution for the residence time.

214

215 gamma(rt_alpha, rt_beta) = gamma(apv_alpha, apv_beta / flow)

216

217 Parameters

218 ----------

219 r : array_like

220 Log removal values at which to compute the PDF.

221 rt_alpha : float

222 Shape parameter of the gamma distribution for residence time.

223 rt_beta : float

224 Scale parameter of the gamma distribution for residence time.

225 log_removal_rate : float

226 Coefficient for log removal calculation (R = log_removal_rate * log10(T)).

227

228 Returns

229 -------

230 pdf_values : ndarray

231 PDF values corresponding to the input r values.

232 """

233 # Compute the transformed PDF

234 t_values = 10 ** (r / log_removal_rate)

235

236 return (

237 (np.log(10) / (log_removal_rate * gamma(rt_alpha) * (rt_beta**rt_alpha)))

238 * (t_values**rt_alpha)

239 * np.exp(-t_values / rt_beta)

240 )

241

242

243def gamma_cdf(r, rt_alpha, rt_beta, log_removal_rate):

244 """

245 Compute the cumulative distribution function (CDF) of log removal given a gamma distribution for the residence time.

246

247 gamma(rt_alpha, rt_beta) = gamma(apv_alpha, apv_beta / flow)

248

249 Parameters

250 ----------

251 r : array_like

252 Log removal values at which to compute the CDF.

253 alpha : float

254 Shape parameter of the gamma distribution for residence time.

255 beta : float

256 Scale parameter of the gamma distribution for residence time.

257 log_removal_rate : float

258 Coefficient for log removal calculation (R = log_removal_rate * log10(T)).

259

260 Returns

261 -------

262 cdf_values : ndarray

263 CDF values corresponding to the input r values.

264 """

265 # Compute t values corresponding to r values

266 t_values = 10 ** (r / log_removal_rate)

267

268 # Use the gamma CDF directly

269 return stats.gamma.cdf(t_values, a=rt_alpha, scale=rt_beta)

270

271

272def gamma_mean(rt_alpha, rt_beta, log_removal_rate):

273 """

274 Compute the mean of the log removal distribution given a gamma distribution for the residence time.

275

276 gamma(rt_alpha, rt_beta) = gamma(apv_alpha, apv_beta / flow)

277

278 Parameters

279 ----------

280 rt_alpha : float

281 Shape parameter of the gamma distribution for residence time.

282 rt_beta : float

283 Scale parameter of the gamma distribution for residence time.

284 log_removal_rate : float

285 Coefficient for log removal calculation (R = log_removal_rate * log10(T)).

286

287 Returns

288 -------

289 mean : float

290 Mean value of the log removal distribution.

291 """

292 # Calculate E[R] = log_removal_rate * E[log10(T)]

293 # For gamma distribution: E[ln(T)] = digamma(alpha) + ln(beta_adjusted)

294 # Convert to log10: E[log10(T)] = E[ln(T)] / ln(10)

295

296 return (log_removal_rate / np.log(10)) * (digamma(rt_alpha) + np.log(rt_beta))

297

298

299def gamma_find_flow_for_target_mean(target_mean, apv_alpha, apv_beta, log_removal_rate):

300 """

301 Find the flow rate flow that produces a specified target mean log removal given a gamma distribution for the residence time.

302

303 gamma(rt_alpha, rt_beta) = gamma(apv_alpha, apv_beta / flow)

304

305 Parameters

306 ----------

307 target_mean : float

308 Target mean log removal value.

309 apv_alpha : float

310 Shape parameter of the gamma distribution for residence time.

311 apv_beta : float

312 Scale parameter of the gamma distribution for pore volume.

313 log_removal_rate : float

314 Coefficient for log removal calculation (R = log_removal_rate * log10(T)).

315

316 Returns

317 -------

318 flow : float

319 Flow rate that produces the target mean log removal.

320

321 Notes

322 -----

323 This function uses the analytical solution derived from the mean formula.

324 From E[R] = (log_removal_rate/ln(10)) * (digamma(alpha) + ln(beta) - ln(Q)),

325 we can solve for Q to get:

326 flow = beta * exp(ln(10)*target_mean/log_removal_rate - digamma(alpha))

327 """

328 # Rearranging the mean formula to solve for Q:

329 # target_mean = (log_removal_rate/ln(10)) * (digamma(alpha) + ln(beta) - ln(Q))

330 # ln(Q) = digamma(alpha) + ln(beta) - (ln(10)*target_mean/log_removal_rate)

331 # Q = beta * exp(-(ln(10)*target_mean/log_removal_rate - digamma(alpha)))

332 return apv_beta * np.exp(digamma(apv_alpha) - (np.log(10) * target_mean) / log_removal_rate)