Coverage for src/gwtransport/logremoval.py: 87%
31 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-09-29 20:01 +0000
« prev ^ index » next coverage.py v7.10.7, created at 2025-09-29 20:01 +0000
1"""
2Functions for calculating log removal efficiency in water treatment systems.
4This module provides utilities to calculate log removal values for different
5configurations of water treatment systems, including both basic log removal
6calculations and parallel flow arrangements where multiple treatment processes
7operate simultaneously on different fractions of the total flow.
9Log removal is a standard measure in water treatment that represents the
10reduction of pathogen concentration on a logarithmic scale. For example,
11a log removal of 3 represents a 99.9% reduction in pathogen concentration.
13Functions
14---------
15residence_time_to_log_removal : Calculate log removal from residence times and removal rate
16parallel_mean : Calculate weighted average log removal for parallel flow systems
17gamma_pdf : Compute PDF of log removal given gamma-distributed residence time
18gamma_cdf : Compute CDF of log removal given gamma-distributed residence time
19gamma_mean : Compute mean log removal for gamma-distributed residence time
20gamma_find_flow_for_target_mean : Find flow rate for target mean log removal
22Notes
23-----
24For systems in series, log removals are typically summed directly, while for
25parallel systems, a weighted average based on flow distribution is required.
26The parallel_mean function supports multi-dimensional arrays via the axis parameter
27and performs minimal validation for improved performance.
28"""
30import numpy as np
31import numpy.typing as npt
32from scipy import stats
33from scipy.special import digamma, gamma
36def residence_time_to_log_removal(residence_times: npt.ArrayLike, log_removal_rate: float) -> np.ndarray:
37 """
38 Compute log removal given residence times and a log removal rate.
40 This function calculates the log removal efficiency based on the
41 residence times of water in a treatment system and the log removal
42 rate coefficient.
44 The calculation uses the formula:
45 Log Removal = log_removal_rate * log10(residence_time)
47 Parameters
48 ----------
49 residence_times : array-like
50 Array of residence times (in consistent units, e.g., hours, days).
51 Must be positive values.
52 log_removal_rate : float
53 Log removal rate coefficient that relates residence time to
54 log removal efficiency. Units should be consistent with
55 residence_times.
57 Returns
58 -------
59 log_removals : numpy.ndarray
60 Array of log removal values corresponding to the input residence times.
61 Same shape as input residence_times.
63 Notes
64 -----
65 Log removal is a logarithmic measure of pathogen reduction:
66 - Log 1 = 90% reduction
67 - Log 2 = 99% reduction
68 - Log 3 = 99.9% reduction
70 The log removal rate coefficient determines how effectively the
71 treatment system removes pathogens per unit log time.
73 Examples
74 --------
75 >>> import numpy as np
76 >>> residence_times = np.array([1.0, 10.0, 100.0])
77 >>> log_removal_rate = 2.0
78 >>> residence_time_to_log_removal(residence_times, log_removal_rate)
79 array([0. , 2. , 4. ])
81 >>> # Single residence time
82 >>> residence_time_to_log_removal(5.0, 1.5)
83 1.0484550065040283
85 >>> # 2D array of residence times
86 >>> residence_times_2d = np.array([[1.0, 10.0], [100.0, 1000.0]])
87 >>> residence_time_to_log_removal(residence_times_2d, 1.0)
88 array([[0., 1.],
89 [2., 3.]])
90 """
91 # Convert to numpy array for consistent handling
92 residence_times = np.asarray(residence_times, dtype=float)
94 # Calculate log removal using the formula
95 return log_removal_rate * np.log10(residence_times)
98def parallel_mean(log_removals: npt.ArrayLike, flow_fractions: npt.ArrayLike = None, axis=None) -> np.ndarray:
99 """
100 Calculate the weighted average log removal for a system with parallel flows.
102 This function computes the overall log removal efficiency of a parallel
103 filtration system. If flow_fractions is not provided, it assumes equal
104 distribution of flow across all paths.
106 The calculation uses the formula:
108 Total Log Removal = -log10(sum(F_i * 10^(-LR_i)))
110 Where:
111 - F_i = fraction of flow through system i (decimal, sum to 1.0)
112 - LR_i = log removal of system i
114 Parameters
115 ----------
116 log_removals : array-like
117 Array of log removal values for each parallel flow.
118 Each value represents the log10 reduction of pathogens.
119 For multi-dimensional arrays, the parallel mean is computed along
120 the specified axis.
122 flow_fractions : array-like, optional
123 Array of flow fractions for each parallel flow.
124 Must sum to 1.0 along the specified axis and have compatible shape
125 with log_removals. If None, equal flow distribution is assumed
126 (default is None).
128 axis : int, optional
129 Axis along which to compute the parallel mean for multi-dimensional
130 arrays. If None, the array is treated as 1-dimensional
131 (default is None).
133 Returns
134 -------
135 float or array-like
136 The combined log removal value for the parallel system.
137 If log_removals is multi-dimensional and axis is specified,
138 returns an array with the specified axis removed.
140 Notes
141 -----
142 This function performs minimal input validation to reduce complexity.
143 NumPy will handle most error cases naturally through broadcasting
144 and array operations.
146 Notes
147 -----
148 Log removal is a logarithmic measure of pathogen reduction:
149 - Log 1 = 90% reduction
150 - Log 2 = 99% reduction
151 - Log 3 = 99.9% reduction
153 For parallel flows, the combined removal is typically less effective
154 than the best individual removal but better than the worst.
156 Examples
157 --------
158 >>> import numpy as np
159 >>> # Three parallel streams with equal flow and log removals of 3, 4, and 5
160 >>> log_removals = np.array([3, 4, 5])
161 >>> parallel_mean(log_removals)
162 3.431798275933005
164 >>> # Two parallel streams with weighted flow
165 >>> log_removals = np.array([3, 5])
166 >>> flow_fractions = np.array([0.7, 0.3])
167 >>> parallel_mean(log_removals, flow_fractions)
168 3.153044674980176
170 >>> # Multi-dimensional array: parallel mean along axis 1
171 >>> log_removals_2d = np.array([[3, 4, 5], [2, 3, 4]])
172 >>> parallel_mean(log_removals_2d, axis=1)
173 array([3.43179828, 2.43179828])
175 See Also
176 --------
177 For systems in series, log removals would be summed directly.
178 """
179 # Convert log_removals to numpy array if it isn't already
180 log_removals = np.asarray(log_removals, dtype=float)
182 # If flow_fractions is not provided, assume equal distribution
183 if flow_fractions is None:
184 if axis is None:
185 # 1D case: calculate the number of parallel flows
186 n = len(log_removals)
187 # Create equal flow fractions (avoid division by zero)
188 flow_fractions = np.full(n, 1.0 / n) if n > 0 else np.array([])
189 else:
190 # Multi-dimensional case: create equal flow fractions along the specified axis
191 n = log_removals.shape[axis]
192 shape = [1] * log_removals.ndim
193 shape[axis] = n
194 flow_fractions = np.full(shape, 1.0 / n)
195 else:
196 # Convert flow_fractions to numpy array
197 flow_fractions = np.asarray(flow_fractions, dtype=float)
199 # Note: Shape compatibility and sum validation removed to reduce complexity
200 # NumPy will handle incompatible shapes through broadcasting or errors
202 # Convert log removal to decimal reduction values
203 decimal_reductions = 10 ** (-log_removals)
205 # Calculate weighted average decimal reduction
206 weighted_decimal_reduction = np.sum(flow_fractions * decimal_reductions, axis=axis)
208 # Convert back to log scale
209 return -np.log10(weighted_decimal_reduction)
212def gamma_pdf(r: npt.ArrayLike, rt_alpha: float, rt_beta: float, log_removal_rate: float) -> np.ndarray:
213 """
214 Compute the probability density function (PDF) of log removal given a gamma distribution for the residence time.
216 gamma(rt_alpha, rt_beta) = gamma(apv_alpha, apv_beta / flow)
218 Parameters
219 ----------
220 r : array-like
221 Log removal values at which to compute the PDF.
222 rt_alpha : float
223 Shape parameter of the gamma distribution for residence time.
224 rt_beta : float
225 Scale parameter of the gamma distribution for residence time.
226 log_removal_rate : float
227 Coefficient for log removal calculation (R = log_removal_rate * log10(T)).
229 Returns
230 -------
231 pdf : numpy.ndarray
232 PDF values corresponding to the input r values.
233 """
234 # Compute the transformed PDF
235 t_values = 10 ** (r / log_removal_rate)
237 return (
238 (np.log(10) / (log_removal_rate * gamma(rt_alpha) * (rt_beta**rt_alpha)))
239 * (t_values**rt_alpha)
240 * np.exp(-t_values / rt_beta)
241 )
244def gamma_cdf(r: npt.ArrayLike, rt_alpha: float, rt_beta: float, log_removal_rate: float) -> np.ndarray:
245 """
246 Compute the cumulative distribution function (CDF) of log removal given a gamma distribution for the residence time.
248 gamma(rt_alpha, rt_beta) = gamma(apv_alpha, apv_beta / flow)
250 Parameters
251 ----------
252 r : array-like
253 Log removal values at which to compute the CDF.
254 alpha : float
255 Shape parameter of the gamma distribution for residence time.
256 beta : float
257 Scale parameter of the gamma distribution for residence time.
258 log_removal_rate : float
259 Coefficient for log removal calculation (R = log_removal_rate * log10(T)).
261 Returns
262 -------
263 cdf : numpy.ndarray
264 CDF values corresponding to the input r values.
265 """
266 # Compute t values corresponding to r values
267 t_values = 10 ** (r / log_removal_rate)
269 # Use the gamma CDF directly
270 return stats.gamma.cdf(t_values, a=rt_alpha, scale=rt_beta)
273def gamma_mean(rt_alpha: float, rt_beta: float, log_removal_rate: float) -> float:
274 """
275 Compute the mean of the log removal distribution given a gamma distribution for the residence time.
277 gamma(rt_alpha, rt_beta) = gamma(apv_alpha, apv_beta / flow)
279 Parameters
280 ----------
281 rt_alpha : float
282 Shape parameter of the gamma distribution for residence time.
283 rt_beta : float
284 Scale parameter of the gamma distribution for residence time.
285 log_removal_rate : float
286 Coefficient for log removal calculation (R = log_removal_rate * log10(T)).
288 Returns
289 -------
290 mean : float
291 Mean value of the log removal distribution.
292 """
293 # Calculate E[R] = log_removal_rate * E[log10(T)]
294 # For gamma distribution: E[ln(T)] = digamma(alpha) + ln(beta_adjusted)
295 # Convert to log10: E[log10(T)] = E[ln(T)] / ln(10)
297 return (log_removal_rate / np.log(10)) * (digamma(rt_alpha) + np.log(rt_beta))
300def gamma_find_flow_for_target_mean(
301 target_mean: float, apv_alpha: float, apv_beta: float, log_removal_rate: float
302) -> float:
303 """
304 Find the flow rate flow that produces a specified target mean log removal given a gamma distribution for the residence time.
306 gamma(rt_alpha, rt_beta) = gamma(apv_alpha, apv_beta / flow)
308 Parameters
309 ----------
310 target_mean : float
311 Target mean log removal value.
312 apv_alpha : float
313 Shape parameter of the gamma distribution for residence time.
314 apv_beta : float
315 Scale parameter of the gamma distribution for pore volume.
316 log_removal_rate : float
317 Coefficient for log removal calculation (R = log_removal_rate * log10(T)).
319 Returns
320 -------
321 flow : float
322 Flow rate that produces the target mean log removal.
324 Notes
325 -----
326 This function uses the analytical solution derived from the mean formula.
327 From E[R] = (log_removal_rate/ln(10)) * (digamma(alpha) + ln(beta) - ln(Q)),
328 we can solve for Q to get:
329 flow = beta * exp(ln(10)*target_mean/log_removal_rate - digamma(alpha))
330 """
331 # Rearranging the mean formula to solve for Q:
332 # target_mean = (log_removal_rate/ln(10)) * (digamma(alpha) + ln(beta) - ln(Q))
333 # ln(Q) = digamma(alpha) + ln(beta) - (ln(10)*target_mean/log_removal_rate)
334 # Q = beta * exp(-(ln(10)*target_mean/log_removal_rate - digamma(alpha)))
335 return apv_beta * np.exp(digamma(apv_alpha) - (np.log(10) * target_mean) / log_removal_rate)