Source code for pyEQL.utils
"""
pyEQL utilities
:copyright: 2013-2024 by Ryan S. Kingsbury
:license: LGPL, see LICENSE for more details.
"""
import logging
import re
from collections import UserDict
from functools import lru_cache
from typing import Any
from iapws import IAPWS95, IAPWS97
from pymatgen.core.ion import Ion
from pyEQL import ureg
logger = logging.getLogger(__name__)
[docs]
def translate_units(unit: str) -> str:
"""
Translate commonly used environmental units such as 'ppm' into strings that `pint` can understand.
Args:
unit: string representing the unit to translate
Returns: a unit that pint can understand
"""
if unit == "m": # molal
return "mol/kg"
if unit == "ppm":
return "mg/L"
if unit == "ppb":
return "ug/L"
if unit == "ppt":
return "ng/L"
# if all else fails, return the unit we were provided
return unit
[docs]
def _translate_pint_quantity(amount: str):
"""
Helper method to split a pint quantity string into magnitude and units.
"""
import re # noqa: PLC0415
from pint import Quantity # noqa: PLC0415
# skip if already a pint Quantity
if isinstance(amount, Quantity):
return amount.magnitude, str(amount.units)
match = re.match(r"^\s*([0-9eE+\-*/().]+)\s*(.*)$", amount)
if match is None:
return amount
_value, _unit = match.groups()
# handle python ** expression in Pint quantity
_value = eval(_value) if "**" in _value else float(_value)
unit = translate_units(_unit)
return (float(_value), unit)
[docs]
@lru_cache
def standardize_formula(formula: str):
"""
Convert a chemical formula into standard form.
Args:
formula: the chemical formula to standardize.
Returns:
A standardized chemical formula
Raises:
ValueError if `formula` cannot be processed or is invalid.
Notes:
Currently this method standardizes formulae by passing them through `pymatgen.core.ion.Ion.reduced_formula()`.
For ions, this means that 1) the charge number will always be listed explicitly and 2) the charge number will
be enclosed in square brackets to remove any ambiguity in the meaning of the formula. For example, 'Na+',
'Na+1', and 'Na[+]' will all standardize to "Na[+1]"
"""
# fix permuted sign and charge number (e.g. Co2+)
for str, rep in zip(["²⁺", "³⁺", "⁴⁺", "²⁻", "³⁻", "⁴⁻"], ["+2", "+3", "+4", "-2", "-3", "-4"], strict=False):
formula = formula.replace(str, rep)
# replace superscripts with non superscripts
for char, rep in zip("⁻⁺⁰¹²³⁴⁵⁶⁷⁸⁹", "-+0123456789", strict=False):
formula = formula.replace(char, rep)
# replace subscripts with non subscripts
for char, rep in zip("₀₁₂₃₄₅₆₇₈₉", "0123456789", strict=False):
formula = formula.replace(char, rep)
# replace different types of dashes with a minus sign
for char in [r"‑", r"‐", r"‒", r"–", r"—", r"−"]: # noqa: RUF001
formula = formula.replace(char, "-")
# Do not modify any dimers etc (Phreeqc reports a small amount of
# "(CO2)2" in a water solution with C(4), for example.
_POLYMER_RE = re.compile(r"^\([A-Za-z0-9+-]+\)\d+$")
if _POLYMER_RE.match(formula):
return formula
sform = Ion.from_formula(formula).reduced_formula
# TODO - manual formula adjustments. May be implemented upstream in pymatgen in the future
# thanks to @xiaoxiaozhu123 for pointing out these issues in
# https://github.com/KingsburyLab/pyEQL/issues/136
# ammonia
if sform == "H4N[+1]":
sform = "NH4[+1]"
elif "H4NCl" in sform:
sform = sform.replace("H4NCl", "NH4Cl")
elif sform == "SO3[-1]":
sform = "S2O6[-2]"
elif sform == "SO4[-1]":
sform = "S2O8[-2]"
elif sform == "H3N(aq)":
sform = "NH3(aq)"
# phosphoric acid system
elif sform == "PH3O4(aq)":
sform = "H3PO4(aq)"
elif "PHO4" in sform:
sform = sform.replace("PHO4", "HPO4")
elif "P(HO2)2" in sform:
sform = sform.replace("P(HO2)2", "H2PO4")
# thiocyanate
elif "CSN" in sform:
sform = sform.replace("CSN", "SCN")
# triiodide, trinitride, tribromide and phosphide
elif sform == "I[-0.33333333]":
sform = "I3[-1]"
elif sform == "N[-0.33333333]":
sform = "N3[-1]"
elif sform == "Br[-0.33333333]":
sform = "Br3[-1]"
elif sform == "P[-0.33333333]":
sform = "P3[-1]"
# sulfur species
elif sform == "S[-0.4]":
sform = "S5[-2]"
elif sform == "S[-0.5]":
sform = "S4[-2]"
elif sform == "S[-0.66666667]":
sform = "S3[-2]"
elif sform == "S[-1]":
sform = "S2[-2]" # note: S2[-2] has lower ΔGf than S[-2], so we want to standardize to S2[-2] rather than S[-2]
# formate
elif sform == "HCOO[-1]":
sform = "HCO2[-1]"
# oxalate
elif sform == "CO2[-1]":
sform = "C2O4[-2]"
# triflate
elif sform == "CS(OF)3[-1]":
sform = "CF3SO3[-1]"
# haloacetic acids of F, Cl, Br, I
elif sform == "C2Cl3O2[-1]":
sform = "CCl3COO[-1]"
elif sform == "C2O2F3[-1]":
sform = "CF3COO[-1]"
elif sform == "C2I3O2[-1]":
sform = "CI3COO[-1]"
elif sform == "C2Br3O2[-1]":
sform = "CBr3COO[-1]"
# Cl+F
elif sform == "C2Cl2O2F[-1]":
sform = "CFCl2COO[-1]"
elif sform == "C2Cl(OF)2[-1]":
sform = "CF2ClCOO[-1]"
# Cl+Br
elif sform == "C2Br(ClO)2[-1]":
sform = "CBrCl2COO[-1]"
elif sform == "C2Br2ClO2[-1]":
sform = "CBr2ClCOO[-1]"
# Cl+I
elif sform == "C2I(ClO)2[-1]":
sform = "CICl2COO[-1]"
elif sform == "C2I2ClO2[-1]":
sform = "CI2ClCOO[-1]"
# ammonium nitrate salts
elif sform == "H4N2O3(aq)":
sform = "NH4NO3(aq)"
# ammonium sulfate salts
elif sform == "H8S(NO2)2(aq)":
sform = "(NH4)2SO4(aq)"
elif sform == "H4SNO4[-1]":
sform = "NH4SO4[-1]"
# TODO - consider adding recognition of special formulas like MeOH for methanol or Cit for citrate
return sform
[docs]
def format_solutes_dict(solute_dict: dict, units: str):
"""
Formats a dictionary of solutes by converting the amount to a string with the provided units suitable for passing to
use with the Solution class. Note that all solutes must be given in the same units.
Args:
solute_dict: The dictionary to format. This must be of the form dict{str: Number}
e.g. {"Na+": 0.5, "Cl-": 0.9}
units: The units to use for the solute. e.g. "mol/kg"
Returns:
A formatted solute dictionary.
Raises:
TypeError if `solute_dict` is not a dictionary.
"""
if not isinstance(solute_dict, dict):
raise TypeError("solute_dict must be a dictionary. Refer to the doc for proper formatting.")
return {key: f"{value!s} {units}" for key, value in solute_dict.items()}
[docs]
@lru_cache
@ureg.wraps(ret=None, args=["K", "MPa"], strict=False)
def create_water_substance(temperature: float, pressure: float):
"""
Instantiate a water substance model from IAPWS.
Args:
temperature: the desired temperature in K
pressure: the desired pressure in MPa
Notes:
The IAPWS97 model is much faster than IAPWS95, but the latter can do temp
below zero. See https://github.com/jjgomera/iapws/issues/14. Hence,
IAPWS97 will be used except when `temperature` is less than 0 degC.
Returns:
A IAPWS97 or IAPWS95 instance
"""
if temperature >= 273.15:
return IAPWS97(T=temperature, P=pressure)
return IAPWS95(T=temperature, P=pressure)
[docs]
class FormulaDict(UserDict):
"""
Automatically converts keys on get/set using pymatgen.core.Ion.from_formula(key).reduced_formula.
This allows getting/setting/updating of Solution.components using flexible
formula notation (e.g., "Na+", "Na+1", "Na[+]" all have the same effect)
"""
def __getitem__(self, key) -> Any:
return super().__getitem__(standardize_formula(key))
def __setitem__(self, key, value) -> None:
# ensure that all values are stored as python floats, not numpy types
# see https://numpy.org/doc/stable/release/2.0.0-notes.html#representation-of-numpy-scalars-changed
super().__setitem__(standardize_formula(key), float(value))
# sort contents anytime an item is set
self.data = dict(sorted(self.items(), key=lambda x: x[1], reverse=True))
# Necessary to define this so that .get() works properly in python 3.12+
# see https://github.com/python/cpython/issues/105524
def __contains__(self, key) -> bool:
return standardize_formula(key) in self.data
def __delitem__(self, key) -> None:
super().__delitem__(standardize_formula(key))