"""Associate calculations with a reaction mechanism."""
import logging
import math
import re
from typing import Any
from typing import NamedTuple
from autojob.coordinator.classification import CalculationType
logger = logging.getLogger(__name__)
_re_id1 = re.compile(r"based on (?P<id>(?:c|j)[A-Za-z0-9]{9})(;)?\b")
_re_id2 = re.compile(
r"(?:(?!converged|calculation|converging))(?P<id>(?:c|j)[A-Za-z0-9]{9})\b"
)
[docs]
class ElectrochemicalState(NamedTuple):
"""An electrochemical state along reaction relative to a reference.
Attributes:
net_hydrogens: The net number of hydrogens transferred from the
reference to the ``ElectrochemicalState``. Defaults to 0.
net_electrons: The net number of electrons transferred from the
reference to the ``ElectrochemicalState``. Defaults to 0.
net_water_lost: The net number of water atoms lost from the
reference to the ``ElectrochemicalState``. Defaults to 0.
reference: The reference state for the ``ElectrochemicalState``.
Defaults to None. If provided, `reference` can be used to relate
`ElectrochemicalState` s defined for different references.
stoich: The absolute value of the stoichiometric coefficient for the
reference state. Defaults to 1.0.
Note:
``net_water_lost`` is defined opposite to how ``net_hydrogens`` and
``net_electrons`` are defined so that simple tuple comparisons can
be used to order ``ElementaryStep`` s according to typical
electrochemical reactions. However, this ordering is not absolute
as there may be reaction mechanisms for which electron transfer
precedes proton transfer.
"""
net_hydrogens: int = 0
net_electrons: int = 0
net_water_lost: int = 0
reference: str | None = None
stoich: float = 1.0
[docs]
def apply_comp_hydrogen_model(
self,
energy_h2: float,
energy_h2o: float,
*,
initial: float,
final: float,
applied_potential: float = 0.0,
) -> float:
"""Calculate an energy using the computational hydrogen electrode (CHE).
This method follows the formalism outlined in:
J. K. Nørskov, J. Rossmeisl, A. Logadottir, L. Lindqvist, J. R. Kitchin, T. Bligaard, and H. Jónsson
The Journal of Physical Chemistry B 2004 108 (46), 17886-17892
DOI: 10.1021/jp047349j
Args:
energy_h2: The energy of gas phase hydrogen to use for the
calculation.
energy_h2o: The energy of gas phase water to use for the
calculation.
initial: The energy of the reference state to use for the
elementary step. For catalyzed reactions, this could include
the catalyst energy.
final: The energy of the final state of the elementary step.
applied_potential: The applied potential in Volts.
Returns:
The free energy of the species resulting from the given elementary
step under the CHE formalism.
"""
return (
final
- initial
- energy_h2 * (self.net_hydrogens / 2)
+ energy_h2o * self.net_water_lost
+ applied_potential * self.net_electrons
)
[docs]
class MechanisticEntry(NamedTuple):
"""Calculated thermodynamic data for an elementary step.
Attributes:
elementary_step: The :class:`ElementaryStep` with which the
``MechanisticEntry`` is associated.
name: A string labeling the entry. For example, the catalyst or
molecule name.
energy: A float representing the calculated energy for the entry.
"""
elementary_step: ElectrochemicalState
name: str
energy: float
# name of intermediate: step
Mechanism = list[ElectrochemicalState]
STEPS = {
"CO2": ElectrochemicalState(),
"COOH": ElectrochemicalState(1, 1),
"OCHO": ElectrochemicalState(1, 1),
"CO": ElectrochemicalState(2, 2, 1),
"HCOOH": ElectrochemicalState(2, 2),
"H": ElectrochemicalState(),
}
[docs]
def find_ancestors(
jobs: list[dict[str, Any]],
data: dict[str, Any],
ancestor_calculation: str,
ancestor_job: str,
) -> list[dict[str, Any]]:
"""Find the ancestor calculations of given calculation."""
ancestors = []
for job in jobs:
if (
job.get("Calculation ID", None) == ancestor_calculation
and job.get("Job ID", None) == ancestor_job
):
if data["Calculation Type"] != str(CalculationType.VIB):
grandparent_calculation = grandparent_job = None
if match := _re_id2.search(data["Calculation Notes"]):
grandparent_calculation = match.group("id")
if match := _re_id2.search(data["Job Notes"]):
grandparent_job = match.group("id")
if None in (grandparent_calculation, grandparent_job):
logger.warning(
f"Unable to determine grandparents of {data['Job ID']}"
f" (parents of {ancestor_job})"
)
else:
ancestors.extend(
find_ancestors(
jobs, job, grandparent_calculation, grandparent_job
)
)
ancestors.append(job)
return ancestors
[docs]
def find_ancestor(
graph: dict[str, str],
data: dict[str, Any],
all_data: list[dict[str, Any]],
) -> str:
"""Find the ancestor calculation of given calculation."""
job_id = current_job = data["Job ID"]
calculations_to_jobs: dict[str, list[str]] = {}
for d in all_data:
calculation_id = d["Calculation ID"]
if calculation_id not in calculations_to_jobs:
calculations_to_jobs[calculation_id] = []
calculations_to_jobs[calculation_id].append(d["Job ID"])
missing = "Ancestor ({0}) of job ({1} from {2}) not in data set"
ancestor_job, ancestor_calculation = graph[job_id]
while ancestor_job in graph and ancestor_job != current_job:
current_job = ancestor_job
ancestor_job, ancestor_calculation = graph[current_job]
# Try to find upstream job using calculation
if (
any([ancestor_job, ancestor_calculation])
and ancestor_job not in graph
):
ancestors = calculations_to_jobs.get(
ancestor_calculation, [ancestor_job]
)
ancestor_job = ancestors[0] if ancestors else ancestor_job
if ancestor_job is None:
return current_job
logger.warning(missing.format(ancestor_job, current_job, job_id))
return ancestor_job
[docs]
def build_job_graph(
all_data: list[dict[str, Any]],
) -> dict[str, tuple[str | None, str | None]]:
"""Build directed acyclic graph representing the connectivity of the jobs.
Args:
all_data: A list of dictionaries representing job data. Job
connectivity is determined based on the "Job Notes" key.
Returns:
A dictionary mapping jobs to their ancestor (``job``,
``calculation``). If the ancestor of a job cannot be found, its
ancestor is (None, None).
"""
graph: dict[str, tuple[str | None, str | None]] = {}
for data in all_data:
ancestor_calculation = ancestor_job = None
if match := _re_id1.search(data["Job Notes"]):
ancestor_job = match.group("id")
elif matches := _re_id2.findall(data["Job Notes"]):
ancestor_job = matches[-1] if matches[-1] != ancestor_job else None
if matches := _re_id2.findall(data["Calculation Notes"]):
ancestor_calculation = (
matches[-1] if matches[-1] != ancestor_calculation else None
)
graph[data["Job ID"]] = (ancestor_job, ancestor_calculation)
return graph
[docs]
def aggregate_mechanism_data(
all_data: list[dict[str, Any]],
) -> list[list[dict[str, Any]]]:
"""Group all data deemed to belong to the same mechanism entry.
Data is determined to be grouped based on whether any other calculation ids
appear in their "Calculation Notes" key.
Args:
all_data: A list of dictionaries containing the data. Each dictionary
must contain the following keys:
- "Calculation Notes"
- "Job Notes"
Returns:
A list of lists of dictionaries grouped by mechanism entry.
"""
# Build reaction network based on metadata
graph = build_job_graph(all_data)
networks: dict[str, list[dict[str, Any]]] = {}
for data in all_data:
head_node = find_ancestor(graph, data, all_data)
if head_node not in networks:
networks[head_node] = []
networks[head_node].append(data)
grouped_data = []
for network in networks.values():
grouped_data.append(
sorted(network, key=lambda d: d.get("SLURM Job ID", math.inf))
)
return grouped_data