Source code for autojob.harvest.mechanism

"""Associate calculations with a reaction mechanism."""

import logging
import math
import re
from typing import Any
from typing import NamedTuple

from autojob.coordinator.classification import CalculationType

logger = logging.getLogger(__name__)

_re_id1 = re.compile(r"based on (?P<id>(?:c|j)[A-Za-z0-9]{9})(;)?\b")
_re_id2 = re.compile(
    r"(?:(?!converged|calculation|converging))(?P<id>(?:c|j)[A-Za-z0-9]{9})\b"
)


[docs] class ElectrochemicalState(NamedTuple): """An electrochemical state along reaction relative to a reference. Attributes: net_hydrogens: The net number of hydrogens transferred from the reference to the ``ElectrochemicalState``. Defaults to 0. net_electrons: The net number of electrons transferred from the reference to the ``ElectrochemicalState``. Defaults to 0. net_water_lost: The net number of water atoms lost from the reference to the ``ElectrochemicalState``. Defaults to 0. reference: The reference state for the ``ElectrochemicalState``. Defaults to None. If provided, `reference` can be used to relate `ElectrochemicalState` s defined for different references. stoich: The absolute value of the stoichiometric coefficient for the reference state. Defaults to 1.0. Note: ``net_water_lost`` is defined opposite to how ``net_hydrogens`` and ``net_electrons`` are defined so that simple tuple comparisons can be used to order ``ElementaryStep`` s according to typical electrochemical reactions. However, this ordering is not absolute as there may be reaction mechanisms for which electron transfer precedes proton transfer. """ net_hydrogens: int = 0 net_electrons: int = 0 net_water_lost: int = 0 reference: str | None = None stoich: float = 1.0
[docs] def apply_comp_hydrogen_model( self, energy_h2: float, energy_h2o: float, *, initial: float, final: float, applied_potential: float = 0.0, ) -> float: """Calculate an energy using the computational hydrogen electrode (CHE). This method follows the formalism outlined in: J. K. Nørskov, J. Rossmeisl, A. Logadottir, L. Lindqvist, J. R. Kitchin, T. Bligaard, and H. Jónsson The Journal of Physical Chemistry B 2004 108 (46), 17886-17892 DOI: 10.1021/jp047349j Args: energy_h2: The energy of gas phase hydrogen to use for the calculation. energy_h2o: The energy of gas phase water to use for the calculation. initial: The energy of the reference state to use for the elementary step. For catalyzed reactions, this could include the catalyst energy. final: The energy of the final state of the elementary step. applied_potential: The applied potential in Volts. Returns: The free energy of the species resulting from the given elementary step under the CHE formalism. """ return ( final - initial - energy_h2 * (self.net_hydrogens / 2) + energy_h2o * self.net_water_lost + applied_potential * self.net_electrons )
[docs] class MechanisticEntry(NamedTuple): """Calculated thermodynamic data for an elementary step. Attributes: elementary_step: The :class:`ElementaryStep` with which the ``MechanisticEntry`` is associated. name: A string labeling the entry. For example, the catalyst or molecule name. energy: A float representing the calculated energy for the entry. """ elementary_step: ElectrochemicalState name: str energy: float
# name of intermediate: step Mechanism = list[ElectrochemicalState] STEPS = { "CO2": ElectrochemicalState(), "COOH": ElectrochemicalState(1, 1), "OCHO": ElectrochemicalState(1, 1), "CO": ElectrochemicalState(2, 2, 1), "HCOOH": ElectrochemicalState(2, 2), "H": ElectrochemicalState(), }
[docs] def find_ancestors( jobs: list[dict[str, Any]], data: dict[str, Any], ancestor_calculation: str, ancestor_job: str, ) -> list[dict[str, Any]]: """Find the ancestor calculations of given calculation.""" ancestors = [] for job in jobs: if ( job.get("Calculation ID", None) == ancestor_calculation and job.get("Job ID", None) == ancestor_job ): if data["Calculation Type"] != str(CalculationType.VIB): grandparent_calculation = grandparent_job = None if match := _re_id2.search(data["Calculation Notes"]): grandparent_calculation = match.group("id") if match := _re_id2.search(data["Job Notes"]): grandparent_job = match.group("id") if None in (grandparent_calculation, grandparent_job): logger.warning( f"Unable to determine grandparents of {data['Job ID']}" f" (parents of {ancestor_job})" ) else: ancestors.extend( find_ancestors( jobs, job, grandparent_calculation, grandparent_job ) ) ancestors.append(job) return ancestors
[docs] def find_ancestor( graph: dict[str, str], data: dict[str, Any], all_data: list[dict[str, Any]], ) -> str: """Find the ancestor calculation of given calculation.""" job_id = current_job = data["Job ID"] calculations_to_jobs: dict[str, list[str]] = {} for d in all_data: calculation_id = d["Calculation ID"] if calculation_id not in calculations_to_jobs: calculations_to_jobs[calculation_id] = [] calculations_to_jobs[calculation_id].append(d["Job ID"]) missing = "Ancestor ({0}) of job ({1} from {2}) not in data set" ancestor_job, ancestor_calculation = graph[job_id] while ancestor_job in graph and ancestor_job != current_job: current_job = ancestor_job ancestor_job, ancestor_calculation = graph[current_job] # Try to find upstream job using calculation if ( any([ancestor_job, ancestor_calculation]) and ancestor_job not in graph ): ancestors = calculations_to_jobs.get( ancestor_calculation, [ancestor_job] ) ancestor_job = ancestors[0] if ancestors else ancestor_job if ancestor_job is None: return current_job logger.warning(missing.format(ancestor_job, current_job, job_id)) return ancestor_job
[docs] def build_job_graph( all_data: list[dict[str, Any]], ) -> dict[str, tuple[str | None, str | None]]: """Build directed acyclic graph representing the connectivity of the jobs. Args: all_data: A list of dictionaries representing job data. Job connectivity is determined based on the "Job Notes" key. Returns: A dictionary mapping jobs to their ancestor (``job``, ``calculation``). If the ancestor of a job cannot be found, its ancestor is (None, None). """ graph: dict[str, tuple[str | None, str | None]] = {} for data in all_data: ancestor_calculation = ancestor_job = None if match := _re_id1.search(data["Job Notes"]): ancestor_job = match.group("id") elif matches := _re_id2.findall(data["Job Notes"]): ancestor_job = matches[-1] if matches[-1] != ancestor_job else None if matches := _re_id2.findall(data["Calculation Notes"]): ancestor_calculation = ( matches[-1] if matches[-1] != ancestor_calculation else None ) graph[data["Job ID"]] = (ancestor_job, ancestor_calculation) return graph
[docs] def aggregate_mechanism_data( all_data: list[dict[str, Any]], ) -> list[list[dict[str, Any]]]: """Group all data deemed to belong to the same mechanism entry. Data is determined to be grouped based on whether any other calculation ids appear in their "Calculation Notes" key. Args: all_data: A list of dictionaries containing the data. Each dictionary must contain the following keys: - "Calculation Notes" - "Job Notes" Returns: A list of lists of dictionaries grouped by mechanism entry. """ # Build reaction network based on metadata graph = build_job_graph(all_data) networks: dict[str, list[dict[str, Any]]] = {} for data in all_data: head_node = find_ancestor(graph, data, all_data) if head_node not in networks: networks[head_node] = [] networks[head_node].append(data) grouped_data = [] for network in networks.values(): grouped_data.append( sorted(network, key=lambda d: d.get("SLURM Job ID", math.inf)) ) return grouped_data