Source code for autojob.harvest.harvesters.ddec6
"""DDEC6 charge analysis harvesting utilities.
This module defines the :func:`.get_ddec6_index_map` and
:func:`.harvest_ddec6_results` functions and the :class:`.DDEC6Analysis` class.
:func:`.get_ddec6_index_map` and :func:`.harvest_ddec6_results` can be used
to parse DDEC6 charge data. :class:`.DDEC6Analysis` is the data model for
DDEC6 charge analysis data.
Example:
from pathlib import Path
from autojob.harvest.harvesters.ddec6 import harvest_ddec6_results
outputs = harvest_ddec6_results(Path.cwd())
"""
import json
import logging
import math
from pathlib import Path
from typing import Annotated
from typing import TypeVar
import warnings
import ase
from ase.geometry import geometry
import ase.io
from pydantic import PlainSerializer
from pymatgen.command_line.chargemol_caller import ChargemolAnalysis
from pymatgen.core.periodic_table import Element
from typing_extensions import TypedDict
from autojob import SETTINGS
logger = logging.getLogger(__name__)
DDEC_NET_ATOMIC_CHARGE_FILE = "DDEC6_even_tempered_net_atomic_charges.xyz"
CHGCAR = "CHGCAR"
POSCAR = "POSCAR"
POTCAR = "POTCAR"
logger = logging.getLogger(__name__)
_T = TypeVar("_T")
StringSerialized = Annotated[
_T, PlainSerializer(lambda x: str(x), return_type=str)
]
[docs]
class BondedToDict(TypedDict):
"""Bond order information between a primary atom and another atom.
This dictionary assumes knowledge of the primary atom.
Keys:
index: The index of the non-primary atom.
element: The element of the non-primary atom.
bond_order: The order of the bond between the primary atom and the
atom specified by the ``"index"`` and ``"element"`` keys.
direction: The direction of the bond between the primary atom and the
atom specified by the ``"index"`` and ``"element"`` keys.
spin_polarization: The spin polarization of the bond between the
primary atom and the atom specified by the ``"index"`` and
``"element"`` keys.
"""
index: int
element: StringSerialized[Element]
bond_order: float
direction: tuple[float, float, float]
spin_polarization: float
[docs]
class BondOrders(TypedDict):
"""Bond order information between a "primary" atom and other atoms.
Keys:
element: The element of the "primary" atom to which the bond order
information belongs.
bonded_to: A list of dictionaries with bond order information.
bond_order_sum: The sum of all bond orders between the primary atom and
other atoms.
"""
element: StringSerialized[Element]
bonded_to: list[BondedToDict]
bond_order_sum: float
[docs]
class DDEC6Analysis(TypedDict):
"""DDEC6 analysis data.
Keys:
partial_charges: A list of floats representing the partial charges.
spin_moments: A list of floats representing the first spin moments.
dipoles: A list of floats representing the dipoles.
rsquared_moments: A list of floats representing the second spin moments.
rcubed_moments: A list of floats representing the third spin moments.
rfourth_moments: A list of floats representing the fourth spin moments.
bond_order_dict: A dictionary mapping atomic indices to bond order
dictionaries. Each bond order dictionary maps atomic indices to
bond order information between other atoms. The keys are the
indices of the "primary" atoms mentioned in the class docstrings of
:class:`.BondedToDict` and :class:`.BondOrders`.
"""
partial_charges: list[float]
spin_moments: list[float]
dipoles: list[list[float]]
rsquared_moments: list[float]
rcubed_moments: list[float]
rfourth_moments: list[float]
bond_order_dict: dict[int, BondOrders]
[docs]
def get_ddec6_index_map(src: str | Path, *, tol: float = 1e-3) -> list[int]:
"""Map DDEC6 structure indices to ASE structure indices.
The ASE index of the atom at index i in the DDEC6 structure can be found
as follows:
index_map = get_ddec6_index_map(dir_name)
index = index_map[i]
Args:
src: The directory containing a calculation.
tol: The maximum allowed deviation in atomic positions in Angstroms.
"""
with Path(src, SETTINGS.INPUTS_FILE).open(mode="r", encoding="utf-8") as f:
inputs = json.load(f)
try:
input_atoms_filename = inputs["task_inputs"]["atoms_filename"]
except KeyError:
input_atoms_filename = SETTINGS.INPUT_ATOMS_FILE
ase_atoms = ase.io.read(Path(src, input_atoms_filename))
ase_atoms = (
ase_atoms if isinstance(ase_atoms, ase.Atoms) else ase_atoms[-1]
)
ase_atoms.center() # type: ignore[no-untyped-call]
xyz_file = Path(src, DDEC_NET_ATOMIC_CHARGE_FILE)
ddec_atoms = ase.io.read(xyz_file)
ddec_atoms = (
ddec_atoms if isinstance(ddec_atoms, ase.Atoms) else ddec_atoms[-1]
)
ddec_atoms.cell = ase_atoms.cell
ddec_atoms.center() # type: ignore[no-untyped-call]
ddec6_index_map = []
# (i, j): distance between ith atom in ddec_atoms and jth atom in ase_atoms
_, distances = geometry.get_distances( # type: ignore[no-untyped-call]
ddec_atoms.positions,
ase_atoms.positions,
cell=ddec_atoms.cell,
pbc=True,
)
min_distances = []
for atom_distances in distances:
min_distance = math.inf
for j, distance in enumerate(atom_distances):
if distance <= min_distance and j not in ddec6_index_map:
closest_ase_atom_index = j
min_distance = distance
ddec6_index_map.append(closest_ase_atom_index)
min_distances.append(min_distance)
if len(ddec6_index_map) != len(ddec_atoms):
msg = f"DDEC6 map is incomplete for calculation in directory: {src}"
raise RuntimeError(msg)
if any(d > tol for d in min_distances):
max_distance = max(min_distances)
msg = f"Atomic displacement exceeds tolerance: {max_distance} > {tol}"
warnings.warn(msg, category=UserWarning, stacklevel=1)
return ddec6_index_map
[docs]
def harvest_ddec6_results(src: str | Path) -> DDEC6Analysis:
"""Harvest DDEC6 charge analysis results from a directory.
Args:
src: The directory of the completed calculation.
Returns:
A :class:`.DDEC6Analysis` dictionary.
Warning:
The success of this function depends on whether
:func:`.get_ddec6_index_map` accurately maps the indices of the
structure identified by the ``pymatgen`` ChargemolAnalysis runner.
"""
logger.debug(f"Loading DDEC6 data for {src!s}")
analysis = ChargemolAnalysis(path=src, run_chargemol=False)
ddec6_index_map = get_ddec6_index_map(src)
charges = [0.0] * len(ddec6_index_map)
spin_densities = [0.0] * len(ddec6_index_map)
bond_orders: dict[int, BondOrders] = {}
dipoles = [[0.0] * 3] * len(ddec6_index_map)
rsquared_moments = [0.0] * len(ddec6_index_map)
rcubed_moments = [0.0] * len(ddec6_index_map)
rfourth_moments = [0.0] * len(ddec6_index_map)
for i_ddec, i_ase in enumerate(ddec6_index_map):
charges[i_ase] = analysis.ddec_charges[i_ddec]
spin_densities[i_ase] = analysis.ddec_spin_moments[i_ddec]
bond_orders[i_ase] = analysis.bond_order_dict[i_ddec]
dipoles[i_ase] = analysis.dipoles[i_ddec]
rsquared_moments[i_ase] = analysis.ddec_rsquared_moments[i_ddec]
rcubed_moments[i_ase] = analysis.ddec_rcubed_moments[i_ddec]
rfourth_moments[i_ase] = analysis.ddec_rfourth_moments[i_ddec]
for bond_order in bond_orders.values():
for bonded_to in bond_order["bonded_to"]:
# DDEC6 Data is 1-indexed
bonded_to["index"] = ddec6_index_map[bonded_to["index"] - 1]
ddec6_data = DDEC6Analysis(
partial_charges=charges,
spin_moments=spin_densities,
dipoles=dipoles,
rsquared_moments=rsquared_moments,
rcubed_moments=rcubed_moments,
rfourth_moments=rfourth_moments,
bond_order_dict=bond_orders,
)
logger.info(f"Successfully loaded DDEC6 data for {src!s}")
return ddec6_data