"""Molecule conversion utilities for the C3D 3D viewer.
Converts OpenEye OEMolBase and OEDesignUnit objects into string
representations (SDF or PDB) that 3Dmol.js can consume.
"""
from __future__ import annotations
import logging
from dataclasses import dataclass
from openeye import oechem
log = logging.getLogger("cnotebook")
[docs]
@dataclass
class MoleculeData:
"""Container for molecule data ready for 3Dmol.js consumption.
:param name: Display name for the molecule.
:param data: String content (SDF or PDB format).
:param format: Format identifier (``"sdf"`` or ``"pdb"``).
:param source_type: Origin type (``"molecule"`` or ``"design_unit"``).
:param num_atoms: Number of atoms in the molecule.
:param disabled: If True, the entry is hidden when the viewer starts.
"""
name: str
data: str
format: str
source_type: str
num_atoms: int = 0
disabled: bool = False
[docs]
def convert_molecule(mol: oechem.OEMolBase, name: str | None = None, disabled: bool = False) -> MoleculeData:
"""Convert an OpenEye molecule to SDF string data for 3Dmol.js.
If the molecule lacks 3D coordinates, conformer generation is
attempted automatically via Omega. A warning is logged when this
occurs.
:param mol: OpenEye molecule to convert.
:param name: Optional display name. Falls back to the molecule title,
then to ``"molecule"``.
:returns: :class:`MoleculeData` with ``format="sdf"`` and
``source_type="molecule"``.
:raises TypeError: If *mol* is not an :class:`oechem.OEMolBase`.
:raises ValueError: If conformer generation fails.
Example::
from openeye import oechem
mol = oechem.OEMol()
oechem.OESmilesToMol(mol, "c1ccccc1")
data = convert_molecule(mol, name="benzene")
"""
if not isinstance(mol, oechem.OEMolBase):
raise TypeError(
f"Expected OEMolBase, got {type(mol).__name__}"
)
mol = _ensure_3d_coords(mol)
if name is None:
title = mol.GetTitle()
name = title if title else "molecule"
if _has_residue_info(mol):
data = _mol_to_pdb_string(mol)
fmt = "pdb"
else:
data = _mol_to_sdf_string(mol)
fmt = "sdf"
return MoleculeData(
name=name,
data=data,
format=fmt,
source_type="molecule",
num_atoms=mol.NumAtoms(),
disabled=disabled,
)
[docs]
def convert_design_unit(du: oechem.OEDesignUnit, name: str | None = None, disabled: bool = False) -> MoleculeData:
"""Convert an OpenEye design unit to PDB string data for 3Dmol.js.
Extracts the full complex (all components) from the design unit and writes it as a PDB string.
:param du: OpenEye design unit to convert.
:param name: Optional display name. Falls back to the design unit title, then to ``"design_unit"``.
:param disabled: bool, if ``True``, the entry will appear as disabled in the entries list.
:returns: :class:`MoleculeData` with ``format="pdb"`` and ``source_type="design_unit"``.
:raises TypeError: If *du* is not an :class:`oechem.OEDesignUnit`.
Example::
from openeye import oechem
du = oechem.OEDesignUnit()
oechem.OEReadDesignUnit("complex.oedu", du)
data = convert_design_unit(du)
"""
if not isinstance(du, oechem.OEDesignUnit):
raise TypeError(
f"Expected OEDesignUnit, got {type(du).__name__}"
)
if name is None:
title = du.GetTitle()
name = title if title else "design_unit"
complex_mol = oechem.OEGraphMol()
du.GetComponents(
complex_mol,
oechem.OEDesignUnitComponents_TargetComplex | oechem.OEDesignUnitComponents_ListComponents
)
# Extract the ligand separately and mark its atoms as HETATM
# (This should probably be reported to OpenEye as a bug, as it should be unnecessary)
lig_mol = oechem.OEGraphMol()
if du.GetLigand(lig_mol):
for atom in lig_mol.GetAtoms():
res = oechem.OEAtomGetResidue(atom)
res.SetHetAtom(True)
oechem.OEAtomSetResidue(atom, res)
oechem.OEAddMols(complex_mol, lig_mol)
num_atoms = complex_mol.NumAtoms()
pdb_string = _mol_to_pdb_string(complex_mol)
return MoleculeData(
name=name,
data=pdb_string,
format="pdb",
source_type="design_unit",
num_atoms=num_atoms,
disabled=disabled,
)
# ---------------------------------------------------------------------------
# Internal helpers
# ---------------------------------------------------------------------------
def _has_residue_info(mol: oechem.OEMolBase) -> bool:
"""Check if a molecule contains standard protein residues.
When a molecule has more than one standard protein residue it likely
originated from a PDB file and should be written as PDB to preserve
HETATM flags, chain IDs, and residue numbering that 3Dmol.js presets
rely on.
:param mol: Molecule to inspect.
:returns: True if the molecule contains standard protein residues.
"""
return oechem.OECount(mol, oechem.OEIsStandardAminoAcid()) > 1
def _mol_to_sdf_string(mol: oechem.OEMolBase) -> str:
"""Write an OEMolBase to an SDF-format string.
:param mol: Molecule with valid 3D coordinates.
:returns: SDF string including the ``$$$$`` record terminator.
"""
oms = oechem.oemolostream()
oms.openstring()
oms.SetFormat(oechem.OEFormat_SDF)
oechem.OEWriteMolecule(oms, mol)
return oms.GetString().decode("utf-8")
def _mol_to_pdb_string(mol: oechem.OEMolBase) -> str:
"""Write an OEMolBase to a PDB-format string.
:param mol: Molecule with valid 3D coordinates.
:returns: PDB-format string with ATOM/HETATM records.
"""
oms = oechem.oemolostream()
oms.openstring()
oms.SetFormat(oechem.OEFormat_PDB)
oms.SetFlavor(
oechem.OEFormat_PDB,
oechem.OEOFlavor_PDB_DEFAULT | oechem.OEOFlavor_PDB_BONDS | oechem.OEOFlavor_PDB_ORDERS
)
oechem.OEWriteMolecule(oms, mol)
val = oms.GetString().decode("utf-8")
oms.close()
return val
def _ensure_3d_coords(mol: oechem.OEMolBase) -> oechem.OEMolBase:
"""Ensure the molecule has 3D coordinates.
If the molecule dimension is not 3, Omega is used to generate a
single conformer. The original molecule is not modified; a copy
(as :class:`oechem.OEMol`) is returned when generation is needed.
:param mol: Input molecule (may be 2D or lacking coordinates).
:returns: Molecule with 3D coordinates (may be a new OEMol copy).
:raises ValueError: If Omega conformer generation fails.
"""
# 2D and 3D are OK
if mol.GetDimension() >= 2:
return mol
log.warning(
"Molecule '%s' lacks 3D coordinates; generating with OEOmega.",
mol.GetTitle() or "untitled",
)
from openeye import oeomega
work_mol = oechem.OEMol(mol)
omega = oeomega.OEOmega()
omega.SetMaxConfs(1)
omega.SetStrictStereo(False)
if not omega(work_mol):
raise ValueError(
f"Failed to generate 3D coordinates for molecule "
f"'{mol.GetTitle() or 'untitled'}'"
)
return work_mol