Source code for cnotebook.c3d.convert

"""Molecule conversion utilities for the C3D 3D viewer.

Converts OpenEye OEMolBase and OEDesignUnit objects into string
representations (SDF or PDB) that 3Dmol.js can consume.
"""

from __future__ import annotations

import logging
from dataclasses import dataclass

from openeye import oechem

log = logging.getLogger("cnotebook")


[docs] @dataclass class MoleculeData: """Container for molecule data ready for 3Dmol.js consumption. :param name: Display name for the molecule. :param data: String content (SDF or PDB format). :param format: Format identifier (``"sdf"`` or ``"pdb"``). :param source_type: Origin type (``"molecule"`` or ``"design_unit"``). :param num_atoms: Number of atoms in the molecule. :param disabled: If True, the entry is hidden when the viewer starts. """ name: str data: str format: str source_type: str num_atoms: int = 0 disabled: bool = False
[docs] def convert_molecule(mol: oechem.OEMolBase, name: str | None = None, disabled: bool = False) -> MoleculeData: """Convert an OpenEye molecule to SDF string data for 3Dmol.js. If the molecule lacks 3D coordinates, conformer generation is attempted automatically via Omega. A warning is logged when this occurs. :param mol: OpenEye molecule to convert. :param name: Optional display name. Falls back to the molecule title, then to ``"molecule"``. :returns: :class:`MoleculeData` with ``format="sdf"`` and ``source_type="molecule"``. :raises TypeError: If *mol* is not an :class:`oechem.OEMolBase`. :raises ValueError: If conformer generation fails. Example:: from openeye import oechem mol = oechem.OEMol() oechem.OESmilesToMol(mol, "c1ccccc1") data = convert_molecule(mol, name="benzene") """ if not isinstance(mol, oechem.OEMolBase): raise TypeError( f"Expected OEMolBase, got {type(mol).__name__}" ) mol = _ensure_3d_coords(mol) if name is None: title = mol.GetTitle() name = title if title else "molecule" if _has_residue_info(mol): data = _mol_to_pdb_string(mol) fmt = "pdb" else: data = _mol_to_sdf_string(mol) fmt = "sdf" return MoleculeData( name=name, data=data, format=fmt, source_type="molecule", num_atoms=mol.NumAtoms(), disabled=disabled, )
[docs] def convert_design_unit(du: oechem.OEDesignUnit, name: str | None = None, disabled: bool = False) -> MoleculeData: """Convert an OpenEye design unit to PDB string data for 3Dmol.js. Extracts the full complex (all components) from the design unit and writes it as a PDB string. :param du: OpenEye design unit to convert. :param name: Optional display name. Falls back to the design unit title, then to ``"design_unit"``. :param disabled: bool, if ``True``, the entry will appear as disabled in the entries list. :returns: :class:`MoleculeData` with ``format="pdb"`` and ``source_type="design_unit"``. :raises TypeError: If *du* is not an :class:`oechem.OEDesignUnit`. Example:: from openeye import oechem du = oechem.OEDesignUnit() oechem.OEReadDesignUnit("complex.oedu", du) data = convert_design_unit(du) """ if not isinstance(du, oechem.OEDesignUnit): raise TypeError( f"Expected OEDesignUnit, got {type(du).__name__}" ) if name is None: title = du.GetTitle() name = title if title else "design_unit" complex_mol = oechem.OEGraphMol() du.GetComponents( complex_mol, oechem.OEDesignUnitComponents_TargetComplex | oechem.OEDesignUnitComponents_ListComponents ) # Extract the ligand separately and mark its atoms as HETATM # (This should probably be reported to OpenEye as a bug, as it should be unnecessary) lig_mol = oechem.OEGraphMol() if du.GetLigand(lig_mol): for atom in lig_mol.GetAtoms(): res = oechem.OEAtomGetResidue(atom) res.SetHetAtom(True) oechem.OEAtomSetResidue(atom, res) oechem.OEAddMols(complex_mol, lig_mol) num_atoms = complex_mol.NumAtoms() pdb_string = _mol_to_pdb_string(complex_mol) return MoleculeData( name=name, data=pdb_string, format="pdb", source_type="design_unit", num_atoms=num_atoms, disabled=disabled, )
# --------------------------------------------------------------------------- # Internal helpers # --------------------------------------------------------------------------- def _has_residue_info(mol: oechem.OEMolBase) -> bool: """Check if a molecule contains standard protein residues. When a molecule has more than one standard protein residue it likely originated from a PDB file and should be written as PDB to preserve HETATM flags, chain IDs, and residue numbering that 3Dmol.js presets rely on. :param mol: Molecule to inspect. :returns: True if the molecule contains standard protein residues. """ return oechem.OECount(mol, oechem.OEIsStandardAminoAcid()) > 1 def _mol_to_sdf_string(mol: oechem.OEMolBase) -> str: """Write an OEMolBase to an SDF-format string. :param mol: Molecule with valid 3D coordinates. :returns: SDF string including the ``$$$$`` record terminator. """ oms = oechem.oemolostream() oms.openstring() oms.SetFormat(oechem.OEFormat_SDF) oechem.OEWriteMolecule(oms, mol) return oms.GetString().decode("utf-8") def _mol_to_pdb_string(mol: oechem.OEMolBase) -> str: """Write an OEMolBase to a PDB-format string. :param mol: Molecule with valid 3D coordinates. :returns: PDB-format string with ATOM/HETATM records. """ oms = oechem.oemolostream() oms.openstring() oms.SetFormat(oechem.OEFormat_PDB) oms.SetFlavor( oechem.OEFormat_PDB, oechem.OEOFlavor_PDB_DEFAULT | oechem.OEOFlavor_PDB_BONDS | oechem.OEOFlavor_PDB_ORDERS ) oechem.OEWriteMolecule(oms, mol) val = oms.GetString().decode("utf-8") oms.close() return val def _ensure_3d_coords(mol: oechem.OEMolBase) -> oechem.OEMolBase: """Ensure the molecule has 3D coordinates. If the molecule dimension is not 3, Omega is used to generate a single conformer. The original molecule is not modified; a copy (as :class:`oechem.OEMol`) is returned when generation is needed. :param mol: Input molecule (may be 2D or lacking coordinates). :returns: Molecule with 3D coordinates (may be a new OEMol copy). :raises ValueError: If Omega conformer generation fails. """ # 2D and 3D are OK if mol.GetDimension() >= 2: return mol log.warning( "Molecule '%s' lacks 3D coordinates; generating with OEOmega.", mol.GetTitle() or "untitled", ) from openeye import oeomega work_mol = oechem.OEMol(mol) omega = oeomega.OEOmega() omega.SetMaxConfs(1) omega.SetStrictStereo(False) if not omega(work_mol): raise ValueError( f"Failed to generate 3D coordinates for molecule " f"'{mol.GetTitle() or 'untitled'}'" ) return work_mol