Source code for ccdc.molecule

#
# This code is Copyright (C) 2015 The Cambridge Crystallographic Data Centre
# (CCDC) of 12 Union Road, Cambridge CB2 1EZ, UK and a proprietary work of CCDC.
# This code may not be used, reproduced, translated, modified, disassembled or
# copied, except in accordance with a valid licence agreement with CCDC and may
# not be disclosed or redistributed in any form, either in whole or in part, to
# any third party. All copies of this code made in accordance with a valid
# licence agreement as referred to above must contain this copyright notice.
#
# No representations, warranties, or liabilities are expressed or implied in the
# supply of this code by CCDC, its servants or agents, except where such
# exclusion or limitation is prohibited, void or unenforceable under governing
# law.
#
'''
The :mod:`ccdc.molecule` contains classes concerned with chemistry.

The three main classes of the :mod:`ccdc.molecule` module are:

- :class:`ccdc.molecule.Atom`
- :class:`ccdc.molecule.Bond`
- :class:`ccdc.molecule.Molecule`

The :class:`ccdc.molecule.Molecule` class contains attributes relating to the
chemistry of the molecule, for example the SMILES representation.

>>> from ccdc.io import MoleculeReader
>>> csd_mol_reader = MoleculeReader('CSD')
>>> first_mol_in_csd = csd_mol_reader[0]
>>> print(first_mol_in_csd.smiles)
CC(=O)NN1C=NN=C1N(N=Cc1c(Cl)cccc1Cl)C(C)=O

In some cases a molecule may not have a canonical SMILES representation -
where the structure has unknown atoms or bonds.  In these cases the value will
be None:

>>> AJABIX01 = csd_mol_reader.molecule('AJABIX01')
>>> print(AJABIX01.smiles)
None

The :class:`ccdc.molecule.Molecule` also contains lists of atoms and bonds.

>>> len(first_mol_in_csd.atoms)
35
>>> len(first_mol_in_csd.bonds)
36
>>> first_atom = first_mol_in_csd.atoms[0]
>>> first_bond = first_mol_in_csd.bonds[0]

The :class:`ccdc.molecule.Atom` contains attributes such as the
:attr:`ccdc.molecule.Atom.atomic_symbol`.

>>> print(first_atom.atomic_symbol)
Cl

The :class:`ccdc.molecule.Bond` contains attributes such as the
:attr:`ccdc.molecule.Bond.bond_type`.

>>> first_bond.bond_type  # doctest: +ELLIPSIS
<ccdc.molecule.Bond.BondType object at ...>
>>> str(first_bond.bond_type)
'Single'
>>> first_bond.bond_type == 1
True

'''
##########################################################################

import collections

from ccdc.utilities import UncertainValue, nested_class, bidirectional_dict, _detect_format

from ccdc.utilities import _private_importer
with _private_importer() as pi:
    pi.import_ccdc_module('MathsLib')
    pi.import_ccdc_module('ChemistryLib')
    pi.import_ccdc_module('ChemicalAnalysisLib')
    pi.import_ccdc_module('FileFormatsLib')
    pi.import_ccdc_module('MotifSearchLib')
    pi.import_ccdc_module('AnnotationsLib')

Coordinates = collections.namedtuple('Coordinates', ['x', 'y', 'z'])


def str_coordinates(c):
    return 'Coordinates(x=%.3f, y=%.3f, z=%.3f)' % (c.x, c.y, c.z)
Coordinates.__repr__ = str_coordinates

##########################################################################

[docs]class Atom(object): '''Represents an atom.''' _hbond_criterion = None _RSChiralityDict = dict([ (ChemicalAnalysisLib.RSChiralFlag_UNKNOWN_CHIRAL, 'Unknown'), (ChemicalAnalysisLib.RSChiralFlag_CANNOT_CALCULATE_CHIRALITY, 'Error'), (ChemicalAnalysisLib.RSChiralFlag_NOT_CHIRAL, ''), (ChemicalAnalysisLib.RSChiralFlag_R_CHIRAL, 'R'), (ChemicalAnalysisLib.RSChiralFlag_S_CHIRAL, 'S'), (ChemicalAnalysisLib.RSChiralFlag_MIXED_CHIRAL, 'Mixed'), ]) _protein_type_dict = bidirectional_dict( Amino_acid=AnnotationsLib.ProteinSubstructureData.AMINOACID, Ligand=AnnotationsLib.ProteinSubstructureData.LIGAND, Cofactor=AnnotationsLib.ProteinSubstructureData.COFACTOR, Water=AnnotationsLib.ProteinSubstructureData.WATER, Metal=AnnotationsLib.ProteinSubstructureData.METAL, Nucleotide=AnnotationsLib.ProteinSubstructureData.NUCLEOTIDE, Unknown=AnnotationsLib.ProteinSubstructureData.UNKNOWN ) def __init__(self, atomic_symbol='', atomic_number=0, coordinates=None, label='', formal_charge=0, _atom=None): '''Instantiate an atom. May be constructed from an atomic symbol using the atomic_symbol parameter. >>> atom = Atom(atomic_symbol='C') >>> print(atom) Atom(C) or from an atomic number: >>> atom = Atom(atomic_number=92) >>> print(atom) Atom(U) The atom parameter if present should be a ChemistryLib HAtom and is for internal use only. :raises: TypeError if the atom parameter is not a ChemistryLib HAtom. ''' if isinstance(_atom, ChemistryLib.Atom): self._atom = _atom self._molecule = _atom.molecule() elif _atom is None: if atomic_symbol == '': self._atom = ChemistryLib.Atom3d( ChemistryLib.Element(ChemistryLib.Element.atomic_number_to_ccdc_code(atomic_number)) ) else: self._atom = ChemistryLib.Atom3d( ChemistryLib.Element(atomic_symbol) ) self._atom.set_label(label) if coordinates is not None: s = self._atom.site() if s is None: s = ChemistryLib.Site() s.set_orth(MathsLib.Point(coordinates[0], coordinates[1], coordinates[2]), ChemistryLib.Cell()) self._atom.set_site(s) self._atom.set_label(label) self.formal_charge = formal_charge elif 'ChemistryLib.Atom' in str(_atom.__class__): # There is some confusion about ChemistryLib.Atom in the presence of mixed imports # Like from ccdc.molecule import Atom # vs. from molecule import Atom # self._atom = _atom self._molecule = _atom.molecule else: raise TypeError('The atom parameter is not an internal CCDC atom.') def __eq__(self, other): '''Equality for atoms.''' if not isinstance(other, Atom): return False if self._atom == other._atom: # Return True if the underlying atoms have the same memory location return True if self._atom.site() is None or other._atom.site() is None: # If at least one of the atoms is siteless, check memory too return self._atom == other._atom # Otherwise, if the underlying atoms are different we still need to # check for equality. For example, when generating a molecule from a # crystal structure with disorder (from crystal structure view), the # same atoms will be regenerated every time. return self._atom.compare(other._atom, 0) == 0 def __ne__(self, other): '''Inequality for atoms.''' return not self == other def __hash__(self): '''Makes it possible to create sets of atoms. Also allows atoms to be used as keys in dictionaries. ''' return hash((self._atom.index(), self.label)) def __str__(self): '''String representation of an atom.''' if not self.label: return 'Atom(%s)' % self.atomic_symbol else: return 'Atom(%s)' % self.label __repr__ = __str__ @property def index(self): '''The index of this atom in its parent molecule. Note that if an atom is not part of a molecule this function will return ``None``. >>> atom = Atom(atomic_symbol='Fe') >>> print(atom.index) None ''' if not self._atom: return None try: return self._atom.index() except RuntimeError: return None @property def atomic_symbol(self): '''The atomic symbol of the element represented. >>> atom = Atom(atomic_symbol='N') >>> print(atom.atomic_symbol) N ''' return self._atom.element().atomic_symbol() @atomic_symbol.setter def atomic_symbol(self, s): '''Set the element according to the symbol. :param s: a string or unicode >>> a = Atom(atomic_symbol='C') >>> print(a.atomic_symbol, a.atomic_number) C, 6 >>> a.atomic_symbol = 'O' >>> print(a.atomic_symbol, a.atomic_number) O, 8 ''' el = ChemistryLib.Element(s) self._atom.set_element(el) @property def atomic_number(self): '''The atomic number of the element represented. >>> atom = Atom(atomic_symbol='N') >>> print(atom.atomic_number) 7 ''' return self._atom.element().atomic_number() @atomic_number.setter def atomic_number(self, n): '''Set the element according to atomic number. :param n: a number >>> a = Atom(atomic_symbol='C') >>> print(a.atomic_symbol, a.atomic_number) C, 6 >>> a.atomic_number = 7 >>> print(a.atomic_symbol, a.atomic_number) N, 7 ''' el = ChemistryLib.Element( ChemistryLib.Element.atomic_number_to_ccdc_code(n) ) self._atom.set_element(el) @property def label(self): '''The label for the atom. >>> atom = Atom(atomic_symbol='C', label='C1') >>> print(atom.label) C1 >>> atom = Atom(atomic_symbol='C') >>> print(atom.label) <BLANKLINE> ''' return self._atom.label() @label.setter def label(self, s): '''Set the label. :param s: string or unicode >>> a = Atom(atomic_symbol='C') >>> print(a.label) None >>> a.label = 'C1' >>> print(a.label) C1 ''' self._atom.set_label(s) def _get_psd(self): '''Private.''' annos = self._atom.annotations() if AnnotationsLib.has_ProteinSubstructureData(annos): psd = annos.find_ProteinSubstructureData() return psd @property def residue_label(self): '''The residue label of the atom if available, or ``None``.''' psd = self._get_psd() if psd: if psd.residue_sequence_number() <= 0 or psd.residue_name() == '****': return psd.residue_name() return '%s%d' % (psd.residue_name(), psd.residue_sequence_number()) @property def chain_label(self): '''The label of the chain in which the atom lies if available.''' annos = self._atom.annotations() if AnnotationsLib.has_ProteinSubstructureData(annos): psd = annos.find_ProteinSubstructureData() if psd: return psd.chain_id() @residue_label.setter def residue_label(self, value): annos = self._atom.annotations() psd = annos.obtain_ProteinSubstructureData() if not isinstance(value, str): label, index = value else: label, index = value, 0 psd.set_residue_name(label) if index != 0: psd.set_residue_sequence_number(index) @property def is_in_protein(self): '''Whether or not the atom is in a protein.''' psd = self._get_psd() return psd and psd.type() != AnnotationsLib.ProteinSubstructureData.UNKNOWN @property def protein_atom_type(self): '''The protein atom type of an atom. This may be 'Amino_acid', 'Ligand', 'Cofactor', 'Water', 'Metal', 'Nucleotide' or 'Unknown'. ''' psd = self._get_psd() return psd and Atom._protein_type_dict.inverse_lookup(psd.type()) @property def formal_charge(self): '''Formal charge on the atom. >>> atom = Atom(atomic_symbol='Cl') >>> atom.formal_charge 0 ''' return self._atom.formal_charge() @formal_charge.setter def formal_charge(self, value): '''Set the formal charge.''' if hasattr(self, '_molecule'): for a in [self._molecule.atom(i) for i in range(self._molecule.natoms())]: a.annotations().remove_PartialCharge() self._atom.set_formal_charge(value) @property def partial_charge(self): '''Partial charge on the atom.''' if AnnotationsLib.has_PartialCharge(self._atom.annotations()): return self._atom.annotations().find_PartialCharge().value() @partial_charge.setter def partial_charge(self, value): if value is None: self._atom.annotations().remove_PartialCharge() else: self._atom.annotations().set_PartialCharge(AnnotationsLib.PartialCharge(value)) @property def coordinates(self): '''The x, y, z coordinates of the atoms in orthogonal space. >>> atom = Atom(atomic_symbol='C', coordinates=(1.0, 2.0, 3.0)) >>> atom.coordinates Coordinates(x=1.000, y=2.000, z=3.000) The coordinates may be addressed by index or by key: >>> atom.coordinates.x 1.0 >>> atom.coordinates[2] 3.0 Note that this function will return ``None`` if the atom does not have 3D coordinates. >>> atom = Atom(atomic_symbol='C') >>> print(atom.coordinates) None ''' try: o = self._atom.site().orth() return Coordinates(o.x(), o.y(), o.z()) except: pass @coordinates.setter def coordinates(self, coords=None): '''Set the coordinates of an atom. Remove coordinates if coords is None :param coords: a sequence of three floats, or None >>> a = Atom(atomic_symbol='C') >>> print(a.coordinates) None >>> a.coordinates = (1.0, 2.0, 3.0) >>> print(a.coordinates) Coordinates(x=1.0, y=2.0, z=3.0) ''' if coords is None: self._atom.set_site(None) else: s = self._atom.site() if s is None: s = ChemistryLib.Site() self._atom.set_site(s) s.set_orth(MathsLib.Point(coords[0], coords[1], coords[2]), ChemistryLib.Cell()) @property def fractional_coordinates(self): '''The fractional coordinates of an atom. The coordinates of the atom expressed in terms of the underlying crystal's unit cell. Where there is no underlying crystal, these will appear relative to the default unit cell, so orthogonal and fractional coordinates coincide. ''' try: frac = self._atom.site().frac() return Coordinates(frac.x(), frac.y(), frac.z()) except: pass @property def fractional_uncertainties(self): '''The fractional coordinates of the atom with the uncertainty information, if present. If no uncertainty information was recorded for the atom, this will be ``None``. ''' try: frac = self.fractional_coordinates if frac is None: return None uncs = self._atom.site().uncertainty() except RuntimeError: return None else: if uncs is None: return None if any(u == 0 for p, u in uncs): return None return tuple( UncertainValue(f, MathsLib.Uncertainty(x[0], x[1])) for f, x in zip(frac, uncs) )
[docs] class DisplacementParameters(object): '''Represents the atomic displacement parameters of an atom, where present. Atoms read from a PDB format file may have displacement parameters, the same is true for structures read from a CIF format, and structures from newer versions of the CSD SQLite crystal structure databases. ''' _type_map = bidirectional_dict( Anisotropic=ChemistryLib.AtomicDisplacementParameters.ANISOTROPIC, Isotropic=ChemistryLib.AtomicDisplacementParameters.ISOTROPIC, Multipole=ChemistryLib.AtomicDisplacementParameters.OVERALL ) def __init__(self, _displacement_parameters): self._displacement_parameters = _displacement_parameters @property def type(self): '''Whether the atomic displacement parameters are anisotropic, isotropic, from multipole expansion or ``None`` if no atomic displacement parameters are available. Entries from the CSD will only have anisotropic, isotropic or ``None``. :returns: One of 'Anisotropic', 'Isotropic', 'Multipole', or ``None`` if no atomic displacement parameters are defined. ''' if self._displacement_parameters: return Atom.DisplacementParameters._type_map.inverse_lookup(self._displacement_parameters.type()) @property def values(self): '''The six values of the displacement parameters.''' if self._displacement_parameters: return tuple( tuple(round(self._displacement_parameters.value(i+1, j+1), self._displacement_parameters.uncertainty(i+1, j+1).precision()) for i in range(3)) for j in range(3) ) @property def uncertainties(self): '''The uncertainties in the displacement parameters.''' if self._displacement_parameters: return tuple( # tuple(round(self._displacement_parameters.uncertainty(i+1, j+1).uncertainty(), # self._displacement_parameters.uncertainty(i+1, j+1).precision()) for i in range(3)) tuple(int(self._displacement_parameters.uncertainty(i + 1, j + 1).uncertainty()) for i in range(3)) for j in range(3) ) @property def precision(self): '''The precision of uncertainty values in the displacement parameters.''' if self._displacement_parameters: return tuple( tuple(int(self._displacement_parameters.uncertainty(i + 1, j + 1).precision()) for i in range(3)) for j in range(3) ) @property def isotropic_equivalent(self): '''The equivalent isotropic displacement. See Fischer and Tillmanns (1988), *Acta Cryst.* **C44**, pp. 775-776. ''' if self._displacement_parameters: return self._displacement_parameters.u_equivalent() @property def temperature_factor(self): '''The temperature factor.''' if self._displacement_parameters: return round(100*self._displacement_parameters.u_equivalent(), 3)
@property def displacement_parameters(self): '''The displacement parameters of this atom. :returns: :class:`ccdc.molecule.Atom.DisplacementParameters` or ``None`` ''' if self._atom.site() and self._atom.site().atomic_displacement_parameters(): return Atom.DisplacementParameters( _displacement_parameters=self._atom.site().atomic_displacement_parameters()) @property def occupancy(self): '''The occupancy of the atom.''' if self._atom.site(): return self._atom.site().sof() @property def vdw_radius(self): '''The Van der Waals radius of the atom.''' if self._atom: return self._atom.element().vdw_radius() @property def atomic_weight(self): '''The atomic weight of the atom.''' return self._atom.element().atomic_weight() @property def is_cyclic(self): '''Test whether the atom is part of a ring system. >>> atom = Atom() >>> atom.is_cyclic False ''' try: return self._atom.cyclic() except RuntimeError: return False @property def is_donor(self): '''Test whether or not the atom is a hydrogen bond donor. >>> atom = Atom(atomic_symbol='C') >>> atom.is_donor False ''' if Atom._hbond_criterion is None: Atom._hbond_criterion = Molecule.HBondCriterion() try: return Atom._hbond_criterion.is_donor(self) except RuntimeError: return False @property def is_acceptor(self): '''Test whether or not the atom is a hydrogen bond acceptor. >>> atom = Atom(atomic_symbol='C') >>> atom.is_acceptor False ''' if Atom._hbond_criterion is None: Atom._hbond_criterion = Molecule.HBondCriterion() try: return Atom._hbond_criterion.is_acceptor(self) except RuntimeError: return False @property def is_metal(self): '''Whether this atom is a metal. >>> atom = Atom(atomic_symbol='C') >>> atom.is_metal False >>> atom = Atom(atomic_symbol='Hg') >>> atom.is_metal True ''' return self._atom.element().metal() @property def chirality(self): '''The R/S Chirality flag for this atom. Returns one of '', 'R', 'S', 'Mixed', 'Error'. ''' if not self._atom.in_molecule(): return 'Error' if ChemicalAnalysisLib.atom_chirality(self._atom) == ChemicalAnalysisLib.RSChiralFlag_UNKNOWN_CHIRAL: try: ChemicalAnalysisLib.calculate_rs_chirality(self._atom.molecule()) except RuntimeError: return 'Error' return self._RSChiralityDict[ChemicalAnalysisLib.atom_chirality(self._atom)] @property def is_chiral(self): '''Whether the atom is chiral. >>> atom = Atom() >>> atom.is_chiral False Returns True if the R/S Chirality flag for this atom is one of 'R', 'S', or 'Mixed'. ''' return self.chirality in ['R', 'S', 'Mixed']
[docs] def solvent_accessible_surface(self, probe_radius=1.1, npoints=6000, method='random', filter_by_ligand=False, ligand_atoms=None): '''The fraction of the atom's surface which is solvent-accessible. :parameter probe_radius: the radius of the probe's sphere. :parameter npoints: the number of points to generate on the surface of a sphere. :parameter method: one of 'random' or 'equidistant' :parameter filter_by_ligand: bool True or False - this parameter controls whether to measure the contacting accessible surface. If True then only the solvent accessible surface that is within a certain distance of the ligand atoms is counted. This allows the user to measure, on this molecule, the accessible surface that is in contact with another molecule. :parameter ligand_atoms: list of atoms :returns: the fraction of the atom's surface area which is accessible. ''' if method.lower().startswith('r'): mode = ChemicalAnalysisLib.AtomAccessibleSurfaceCalculator.RANDOM else: mode = ChemicalAnalysisLib.AtomAccessibleSurfaceCalculator.EQUIDISTANT if filter_by_ligand == True: lig_filter = ChemicalAnalysisLib.AtomAccessibleSurfaceCalculator.CONTACTING_ACCESSIBLE_SURFACE _calculator = ChemicalAnalysisLib.AtomAccessibleSurfaceCalculator(mode, lig_filter, [atom._atom for atom in ligand_atoms]) _calculator.set_probe_radius(probe_radius) _calculator.set_npoints(npoints) return _calculator.calculate_surface_fraction(self._atom, self._atom.molecule()) else: _calculator = ChemicalAnalysisLib.AtomAccessibleSurfaceCalculator(mode) _calculator.set_probe_radius(probe_radius) _calculator.set_npoints(npoints) return _calculator.calculate_surface_fraction(self._atom, self._atom.molecule())
@property def bonds(self): '''The bonds which this atom forms''' if self._atom.in_molecule(): return tuple(Bond(_bond=b) for b in self._atom.get_bonds()) return tuple() @property def neighbours(self): '''Those atoms bonded to this one''' if self._atom.in_molecule(): return tuple(Atom(_atom=a) for a in self._atom.get_neighbours()) return tuple() @property def is_spiro(self): '''Whether this is a spiro atom.''' if not hasattr(self._molecule, '_ring_analyser'): self._molecule._ring_analyser = self._molecule.ring_analyser() return self._molecule._ring_analyser.is_node_spiro(self._atom) @property def rings(self): '''The collection of :class:`ccdc.molecule.Ring` in which this atom lies.''' m = Molecule('', _molecule=self._molecule) rings = m.rings return [r for r in rings if self in r] @property def sybyl_type(self): '''The Sybyl atom type of an atom.''' sybyl = ChemistryLib.SybylAtomTyper() try: return sybyl.atom_type(self._atom) except RuntimeError: pass
[docs] def is_in_line_of_sight(self, atom, molecules=None): '''Whether the other atom is within the line of this atom. :param atom: a :class:`ccdc.molecule.Atom` instance. :param molecules: an iterable of molecules to test for occlusion, or None. If molecules is None or an empty iterable the only molecules to be tested will be those of the atoms involved. ''' _contact = ChemistryLib.MolecularContact( self._atom.molecule(), self._atom, atom._atom.molecule(), atom._atom, ChemistryLib.MolecularContact.UNKNOWN_CONTACT ) if molecules is None: s = {} else: s = {m._molecule for m in molecules} return ChemistryLib.LineOfSight().line_of_sight(_contact, s)
##########################################################################
[docs]class Bond(object): '''A bond between two atoms of a molecule.''' _EZStereoChemistryDict = dict([ (ChemicalAnalysisLib.EZStereoChemistryFlag_UNKNOWN_EZ, 'Unknown'), (ChemicalAnalysisLib.EZStereoChemistryFlag_CANNOT_CALCULATE_EZ, ''), (ChemicalAnalysisLib.EZStereoChemistryFlag_E_STEREOCHEMISTRY, 'E'), (ChemicalAnalysisLib.EZStereoChemistryFlag_Z_STEREOCHEMISTRY, 'Z'), ])
[docs] @nested_class('Bond') class BondType(object): '''Represent a bond type as either an integer or a string. >>> bond_type = Bond.BondType(1) >>> print(bond_type) Single >>> bond_type == 1 True ''' def __init__(self, bond_type): '''Initialise with a ChemistryLib BondType instance''' if isinstance(bond_type, ChemistryLib.BondType): self._bond_type = bond_type elif isinstance(bond_type, int): self._bond_type = ChemistryLib.BondType(bond_type) elif isinstance(bond_type, str): ty = self.all_bond_types().get(bond_type, 0) self._bond_type = ChemistryLib.BondType(ty) elif 'ChemistryLib.BondType' in str(bond_type.__class__): self._bond_type = bond_type else: raise TypeError('Unknown bond type %s' % bond_type) def __str__(self): '''String representation of a bond.''' return self._bond_type.text() def __hash__(self): '''Allow bond types to be in sets and dicts.''' return hash(str(self)) def __eq__(self, other): '''Equality. >>> b = Bond.BondType('Double') >>> c = Bond.BondType(2) >>> b == c True ''' if isinstance(other, Bond.BondType): return self._bond_type.value() == other._bond_type.value() elif isinstance(other, int): return self._bond_type.value() == other elif isinstance(other, str): return str(self) == other else: return False def __ne__(self, other): '''Inequality.''' if isinstance(other, Bond.BondType): return self._bond_type.value() != other._bond_type.value() elif isinstance(other, int): return self._bond_type.value() != other elif isinstance(other, str): return str(self) != other else: return True
[docs] @staticmethod def all_bond_types(): '''Return dictionary of all valid CSD bond types.''' return dict( Unknown=0, Single=1, Double=2, Triple=3, Quadruple=4, Aromatic=5, Delocalised=7, Pi=9 )
def __init__(self, bond_type=None, _bond=None): '''Instantiate a bond. :param bond_type: a string, int or a :class:`ccdc.molecule.Bond.BondType` :param _bond: an internal ChemistryLib HBond, for internal use only. ''' if _bond is None: if bond_type is not None: if not isinstance(bond_type, Bond.BondType): bond_type = Bond.BondType(bond_type) else: bond_type = ChemistryLib.BondType.unknown_bond() _bond = ChemistryLib.Bond3d(bond_type._bond_type) self._bond = _bond self._molecule = self._bond.molecule() def __eq__(self, other): '''Equality for bonds.''' return isinstance(other, Bond) and self._bond == other._bond def __str__(self): '''Represent the bond as Bond(Type, at1, at2).''' return 'Bond(%s %s %s)' % (self.bond_type, self.atoms[0], self.atoms[1]) __repr__ = __str__ def __hash__(self): '''Hash to allow bonds to be keys in dicts and members of sets.''' return hash((str(self.bond_type), self.atoms)) @property def atoms(self): '''The two atoms which this bond connects.''' return Atom(_atom=self._bond.atom1()), Atom(_atom=self._bond.atom2()) @property def bond_type(self): '''The bond type represented as a :class:`ccdc.molecule.Bond.BondType`.''' return self.BondType(self._bond.type()) @bond_type.setter def bond_type(self, arg): '''Set the bond type. :param arg: a string, number or :class`ccdc.molecule.Bond.BondType` >>> b = Bond(1) >>> print(b.bond_type) Single >>> b.bond_type = 'Double' >>> print(b.bond_type) Double ''' if not isinstance(arg, Bond.BondType): arg = Bond.BondType(arg) self._bond.set_type(arg._bond_type) @property def ideal_bond_length(self): '''The ideal bond length for a bond of this type connected these atoms. ''' els = [a._atom.element() for a in self.atoms] return ChemistryLib.IdealBondDistance().ideal_distance( els[0], els[1], self.bond_type._bond_type ) @property def is_cyclic(self): '''Test whether the bond is part of a ring system.''' return self._bond.cyclic() @property def length(self): '''The length of the bond in Angstroms.''' return ChemistryLib.bond_length( self._bond.molecule(), self._bond.atom1().index(), self._bond.atom2().index() ) @property def is_rotatable(self): '''Test whether the bond is rotatable.''' return ChemistryLib.rotatable(self._bond) @property def rings(self): '''The collection of :class:`ccdc.molecule.Ring` of which this bond is a part.''' rings = Molecule('', _molecule=self._molecule).rings return [r for r in rings if self in r] @property def is_conjugated(self): '''Whether the bond is conjugated.''' if self.bond_type == 'Aromatic' or self.bond_type == 'Delocalised': return True if self.bond_type == 'Single': # Need a double bond attached to each atom # And those bonds not to be part of a conjugated system? ats = self.atoms if ( ('Double' in [b.bond_type for b in ats[0].bonds] or 'Aromatic' in [b.bond_type for b in ats[0].bonds]) and ('Double' in [b.bond_type for b in ats[1].bonds] or 'Aromatic' in [b.bond_type for b in ats[1].bonds]) ): return True return False @property def sybyl_type(self): '''The Sybyl bond type of this bond.''' sybyl = ChemistryLib.SybylAtomTyper() try: return sybyl.bond_type(self._bond) except RuntimeError: pass @property def ez_stereochemistry(self): '''The E/Z stereochemistry for this bond. Returns one of '', 'E', 'Z', 'Unknown', 'Error'. ''' if ChemicalAnalysisLib.bond_ez_stereochemistry(self._bond) == ChemicalAnalysisLib.EZStereoChemistryFlag_UNKNOWN_EZ: try: ChemicalAnalysisLib.calculate_ez_stereochemistry(self._bond.molecule()) except RuntimeError: return 'Error' return self._EZStereoChemistryDict[ChemicalAnalysisLib.bond_ez_stereochemistry(self._bond)]
##########################################################################
[docs]class Ring(object): '''A ring of atoms in a molecule.''' @staticmethod def _paths_through_atoms(atoms, path): if len(atoms) == 0: yield path for i, a in enumerate(atoms): if a in path[-1].neighbours: for p in Ring._paths_through_atoms(atoms[:i] + atoms[i+1:], path + [a]): yield p @staticmethod def _find_bond(at0, at1): return [b for b in at0.bonds if at1 in [a for a in b.atoms]][0] def __init__(self, atoms, _molecule=None): self._molecule = _molecule path = [atoms.pop()] for p in Ring._paths_through_atoms(atoms, path): self.atoms = p break else: raise RuntimeError('This is not a ring') def __str__(self): return '-'.join(str(a) for a in self.atoms) __repr__ = __str__ def __len__(self): return len(self.atoms) def __contains__(self, other): '''To support atom in ring and bond in ring.''' if isinstance(other, Atom): return other in self.atoms elif isinstance(other, Bond): return other in self.bonds @property def bonds(self): '''The bonds of this ring.''' m = Molecule('', _molecule=self._molecule) s = set(self.atoms) return tuple( b for b in m.bonds if len(set(b.atoms) & s) == 2 ) @property def is_fused(self): '''Whether the ring is fused with another.''' atms = self.atoms s = set(atms) for r in Molecule('', _molecule=self._molecule).rings: other = set(r.atoms) if len(s & other) == len(atms): continue if len(s & other) == 2: return True return False @property def is_aromatic(self): '''Whether or not the ring is aromatic.''' types = [b.bond_type for b in self.bonds] if sum(t == 'Aromatic' for t in types) == len(types): return True if set(types) == {Bond.BondType('Single'), Bond.BondType('Double')}: if max([types[i] == types[(i+1)%len(types)] for i in range(len(types))]) == 0: return True return False @property def is_fully_conjugated(self): '''Whether each bond of the ring is conjugated.''' if sum([b.bond_type in ['Aromatic', 'Delocalised'] for b in self.bonds]) == len(self.bonds): return True singles = [b for b in self.bonds if b.bond_type == 'Single'] return singles and min(b.is_conjugated for b in self.bonds if b.bond_type == 'Single')
##########################################################################
[docs]class Molecule(object): '''Represents a molecule'''
[docs] @nested_class('Molecule') class Contact(object): '''A contact between two molecules.''' _type_map = { ChemistryLib.MolecularContact.UNKNOWN_CONTACT : 'Unknown', ChemistryLib.MolecularContact.VDW_CONTACT : 'Van der Waals', ChemistryLib.MolecularContact.DISTANCE_CONTACT : 'Distance', ChemistryLib.MolecularContact.HBOND_CONTACT : 'HBond', ChemistryLib.MolecularContact.REAL_BOND_CONTACT : 'Real Bond', ChemistryLib.MolecularContact.CUSTOM_CONTACT : 'Custom', } def __init__(self, _contact): '''Private.''' self._contact = _contact def __str__(self): return 'Contact(%s-%s)' % (self.atoms) __repr__ = __str__ @property def atoms(self): '''The atoms forming this contact.''' return (Atom(_atom=self._contact.atom1()), Atom(_atom=self._contact.atom2())) @property def type(self): '''The type of contact.''' return 'Van der Waals' #return Molecule.Contact._type_map[self._contact.type()] @property def intermolecular(self): '''Whether the contact is inter- or intra-molecular.''' return False @property def length(self): '''The length of the contact.''' from ccdc.descriptors import MolecularDescriptors return MolecularDescriptors.atom_distance(*self.atoms) @property def strength(self): '''The strength of this contact.''' if self._contact.is_strength_set(): return self._contact.strength() @property def is_in_line_of_sight(self): '''Whether the contact is occluded by other atoms of the structure.''' return self.atoms[0].is_in_line_of_sight(self.atoms[1])
[docs] @nested_class('Molecule') class HBond(Contact): '''A hydrogen bond between atoms of a molecule. ''' def __str__(self): return 'HBond(%s)' % ('-'.join(str(a) for a in self.atoms)) __repr__ = __str__ @property def atoms(self): '''The atoms forming this contact. If a Hydrogen is present in the contact it will be the central atom. Either the donor or the acceptor may come first: which may be determined by inspecting the neighbours of the atoms. (Testing the attribute :attr:`ccdc.molecule.atom.is_donor` can be misleading if an atom acts as both a donor and an acceptor in a crystal structure.) ''' at1 = Atom(_atom=self._contact.atom1()) at2 = Atom(_atom=self._contact.atom2()) if at1.atomic_number == 1: return (at1.neighbours[0], at1, at2) elif at2.atomic_number == 1: return (at1, at2, at2.neighbours[0]) return (at1, at2) @property def length(self): '''The length of the hydrogen bond. This is the distance between the hydrogen and the acceptor, or ``None`` if the hydrogen is implicit. ''' from ccdc.descriptors import MolecularDescriptors ats = self.atoms if len(ats) == 3: if ats[0] in ats[1].neighbours: return MolecularDescriptors.atom_distance(ats[1], ats[2]) else: return MolecularDescriptors.atom_distance(ats[0], ats[1]) @property def da_distance(self): '''The donor-acceptor distance of this hydrogen bond.''' from ccdc.descriptors import MolecularDescriptors ats = self.atoms return MolecularDescriptors.atom_distance(ats[0], ats[-1]) @property def angle(self): '''The donor-hydrogen-acceptor angle or ``None`` if the hydrogen is implicit.''' from ccdc.descriptors import MolecularDescriptors ats = self.atoms if len(ats) == 3: if ats[1] in ats[0].neighbours: return MolecularDescriptors.atom_angle(*ats) else: return MolecularDescriptors.atom_angle(ats[2], ats[1], ats[0]) @property def type(self): '''The type of this contact ('HBond').''' return 'HBond' @property def is_in_line_of_sight(self): '''Whether the HBond is in line of sight or occluded.''' ats = self.atoms return ats[0].is_in_line_of_sight(ats[1]) and ats[1].is_in_line_of_sight(ats[-1])
def __init__(self, identifier='', _molecule=None): '''Instantiate a molecule. :param identifier: str :param _molecule: a private parameter for internal use only. ''' if _molecule is None: self._molecule = ChemistryLib.Molecule3d_instantiate() else: self._molecule = _molecule self._identifier = identifier def __eq__(self, other): '''Return True if the underlying molecules have the same memory location.''' return isinstance(other, Molecule) and self._molecule == other._molecule def __hash__(self): '''Makes it possible to create sets of molecules. Also allows molecules to be used as keys in dictionaries.''' h = hash((self._molecule.natoms(), self.identifier)) return h @property def identifier(self): '''The string identifier of the molecule, e.g. 'ABEBUF'.''' return self._identifier @identifier.setter def identifier(self, identifier): '''Set the molecule identifier.''' self._identifier = identifier @property def atoms(self): '''List of the atoms in the molecule.''' return [ Atom(_atom=self._molecule.atom(i)) for i in range(self._molecule.natoms()) ] @property def heavy_atoms(self): '''List of the heavy atoms in the molecule.''' return [ a for a in self.atoms if a.atomic_symbol not in ['H', 'D'] ]
[docs] def atom(self, label): '''The unique atom with this label. :param label: a string :raises: RuntimeError if there is not exactly one atom in the molecule with this label :returns: :class:`ccdc.molecule.Atom` ''' possibles = [a for a in self.atoms if a.label == label] if len(possibles) == 1: return possibles[0] raise RuntimeError('The label %s is not present or not unique' % label)
@property def bonds(self): '''List of the bonds in the molecule.''' return [ Bond(_bond=self._molecule.bond(i)) for i in range(self._molecule.nbonds()) ]
[docs] def bond(self, label1, label2): '''The bond between the atoms uniquely so labelled. :param label1, label2: strings :raises: RuntimeError if the atom labels are not unique or the atoms are not bonded :returns: :class:`ccdc.molecule.Bond` ''' a = self.atom(label1) b = self.atom(label2) if b not in a.neighbours: raise RuntimeError('The atoms are not bonded') possibles = [x for x in a.bonds if {label1, label2} == set(a.label for a in x.atoms)] if len(possibles) == 1: return possibles[0] raise RuntimeError('The atoms are not bonded')
@property def formal_charge(self): '''The formal charge of the molecule represented as an integer.''' return self._molecule.formal_charge()
[docs] def assign_partial_charges(self): '''Assigns partial charges to atoms using the Gasteiger-Marsili algorithm. Reference: Gasteiger J.; Marsili M., Tetrahedron 36 (22) 3219-3228 1980. This is a fast, convergent algorithm only suitable for organic molecules. :returns: bool, indicating success or failure of the algorithm. ''' assigner = ChemicalAnalysisLib.AtomicChargesGasteigerMarsili(self._molecule) return assigner.annotate()
[docs] def remove_partial_charges(self): '''Removes partial charges from all atoms.''' for a in self.atoms: a._atom.annotations().remove_PartialCharge()
@property def components(self): '''List of the disconnected molecules with the molecule.''' return [ Molecule( '%02d' % (i+1), _molecule=ChemistryLib.Molecule3d_instantiate(m) ) for i, m in enumerate(self._molecule.split()) ]
[docs] def set_residue_by_component(self): '''Sets the residue label of disconnected components of the molecule.''' from ccdc.descriptors import MolecularDescriptors cs = self.components searchers = [MolecularDescriptors.AtomDistanceSearch(c) for c in cs] for a in self.atoms: for i, s in enumerate(searchers): if s.atoms_within_range(a.coordinates, 1e-6): a.residue_label = 'RES', i+1 break else: raise RuntimeError('No matching atom')
@property def rings(self): '''The collection of basic :class:`ccdc.molecule.Ring` in the molecule.''' if not hasattr(self._molecule, '_ring_analyser'): self._molecule._ring_analyser = self._molecule.ring_analyser() l = [ Ring([Atom(_atom=a) for a in s], _molecule=self._molecule) for s in self._molecule._ring_analyser.basic_rings() ] rings = [ r for r in l if 'Pi' not in [str(b.bond_type) for b in r.bonds] ] return rings @property def smallest_ring_size(self): '''Size of the smallest :class:`ccdc.molecule.Ring` in the molecule. :return: the size of the smallest :class:`ccdc.molecule.Ring` or ``None`` if there are no rings in the molecule. ''' rings = self.rings if rings: return min(len(r) for r in rings) @property def largest_ring_size(self): '''The size of the largest basic :class:`ccdc.molecule.Ring` in the molecule. :return: the size of the largest basic :class:`ccdc.molecule.Ring` or ``None`` if there are no rings in the molecule.''' rings = self.rings if rings: return max(len(r) for r in rings)
[docs] def contacts(self, distance_range=(-5.0, 0.0), only_strongest=False, path_length_range=(4, 999)): '''The collection of short nonbonded contacts in this molecule. Distance limits for nonbonded contacts are defined as distances relative to the sum of van der Waals Radii, *i.e.*: v(X) + v(Y) - p < d(X...Y) < v(X) + v(Y) - q where *v(X)* and *v(Y)* are the vdW radii of *X* and *Y* respectively and *p* and *q* are user-specified values in Angstroms (*e.g.*, if *p* = 0.5 and *q* = 0.1, the contact must be at least 0.1 Angstroms shorter than the sum of vdW radii but no more than 0.5 Angstroms shorter). Standard van der Waals radii are assigned to the common elements. They are taken from Bondi, *J.Phys.Chem.*, **68**, 441, 1964. Other elements are assigned van der Waals radii of 2.0 Angstroms. :param distance_range: Minimum and maximum values (Van der Waals corrected) for a short contact (Angstroms). :param only_strongest: whether to return only the strongest contact made. :param path_length_range: Minimum and maximum values for length of path between the contact atoms. ''' crit = ChemistryLib.VdWCriterion( max(distance_range), min(distance_range), ChemistryLib.ContactCriterion.ANY, min(path_length_range), max(path_length_range), only_strongest ) return tuple( Molecule.Contact(_contact=contact) for contact in ChemistryLib.nonbonded_search(self._molecule, crit) if contact.atom1().index() < contact.atom2().index() )
[docs] class HBondCriterion(object): '''Defines the conditions for an HBond to be detected in a molecule. Contains two dictionary like objects for the donor atom types and the acceptor atom types, whether or not hydrogens are required, a distance range and an angle tolerance (if hydrogen atoms are required), whether or not the distance range is relative to Van der Waals radii, whether inter-, intra- or both HBonds should be detected, and a range of path separation distances for atoms to be considered hydrogen bonded. In almost all cases the defaults will give good results, but users may wish to fine-tune the detection algorithm. ''' _inter_dict = bidirectional_dict( intermolecular=ChemistryLib.HBondCriterion.INTERMOLECULAR, intramolecular=ChemistryLib.HBondCriterion.INTRAMOLECULAR, any=ChemistryLib.HBondCriterion.ANY, )
[docs] class HBondAtomTypes(object): '''A dictionary-like class providing access to the atom types of a donor or an acceptor.''' _settings = bidirectional_dict( YES=1, NO =0, NEVER=-1 ) def __init__(self, type, _hbc): self.type = type self._getter = getattr(_hbc.hbond_criterion(), 'get_%s_atom_setting' % type) self._setter = getattr(_hbc.hbond_criterion(), 'set_%s%s' % ('alpha' if type == 'donor' else '', type)) self._from_string = getattr(_hbc.hbond_criterion(), 'string_to_%s_atom_type' % type) self._name_getter = getattr(_hbc.hbond_criterion(), 'get_%s_atom_label' % type)
[docs] def keys(self): '''The atom type names.''' t = self.type.upper() start = getattr(ChemistryLib.HBondCriterion, '%s_START_N' % t[0]) end = getattr(ChemistryLib.HBondCriterion, '%s_NOT_%s' % (t[0], t)) names = [ self._name_getter(k) for k in range(start, end) ] return names
[docs] def items(self): '''The atom type names and whether or not they will be considered for HBonding purposes.''' return [ (k, self._settings.inverse_lookup(self._getter(self._from_string(k)))) for k in self.keys() ]
def __getitem__(self, k): v = self._from_string(k) if v in [ChemistryLib.HBondCriterion.D_NOT_DONOR, ChemistryLib.HBondCriterion.A_NOT_ACCEPTOR]: raise RuntimeError('Unrecognised atom type %s' % k) return self._settings.inverse_lookup(self._getter(v)) def __setitem__(self, k, val): v = self._from_string(k) if isinstance(val, (bool, int)): val = int(val) else: val = self._settings.prefix_lookup(val) if v in [ChemistryLib.HBondCriterion.D_NOT_DONOR, ChemistryLib.HBondCriterion.A_NOT_ACCEPTOR]: raise RuntimeError('Unrecognised atom type %s' % k) self._setter(v, val)
def __init__(self, _hbc=None): if _hbc is None: _hbc = MotifSearchLib.GraphSetHBondCriterion( ChemistryLib.HBondCriterion(), MotifSearchLib.GraphSetHBondCriterion.DONOR_TO_ACCEPTOR, MotifSearchLib.GraphSetHBondCriterion.ANGLE_IS_CHECKED ) self._hbc = _hbc self.require_hydrogens = False self.donor_types = Molecule.HBondCriterion.HBondAtomTypes('donor', self._hbc) self.acceptor_types = Molecule.HBondCriterion.HBondAtomTypes('acceptor', self._hbc) self.intermolecular = 'Any'
[docs] def is_donor(self, atom): '''Whether the atom be considered a donor by this HBondCriterion.''' try: return self._hbc.hbond_criterion().is_donor(atom._atom) except RuntimeError: return False
[docs] def donor_atom_type(self, atom): '''Return the string representation of the atom's type.''' return self.donor_types._name_getter(self._hbc.hbond_criterion().donor_atom_type(atom._atom))
[docs] def set_donor_type(self, atom, value): '''Set the donor properties of the type of the atom.''' s = self.donor_atom_type(atom) if s != 'not donor': self.donor_types[s] = value
[docs] def is_acceptor(self, atom): '''Whether the atom be considered an acceptor by this criterion.''' try: return self._hbc.hbond_criterion().is_acceptor(atom._atom) except RuntimeError: return False
[docs] def acceptor_atom_type(self, atom): '''Return the string representation of the atom's type.''' return self.acceptor_types._name_getter(self._hbc.hbond_criterion().acceptor_atom_type(atom._atom))
[docs] def set_acceptor_type(self, atom, value): '''Set the acceptor properties of the type of the atom.''' s = self.acceptor_atom_type(atom) if s != 'non acceptor': self.acceptor_types[s] = value
@property def distance_range(self): '''Allowable distance range for an HBond to be formed.''' return ( self._hbc.hbond_criterion().min_dist_tolerance(), self._hbc.hbond_criterion().max_dist_tolerance() ) @distance_range.setter def distance_range(self, val): self._hbc.hbond_criterion().set_min_dist_tolerance(min(val)) self._hbc.hbond_criterion().set_max_dist_tolerance(max(val)) @property def angle_tolerance(self): '''The tolerance of the HBond angle.''' if not self.require_hydrogens or self._hbc.angle_option() == MotifSearchLib.GraphSetHBondCriterion.ANGLE_IS_NOT_CHECKED: return None return self._hbc.hbond_criterion().angle_tolerance().degrees() @angle_tolerance.setter def angle_tolerance(self, val): if val is None: self._hbc.set_angle_option( MotifSearchLib.GraphSetHBondCriterion.ANGLE_IS_NOT_CHECKED ) else: self._hbc.set_angle_option( MotifSearchLib.GraphSetHBondCriterion.ANGLE_IS_CHECKED ) self._hbc.hbond_criterion().set_angle_tolerance( MathsLib.Angle(val, MathsLib.Angle.DEGREES) ) @property def vdw_corrected(self): '''Whether the distance range is Van der Waals corrected.''' return self._hbc.hbond_criterion().vdw_corrected() @vdw_corrected.setter def vdw_corrected(self, val): self._hbc.hbond_criterion().set_vdw_corrected(val) @property def require_hydrogens(self): '''Whether Hydrogens are required for the HBond.''' return not self._hbc.hbond_criterion().allow_non_hydrogen_contacts() @require_hydrogens.setter def require_hydrogens(self, val): if val: self._hbc.set_distance_option( MotifSearchLib.GraphSetHBondCriterion.HYDROGEN_TO_ACCEPTOR ) else: self._hbc.set_distance_option( MotifSearchLib.GraphSetHBondCriterion.DONOR_TO_ACCEPTOR ) self._hbc.hbond_criterion().set_allow_non_hydrogen_contacts(not val) @property def intermolecular(self): '''Whether HBonds should be intermolecular, intramolecular, or any.''' return self._inter_dict.inverse_lookup( self._hbc.hbond_criterion().type() ) @intermolecular.setter def intermolecular(self, val): self._hbc.hbond_criterion().set_type( self._inter_dict.prefix_lookup(val) ) @property def path_length_range(self): '''The shortest and longest bond-path separation for intramolecular contacts.''' return ( self._hbc.hbond_criterion().minpath(), self._hbc.hbond_criterion().maxpath() ) @path_length_range.setter def path_length_range(self, val): self._hbc.hbond_criterion().set_minpath(min(val)) self._hbc.hbond_criterion().set_maxpath(max(val))
[docs] def hbonds(self, distance_range=(-5.0, 0.0), angle_tolerance=120.0, vdw_corrected=True, require_hydrogens=True, path_length_range=(4, 999), hbond_criterion=None): '''The collection of molecular hydrogen bonds in this molecule. The definition of a hydrogen bond (*D-H...A*) is a contact meeting the following criteria: * The donor (*D*) must be a nitrogen, oxygen or sulphur atom covalently bound to at least one hydrogen. * The acceptor (*A*) must be a nitrogen, oxygen, sulphur or halogen with at least one available lone pair (*e.g.*, pyramidal trigonal nitrogen is regarded as an acceptor but planar trigonal nitrogen is not). * The *D...A* distance must be less than the sum of van der Waals Radii of the D and A atoms (`vdw_corrected` is `True`) or within the absolute specified range (`vdw_corrected` is `False`) in Angstroms. * The *D-H...A* angle must be within the specified `angle_tolerance` range. * The contact may be intermolecular and/or intramolecular involving donor and acceptor atoms separated with covalent bonds within the specified path length. If hydrogens are not required, they will not appear in the returned :attr:`ccdc.molecule.Molecule.HBond.atoms`. :param distance_range: Minimum and maximum distances considered acceptable for a hydrogen bond to be formed. :param vdw_corrected: Whether the distances are relative to the Van der Waals radius of the atoms (Angstroms). :param angle_tolerance: Acceptable range for a hydrogen Donor-Hydrogen-Acceptor angle (degrees). :param require_hydrogens: whether hydrogens are necessary for the hydrogen bond. :param path_length_range: Minimum and maximum values for length of path between donor and acceptor atom. If it is desirable to detect hydrogen bonds spanning discrete components of the molecule, this should be set to (-1, 999). :param hbond_criterion: an instance of :class:`ccdc.molecule.Molecule.HBondCriterion` or ``None``. If this is not ``None`` then it will be used to override all of the other parameters. :returns: a tuple of :class:`ccdc.molecule.Molecule.HBond` ''' if hbond_criterion is not None: hbc = hbond_criterion._hbc.hbond_criterion() else: inter = ChemistryLib.ContactCriterion.ANY angle = MathsLib.Angle(angle_tolerance, MathsLib.Angle.DEGREES) hbc = ChemistryLib.HBondCriterion( max(distance_range), min(distance_range), angle, vdw_corrected, not require_hydrogens, inter, min(path_length_range), max(path_length_range) ) return tuple( Molecule.HBond(_contact=contact) for contact in ChemistryLib.nonbonded_search(self._molecule, hbc) if contact.atom1().index() < contact.atom2().index() )
[docs] def shortest_path(self, atom1, atom2): '''Return the shortest path between two atoms. :param atom1: :class:`ccdc.molecule.Atom` :param atom2: :class:`ccdc.molecule.Atom` :returns: integer of shortest path, or 0 if no path can be found. :raises: TypeError if atoms are not in this molecule. ''' if atom1._atom.molecule() == self._molecule and atom1._atom.molecule() == atom2._atom.molecule(): return self._molecule.shortest_path(atom1.index, atom2.index) raise TypeError('Atoms are not in the same molecule')
[docs] def shortest_path_atoms(self, atom1, atom2): '''The list of atoms along the shortest path from atom1 to atom2. :param atom1: :class:`ccdc.molecule.Atom` :param atom2: :class:`ccdc.molecule.Atom` :returns: list of :class:`ccdc.molecule.Atom` instances :raises: TypeError if atoms are not in this molecule. ''' if atom1._atom.molecule() == self._molecule and atom1._atom.molecule() == atom2._atom.molecule(): return [Atom(_atom=a) for a in self._molecule.shortest_path_atoms(atom1._atom, atom2._atom)] raise TypeError('Atoms are not in the same molecule')
[docs] def shortest_path_bonds(self, atom1, atom2): '''The list of bonds along the shortest path from atom1 to atom2. :param atom1: :class:`ccdc.molecule.Atom` :param atom2: :class:`ccdc.molecule.Atom` :returns: list of :class:`ccdc.molecule.Bond` instances :raises: TypeError if atoms are not in this molecule. ''' if atom1._atom.molecule() == self._molecule and atom1._atom.molecule() == atom2._atom.molecule(): return [Bond(_bond=b) for b in self._molecule.shortest_path_bonds(atom1._atom, atom2._atom)] raise TypeError('Atoms are not in the same molecule')
@property def heaviest_component(self): '''The heaviest component, useful for stripping solvents.''' l = sorted([(m.molecular_weight, i, m) for i, m in enumerate(self.components)]) return l[-1][-1] @property def molecular_weight(self): '''The molecular weight of the molecule.''' return ChemistryLib.molecular_weight(self._molecule) @property def has_charged_atoms(self): '''Does molecule contain charged atoms.''' return ChemistryLib.has_charged_atoms(self._molecule) @property def molecular_volume(self): '''The molecular volume of the molecule.''' if self.all_atoms_have_sites: return ChemistryLib.molecular_volume(self._molecule)
[docs] def copy(self): '''Return a deep copy of the molecule.''' return Molecule( self.identifier, _molecule=self._molecule.clone().create_editable_molecule() )
@property def formula(self): '''Return the chemical formula of the molecule.''' if any(a.label.endswith('?') for a in self.atoms): flag = ChemistryLib.MoleculeChemicalFormula.USE_OCCUPANCIES else: flag = ChemistryLib.MoleculeChemicalFormula.IGNORE_OCCUPANCIES return str(ChemistryLib.MoleculeChemicalFormula( self._molecule, True, flag )) @property def smiles(self): '''The canonical SMILES representation of the molecule. This will raise a RuntimeError if the molecule contains unknown atoms or bonds, delocalised bonds or if there are non-terminal hydrogens. ''' try: return '.'.join( ChemicalAnalysisLib.canonical_smiles(m._molecule) for m in self.components ) except RuntimeError: return None @property def is_3d(self): '''Whether the molecule has 3d coordinates for all heavy atoms.''' coords = [a.coordinates for a in self.heavy_atoms] if None in coords: return False return not ( all(c.x == 0.0 for c in coords) or all(c.y == 0.0 for c in coords) or all(c.z == 0.0 for c in coords) ) @property def is_organic(self): '''Uses CSD definitions for organometallic molecules.''' elements = ChemistryLib.MoleculeChemicalFormula(self._molecule).elements_present() if not hasattr(Molecule, '_organometallic_elements'): Molecule._organometallic_elements = ChemistryLib.ElementSet() Molecule._organometallic_elements.add_elements( ChemistryLib.ElementSetFactory.lanthanides().elements() ) Molecule._organometallic_elements.add_elements( ChemistryLib.ElementSetFactory.actinides().elements() ) Molecule._organometallic_elements.add_elements( ChemistryLib.ElementSetFactory.transition_metals().elements() ) Molecule._organometallic_elements.add_elements( ChemistryLib.ElementSetFactory.p_block_metals().elements() ) return not elements.intersects(Molecule._organometallic_elements) @property def is_organometallic(self): '''Uses CSD definitions for organometallic molecules.''' return not self.is_organic and 'C' in self.formula @property def is_polymeric(self): '''Whether the molecule contains polymeric bonds.''' return True in [b._bond.polymeric() for b in self.bonds] # Mutators def _downstream_atoms(self, atom1, atom2): '''Private: find atoms downstream of a bond.''' if not atom2 in atom1.neighbours: return (atom2._atom,) #raise RuntimeError('Atoms are not directly neighbouring') for b in atom1.bonds: if atom2 in b.atoms: if b.is_cyclic: raise RuntimeError('The bond between atoms is cyclic') affected_atoms = ChemicalAnalysisLib.downstream_atoms(atom1._atom, atom2._atom) return affected_atoms
[docs] def set_bond_length(self, atom1, atom2, distance): '''Set the bond length between the two atoms to distance. :param atom1, atom2: :class:`ccdc.molecule.Atom` :param distance: the desired distance between atom1 and atom2 :raises: RuntimeError if the two atoms are not neighbours or if the bond between them is cyclic or if an affected atom has no coordinates ''' if atom1 not in atom2.neighbours: raise RuntimeError('The atoms are not connected') ats = self._downstream_atoms(atom1, atom2) for a in ats: if a.site() is None: raise RuntimeError('An affected atom has no coordinates.') ChemicalAnalysisLib.set_bond_length(atom1._atom, atom2._atom, distance)
[docs] def set_valence_angle(self, atom1, atom2, atom3, theta): '''Set the valence angle subtended by atom1, atom2, atom3 to theta. :param atom1: :class:`ccdc.molecule.Atom` :param atom2: :class:`ccdc.molecule.Atom` :param atom3: :class:`ccdc.molecule.Atom` :param theta: the desired angle in degrees :raises: RuntimeError if the atoms are not connected, if either bond is cyclic, or if an affected atom has no coordinates. ''' ats = self._downstream_atoms(atom2, atom3) for a in ats: if a.site() is None: raise RuntimeError('An affected atom has no coordinates.') if atom2 not in atom1.neighbours or atom3 not in atom2.neighbours or atom1 == atom3: raise RuntimeError('Atoms are not connected.') ChemicalAnalysisLib.set_valence_angle(atom1._atom, atom2._atom, atom3._atom, theta)
[docs] def set_torsion_angle(self, atom1, atom2, atom3, atom4, theta): '''Set the torsion angle between the specified atoms to theta. :param atom1: :class:`ccdc.molecule.Atom` :param atom2: :class:`ccdc.molecule.Atom` :param atom3: :class:`ccdc.molecule.Atom` :param atom4: :class:`ccdc.molecule.Atom` :param theta: the desired torsion angle in degrees :raises: RuntimeError if the atoms are not connected or if a bond is cyclic or if an affected atom has no coordinates. ''' if atom2 not in atom1.neighbours or atom3 not in atom2.neighbours or atom4 not in atom3.neighbours: # improper torsion ats = self._downstream_atoms(atom2, atom3) + self._downstream_atoms(atom2, atom4) sites = [a.site() for a in ats] if None in sites: raise RuntimeError('An affected atom has no coordinates') ChemicalAnalysisLib.set_improper_torsion( atom1._atom, atom2._atom, atom3._atom, atom4._atom, theta ) else: ats = self._downstream_atoms(atom2, atom3) sites = [a.site() for a in ats] if None in sites: raise RuntimeError('An affected atom has no coordinates') ChemicalAnalysisLib.set_torsion_angle(atom1._atom, atom2._atom, atom3._atom, atom4._atom, theta)
[docs] def centre_of_geometry(self): '''Geometric centre of the molecule. :raises: RuntimeError if an atom has no coordinates ''' coords = [a.coordinates for a in self.atoms] if None in coords: raise RuntimeError('An atom has no coordinates') x, y, z = zip(*coords) cog = Coordinates(sum(x)/len(coords), sum(y)/len(coords), sum(z)/len(coords)) return cog
[docs] def translate(self, xyz): '''Translate the molecule. :param xyz: a sequence of three floats ''' for atom in self.atoms: if atom.coordinates is not None: atom.coordinates = tuple(atom.coordinates[i] + xyz[i] for i in range(3))
[docs] def rotate(self, xyz, theta): '''Rotate a molecule about the vector xyz by theta degrees. The molecule will be rotated about its centre of geometry. :param xyz: the rotation axis, a sequence of length 3 :param theta: angle by which to rotate, in degrees ''' cog = self.centre_of_geometry() Q = MathsLib.Quaternion( MathsLib.vector_3d(xyz[0], xyz[1], xyz[2]), MathsLib.Angle(theta, MathsLib.Angle.DEGREES) ) self.translate((-cog.x, -cog.y, -cog.z)) ChemicalAnalysisLib.apply_quaternion(self._molecule, Q) self.translate((cog.x, cog.y, cog.z))
[docs] @nested_class('Molecule') class Transformation: '''Transformation matrix that could be applied to the coordinates of a molecule''' def __init__(self, matrix=None, _transformation=None): ''' :param matrix: a sequence of length 4 of sequences of length 4 of floats ''' if _transformation is not None: self._transformation = _transformation else: self._transformation = MathsLib.Transformation( matrix[0][0], matrix[0][1], matrix[0][2], matrix[0][3], matrix[1][0], matrix[1][1], matrix[1][2], matrix[1][3], matrix[2][0], matrix[2][1], matrix[2][2], matrix[2][3], matrix[3][0], matrix[3][1], matrix[3][2], matrix[3][3] )
[docs] @staticmethod def from_rotation_and_translation(rotation, translation): '''Create a transformation from a rotation matrix and translation vector :param rotation: A 3 x 3 rotation matrix as a tuple of 3 tuples of 3 doubles :param translation: A translation vector as a tuple of 3 doubles (x, y, z) :returns: A Molecule.Transformation ''' return Molecule.Transformation(_transformation=MathsLib.Transformation( rotation[0][0], rotation[0][1], rotation[0][2], translation[0], rotation[1][0], rotation[1][1], rotation[1][2], translation[1], rotation[2][0], rotation[2][1], rotation[2][2], translation[2] ))
def __str__(self): '''String representation of a molecule transformation.''' return 'Molecule.Transformation(Rotation: %s Translation: %s)' % (str(self.rotation), str(self.translation)) __repr__ = __str__ def __mul__(self, other): '''Multiply two molecule transformations :returns: A Molecule.Transformation that is the product of the two transformations ''' from ccdc.descriptors import GeometricDescriptors as GD if isinstance(other, Molecule.Transformation): return Molecule.Transformation(_transformation=self._transformation * other._transformation) if isinstance(other, Coordinates): point = MathsLib.Point(other.x, other.y, other.z) transformed_point = self._transformation.mul_point(point) return Coordinates(x=transformed_point(1), y=transformed_point(2), z=transformed_point(3)) if isinstance(other, GD.Vector): point = MathsLib.Point(other[0], other[1], other[2]) transformed_point = self._transformation.mul_point(point) return GD.Vector(transformed_point(1), transformed_point(2), transformed_point(3))
[docs] def inverse(self): '''Return the inverse of this transformation :returns: A Molecule.Transformation that is the inverse of this transformation ''' return Molecule.Transformation(_transformation=self._transformation.inverse())
@property def translation(self): '''The translational component of the transformation :returns: A translation vector as a tuple of 3 doubles (x, y, z) ''' if self._transformation: return tuple([self._transformation.trans().x(), self._transformation.trans().y(), self._transformation.trans().z()]) @property def rotation(self): '''The rotational component of the transformation :returns: A 3 x 3 rotation matrix as a tuple of 3 tuples of 3 doubles ''' if self._transformation: return tuple( tuple(self._transformation.rot().c(i, j) for i in range(3)) for j in range(3) )
[docs] def transform(self, M): '''Apply a matrix to the coordinates of the molecule. The method will not check that the matrix is appropriate for a molecular transformation; it is for the user to ensure that inappropriate affine transformations are avoided. :param M: a sequence of length 4 of sequences of length 4 of floats ''' if isinstance(M, Molecule.Transformation): transformation = M else: transformation = Molecule.Transformation(M) xform = transformation._transformation mol_xform = ChemistryLib.MoleculeTransformation(self._molecule, xform) mol_xform.apply()
[docs] def apply_quaternion(self, Q): '''Apply a Quaternion to the molecule, rotating it about an arbitrary axis. :param Q: a sequence of 4 floats :raises: RuntimeError if an atom has no coordinates ''' if None in [a.coordinates for a in self.atoms]: raise RuntimeError('An atom has no coordinates') q = MathsLib.Quaternion(Q[0], Q[1], Q[2], Q[3]) ChemicalAnalysisLib.apply_quaternion(self._molecule, q)
[docs] def add_atom(self, atom): '''Add an atom. :param atom: :class:`ccdc.molecule.Atom` :returns: a copy of the atom which is now in this molecule ''' a = Atom(_atom=atom._atom.clone()) self._molecule.add(a._atom) return a
[docs] def add_atoms(self, iterable): '''Add atoms, whether from another molecule or constructed *ab initio*. :param iterable: any iterable of :class:`ccdc.molecule.Atom` :returns: a tuple of copies of the atoms in iterable ''' copies = tuple(Atom(_atom=a._atom.clone()) for a in iterable) for a in copies: self._molecule.add(a._atom) return copies
[docs] def remove_atom(self, atom): '''Remove an atom from the molecule. :param atom: :class:`ccdc.molecule.Atom` which must be in the molecule. :raises: RuntimeError if atom is not in the molecule. ''' if atom not in self.atoms: raise RuntimeError('The atom is not in the molecule') self._molecule.remove_atom(atom.index)
[docs] def remove_atoms(self, iterable): '''Remove several atoms from the molecule. :param iterable: any iterable of :class:`ccdc.molecule.Atom` :raises: RuntimeError if any atom is not in the molecule. ''' l = list(iterable) atms = self.atoms for a in l: if not a in atms: raise RuntimeError('An atom is not in the molecule') for a in l: self._molecule.remove_atom(a.index)
[docs] def add_bond(self, bond_type, atom1, atom2): '''Add a bond between two atoms. :param bond_type: :class:`ccdc.molecule.Bond.BondType` instance :param atom1, atom2: :class:`ccdc.molecule.Atom` instances which must already be present in the molecule ''' if not isinstance(bond_type, Bond.BondType): bond_type = Bond.BondType(bond_type) bond = ChemistryLib.Bond3d(bond_type._bond_type) self._molecule.add(bond, atom1.index, atom2.index)
[docs] def add_bonds(self, iterable): '''Add several bonds to the molecule. :param iterable: should be an iterable of triples, (:class:`ccdc.molecule.Bond.BondType`, :class:`ccdc.molecule.Atom`, :class:`ccdc.molecule.Atom`) where the atoms must be in this molecule ''' for bond_type, atom1, atom2 in iterable: self.add_bond(bond_type, atom1, atom2)
[docs] def remove_bond(self, bond): '''Remove a bond. :param bond: :class:`ccdc.molecule.Bond` which must be in the molecule :raises: RuntimeError if the bond is not present ''' if bond not in self.bonds: raise RuntimeError('Bond is not in the molecule') self._molecule.remove(bond._bond)
[docs] def remove_bonds(self, iterable): '''Remove bonds. :param iterable: any iterable of :class:`ccdc.molecule.Bond` :raises: RuntimeError if any bond is not in the molecule ''' l = list(iterable) for b in l: if b not in self.bonds: raise RuntimeError('A bond is not in the molecule') for b in l: self._molecule.remove(b._bond)
[docs] def add_molecule(self, molecule): '''Add a copy of all the atoms and bonds of molecule. :param molecule: :class:`ccdc.molecule.Molecule` ''' self._molecule.add(molecule._molecule)
[docs] def add_group(self, mol, atom1, atom2): '''Add a group from another molecule. All atoms and bonds of downstream of the pair atom1, atom2 will be added to the molecule. :param mol: :class:`ccdc.molecule.Molecule` :param atom1: :class:`ccdc.molecule.Atom` which must be in mol :param atom2: :class:`ccdc.molecule.Atom` which must be in mol ''' ats = self._downstream_atoms(atom1, atom2) new_ats = [self.add_atom(Atom(_atom=a)) for a in ats] for b in mol.bonds: bats = b.atoms if bats[0]._atom in ats and bats[1]._atom in ats: a0 = new_ats[ats.index(bats[0]._atom)] a1 = new_ats[ats.index(bats[1]._atom)] self.add_bond(b.bond_type._bond_type, a0, a1)
[docs] def remove_group(self, atom1, atom2): '''Remove the group of atoms and bonds downstream from atom1, atom2. :param atom1 atom2: :class:`ccdc.molecule.Atom` which must be in the molecule. ''' ats = self._downstream_atoms(atom1, atom2) for a in ats: self._molecule.remove_atom(a.index())
[docs] def change_group(self, atom1, atom2, mol, atom3, atom4): '''Transfer a copy of a group of atoms and bonds. All atoms downstream of atom1 - atom2 will be removed and copies of all atoms and bonds downstream of atom3 - atom4 will be added to the molecule. The geometry of the copied atoms will be translated and rotated such that the bond atom3 - atom4 is aligned with atom1 - atom2. The bond connecting the groups will have the same type as that between atom1 and atom2. :param atom1: :class:`ccdc.molecule.Atom` :param atom2: :class:`ccdc.molecule.Atom` :param atom3: :class:`ccdc.molecule.Atom` :param atom4: :class:`ccdc.molecule.Atom` :param mol: :class:`ccdc.molecule.Molecule` in which atom3 and atom4 must be present :raises: RuntimeError if any atom of mol is siteless, or if the bond atom3 - atom4 is cyclic. :returns: a tuple of :class:`ccdc.molecule.Atom` which are the newly added atoms of the molecule. ''' vec = ChemicalAnalysisLib.change_group(atom1._atom, atom2._atom, atom3._atom, atom4._atom) return tuple(Atom(_atom=self.atoms[i]._atom) for i in vec)
[docs] def fuse_rings(self, atom1, atom2, mol, atom3, atom4): '''Fuse a copy of a ring system. The bond between atom3 and atom4 will be superimposed on the bond between atom1 and atom2. :param atom1: :class:`ccdc.molecule.Atom` within this molecule :param atom2: :class:`ccdc.molecule.Atom` within this molecule :param mol: :class:`ccdc.molecule.Molecule` :param atom3: :class:`ccdc.molecule.Atom` which must be within mol :param atom4: :class:`ccdc.molecule.Atom` which must be within mol :raises: RuntimeError if bonds between atoms are not cyclic :returns: a tuple of :class:`ccdc.molecule.Atom` which are the newly added atoms of the molecule ''' m = mol.copy() atom3 = m.atoms[atom3.index] atom4 = m.atoms[atom4.index] vec = ChemicalAnalysisLib.fuse_ring(atom1._atom, atom2._atom, atom3._atom, atom4._atom) return tuple(Atom(_atom=self.atoms[i]._atom) for i in vec)
[docs] def normalise_labels(self): '''Ensure labels of atoms are unique. Labels will be the atom's atomic symbol followed by an integer. ''' d = collections.defaultdict(int) for a in self.atoms: d[a.atomic_symbol] += 1 e = Atom(_atom=a._atom) e.label = '%s%d' % (a.atomic_symbol, d[a.atomic_symbol])
[docs] def normalise_atom_order(self): '''Reorder the atoms of the molecule canonically. This will not work for structures with unknown atoms or bonds. This may be useful for comparing polymorphs, for example: >>> from ccdc import io >>> csd = io.MoleculeReader('csd') >>> hxacan = csd.molecule('HXACAN') >>> hxacan26 = csd.molecule('HXACAN26') >>> all(a.label == b.label for a, b in zip(hxacan.atoms, hxacan26.atoms)) False >>> hxacan.normalise_atom_order() >>> hxacan.normalise_labels() >>> hxacan26.normalise_atom_order() >>> hxacan26.normalise_labels() >>> all(a.label == b.label for a, b in zip(hxacan.atoms, hxacan26.atoms)) True ''' from ccdc import search smiles = [c.smiles for c in self.components] if None in smiles: return def smiles_cmp(s): return (len(s), hash(s)) smiles.sort(key=smiles_cmp) seen_smiles = set() seen_indices = set() indices = [] for s in smiles: if s in seen_smiles: continue seen_smiles.add(s) searcher = search.SubstructureSearch() searcher.add_substructure(search.SMARTSSubstructure(s)) hits = searcher.search(self) for h in hits: indices.extend(i for i in h.match_atoms(indices=True) if i not in seen_indices) seen_indices |= set(h.match_atoms(indices=True)) extra = [a for a in self.atoms if a.atomic_symbol in 'DH' and a.index not in seen_indices] def nbr_index(at): nbrs = at.neighbours if not nbrs: return ('XXX', 999) return (at.atomic_symbol, nbrs[0].index) extra.sort(key=nbr_index) extra = list(a.index for a in extra) self._molecule.reorder_atoms(indices + extra)
[docs] def remove_hydrogens(self): '''Remove all hydrogen atoms.''' self.remove_atoms(a for a in self.atoms if a.atomic_symbol in 'HD')
[docs] def remove_unknown_atoms(self): '''Remove all atoms with an atomic number of zero (e.g. lone pairs, dummy atoms).''' ChemistryLib.remove_unknown_atoms(self._molecule)
[docs] def set_formal_charges(self): '''Calculate formal charges for atoms assuming bond types and protonation are correct.''' clone = self.copy() for a in self.atoms: a.formal_charge = 0 clone.add_hydrogens() for a, b in zip( [x for x in self.atoms if x.atomic_number > 1], [y for y in clone.atoms if y.atomic_number > 1] ): diff = len(a.neighbours) - len(b.neighbours) if diff: a.formal_charge = diff else: a.formal_charge = b.formal_charge
[docs] def add_hydrogens(self, mode='all', add_sites=True): '''Add hydrogen atoms to the molecule. :param mode: 'all' to generate all hydrogens (throws away existing hydrogens) or 'missing' to generate hydrogens deemed to be missing. :param add_sites: 'True' to generate 3D coordinates for hydrogens (default) 'False' to only generate siteless atoms :raises: RuntimeError if any heavy atom has no site. :raises: RuntimeError if any atoms are of unknown type. :raises: RuntimeError if any bonds are of unknown type. ''' if mode.lower() == 'all': self.remove_hydrogens() if hasattr(self, '_cell'): cell = self._cell else: cell = ChemistryLib.Cell() ChemistryLib.molecule_add_missing_hydrogens(self._molecule, cell, add_sites)
[docs] def normalise_hydrogens(self): '''Normalise the hydrogen atom positions in the molecule. This will normalise the position of hydrogen atoms attached to oxygen, nitrogen or carbon atoms to standard X-H distance values derived from statistical surveys of neutron diffraction data. Note that this function will only modify the X-H distance. In other words the bond vector direction will be unaffected. ''' if hasattr(self, '_cell'): cell = self._cell else: cell = ChemistryLib.Cell() ChemistryLib.HydrogenNormalisation(self._molecule, cell).apply()
[docs] def assign_bond_types(self, which='All'): '''Assign bond types to the molecule. :param which: may be 'all' or 'unknown' :raises: ValueError if an unrecognised ``which`` parameter is provided ''' assigner = ChemicalAnalysisLib.MoleculeAllBondTypesAssigner() if which.lower() == 'all': assigner.assign(self._molecule) elif which.lower() == 'unknown': assigner.assign_unknown_only(self._molecule) else: raise ValueError( 'assign_bond_types: %s should be all or unknown' % which )
[docs] def standardise_aromatic_bonds(self): '''Standardise aromatic bonds to CSD conventions.''' assigner = ChemicalAnalysisLib.MoleculeAromaticBondAssigner( ChemicalAnalysisLib.CsdAromaticConventions() ) assigner.assign(self._molecule)
[docs] def standardise_delocalised_bonds(self): '''Standardise delocalised bonds to CSD conventions.''' assigner = ChemicalAnalysisLib.MoleculeDelocalisedBondAssigner() assigner.assign(self._molecule)
[docs] def kekulize(self): '''Convert the molecule to a Kekule representation, replacing all aromatic bonds by alternating single and double bonds.''' assigner = ChemicalAnalysisLib.MoleculeAllBondTypesAssigner() assigner.kekulize_aromatic_systems(self._molecule)
[docs] def set_coordinates(self, mol, atoms=None): '''Set the coordinates of this molecule to those of the other molecule. For example AACFAZ has no coordinates, so in some circumstances one might wish to use the coordinates from AACFAZ10. :param mol: another molecule :param atoms: an iterable of pairs of atoms, the first of which is in this molecule, the second in the other molecule. If this is empty (or None) then it is assumed that the molecules' atoms match exactly. ''' if not atoms: atoms = zip(self.atoms, mol.atoms) for a, b in atoms: a.coordinates = b.coordinates
# Miscellaneous properties @property def all_atoms_have_sites(self): '''Whether all atoms have coordinates.''' return ChemistryLib.all_atoms_have_sites(self._molecule) @property def contains_unknown_bonds(self): '''Whether the molecule contains unknown bonds.''' return ChemistryLib.molecule_contains_unknown_bonds(self._molecule) # To and from string representations
[docs] @staticmethod def from_string(s, format=''): '''Create a molecule from a string representation. The format will be auto-detected if not specified. :param s: molecule string representation :param format: one of 'mol2', 'sdf', 'mol', 'cif', 'mmcif' or 'smiles' :rtype: :class:`ccdc.molecule.Molecule` :raises: TypeError if the format string is not '', 'mol2', 'sdf', 'mol', 'cif', 'mmcif' or 'smiles'. :raises: RuntimeError if the string representation is incorrectly formatted ''' if format == '': format = _detect_format(s) if format in ['sdf', 'mol']: if 'V2000' not in s and 'M END' not in s and '$$$$' not in s: raise RuntimeError('This does not appear to be an SDF format string: %s' % s) if format == 'cif' or format == 'mmcif': format = 'cifreader' stream = FileFormatsLib.istringstream(s) mf = _file_object_factory(format) try: mf.read(stream) if format in ['sdf', 'mol']: return Molecule(mf.molecule_name(), mf.molecule().create_editable_molecule()) try: return Molecule(mf.identifier().str(), mf.molecule().create_editable_molecule()) except RuntimeError as exc: if 'There is no data' in str(exc): return Molecule(mf.identifier().str(), ChemistryLib.Molecule3d_instantiate()) else: raise except RuntimeError as e: raise RuntimeError('Invalid content for %s formatted string: %s' % (format, e))
[docs] def to_string(self, format='mol2'): '''Return a string representation of a molecule. :param format: 'mol2', 'sdf', 'mol', 'cif', 'mmcif' or 'smiles' :rtype: string :raises: TypeError if the format string is not 'mol2', 'sdf', 'mol', 'cif', 'mmcif' or 'smiles'. ''' stream = FileFormatsLib.ostringstream() mf = _file_object_factory(format) from ccdc.entry import Entry e = Entry.from_molecule(self) mf.set(e._entry) mf.write(stream) return stream.str()
[docs] def generate_inchi(self, include_stereo=True, add_hydrogens=True): '''Return a :class:`ccdc.descriptors.MolecularDescriptors.InChIGenerator.InChI` object for the molecule :param include_stereo: include stereochemistry (True, default) or ignore stereochemistry (False) when generating the InChI object :param add_hydrogens: add hydrogens (True, default) or not add hydrogens (False) when generating the InChI object :returns: a :class:`ccdc.descriptors.MolecularDescriptors.InChIGenerator.InChI` object ''' from ccdc.descriptors import MolecularDescriptors gen = MolecularDescriptors.InChIGenerator() return gen.generate(self, include_stereo, add_hydrogens)
class _CifFileWithBonds(FileFormatsLib.CifFile): '''CifFile variant that is configured for reading and writing bonds.''' def __init__(self, *args, **kwargs): super(_CifFileWithBonds, self).__init__(*args, **kwargs) options = _CifFileWithBonds.cif_bond_options() self.add_cif_format_options(options) self.set_read_write_options(options) def clear(self, *args, **kwargs): '''Clear the CifFile, but reset the bond options after.''' super(_CifFileWithBonds, self).clear(*args, **kwargs) options = _CifFileWithBonds.cif_bond_options() self.add_cif_format_options(options) self.set_read_write_options(options) @staticmethod def cif_bond_options(): '''Return CifReadWriteOptions to read and write bonds.''' options = FileFormatsLib.CifReadWriteOptions() options.write_geom_bonds_ = True options.write_unique_atom_labels_ = True options.read_geom_bonds_ = True return options class _CifFileDetectFormat(_CifFileWithBonds): '''A CifFile variant that will detect the apropriate format between CIF and mmCIF depending on the data written.''' def add_cif_format_options(self, options): '''Add options to detect the appropriate format.''' options.write_version_.detect_version_ = True class _CifFileCifFormat(_CifFileWithBonds): '''A CifFile variant that always writes CIF format.''' def add_cif_format_options(self, options): '''Add options to detect the appropriate format.''' options.write_version_.detect_version_ = False options.write_version_.default_version_ = FileFormatsLib.CIF class _CifFileMmCifFormat(_CifFileWithBonds): '''A CifFile variant that always writes mmCIF format.''' def add_cif_format_options(self, options): '''Add options to detect the appropriate format.''' options.write_version_.detect_version_ = False options.write_version_.default_version_ = FileFormatsLib.MMCIF def _file_object_factory(format): '''Return an appropriate molecule file object for the format requested.''' if format == 'mol2': mf = FileFormatsLib.Mol2File() elif format in ['sdf', 'mol']: mf = FileFormatsLib.MolFile() elif format == 'cif': mf = _CifFileCifFormat() elif format == 'mmcif': mf = _CifFileMmCifFormat() elif format == 'cifreader': mf = FileFormatsLib.CifFile() elif format == 'res': return FileFormatsLib.ResFile() elif format == 'smiles': return FileFormatsLib.SMILESFile() else: raise TypeError('Unknown file format: %s' % format) return mf ##########################################################################