Source code for ccdc.descriptors

#
# This code is Copyright (C) 2015 The Cambridge Crystallographic Data Centre
# (CCDC) of 12 Union Road, Cambridge CB2 1EZ, UK and a proprietary work of CCDC.
# This code may not be used, reproduced, translated, modified, disassembled or
# copied, except in accordance with a valid licence agreement with CCDC and may
# not be disclosed or redistributed in any form, either in whole or in part, to
# any third party. All copies of this code made in accordance with a valid
# licence agreement as referred to above must contain this copyright notice.
#
# No representations, warranties, or liabilities are expressed or implied in the
# supply of this code by CCDC, its servants or agents, except where such
# exclusion or limitation is prohibited, void or unenforceable under governing
# law.
#
'''
The :mod:`ccdc.descriptors` module contains classes for calculating descriptors.

The main classes in the :mod:`ccdc.descriptors` module are:

- :class:`ccdc.descriptors.MolecularDescriptors`.
- :class:`ccdc.descriptors.GeometricDescriptors`.
- :class:`ccdc.descriptors.CrystalDescriptors.PowderPattern`.
- :class:`ccdc.descriptors.CrystalDescriptors.Morphology`.
- :class:`ccdc.descriptors.CrystalDescriptors.GraphSetSearch`.
- :class:`ccdc.descriptors.CrystalDescriptors.HBondCoordination`.
- :class:`ccdc.descriptors.CrystalDescriptors.HBondPropensities`.
- :class:`ccdc.descriptors.StatisticalDescriptors`.
'''

import collections
import math
import operator
import os
import warnings
import tempfile
import sys

warnings.simplefilter('always', DeprecationWarning)

from ccdc.molecule import Molecule, Atom, Bond, Coordinates
from ccdc.crystal import Crystal
from ccdc.utilities import Logger, bidirectional_dict, nested_class
from ccdc import io
from ccdc import morphology

from ccdc.utilities import _private_importer
with _private_importer():
    import ChemistryLib
    import ChemicalAnalysisLib
    import MathsLib
    import SubstructureSearchLib
    import MotifSearchLib
    import PackingSimilarityLib
    import UtilitiesLib
    import DatabaseEntryLib
    import FileFormatsLib
    import CSDSQLDatabaseLib
    import AnnotationsLib

[docs]class MolecularDescriptors(object): '''Namespace for descriptors of a molecular nature.'''
[docs] @staticmethod def bond_length(bond): '''The length of a bond. :param bond: :class:`ccdc.molecule.Bond` :returns: float, or ``None`` if an atom of the bond has no coordinates ''' if bond._bond.atom1().site() and bond._bond.atom2().site(): return ChemistryLib.bond_length( bond._bond.molecule(), bond._bond.atom1().index(), bond._bond.atom2().index() )
[docs] @staticmethod def atom_distance(a, b): '''Distance between two atom irrespective their parent molecules. :param a: :class:`ccdc.molecule.Atom` :param b: :class:`ccdc.molecule.Atom` :returns: float or ``None`` if one of the atoms has no coordinates ''' if a._atom.site() and b._atom.site(): return MathsLib.distance(a.coordinates, b.coordinates)
[docs] @staticmethod def atom_angle(a, b, c): '''Angle subtended by three arbitrary atoms. :param a: :class:`ccdc.molecule.Atom` :param b: :class:`ccdc.molecule.Atom` :param c: :class:`ccdc.molecule.Atom` :returns: float - the angle in degrees or ``None`` if one of the atoms has no coordinates ''' if a._atom.site() and b._atom.site() and c._atom.site(): return MathsLib.angle( a.coordinates, b.coordinates, c.coordinates ).degrees()
[docs] @staticmethod def atom_torsion_angle(a, b, c, d): '''Plane angle subtended by the triples abc and bcd. :param a: :class:`ccdc.molecule.Atom` :param b: :class:`ccdc.molecule.Atom` :param c: :class:`ccdc.molecule.Atom` :param d: :class:`ccdc.molecule.Atom` :returns: float - the angle in degrees or ``None`` if one of the atoms has no coordinates ''' if a._atom.site() and b._atom.site() and c._atom.site() and d._atom.site(): return MathsLib.torsion_angle( a.coordinates, b.coordinates, c.coordinates, d.coordinates ).degrees()
@staticmethod def _set_atom_torsion_angle(a, b, c, d, degrees): '''Set a torsion angle. :param a: :class:`ccdc.molecule.Atom` :param b: :class:`ccdc.molecule.Atom` :param c: :class:`ccdc.molecule.Atom` :param d: :class:`ccdc.molecule.Atom` :param degrees: float - the required torsion angle in degrees ''' if degrees > 180.: degrees = 360. - degrees ChemicalAnalysisLib.set_torsion_angle(a._atom, b._atom, c._atom, d._atom, degrees)
[docs] @staticmethod def atom_centroid(*atoms): '''Define the centroid of the specified atoms.''' coords = [a.coordinates for a in atoms] if None in coords: raise RuntimeError('An atom has no coordinates') x, y, z = zip(*coords) centroid = Coordinates(sum(x)/len(coords), sum(y)/len(coords), sum(z)/len(coords)) return centroid
[docs] @staticmethod def atom_vector(atom0, atom1): '''Define the vector from atom0 to atom1. :param atom0: :class:`ccdc.molecule.Atom` :param atom1: :class:`ccdc.molecule.Atom` :returns: :class:`GeometricDescriptors.Vector` :raises: RuntimeError if either atom has no coordinates. ''' coords = [atom0.coordinates, atom1.coordinates] if None in coords: raise RuntimeError('An atom has no coordinates') return GeometricDescriptors.Vector.from_points(coords[0], coords[1])
[docs] @staticmethod def atom_plane(*atoms): '''Define a plane from the coordinates of the atoms. :param atoms: there must be at least three :class:`ccdc.molecule.Atom` in the arguments. ''' return GeometricDescriptors.Plane.from_points( *tuple(a.coordinates for a in atoms) )
[docs] @staticmethod def ring_centroid(ring): '''The centroid of the ring's atoms. :param ring: :class:`ccdc.molecule.Molecule.Ring` ''' return MolecularDescriptors.atom_centroid(*ring.atoms)
[docs] @staticmethod def ring_plane(ring): '''The plane of the ring's atoms. :param ring: :class:`ccdc.molecule.Molecule.Ring` ''' return MolecularDescriptors.atom_plane(*ring.atoms)
[docs] @staticmethod def point_group_analysis(mol): '''Return Schoenflies notation of the point group symmetry of a molecule. The point group symmetry is returned as a tuple of: - order (e.g. 1) - symbol (e.g. 'C1') - description (e.g. 'Objects in this point group have no symmetry.') :param mol: :class:`ccdc.molecule.Molecule` :returns: (int, str, str) ''' pga = ChemicalAnalysisLib.MoleculePointGroupAssigner( ChemicalAnalysisLib.RadialAtomProximity(0.5) ) try: pg = pga.assign(mol._molecule) if not pga.assignment_succeeded(): return 0, 'Unknown', 'point group assignment did not conclude' return pg.order(), pg.symbol(), pg.description() except RuntimeError as e: logger = Logger() logger.warning('molecule.point_group_analysis %s: %s' % (mol.identifier, e)) return 0, 'Unknown', 'point group assignment failed'
@staticmethod def _make_subset(mol, atoms): '''Private: make an internal subset.''' sub = ChemistryLib.MoleculeSubset_instantiate(mol._molecule) sub.select_all() sub.select_atom_indices_from_current([a.index for a in atoms]) return sub @nested_class('MolecularDescriptors') class Overlay(object): def __init__(self, mol1, mol2, atoms=None, invert=False, rotate_torsions=False, with_symmetry=True): '''Overlay mol2 on mol1 :param mol1: a :class:`ccdc.molecule.Molecule` instance :param mol2: a :class:`ccdc.molecule.Molecule` instance :param atoms: a list of pairs of atoms to use in the overlay, or None for all atoms to be used :param invert: allow inversions in the overlay :param rotate_torsions: allow torsional rotations when overlaying :param with_symmetry: take account of symmetry when overlaying atoms ''' m2 = mol1.copy() m1 = mol2.copy() if atoms is not None: # Got to get the matching atoms def _match_atom(mol, at): l = [a for a in mol.atoms if a.atomic_symbol == at.atomic_symbol and str(a.coordinates) == str(at.coordinates)] if len(l) == 1: return l[0] else: # Fall back on match by index if mol.atoms[at.index].atomic_symbol == at.atomic_symbol: return mol.atoms[at.index] raise RuntimeError('Atom %s could not be matched' % at) inp1, inp2 = zip(*atoms) ats1 = [_match_atom(m1, a) for a in inp2] ats2 = [_match_atom(m2, a) for a in inp1] if with_symmetry: sub1 = MolecularDescriptors._make_subset(m1, ats1) sub2 = MolecularDescriptors._make_subset(m2, ats2) if invert: invert = ChemicalAnalysisLib.MoleculeOverlay.NO_INVERT_AND_INVERT else: invert = ChemicalAnalysisLib.MoleculeOverlay.NO_INVERT if rotate_torsions: rotate = ChemicalAnalysisLib.MoleculeOverlay.FLEXIBLE_TORSIONS else: rotate = ChemicalAnalysisLib.MoleculeOverlay.FIXED_TORSIONS if atoms is None: ov = ChemicalAnalysisLib.MoleculeOverlay( m1._molecule, m2._molecule, invert, rotate ) ov.fit_geometry().apply() else: if with_symmetry: ov = ChemicalAnalysisLib.MoleculeOverlay( sub1, sub2, invert, rotate ) trans = ov.transformation() if hasattr(m1, '_cell'): cell = m1._cell else: cell = ChemistryLib.Cell() ChemistryLib.MoleculeTransformation(m1._molecule, trans, cell).apply() else: ov = ChemicalAnalysisLib.MoleculeOverlay( m1._molecule, m2._molecule, list(zip([a._atom for a in ats1], [a._atom for a in ats2])), invert, rotate ) ov.fit_geometry().apply() self._ov = ov self._molecule = m1 self._transformation = Molecule.Transformation(_transformation=ov.transformation()) @property def molecule(self): '''Returns input molecule mol2 transformed to overlay onto mol1''' return self._molecule @property def rmsd(self): '''Returns RMSD between the two overlaid molecules''' return self._ov.rms() @property def rmsd_tanimoto(self): '''Returns Tanimoto RMSD between the two overlaid molecules''' return self._ov.rms_tanimoto() @property def transformation(self): '''Returns Molecule.Transformation object required to overlay mol2 over mol1''' return self._transformation @property def max_distance(self): '''Returns the maximum distance between two equivalent atoms in the overlay (Angstroms)''' return self._ov.max_distance()
[docs] @staticmethod def overlay_rmsds_and_transformation(mol1, mol2, atoms=None, invert=False, rotate_torsions=False, with_symmetry=True): '''Overlay mol2 on mol1 and return properties of the overlay. Deprecated and replaced with :meth:`overlay = ccdc.MolecularDescriptors.Overlay :param mol1: a :class:`ccdc.molecule.Molecule` instance :param mol2: a :class:`ccdc.molecule.Molecule` instance :param atoms: a list of pairs of atoms to use in the overlay, or None for all atoms to be used :param invert: allow inversions in the overlay :param rotate_torsions: allow torsional rotations when overlaying :param with_symmetry: take account of symmetry when overlaying atoms :returns: a tuple containing a :class:`ccdc.molecule.Molecule` instance which is a copy of mol2 overlaid on mol1 as entry 0, the rmsd as entry 1, the Tanimoto rmsd as entry 2 and the overlay transformation as entry 3 ''' warnings.warn('''This attribute is deprecated and will be removed in a later version. Instead, please use MolecularDescriptors.Overlay()''', DeprecationWarning) overlay = MolecularDescriptors.Overlay(mol1, mol2, atoms, invert, rotate_torsions, with_symmetry) return overlay.molecule, overlay.rmsd, overlay.rmsd_tanimoto, overlay.transformation
[docs] @staticmethod def overlay(mol1, mol2, atoms=None, invert=False, rotate_torsions=False, with_symmetry=True): '''Overlay mol2 on mol1. Deprecated and replaced with :meth:`overlay = ccdc.MolecularDescriptors.Overlay overlay.molecule :param mol1: a :class:`ccdc.molecule.Molecule` instance :param mol2: a :class:`ccdc.molecule.Molecule` instance :param atoms: a list of pairs of atoms to use in the overlay, or None for all atoms to be used :param invert: allow inversions in the overlay :param rotate_torsions: allow torsional rotations when overlaying :param with_symmetry: take account of symmetry when overlaying atoms :returns: a :class:`ccdc.molecule.Molecule` instance which is a copy of mol2 overlaid on mol1 Note: if with_symmetry is true, and matching atoms are provided, then the matching atoms need to form a connected structure. ''' warnings.warn('''This attribute is deprecated and will be removed in a later version. Instead, please use overlay = MolecularDescriptors.Overlay(), overlay.molecule''', DeprecationWarning) ov = MolecularDescriptors.Overlay(mol1, mol2, atoms, invert, rotate_torsions, with_symmetry) return ov.molecule
[docs] @staticmethod def rmsd(mol1, mol2, atoms=None, overlay=False, exclude_hydrogens=True, with_symmetry=True): '''Return the RMSD of two molecules. Both molecules should have the same atoms if ``atoms`` is ``None``. :param atoms: a list of pairs :class:`ccdc.molecule.Atom` or ``None`` :param overlay: Whether to overlay the molecules before calculating RMSD :param exclude_hydrogens: Whether all-atom or heavy atom RMSD should be calculated :param with_symmetry: Whether to allow symmetrical matches :returns: float ''' if overlay: the_overlay = MolecularDescriptors.Overlay(mol1, mol2, atoms, with_symmetry=with_symmetry) mol2 = the_overlay.molecule if with_symmetry: symm = ChemicalAnalysisLib.WITH_SYMMETRY else: symm = ChemicalAnalysisLib.NO_SYMMETRY if atoms is None: if exclude_hydrogens: ats1 = [a for a in mol1.atoms if a.atomic_symbol not in ['H', 'D', 'U']] ats2 = [a for a in mol2.atoms if a.atomic_symbol not in ['H', 'D', 'U']] atoms = list(zip(ats1, ats2)) else: if with_symmetry: ats1, ats2 = zip(*atoms) orig_ats2 = mol2.atoms ats2 = [Atom(_atom=orig_ats2[a.index]._atom) for a in ats2] else: tot = 0.0 for a, b in atoms: ap = a.coordinates bp = b.coordinates tot += ((ap.x - bp.x)**2 + (ap.y - bp.y)**2 + (ap.z - bp.z)**2) return (tot/len(atoms))**0.5 if atoms is None: return ChemicalAnalysisLib.RMSD(mol1._molecule, mol2._molecule, symm) else: sub1 = MolecularDescriptors._make_subset(mol1, ats1) sub2 = MolecularDescriptors._make_subset(mol2, ats2) return ChemicalAnalysisLib.RMSD(sub1, sub2, symm)
[docs] class MaximumCommonSubstructure(object): '''Identifies the maximum common substructure of two molecules.'''
[docs] class Settings(object): '''Settings for the MCS calculation.''' def __init__(self): '''Initialises the settings.''' self._settings = SubstructureSearchLib.MoleculeExactMatchCriteria() self.connected = True @property def connected(self): '''Whether substructure should be connected. Note that finding disconnected maximal substructures is a lot slower than finding connected. ''' return self._connected @connected.setter def connected(self, tf): self._connected = tf @property def check_bond_count(self): '''Whether the bond count of an atom be checked.''' return self._settings.settings().check_bond_count() @check_bond_count.setter def check_bond_count(self, tf): self._settings.settings().set_check_bond_count(tf) @property def check_bond_polymeric(self): '''Check whether the bond be polymeric.''' return self._settings.settings().check_bond_polymeric() @check_bond_polymeric.setter def check_bond_polymeric(self, tf): self._settings.settings().set_check_bond_polymeric(tf) @property def check_bond_type(self): '''Whether the bond type be checked.''' return self._settings.settings().check_bondtype() @check_bond_type.setter def check_bond_type(self, tf): self._settings.settings().set_check_bondtype(tf) @property def check_charge(self): '''Whether the atom charge be checked.''' return self._settings.settings().check_charge() @check_charge.setter def check_charge(self, tf): self._settings.settings().set_check_charge(tf) @property def check_element(self): '''Whether the element be checked.''' return self._settings.settings().check_element() @check_element.setter def check_element(self, tf): self._settings.settings().set_check_element(tf) @property def check_hydrogen_count(self): '''Whether the atom's hydrogen count be checked.''' return self._settings.settings().check_hydrogen_count() @check_hydrogen_count.setter def check_hydrogen_count(self, tf): self._settings.settings().set_check_hydrogen_count(tf) @property def ignore_hydrogens(self): '''Whether the hydrogens be ignored.''' return self._settings.settings().ignore_hydrogens() @ignore_hydrogens.setter def ignore_hydrogens(self, tf): self._settings.settings().set_ignore_hydrogens(tf)
def __init__(self, settings=None): if settings is None: settings = MolecularDescriptors.MaximumCommonSubstructure.Settings() self.settings = settings
[docs] def search(self, mol1, mol2, only_edges=False, search_step_limit = 'unlimited'): '''Calculate the maximum common substructure between two molecules. :param mol1, mol2: :class:`ccdc.molecule.Molecule` instances. :param only_edges: bool. The search will find a maximal common substructure matching only the edges. :param search_step_limit: positive integer or 'unlimited'. Controls the maximum number of steps the algorithm takes. :returns: a pair of tuples, giving matched :class:`ccdc.molecule.Atom` and :class:`ccdc.molecule.Bond` instances. :raises: ValueError with invalid input Note: this function is computationally exponential, so will take a long time on large molecules. ''' if type(search_step_limit) == type(int()): if search_step_limit <= 0: raise ValueError("search_step_limit must be a positive integer or 'unlimited' ") elif search_step_limit != 'unlimited': raise ValueError("search_step_limit must be a positive integer or 'unlimited' ") if len(mol1.atoms) > len(mol2.atoms): def first(x): return x.second def second(x): return x.first mol2, mol1 = mol1, mol2 else: def first(x): return x.first def second(x): return x.second if self.settings.connected: conn = SubstructureSearchLib.CONNECTED else: conn = SubstructureSearchLib.DISCONNECTED xcrit = SubstructureSearchLib.mol_exact_match_as_match(self.settings._settings) if only_edges: mcsg = SubstructureSearchLib.MoleculeMCSG( mol1._molecule, mol2._molecule, conn, xcrit ) if search_step_limit != 'unlimited': mcsg.set_search_step_limit(search_step_limit) match = mcsg.maximum_common_edge_subgraph() else: xcrit = SubstructureSearchLib.mol_exact_match_as_match(self.settings._settings) mcsg = SubstructureSearchLib.MoleculeMCSG( mol1._molecule, mol2._molecule, conn, xcrit ) if search_step_limit != 'unlimited': mcsg.set_search_step_limit(search_step_limit) match = mcsg.maximum_common_subgraph() if match is None: return (tuple(), tuple()) atoms = tuple( (Atom(_atom=first(match.node_match(i))), Atom(_atom=second(match.node_match(i)))) for i in range(match.nnode_matches()) ) bonds = tuple( (Bond(_bond=first(match.edge_match(i))), Bond(_bond=second(match.edge_match(i)))) for i in range(match.nedge_matches()) ) return atoms, bonds
[docs] class AtomDistanceSearch(object): '''More rapid searching for atoms within a certain distance of a point.''' def __init__(self, molecule): '''Initialise the searcher. :param molecule: a :class:`ccdc.molecule.Molecule` instance. ''' #self._searcher = ChemistryLib.create_distance_search(molecule._molecule) self._searcher = ChemistryLib.SpatialAtomMap(molecule._molecule, ChemistryLib.SpatialAtomMap.ORTHOGONAL)
[docs] def atoms_within_range(self, point, radius): '''A tuple of all atoms within the given radius of the given point.''' ats = self._searcher.atoms_within_sphere(point, radius) return tuple(Atom(_atom=a) for _, a in ats)
[docs] class PrincipleAxesAlignedBox(object): '''The bounding box of the molecule aligned on its principle axes. The vectors of the box have lengths of the size of the box. The x_vector is the major axis of the molecule, the y_vector the minor axis and the z_vector the minimal axis of the molecule. ''' def __init__(self, molecule): if not molecule.all_atoms_have_sites: raise RuntimeError('Some atoms do not have coordinates') self._identifier = molecule.identifier self._box_model = ChemicalAnalysisLib.MoleculeBoxModel(molecule._molecule) @property def aligned_molecule(self): '''The molecule aligned along its principle axes, with centre at its centre of geometry.''' return Molecule(self._identifier, _molecule=self._box_model.aligned_molecule()) @property def x_vector(self): '''The vector of the major axis of the box.''' v = self._box_model.z_vector() return GeometricDescriptors.Vector.from_points( Coordinates(0, 0, 0), Coordinates(v.x(), v.y(), v.z()) ) @property def y_vector(self): '''The vector of the minor axis of the box.''' v = self._box_model.y_vector() return GeometricDescriptors.Vector.from_points( Coordinates(0, 0, 0), Coordinates(v.x(), v.y(), v.z()) ) @property def z_vector(self): '''The vector of the minimal axis of the box.''' v = self._box_model.x_vector() return GeometricDescriptors.Vector.from_points( Coordinates(0, 0, 0), Coordinates(v.x(), v.y(), v.z()) ) @property def volume(self): '''The volume of the box.''' return self._box_model.box_volume()
[docs] class AdjacencyMatrixDescriptorCalculator(object): '''Descriptor calculator for descriptors based on a molecule's adjacency matrix.''' def __init__(self, molecule): '''Initialise the calculator for a molecule. :param molecule: a :class:`ccdc.molecule.Molecule` instance. ''' self.mol = molecule self._calculator = ChemicalAnalysisLib.AdjacencyMatrixDescriptorCalculator(molecule._molecule)
[docs] def self_returning_walk(self, k): '''Return the number of walks of length k that start and end at the same atom. See Handbook of Molecular Descriptors, page 384, "self-returning walk counts". :param k: the number of steps to walk. :returns: float ''' return self._calculator.self_returning_walk(k).value_
[docs] def self_returning_walk_ln(self, k): '''Return the logarithm of the number of walks of length k that start and end at the same atom. See Handbook of Molecular Descriptors, page 384, "self-returning walk counts". :param k: the number of steps to walk. :returns: float ''' return self._calculator.self_returning_walk_ln(k).value_
[docs] def topological_charge_autocorrelation_index(self, k): '''Calculate the topological charge autocorrelation index See https://pubs.acs.org/doi/pdf/10.1021/ci00019a008 :param k: the topological distance to measure across :returns: float ''' return self._calculator.topological_charge_autocorrelation_index(k).value_
[docs] class ConnectivityIndices(object): '''Connectivitiy index descriptor calculations.''' def __init__(self, molecule): '''Initialise the calculator for a molecule. :param molecule: a :class:`ccdc.molecule.Molecule` instance. ''' self.mol = molecule
[docs] def connectivity_index(self, m): '''Return the connectivity index of mth order. See Handbook of Molecular Descriptors, page 85, "connectivity indices". :param m: the path length evaluated. :returns: float ''' return ChemicalAnalysisLib.ConnectivityIndices.connectivity_index(self.mol._molecule, m).value_
[docs] def average_connectivity_index(self, m): '''Return the average connectivity index of mth order. See Handbook of Molecular Descriptors, page 85, "connectivity indices". :param m: the path length evaluated. :returns: float ''' return ChemicalAnalysisLib.ConnectivityIndices.average_connectivity_index(self.mol._molecule, m).value_
[docs] class AtomPairDistanceDescriptorCalculator(object): '''Atom pair distance descriptor calculations. :param molecule: a :class:`ccdc.molecule.Molecule` instance. ''' def __init__(self, molecule): '''Initialise the calculator for a molecule.''' self.mol = molecule self.calculator = ChemicalAnalysisLib.AtomPairDistanceDescriptorCalculator(self.mol._molecule)
[docs] def element_pair_count(self, element_a, element_b, distance): '''Return a count of the number of times a pair of elements appear with a specified minimum path length. See Handbook of Molecular Descriptors, page 428, "substructure descriptors, atom pairs". :param element_a: str. the first element name. :param element_b: str. the second element name. :param distance: int. the number of bonds between atoms of the specified the elements. :returns: float ''' return self.calculator.element_pair_count(ChemistryLib.Element(element_a), ChemistryLib.Element(element_b), distance).value_
#################################################################################################
[docs]class GeometricDescriptors(object): '''A namespace to hold geometric classes and functions.'''
[docs] @staticmethod def point_distance(p0, p1): '''The distance between two points.''' return MathsLib.distance(p0, p1)
[docs] @staticmethod def point_angle(p0, p1, p2): '''The angle between three points.''' return MathsLib.angle(p0, p1, p2).degrees()
[docs] @staticmethod def point_torsion_angle(p0, p1, p2, p3): '''The torsion angle between four points.''' return MathsLib.torsion_angle(p0, p1, p2, p3).degrees()
[docs] class Vector(object): '''A 3D vector.''' def __init__(self, x, y, z): self.x = x self.y = y self.z = z def __getitem__(self, index): if index in range(3): return getattr(self, 'xyz'[index]) def __iter__(self): yield self.x yield self.y yield self.z def __repr__(self): return 'Vector(%.3f, %.3f, %.3f)' % (self.x, self.y, self.z) @property def length(self): '''The length of the vector.''' return sum(self[i]**2 for i in range(3))**0.5
[docs] def cross(self, other): '''Cross product.''' v = MathsLib.vector_3d(self.x, self.y, self.z) w = MathsLib.vector_3d(*tuple(other[i] for i in range(3))) x = v.cross(w) return GeometricDescriptors.Vector(x[0], x[1], x[2])
[docs] def dot(self, other): '''Dot product.''' return sum(self[i]*other[i] for i in range(3))
[docs] @staticmethod def from_points(p0, p1): '''Construct the vector from p0 to p1. :param p0, p1: :class:`ccdc.molecule.Coordinates` ''' return GeometricDescriptors.Vector(p1.x - p0.x, p1.y - p0.y, p1.z - p0.z)
[docs] class Plane(object): '''A plane in 3D.''' def __init__(self, vector, distance, _plane=None): '''Construct a plane from a vector and a distance to the origin. :param vector: :class:`ccdc.descriptors.GeometricDescriptors.Vector` :param distance: float ''' if _plane is None: self._plane = MathsLib.Plane( MathsLib.vector_3d(vector[0], vector[1], vector[2]), distance ) else: self._plane = _plane def __repr__(self): return 'Plane(normal=%s, distance=%.3f)' % (self.normal, self.distance) @property def normal(self): '''The normal to the plane.''' v = self._plane.normal() return GeometricDescriptors.Vector(v.x(), v.y(), v.z()) @property def distance(self): '''The distance from the origin of the plane.''' return self._plane.distance()
[docs] @staticmethod def from_points(*points): '''Construct a RMS fitted plane from points.''' return GeometricDescriptors.Plane(None, None, _plane=MathsLib.Plane(points))
[docs] def point_distance(self, point): '''The distance of the point to the plane.''' return self._plane.distance(point)
[docs] def plane_distance(self, plane): '''The shortest distance of the plane to another.''' return self._plane.distance(plane._plane)
[docs] def plane_angle(self, plane): '''The angle between the two planes.''' return self._plane.angle(plane._plane).degrees()
[docs] def vector_angle(self, vector): '''The angle between the plane and the vector.''' return self._plane.angle(MathsLib.vector_3d(vector.x, vector.y, vector.z)).degrees()
@property def plane_vector1(self): '''A vector in the plane, normal to the plane's normal.''' vec = self._plane.v1() return GeometricDescriptors.Vector(vec.x(), vec.y(), vec.z()) @property def plane_vector2(self): '''A vector in the plane, normal to both the plane's normal and the plane's plane_vector1.''' vec = self._plane.v2() return GeometricDescriptors.Vector(vec.x(), vec.y(), vec.z())
[docs] class Sphere(object): '''A sphere in 3D.''' def __init__(self, centre, radius): self.centre = centre self.radius = radius def __repr__(self): return 'Sphere(%s radius %.2f)' % (str(self.centre), self.radius) def __eq__(self, other): return ( all(round(self.centre[i], 6) == round(other.centre[i], 6) for i in range(3)) and round(self.radius, 6) == round(other.radius, 6) )
[docs] class Quaternion(object): '''A normalised quaternion suitable for expressing rotations in 3D space. Quaternions are a convenient method for expressing a complex sequence of rotations By default this constructs a unit quaternion ''' def __init__(self, _quaternion = None): if _quaternion is None: self._quaternion = MathsLib.Quaternion() else: self._quaternion = _quaternion
[docs] @staticmethod def from_dimensions(q0,q1,q2,q3): ''' create from 4 real numbers. The quaternion will be normalised to unit length. :param q0,q1,q2,q3: the 4 dimensions of the axes 1,i,j and k :raises ValueError: with invalid input (e.g. if the length of the quaternion is 0) ''' try: return GeometricDescriptors.Quaternion(_quaternion=MathsLib.Quaternion(q0,q1,q2,q3)) except Exception as e: raise ValueError('Unable to construct Quaternion') from e
[docs] @staticmethod def from_vector_and_angle(vector,angle, unit = 'degrees'): ''' Construct from a vector and an angle :param vector: a tuple or list of length 3 that represents a vector in 3D space :param angle: a double that represents an angle (in default in degrees.) :raise: ValueError for a bad input ''' if len(vector) != 3: raise ValueError("Quaternion must be constructed from a 3D vector") mlunit = MathsLib.Angle.DEGREES if unit == 'radians': mlunit = MathsLib.Angle.RADIANS try: return GeometricDescriptors.Quaternion( _quaternion=MathsLib.Quaternion( MathsLib.vector_3d(vector[0],vector[1],vector[2]), MathsLib.Angle(angle,mlunit))) except Exception as e: raise ValueError('Unable to construct Quaternion') from e
[docs] @staticmethod def from_euler_angles(alpha,beta,gamma, unit = 'degrees' ): ''' create a quaternion from a set of euler angles ''' mlunit = MathsLib.Angle.DEGREES if unit == 'radians': mlunit = MathsLib.Angle.RADIANS try: return GeometricDescriptors.Quaternion( _quaternion=MathsLib.Quaternion( MathsLib.Angle(alpha,mlunit), MathsLib.Angle(beta, mlunit), MathsLib.Angle(gamma,mlunit))) except Exception as e: raise ValueError('Unable to construct Quaternion') from e
[docs] def copy(self): ''' Create a copy of this quaternion ''' return GeometricDescriptors.Quaternion( _quaternion=MathsLib.Quaternion(self._quaternion) )
[docs] def square(self): ''' In-place squares this quaternion ''' self._quaternion.square()
[docs] def invert(self): ''' In-place inverts this quaternion ''' self._quaternion.invert()
[docs] def complex_conjugate(self): ''' In-place converts this quaternion to its complex conjugate ''' self._quaternion.complex_conjugate()
[docs] def rotation_matrix(self): ''' Returns a rotation matrix that the quaternion currently describes ''' return self._quaternion.rotation_matrix()
[docs] def rotate(self, object_to_rotate, cell = None): ''' Rotates a set of vectors in place by this quaternion ''' try: if isinstance(object_to_rotate, Molecule): self._rotate_molecule(object_to_rotate._molecule, cell) return object_to_rotate elif isinstance(object_to_rotate, Crystal): if cell is None: cell = object_to_rotate._crystal.cell() molecule = object_to_rotate.molecule self._rotate_molecule(molecule,cell) object_to_rotate.molecule = molecule return object_to_rotate elif isinstance(object_to_rotate, Atom): self._rotate_atoms([object_to_rotate],cell) elif isinstance(object_to_rotate, MathsLib.vector_3d): self._quaternion.rotate(object_to_rotate) elif isinstance(object_to_rotate[0], Atom): self._rotate_atoms(object_to_rotate,cell) elif isinstance(object_to_rotate[0],float) or isinstance(object_to_rotate[0],int): mlv = MathsLib.vector_3d(object_to_rotate[0],object_to_rotate[1],object_to_rotate[2]) self._quaternion.rotate(mlv); object_to_rotate[0] = mlv.x() object_to_rotate[1] = mlv.y() object_to_rotate[2] = mlv.z() else: for vector in object_to_rotate: self.rotate(vector) return object_to_rotate except Exception as e: raise ValueError('Unable to rotate object of type %s' % str(type(object_to_rotate))) from e
def _rotate_molecule(self, molecule, _cell): ''' Rotates a molecule. Rotation is applied to orthogonal coordinates. If possible, fractional coordinates are then updated too. ''' self._rotate_atoms(molecule.atoms, _cell) def _rotate_atoms(self, atoms, _cell): ''' Rotates a set of atoms. Rotation is applied to orthogonal coordinates. If possible, fractional coordinates are then updated too. ''' raw_atoms = [atom._atom for atom in atoms] if _cell is not None: ChemicalAnalysisLib.apply_quaternion_to_atoms_with_cell( raw_atoms, self._quaternion, _cell) else: ChemicalAnalysisLib.apply_quaternion_to_atoms(raw_atoms, self._quaternion) def __eq__(self,other): return self._quaternion == other._quaternion def __ne__(self,other): return self._quaternion != other._quaternion def __pos__(self): return GeometricDescriptors.Quaternion(quaternion = +self._quaternion) def __neg__(self): return GeometricDescriptors.Quaternion(quaternion = -self._quaternion) def __pow__(self,n): return GeometricDescriptors.Quaternion( _quaternion = self._quaternion ** n ) def __mul__(self, other): return GeometricDescriptors.Quaternion( _quaternion = self._quaternion * other._quaternion ) def __imul__(self, other): self._quaternion *= other._quaternion return self def __str__(self): return str(self._quaternion.as_string())
#################################################################################################
[docs]class CrystalDescriptors(object): '''Namespace for crystallographic descriptors.''' ################################################################################################# # Powder patterns #################################################################################################
[docs] @nested_class('CrystalDescriptors') class PowderPattern(object): '''Represents a powder pattern. The powder pattern class is available only to CSD-Materials and CSD-Enterprise users. '''
[docs] @nested_class('PowderPattern') class Settings(object): '''Settings which may be set for a Powder simulation. Setting ``None`` for any of the attributes will result in a default value being used. ''' def __init__(self): self._wavelength = None #: Wavelength for the simulation. self._second_wavelength = None #: Optional second wavelength. self._include_hydrogens = None #: Whether to include hydrogens. self._deuterium_is_hydrogen = None #: Whether deuterium and hydrogen are indistinguishable. self._two_theta_minimum = None #: Minimum value of two_theta (5.0). self._two_theta_maximum = None #: Maximum value of two_theta (50.0). self._two_theta_step = None #: Step size (0.02). self._full_width_at_half_maximum = None #: Peak width at half height (0.1). self._march_dollase_preferred_orientation = None @staticmethod def _get_wavelength_object(value): if value is not None: if isinstance(value, float) or isinstance(value, int): return CrystalDescriptors.PowderPattern.Wavelength(value) elif (isinstance(value, list) or isinstance(value, tuple)) and len(value) == 2: return CrystalDescriptors.PowderPattern.Wavelength(value[0], value[1]) elif isinstance(value, CrystalDescriptors.PowderPattern.Wavelength): return value else: raise TypeError(f'Cant set wavelength; type must be a float or a pair of floats: {value} was passed in') return None @property def wavelength(self): ''' Set or get the primary wavelength :param value: float or pair of floats (wavelength and scale factor) or another Wavelength object or None to reset to the default :return: Primary wavelength object (or None) for the simulation ''' return self._wavelength @wavelength.setter def wavelength(self, value): self._wavelength = self._get_wavelength_object(value) @property def second_wavelength(self): ''' Set or get the secondary wavelength :param value: float or pair of floats (wavelength and scale factor) or another Wavelength object or None to reset to the default :return: Secondary wavelength object (or None) for the simulation ''' return self._second_wavelength @second_wavelength.setter def second_wavelength(self, value): self._second_wavelength = self._get_wavelength_object(value) @property def include_hydrogens(self): ''' Whether to include hydrogens in the simulation :return: True or false ''' return self._include_hydrogens @include_hydrogens.setter def include_hydrogens(self, value): self._include_hydrogens = bool(value) @property def deuterium_is_hydrogen(self): ''' Whether to include treat deuterium as hydrogen :return: True or false ''' return self._deuterium_is_hydrogen @deuterium_is_hydrogen.setter def deuterium_is_hydrogen(self, value): self._deuterium_is_hydrogen = bool(value) @property def two_theta_minimum(self): ''' Where to start the pattern simulation :return: float representing the minimum 2-theta (in degrees) ''' return self._two_theta_minimum @two_theta_minimum.setter def two_theta_minimum(self, value): self._two_theta_minimum = float(value) @property def two_theta_maximum(self): ''' Where to end the pattern simulation :return: float representing the maximum 2-theta (in degrees) ''' return self._two_theta_maximum @two_theta_maximum.setter def two_theta_maximum(self, value): self._two_theta_maximum = float(value) @property def two_theta_step(self): ''' The step-size used in the pattern simulation :return: float representing the step size (in degrees) ''' return self._two_theta_step @two_theta_step.setter def two_theta_step(self, value): self._two_theta_step = float(value) @property def full_width_at_half_maximum(self): ''' The the full width at half height of peaks to use in simulation :return: float representing the full width at half height of peaks (in degrees) ''' return self._full_width_at_half_maximum @full_width_at_half_maximum.setter def full_width_at_half_maximum(self, value): self._full_width_at_half_maximum = float(value) @property def march_dollase_preferred_orientation(self): ''' Setting for march_dollase :return: float representing the full width at half height of peaks (in degrees) ''' return self._march_dollase_preferred_orientation @march_dollase_preferred_orientation.setter def march_dollase_preferred_orientation(self, value): # Shape of input is (h,k,l,value) if not value: self._march_dollase_preferred_orientation = None else: try: self._march_dollase_preferred_orientation = tuple( [int(v) for v in value[:3]] + [float(value[3])] ) except (ValueError, IndexError, TypeError): raise ValueError(f"expected a tuple of form (int,int,int,float) - got {value}")
[docs] @nested_class('PowderPattern') class Wavelength(object): '''Represents a wavelength for powder studies. Some standard wavelengths - these are floats, not :class:`ccdc.descriptors.CrystalDescriptors.PowderPattern.Wavelength` ''' Wavelength_Cu = PackingSimilarityLib.Wavelength.Wavelength_Cu Wavelength_CuKa1 = PackingSimilarityLib.Wavelength.Wavelength_CuKa1 Wavelength_CuKa2 = PackingSimilarityLib.Wavelength.Wavelength_CuKa2 Wavelength_CuKb1 = PackingSimilarityLib.Wavelength.Wavelength_CuKb1 Wavelength_Cr = PackingSimilarityLib.Wavelength.Wavelength_Cr Wavelength_CrKa1 = PackingSimilarityLib.Wavelength.Wavelength_CrKa1 Wavelength_CrKa2 = PackingSimilarityLib.Wavelength.Wavelength_CrKa2 Wavelength_Co = PackingSimilarityLib.Wavelength.Wavelength_Co Wavelength_CoKa1 = PackingSimilarityLib.Wavelength.Wavelength_CoKa1 Wavelength_CoKa2 = PackingSimilarityLib.Wavelength.Wavelength_CoKa2 Wavelength_CoKb1 = PackingSimilarityLib.Wavelength.Wavelength_CoKb1 Wavelength_Mo = PackingSimilarityLib.Wavelength.Wavelength_Mo Wavelength_MoKa1 = PackingSimilarityLib.Wavelength.Wavelength_MoKa1 Wavelength_MoKa2 = PackingSimilarityLib.Wavelength.Wavelength_MoKa2 Wavelength_MoKb1 = PackingSimilarityLib.Wavelength.Wavelength_MoKb1 Wavelength_Fe = PackingSimilarityLib.Wavelength.Wavelength_Fe Wavelength_FeKa1 = PackingSimilarityLib.Wavelength.Wavelength_FeKa1 Wavelength_FeKa2 = PackingSimilarityLib.Wavelength.Wavelength_FeKa2 Wavelength_FeKb1 = PackingSimilarityLib.Wavelength.Wavelength_FeKb1 def __init__(self, wavelength=None, scale_factor=1.0): if wavelength is None: wavelength = CrystalDescriptors.PowderPattern.Wavelength.Wavelength_CuKa1 self._wavelength = PackingSimilarityLib.Wavelength(wavelength, scale_factor) @property def wavelength(self): '''The wavelength.''' return self._wavelength.wavelength() @wavelength.setter def wavelength(self, w): self._wavelength = PackingSimilarityLib.Wavelength(w, self.scale_factor) @property def scale_factor(self): '''The scale factor of this Wavelength.''' return self._wavelength.scale_factor() @scale_factor.setter def scale_factor(self, f): self._wavelength = PackingSimilarityLib.Wavelength(self.wavelength, f)
[docs] @nested_class('PowderPattern') class TickMark(object): '''A tick mark in a simulated powder pattern. ''' def __init__(self, _tick, _crystal=None): '''Create from an internal tick mark object.''' self._tick = _tick self._crystal = _crystal @property def two_theta(self): '''Two theta value of this tick.''' return self._tick.two_theta().degrees() @property def is_systematically_absent(self): '''Whether this tick mark is systematically absent.''' return self._tick.is_systematically_absent() @property def miller_indices(self): '''The Miller indices of this tick mark.''' if self._crystal is not None: mi = self._tick.miller_indices() return self._crystal.miller_indices( mi.h(), mi.k(), mi.l() )
[docs] @staticmethod def from_xye_file(file_name): '''Create a CrystalDescriptors.PowderPattern from an xye file. :param file_name: path to xye file ''' return CrystalDescriptors.PowderPattern(PackingSimilarityLib.PXRDPattern(file_name))
[docs] @staticmethod def from_crystal(crystal, settings=None): '''Create a CrystalDescriptors.PowderPattern from a crystal. :param crystal: :class:`ccdc.crystal.Crystal` :param settings: :class:`ccdc.descriptors.CrystalDescriptors.PowderPattern.Settings` ''' sim = PackingSimilarityLib.PXRDSimulator() sim.set_crystal_structure(crystal._crystal) if settings is None: settings = CrystalDescriptors.PowderPattern.Settings() if settings.wavelength is not None: sim.set_wavelength(0, settings.wavelength._wavelength) else: settings.wavelength = CrystalDescriptors.PowderPattern.Wavelength( sim.wavelength(0).wavelength(), sim.wavelength(0).scale_factor() ) if settings.second_wavelength is not None: sim.add_wavelength(settings.second_wavelength._wavelength) if settings.include_hydrogens is not None: sim.set_include_hydrogens(settings.include_hydrogens) else: settings.include_hydrogens = sim.include_hydrogens() if settings.deuterium_is_hydrogen is not None: sim.set_D_is_H(settings.deuterium_is_hydrogen) else: settings.deuterium_is_hydrogen = sim.D_is_H() if settings.two_theta_minimum is not None or settings.two_theta_maximum is not None: if settings.two_theta_minimum is not None: start = MathsLib.Angle(settings.two_theta_minimum, MathsLib.Angle.DEGREES) else: start = sim.two_theta_start() settings.two_theta_minimum = start.degrees() if settings.two_theta_maximum is not None: end = MathsLib.Angle(settings.two_theta_maximum, MathsLib.Angle.DEGREES) else: end = sim.two_theta_end() settings.two_theta_maximum = end.degrees() sim.set_two_theta_range(start, end) else: settings.two_theta_minimum = sim.two_theta_start().degrees() settings.two_theta_maximum = sim.two_theta_end().degrees() if settings.two_theta_step is not None: sim.set_two_theta_step(MathsLib.Angle(settings.two_theta_step, MathsLib.Angle.DEGREES)) else: settings.two_theta_step = sim.two_theta_step().degrees() if settings.full_width_at_half_maximum is not None: fhwm = PackingSimilarityLib.ConstantFWHM( MathsLib.Angle(settings.full_width_at_half_maximum, MathsLib.Angle.DEGREES) ) sim.peak_shape().set_peak_width(fhwm) else: x = sim.peak_shape().peak_width() y = PackingSimilarityLib.get_fwhm_as_constant_fwhm(x) if y: settings.full_width_at_half_maximum = y.width().degrees() if settings.march_dollase_preferred_orientation is not None: # Assume tuple h,k,l,r miller_indexes = ChemistryLib.MillerIndices(settings.march_dollase_preferred_orientation[0], settings.march_dollase_preferred_orientation[1], settings.march_dollase_preferred_orientation[2]) march_dollase = PackingSimilarityLib.MarchDollase(miller_indexes) march_dollase.set_r(settings.march_dollase_preferred_orientation[3]) sim.set_preferred_orientation(march_dollase) return CrystalDescriptors.PowderPattern(sim.simulate_profile(), settings, sim, crystal)
def __init__(self, _pattern, _settings=None, _simulation=None, _crystal=None): '''Private: create from an internal PXRDPattern.''' self._pattern = _pattern self.settings = _settings self._simulation = _simulation self._crystal = _crystal
[docs] def similarity(self, other): '''Measure of match between this pattern and another. :param other: :class:`ccdc.descriptors.CrystalDescriptors.PowderPattern` :returns: float ''' return PackingSimilarityLib.PXRDMatch(self._pattern, other._pattern)
[docs] def write_xye_file(self, file_name): '''Write a .xye format file. :param file_name: output file name ''' self._pattern.write_xye_file(file_name)
[docs] def write_raw_file(self, file_name): '''Write a Bruker .raw file. :param file_name: output file name ''' self._pattern.write_raw_file(file_name)
[docs] def integral(self, start=0.0, end=180.0): '''The area under the curve. :param start: float :param end: float :returns: float ''' return self._pattern.integral(start, end)
@property def two_theta(self): '''The array of two_theta values.''' return [ self._pattern.two_theta(i) for i in range(self._pattern.npoints()) ] @property def intensity(self): '''The array of intensity values.''' return [ self._pattern.intensity(i) for i in range(self._pattern.npoints()) ] @property def esd(self): '''The array of esd values (Estimated Square Deviations).''' return [ self._pattern.esd(i) for i in range(self._pattern.npoints()) ] @property def tick_marks(self): '''The array of tick marks if this is a simulated powder pattern. :returns: list of :class:`ccdc.descriptors.CrystalDescriptors.PowderPattern.TickMark` or ``None`` if this is not a simulated powder pattern. ''' if self._simulation is not None: self._ticks = self._simulation.tick_mark_list(self._simulation.INCLUDE_SYSTEMATIC_ABSENCES) self._ticks.sort_on_two_theta() return [ CrystalDescriptors.PowderPattern.TickMark( self._ticks.tick_mark(i), self._crystal ) for i in range(self._ticks.size()) ]
################################################################################################# # Morphology #################################################################################################
[docs] class Morphology(morphology.MorphologyBase): def __init__(self, crystal=None): warnings.warn('''This class is deprecated and will be removed in a later version. Instead, please use the ccdc.morphology.BFDHMorphology class.''', DeprecationWarning) super().__init__(crystal) Facet = morphology.Facet OrientedBoundingBox = morphology.OrientedBoundingBox
################################################################################################# # Graph sets #################################################################################################
[docs] class GraphSetSearch(object): '''Finds the graph sets of a crystal.'''
[docs] class GraphSet(object): '''An individual graph set.''' _descriptor_dict = { MotifSearchLib.GraphSet.SELF: 'self', MotifSearchLib.GraphSet.DISCRETE: 'discrete', MotifSearchLib.GraphSet.CHAIN: 'chain', MotifSearchLib.GraphSet.RING: 'ring', MotifSearchLib.GraphSet.NONE: 'none' } def __init__(self, _graph_set_atoms, _view): self._graph_set_atoms = _graph_set_atoms self._graph_set = _graph_set_atoms.graph_set() self._view = _view def __repr__(self): ostr = MotifSearchLib.ostringstream() self._graph_set.print_(ostr) return ostr.str() @property def descriptor(self): '''The descriptor of the graph set.''' return CrystalDescriptors.GraphSetSearch.GraphSet._descriptor_dict[self._graph_set.designator()] @property def ndonors(self): '''The number of donors involved in the graph set.''' return self._graph_set.ndonors() @property def nacceptors(self): '''The number of acceptors involved in the graph set.''' return self._graph_set.nacceptors() @property def degree(self): '''The degree of the graph set, i.e. the number of atoms involved.''' return self._graph_set.degree() @property def edge_labels(self): '''The edge labels of the graph set. The labels are arbitrary letters identifying a unique hydrogen bond, separated by '>' or '<' indicating the donor-acceptor direction. ''' return self._graph_set.edge_labels() @property def nmolecules(self): '''The number of molecules involved in the graph set.''' return self._graph_set_atoms.nmolecules() @property def period(self): '''The period of the graph set, i.e the number of hydrogen bonds in the repeat unit. If the type of the graph set is not a chain or a ring this will be -1 ''' return self._graph_set_atoms.period() @property def label_set(self): '''The set of hydrogen bond labels found in the graph set.''' return set(self._graph_set_atoms.label_set()) @property def hbonds(self): '''The hydrogen bonds of the graph set. :returns: a tuple of :class:`ccdc.crystal.Crystal.HBond` instances. ''' res = [] for i in range(self._graph_set_atoms.nhbonds()): hb = self._graph_set_atoms.hbond(i) _contact = ChemistryLib.MolecularContact( hb.from_molecule(), hb.from_atom(), hb.to_molecule(), hb.to_atom(), ChemistryLib.MolecularContact.HBOND_CONTACT ) res.append(Crystal.HBond(self._view, _contact)) return tuple(res)
[docs] class Settings(object): '''Configurable settings for the graph set analyser.''' level = 2 #: deepest level to search. This is the number of different HBonds involved. max_ring_size = 6 #: largest ring to search max_chain_size = 4 #: longest chain to search max_discrete_chain_size = 4 #: longest discrete chain to search _inter_dict = bidirectional_dict( intermolecular=ChemistryLib.HBondCriterion.INTERMOLECULAR, intramolecular=ChemistryLib.HBondCriterion.INTRAMOLECULAR, any=ChemistryLib.HBondCriterion.ANY, ) def __init__(self, hbond_criterion=None): if hbond_criterion is None: hbond_criterion = Molecule.HBondCriterion() self.hbond_criterion = hbond_criterion @property def distance_range(self): '''Allowable distance range for a HBond to be formed.''' return ( self.hbond_criterion._hbc.hbond_criterion().min_dist_tolerance(), self.hbond_criterion._hbc.hbond_criterion().max_dist_tolerance() ) @distance_range.setter def distance_range(self, val): self.hbond_criterion._hbc.hbond_criterion().set_min_dist_tolerance(min(val)) self.hbond_criterion._hbc.hbond_criterion().set_max_dist_tolerance(max(val)) @property def angle_tolerance(self): '''The tolerance of the HBond angle.''' return self.hbond_criterion._hbc.hbond_criterion().angle_tolerance().degrees() @angle_tolerance.setter def angle_tolerance(self, val): if val is None: self.hbond_criterion._hbc.set_angle_option( MotifSearchLib.GraphSetHBondCriterion.ANGLE_IS_NOT_CHECKED ) else: self.hbond_criterion._hbc.set_angle_option( MotifSearchLib.GraphSetHBondCriterion.ANGLE_IS_CHECKED ) self.hbond_criterion._hbc.hbond_criterion().set_angle_tolerance( MathsLib.Angle(val, MathsLib.Angle.DEGREES) ) @property def vdw_corrected(self): '''Whether the distance range is Van der Waals corrected.''' return self.hbond_criterion._hbc.hbond_criterion().vdw_corrected() @vdw_corrected.setter def vdw_corrected(self, val): self.hbond_criterion._hbc.hbond_criterion().set_vdw_corrected(val) @property def require_hydrogens(self): '''Whether Hydrogens are required for the HBond.''' return not self.hbond_criterion._hbc.hbond_criterion().allow_non_hydrogen_contacts() @require_hydrogens.setter def require_hydrogens(self, val): if val: self.hbond_criterion._hbc.set_distance_option( MotifSearchLib.GraphSetHBondCriterion.HYDROGEN_TO_ACCEPTOR ) else: self.hbond_criterion._hbc.set_distance_option( MotifSearchLib.GraphSetHBondCriterion.DONOR_TO_ACCEPTOR ) self.hbond_criterion._hbc.hbond_criterion().set_allow_non_hydrogen_contacts(not val) @property def intermolecular(self): '''Whether HBonds should be intermolecular, intramolecular, or any.''' return self._inter_dict.inverse_lookup( self.hbond_criterion._hbc.hbond_criterion().type() ) @intermolecular.setter def intermolecular(self, val): self.hbond_criterion._hbc.hbond_criterion().set_type( self._inter_dict.prefix_lookup(val) ) @property def path_length_range(self): '''The shortest and longest bond-path separation for intramolecular contacts.''' return ( self.hbond_criterion._hbc.hbond_criterion().minpath(), self.hbond_criterion._hbc.hbond_criterion().maxpath() ) @path_length_range.setter def path_length_range(self, val): self.hbond_criterion._hbc.hbond_criterion().set_minpath(min(val)) self.hbond_criterion._hbc.hbond_criterion().set_maxpath(max(val))
def __init__(self, settings=None): '''Initialise the analyser.''' if settings is None: settings = CrystalDescriptors.GraphSetSearch.Settings() self.settings = settings self._analyser = MotifSearchLib.GraphSetAnalyser() self._analyser.set_level(2) self._analyser.set_hbond_criterion(self.settings.hbond_criterion._hbc)
[docs] def search(self, crystal): '''Find all graph sets for the crystal subject to the constraints of the settings. :param crystal: :class:`ccdc.crystal.Crystal` instance. :returns: a tuple of :class:`ccdc.descriptors.CrystalDescriptors.GraphSetSearch.GraphSet` instances. ''' self._analyser.set_level(self.settings.level) self._analyser.set_max_ring_size(self.settings.max_ring_size) self._analyser.set_max_chain_size(self.settings.max_chain_size) self._analyser.set_max_discrete_size(self.settings.max_discrete_chain_size) self._analyser.set_hbond_criterion(self.settings.hbond_criterion._hbc) atoms = self._analyser.run_graph_set_analysis(crystal._crystal) view = ChemistryLib.CrystalStructureView_instantiate(crystal._crystal) return tuple(CrystalDescriptors.GraphSetSearch.GraphSet(a, view) for a in atoms)
################################################################################################# # Hydrogen bond coordination #################################################################################################
[docs] class HBondCoordination(object): '''Calculate HBond coordination predictions. The HBondCoordination class is available only to CSD-Materials and CSD-Enterprise users. '''
[docs] class Settings(object): '''Settings pertaining to the calculation of coordination predictions.''' def __init__(self): self._graph_set_search_settings = CrystalDescriptors.GraphSetSearch.Settings() self._graph_set_search_settings.hbond_criterion.intermolecular = 'Any' self._graph_set_search_settings.hbond_criterion.require_hydrogens = True self._graph_set_search_settings.hbond_criterion.path_length_range = (3, 999) self._graph_set_search_settings.hbond_criterion.vdw_corrected = True self._graph_set_search_settings.hbond_criterion.distance_range = (-5, 0.0) self._coordination_models_path = None self._models_changed = False if self.coordination_models_path is None: self.coordination_models_path = os.path.abspath( os.path.join( os.path.dirname(__file__), 'parameter_files', 'hydrogen_bond_coordination_models' ) ) @property def hbond_criterion(self): return self._graph_set_search_settings.hbond_criterion @property def coordination_models_path(self): '''The directory in which the coordination models may be found.''' return self._coordination_models_path @coordination_models_path.setter def coordination_models_path(self, path): self._coordination_models_path = path self._models_changed = True
[docs] class Predictions(object): '''The predictions for HBonds coordinations.''' Observation = collections.namedtuple('Observation', ['label', 'coordination_count', 'probability']) def observation_repr(obs): return "Observation(label='%s', coordination_count=%d, probability=%f)" % (obs.label, obs.coordination_count, obs.probability) Observation.__repr__ = observation_repr def __init__(self, crystal, _analysis, _predictions): self.crystal = crystal self._predictions = _predictions self._donor_atoms = tuple( Atom(_atom=hb.atom()) for hb in _analysis.observed_donor_atoms() ) self._acceptor_atoms = tuple( Atom(_atom=hb.atom()) for hb in _analysis.observed_acceptor_atoms() ) @property def is_valid(self): '''Whether or not valid predictions were made.''' return not all(len(p) <= 1 for p in self._predictions) @property def observed(self): '''The predicted probabilities of observed HBonds.''' if not self._predictions: return None obs = tuple( CrystalDescriptors.HBondCoordination.Predictions.Observation(p[0][0], i - 1, float(p[i][0])) for p in self._predictions for i in range(1, len(p)) if abs(p[i][1]) == 1 ) return obs
[docs] def predictions_for_label(self, label, type='donor'): '''All the predictions for the atom. :returns: a pair: observed hbond coordination number, dictionary with key, hbond coordination number, value, predicted probability. ''' if not self._predictions: return None ps = [p for p in self._predictions if p[0][0].startswith(label + ' ')] if len(ps) == 1: p = ps[0] elif len(ps) == 0: return None elif len(ps) > 2: raise RuntimeError('Ambiguous atom label %s' % label) elif ps[0][0][0].endswith('(%s)' % type.lower()[0]): p = ps[0] else: p = ps[1] obs = { i - 1: float(p[i][0]) for i in range(1, len(p)) } observed = [i - 1 for i in range(1, len(p)) if abs(p[i][1]) == 1][0] return (observed, obs)
def _functional_group_of_label(self, label, type='donor'): ps = [p[0][0] for p in self._predictions if p[0][0].startswith(label + ' ')] if len(ps) == 1: p = ps[0] elif len(ps) == 0: return None elif len(ps) > 2: raise RuntimeError('Ambiguous atom label %s' % label) elif ps[0].endswith('(%s)' % type.lower()[0]): p = ps[0] else: p = ps[1] return p
[docs] def functional_groups_of_hbond(self, hbond): '''The functional group pertaining to a hydrogen-bonding atom.''' ats = hbond.atoms if len(ats) == 3: if ats[1] in ats[0].neighbours: donor = self._functional_group_of_label(ats[0].label, 'donor') acc = self._functional_group_of_label(ats[-1].label, 'acceptor') else: donor = self._functional_group_of_label(ats[-1].label, 'donor') acc = self._functional_group_of_label(ats[0].label, 'acceptor') else: # No H, how can I tell which is donor? if ats[0].is_donor and not ats[-1].is_donor: donor = self._functional_group_of_label(ats[0].label, 'donor') acc = self._functional_group_of_label(ats[-1].label, 'acceptor') elif ats[-1].is_donor and not ats[0].is_donor: donor = self._functional_group_of_label(ats[-1].label, 'donor') acc = self._functional_group_of_label(ats[0].label, 'acceptor') else: donor = self._functional_group_of_label(ats[0].label, 'donor') acc = self._functional_group_of_label(ats[-1].label, 'acceptor') return donor, acc
[docs] def to_csv(self, separator=','): '''Format the predictions suitable for a csv file. :param separator: a separation string, or ``None``. :returns: if separator is ``None`` a tuple of lists of components, otherwise a separated string of components.''' components = self.crystal.molecule.components file_orders = [ set(a._atom.annotations().obtain_FileOrdering().file_order() for a in c.atoms) for c in components ] def one_line(l): id_bits = l[0][0].split() at_label = id_bits[0] context = ' '.join(id_bits[2:-1]) da = id_bits[-1][1:-1] if da == 'd': possible_correct_atoms = [a for a in self._donor_atoms if a.label == at_label] else: possible_correct_atoms = [a for a in self._acceptor_atoms if a.label == at_label] if len(possible_correct_atoms) == 1: correct_atom = possible_correct_atoms[0] else: raise RuntimeError('Ambiguous label %s %s' % ( at_label, ' '.join(a.label for a in possible_correct_atoms))) for i, c in enumerate(file_orders): if correct_atom._atom.annotations().obtain_FileOrdering().file_order() in c: residue = 'RES%d' % (i + 1) break else: raise RuntimeError('Cannot find appropriate file order') ret = [ self.crystal.identifier, residue, # at_label, correct_atom.label, context, da, ] # Extract the coordination values themselves (starting at the second element of l) coordination_values = l[1:] for x in coordination_values: ret.extend( (float(x[0]), abs(x[1])) ) for i in range(len(coordination_values), 7): ret.extend((0.0, 0)) if separator is None: return ret return separator.join(str(x) for x in ret) if separator is None: return (one_line(l) for l in self._predictions[1:]) return '\n'.join(one_line(l) for l in self._predictions[1:]) + '\n'
def __init__(self, settings=None): '''Initialise the prediction calculator. :param settings: :class:`ccdc.hbond_coordination.CrystalDescriptors.HBondCoordination.Settings` instance. ''' if settings is None: settings = CrystalDescriptors.HBondCoordination.Settings() self.settings = settings self._setup_calculator() def _setup_calculator(self): self._coordination_models = PackingSimilarityLib.CoordinationModels( self.settings.coordination_models_path) self._calculator = PackingSimilarityLib.DonorAcceptorPropensityCalculator(self._coordination_models)
[docs] def predict(self, crystal): '''Calculate HBond coordination likelihoods for the crystal. :returns: a :class:`ccdc.hbond_coordination.CrystalDescriptors.HBondCoordination.Predictions` instance. ''' if crystal.has_disorder: raise RuntimeError('Cannot calculate propensities for disordered crystals') if self.settings._models_changed: self._setup_calculator() self.settings._models_changed = False self.crystal = crystal self.hbond_analysis = PackingSimilarityLib.create_analysis_from_structure( crystal._crystal, self._coordination_models, self.settings.hbond_criterion._hbc.hbond_criterion()) self._coordination_propensities = self._calculator.calculate(self.hbond_analysis) pairs = self.hbond_analysis.observed_hbonding_outcome( self._coordination_propensities ) grouping_calculator = PackingSimilarityLib.DonorAcceptorOutcomeGroupingCalculator() grouping = grouping_calculator.grouping( pairs, self.hbond_analysis, self._coordination_propensities ) self.donor_atoms = tuple( Atom(_atom=d.atom()) for d in self.hbond_analysis.observed_donor_atoms() ) self.acceptor_atoms = tuple( Atom(_atom=a.atom()) for a in self.hbond_analysis.observed_acceptor_atoms() ) return CrystalDescriptors.HBondCoordination.Predictions( self.crystal, self.hbond_analysis, PackingSimilarityLib.coordination_likelihoods_table_raw_data( grouping_calculator, grouping, self._coordination_propensities ) )
################################################################################################# # Hydrogen bond propensities #################################################################################################
[docs] class HBondPropensities(object): '''Calculates HBond propensities. .. TODO: all donors and acceptors matched, load regression from file (fitting_data.txt) '''
[docs] class Settings(object): '''Pertaining to HBond propensity calculation.''' def __init__(self): '''NB: databases *MUST* be in the SQLite ASER format.''' self.hbond_criterion = Molecule.HBondCriterion() self.hbond_criterion.require_hydrogens = True self.hbond_criterion.path_length_range = (4, 999) self.working_directory = tempfile.mkdtemp() self._databases = [io.EntryReader('CSD')] self._identifier_list = UtilitiesLib.DatabaseEntryIdentifierList() # Check whether in a build environment maindir = os.environ.get('MAINDIR') if maindir: self.functional_groups_directory = os.path.join(maindir, 'mercury', 'distrib', 'functional_groups') if not os.path.isdir(self.functional_groups_directory): raise RuntimeError('Functional groups not in expected place in source space.') return possible_paths = [ os.path.join(io.csd_directory(), os.pardir, 'Mercury', 'functional_groups'), # Windows os.path.join(io.csd_directory(), os.pardir, 'functional_groups'), # Linux os.path.join(io.csd_directory(), os.pardir, os.pardir, 'mercury.app', 'Contents', 'Resources', 'functional_groups'), # MacOs ] for candidate in possible_paths: if os.path.isdir(candidate): self.functional_groups_directory = candidate return raise RuntimeError('Could not find Functional groups directory in %s.' % possible_paths) @property def databases(self): '''The databases to be used for the prediction. Note: the databases *MUST* be SQLite ASER databases for the moment. ''' return self._databases @databases.setter def databases(self, dbs): if isinstance(dbs, io._DatabaseReader): if not isinstance(dbs._db, ( CSDSQLDatabaseLib.CSDSQLDatabase, FileFormatsLib.CrystalStructureDatabasePool, FileFormatsLib.CrystalStructureDatabase )): raise RuntimeError('One or more databases is NOT searchable.') self._databases = [dbs] else: for d in dbs: if isinstance(d._db, ( CSDSQLDatabaseLib.CSDSQLDatabase, FileFormatsLib.CrystalStructureDatabasePool, FileFormatsLib.CrystalStructureDatabase )): continue raise RuntimeError('One or more databases is NOT searchable.') self._databases = dbs @property def limit_identifier_list(self): '''A list of identifiers to limit the search''' return self._identifier_list @limit_identifier_list.setter def limit_identifier_list(self, identifiers): self._identifier_list = identifiers @property def working_directory(self): '''The working directory for the predictions.''' return self._working_directory @working_directory.setter def working_directory(self, path): path = path.replace('\\', '/') if not os.path.exists(path): os.makedirs(path) self._working_directory = path
[docs] class FunctionalGroup(object): '''A functional group capable of hydrogen bonding.''' def __init__(self, _model_group): self._model_group = _model_group def __repr__(self): return 'FunctionalGroup(%s)' % self.identifier @property def identifier(self): '''The name of the functional group.''' return self._model_group.name
[docs] def matches(self, molecule): '''The substructure search matches of the functional group.''' from ccdc.search import SubstructureSearch, QuerySubstructure searcher = SubstructureSearch() sub = QuerySubstructure(self._model_group.substructure) searcher.add_substructure(sub) hits = searcher.search(molecule) return hits
[docs] class FittingData(object): '''The collection of entries used for the prediction.'''
[docs] class FittingDataEntry(object): '''An individual entry with associated matching data.''' def __init__(self, _fitting_item): self._fitting_item = _fitting_item @property def identifier(self): '''The identifier of the fitting data item.''' return self._fitting_item.identifier().str()
def __init__(self, _fitting_data=None, identifiers=None, databases=None): if _fitting_data is None: _fitting_data = PackingSimilarityLib.PropensityPredictionFittingData() self._fitting_data = _fitting_data #if identifiers is None: # # All the ids from all the dbs? # pass #else: # self._fitting_data.add_fitting_data_items(identifiers, [d._db for d in databases]) self._limit_to_identifiers = identifiers self._databases = databases def __len__(self): if self._fitting_data: return self._fitting_data.nitems() elif self._limit_to_identifiers: return self._limit_to_identifiers.size() else: return 0
[docs] def write(self, file_name): '''Writes the fitting data to a file.''' with io.CrystalWriter(file_name) as writer: for i in range(len(self)): item = self._fitting_data.item(i) writer.write(Crystal(item.crystal_structure(), item.identifier().str()))
[docs] def nitems(self, functional_group=None): '''How many items there are representing the functional group.''' if functional_group is None: return self._fitting_data.nitems() else: return self._fitting_data.nitems_with_group(functional_group._model_group)
[docs] def advice_comment(self, functional_group=None): '''A string indicating whether or not there are enough data for propensity predictions. Note: when first made the fitting data has not performed substructure matching, so results for particular groups will be inappropriately bad. Results will be valid after :meth:`ccdc.hbond_coordination.CrystalDescriptors.HBondPropensities.match_fitting_data` has been called. ''' if functional_group is None: return self._fitting_data.advice_comment() else: return self._fitting_data.advice_comment(functional_group._model_group)
def _identifiers(self): if self._fitting_data: ids = UtilitiesLib.DatabaseEntryIdentifierList() for i in range(len(self)): item = self._fitting_data.item(i) ids.push_back(item.identifier()) return ids elif self._limit_to_identifiers: return self._limit_to_identifiers else: return UtilitiesLib.DatabaseEntryIdentifierList() def identifiers(self): if self._fitting_data: return tuple(self._fitting_data.item(i).identifier().str() for i in range(len(self))) else: return tuple() def databases(self): if self._databases: return self._databases return list( set( [self._fitting_data.item(i).parent_database() for i in range(len(self))]))
[docs] @staticmethod def from_file(file_name, databases): '''Reads fitting data from a file.''' _fitting_data = PackingSimilarityLib.PropensityPredictionFittingData() with io.EntryReader(file_name) as reader: ids = UtilitiesLib.DatabaseEntryIdentifierList() if isinstance(reader, io._GCDReader): ids.read_from_file(file_name) else: for c in reader: ids.push_back(UtilitiesLib.DatabaseEntryIdentifier(c.identifier)) _fitting_data.add_fitting_data_items(ids, [reader._db]) return CrystalDescriptors.HBondPropensities.FittingData(_fitting_data=_fitting_data, identifiers=ids, databases=databases)
[docs] class HBondAtom(object): '''Base class for HBondDonor and HBondAcceptor.''' def __init__(self, _analysis): self._analysis = _analysis self.nnegative = self.npositive = None self.exclude = False @property def accessible_surface_area(self): '''The accessible surface area of the HBond atom.''' return PackingSimilarityLib.atom_accessible_surface(self._analysis) @property def functional_group_identifier(self): '''The identifier of the functional group for this atom.''' return self._analysis.substructure_label() @property def nlone_pairs(self): '''The number of lone pairs associated with this atom.''' return PackingSimilarityLib.nlone_pairs(self._analysis.atom()) @property def identifier(self): '''The full identifier of this atom.''' return self._analysis.label() @property def label(self): '''The label of the atom in the original structure.''' return self._analysis.atom().label() @property def atom(self): '''The :class:`ccdc.molecule.Atom` of the HBondAtom.''' return Atom(_atom=self._analysis.atom())
[docs] class HBondDonor(HBondAtom): '''A potential donor atom. This class will be augmented with the evidence found during match_fitting_data(). ''' def __init__(self, _analysis): super(self.__class__, self).__init__(_analysis) @property def donor_atom_type(self): '''A string representation of the atom's donor type.''' hbc = Molecule.HBondCriterion() return hbc.donor_atom_type(Atom(_atom=self._analysis.atom())) def __repr__(self): if self.nnegative is None: return 'HBondDonor(%s) no evidence' % self.identifier else: return 'HBondDonor(%s) %d positive %d negative' % (self.identifier, self.npositive, self.nnegative)
[docs] class HBondAcceptor(HBondAtom): '''A potental acceptor atom. This class will be augmented with the evidence found during match_fitting_data(). ''' def __init__(self, _analysis): super(self.__class__, self).__init__(_analysis) @property def acceptor_atom_type(self): '''A string representation of the atom's acceptor type.''' hbc = Molecule.HBondCriterion() return hbc.acceptor_atom_type(Atom(_atom=self._analysis.atom())) def __repr__(self): if self.nnegative is None: return 'HBondAcceptor(%s) no evidence' % self.identifier else: return 'HBondAcceptor(%s) %d positive %d negative' % ( self.identifier, self.npositive, self.nnegative)
def _find_donor(self, label): for d in self.donors: if d.label == label: return d def _find_acceptor(self, label): for a in self.acceptors: if a.label == label: return a def _find_hbond(self, donor_label, acceptor_label, intermolecular): for hb in self.crystal_hbonds: if not hb.intermolecular == intermolecular: continue if hb.atoms[1] in hb.atoms[0].neighbours: if donor_label == hb.atoms[0].label and acceptor_label == hb.atoms[-1].label: return hb elif hb.atoms[1] in hb.atoms[-1].neighbours: if donor_label == hb.atoms[-1].label and acceptor_label == hb.atoms[0].label: return hb def _find_propensity(self, d, a): for p in self.inter_propensities: if d.atom().label() == p.donor_label and a.atom().label() == p.acceptor_label: return p raise RuntimeError('No matching propensity')
[docs] class HBond(object): '''A putative HBond in the propensity calculation.''' def __init__(self, hbp, _outcome): self._outcome = _outcome self.donor = hbp._find_donor(_outcome[0].atom().label()) self.acceptor = hbp._find_acceptor(_outcome[1].atom().label()) self.propensity = hbp._find_propensity(_outcome[0], _outcome[1]) def __eq__(self, other): return ( self.donor == other.donor and self.acceptor == other.acceptor and self.propensity == other.propensity )
[docs] class HBondGrouping(object): '''A grouping of interactions between donors and acceptors representing a possible hbond network. This represents a point in the chart of Mercury's HBondPropensity wizard. ''' _donor_outcome_mapping = { PackingSimilarityLib.DOES_NOT_DONATE: "does not donate", PackingSimilarityLib.DONATES_ONCE: "donates once", PackingSimilarityLib.DONATES_TWICE: "donates twice", PackingSimilarityLib.DONATES_THREE_TIMES: "donates 3 times", PackingSimilarityLib.DONATES_FOUR_TIMES: "donates 4 times", PackingSimilarityLib.DONATES_FIVE_TIMES: "donates 5 times", PackingSimilarityLib.DONATES_SIX_OR_MORE_TIMES: "donates >= 6 times" } _acceptor_outcome_mapping = { PackingSimilarityLib.DOES_NOT_ACCEPT: "does not accept", PackingSimilarityLib.ACCEPTS_ONCE: "accepts once", PackingSimilarityLib.ACCEPTS_TWICE: "accepts twice", PackingSimilarityLib.ACCEPTS_THREE_TIMES: "accepts 3 times", PackingSimilarityLib.ACCEPTS_FOUR_TIMES: "accepts 4 times", PackingSimilarityLib.ACCEPTS_FIVE_TIMES: "accepts 5 times", PackingSimilarityLib.ACCEPTS_SIX_TIMES: "accepts 6 times", PackingSimilarityLib.ACCEPTS_SEVEN_OR_MORE_TIMES: "accepts >= 7 times" } def __init__(self, hbond_propensities, _outcome): self._find_propensity = hbond_propensities._find_propensity self._outcome = _outcome self.donors = tuple( CrystalDescriptors.HBondPropensities.HBondDonor(d._analysis) for d in hbond_propensities.donors ) self.acceptors = tuple( CrystalDescriptors.HBondPropensities.HBondAcceptor(a._analysis) for a in hbond_propensities.acceptors ) self.hbonds = tuple( CrystalDescriptors.HBondPropensities.HBond(hbond_propensities, _outcome) for _outcome in self._outcome.pair_propensities() ) if len(self.hbonds) == 0: self.hbond_score = 0.0 else: self.hbond_score = sum(hb.propensity.propensity for hb in self.hbonds) / len(self.hbonds) observed = [] for hb in hbond_propensities.crystal_hbonds: ats = hb.atoms if ats[1] in ats[0].neighbours: d = ats[0] a = ats[-1] else: d = ats[-1] a = ats[0] observed.append((d.label, a.label)) donor_counts = collections.defaultdict(int) acceptor_counts = collections.defaultdict(int) for hb in self.hbonds: d = hb.donor._analysis.atom().label() a = hb.acceptor._analysis.atom().label() donor_counts[d] += 1 acceptor_counts[a] += 1 hb.is_observed = (d, a) in observed coord_score = 0.0 for d in self.donors: for attr, value in d.__dict__.items(): if not attr.startswith('_'): setattr(d, attr, value) label = d._analysis.atom().label() outcome = donor_counts[label] preds_for_label = hbond_propensities.coordination_scores.predictions_for_label(label, 'donor') if preds_for_label is None: d_coord = 0.0 else: d_coord = preds_for_label[1].get(outcome, 0.0) d.outcome = outcome d.outcome_label = self._donor_outcome_mapping[outcome] d.coordination_propensity = d_coord coord_score += d_coord for a in self.acceptors: for attr, value in a.__dict__.items(): if not attr.startswith('_'): setattr(a, attr, value) label = a._analysis.atom().label() outcome = acceptor_counts[label] preds_for_label = hbond_propensities.coordination_scores.predictions_for_label(label, 'acceptor') if preds_for_label is None: a_coord = 0.0 else: a_coord = preds_for_label[1].get(outcome, 0.0) a.outcome = outcome a.outcome_label = self._acceptor_outcome_mapping[outcome] a.coordination_propensity = a_coord coord_score += a_coord self.coordination_score = -1 * (coord_score / len(self.donors + self.acceptors)) def __repr__(self): dons = ' '.join('%s: %s' % (d.atom, d.outcome) for d in self.donors) accs = ' '.join('%s: %s' % (d.atom, d.outcome) for d in self.acceptors) return 'Grouping(Donors: %s; Acceptors: %s)' % (dons, accs) def __eq__(self, other): return ( all(d.atom == o.atom for d, o in zip(self.donors, other.donors)) and all(d.outcome == o.outcome for d, o in zip(self.donors, other.donors)) and all(a.atom == o.atom for a, o in zip(self.acceptors, other.acceptors)) and all(a.outcome == o.outcome for a, o in zip(self.acceptors, other.acceptors)) and len(self.hbonds) == len(other.hbonds) and all(hb == other_hb for hb, other_hb in zip(self.hbonds, other.hbonds)) )
[docs] class Model(object): '''The logistic regression model.'''
[docs] class Parameter(object): '''A named parameter of the regression.''' def __init__(self, _crystal_structure_property): self._crystal_structure_property = _crystal_structure_property self.include = True @property def identifier(self): '''The identifier of the parameter.''' return self._crystal_structure_property.label()
[docs] def calculate(self, donor, acceptor): '''The value of this property for the pair of atoms. :parameter donor: `ccdc.hbond_coordination.CrystalDescriptors.HBondPropensities.HBondDonor` instance. :parameter acceptor: `ccdc.hbond_coordination.CrystalDescriptors.HBondPropensities.HBondAcceptor` instance. :returns: float ''' hbprop = PackingSimilarityLib.CrystalStructurePropertyAsHyBondingProperty( self._crystal_structure_property) if hbprop is not None: hbprop.update(donor._analysis, acceptor._analysis, PackingSimilarityLib.HyBondContactAnalysis()) else: hbprop = PackingSimilarityLib.CrystalStructurePropertyAsMoleculeProperty( self._crystal_structure_property) hbprop.update(donor._analysis.atom().molecule()) return hbprop.value()
[docs] class Coefficient(object): '''A coefficient of the regression model.''' def __init__(self, _coefficient): self._coefficient = _coefficient @property def identifier(self): '''The identifier of the coefficient.''' return self._coefficient.label() @property def confidence_interval(self): '''The upper and lower bounds of the coefficient.''' return (self._coefficient.coefficient_lower_bound(), self._coefficient.coefficient_upper_bound()) @property def estimate(self): '''The estimate of the coefficient.''' return self._coefficient.coefficient_value() @property def is_baseline(self): '''Whether or not the coefficient is used for the baseline calculation.''' return self._coefficient.is_baseline() @property def standard_error(self): '''Standard error of the coefficient.''' if not self.is_baseline: return self._coefficient.significance_statistics().standard_error_ @property def z_value(self): '''Z-value of the coefficient.''' if not self.is_baseline: return self._coefficient.significance_statistics().z_value_ @property def p_value(self): '''P-value of the coefficient.''' if not self.is_baseline: return self._coefficient.significance_statistics().pr_ @property def significance_code(self): '''A string representation of how significant the parameter is. '***' for P-value < 0.01, '**' < 0.01. '*' < 0.05 and '.' < 0.1 ''' if not self.is_baseline: return self._coefficient.significance_statistics().significance_code()
def __init__(self, _model): self._model = _model @property def equation(self): '''The regression equation.''' coeffs = [self._model.coefficient(i) for i in range(self._model.ncoefficients())] s = '%.2f + ' % coeffs[0].coefficient_value() s += ' + '.join( '%.2f*%s' % (c.coefficient_value(), c.label()) for c in coeffs[1:] if not c.is_baseline()) return s @property def coefficients(self): '''The coefficients of the model.''' return tuple( CrystalDescriptors.HBondPropensities.Model.Coefficient(self._model.coefficient(i)) for i in range(self._model.ncoefficients()) ) @property def area_under_roc_curve(self): '''Area under the ROC curve.''' return self._model.goodness_of_fit_data().area_under_roc_curve() @property def advice_comment(self): '''A string representing the quality of the discrimination based on the ROC.''' auc = self.area_under_roc_curve if auc < 0.7: return 'bad' if auc < 0.75: return 'poor' if auc < 0.8: return 'maybe acceptable' if auc < 0.85: return 'good' if auc < 0.9: return 'excellent' return 'outstanding' @property def akaike_information_criterion(self): '''The Akaike Information Criterion (AIC) of the model.''' return self._model.goodness_of_fit_data().akaike_information_criterion() @property def null_deviance(self): '''The null deviance of the model.''' return self._model.goodness_of_fit_data().null_deviance() @property def null_deviance_degrees_of_freedom(self): '''The degrees of freedom of the null deviance of the model.''' return self._model.goodness_of_fit_data().null_deviance_degrees_of_freedom() @property def residual_deviance(self): '''The residual deviance of the model.''' return self._model.goodness_of_fit_data().residual_deviance() @property def residual_deviance_degrees_of_freedom(self): '''The number of degrees of freedom of the residual deviance of the model.''' return self._model.goodness_of_fit_data().residual_deviance_degrees_of_freedom() @property def log_likelihood(self): '''The log likelihood of the model.''' return self._model.goodness_of_fit_data().log_likelihood() @property def log_likelihood_test_p_value(self): '''The P-value of the log likelihood of the model.''' return self._model.goodness_of_fit_data().log_likelihood_test_p_value()
[docs] class Propensity(object): '''Base class for inter- and intra-molecular propensity predictions.''' def __init__(self, hbp, _prediction): self._prediction = _prediction self.donor = hbp._find_donor(self.donor_label) self.acceptor = hbp._find_acceptor(self.acceptor_label) self.hbond = hbp._find_hbond(self.donor_label, self.acceptor_label, self.is_intermolecular) @property def propensity(self): '''The predicted value.''' return self._prediction.prediction() @property def donor_label(self): '''The label of the donor atom.''' return self._prediction.pair().donor @property def acceptor_label(self): '''The label of the acceptor atom.''' return self._prediction.pair().acceptor @property def bounds(self): '''The lower and upper bounds of the prediction.''' return ( self._prediction.predicted_lower_bound(), self._prediction.predicted_upper_bound() ) @property def uncertainty(self): '''The uncertainty in the prediction.''' return self._prediction.uncertainty() @property def predictive_error(self): '''The error in the prediction.''' return self._prediction.predictive_error() @property def scores(self): '''The calculated values and statistics for the hbond prediction.''' return { qp[0]: qp[1] for qp in [self._prediction.quant_props(i) for i in range(self._prediction.nquant_props())] } @property def is_observed(self): '''Whether the hbond is observed in the target structure.''' return self.hbond is not None
[docs] class InterPropensity(Propensity): '''Predicted propensity for a single HBond.''' @property def is_intermolecular(self): '''Whether or not the predicted propensity is for an intermolecular HBond.''' return True
[docs] class IntraPropensity(Propensity): '''Predicted propensity for an intramolecular HBond.''' @property def is_intermolecular(self): '''Whether or not the predicted propensity is for an intermolecular HBond.''' return False
def __init__(self, settings=None): if settings is None: settings = CrystalDescriptors.HBondPropensities.Settings() self.settings = settings self._model = PackingSimilarityLib.create_propensity_prediction_model(settings.working_directory, settings.functional_groups_directory) self.all_parameters = tuple( CrystalDescriptors.HBondPropensities.Model.Parameter(p) for p in PackingSimilarityLib.analysis_model_properties() ) self._model.set_hbond_criterion(self.settings.hbond_criterion._hbc.hbond_criterion()) # Load the functional groups self._model.functional_group_manager().set_directory( self.settings.functional_groups_directory ) self._substructures = self._model.functional_group_manager().substructures() self._model.prediction().set_model_groups(self._substructures) self._model.prediction().set_working_directory(settings.working_directory) self._hbond_coordination = CrystalDescriptors.HBondCoordination() def _make_donors_and_acceptors(self, _csv): hba = PackingSimilarityLib.create_bonding_analysis( self._model.prediction(), self._model.hbond_criterion()) hba.run_analysis(_csv) donors = tuple( CrystalDescriptors.HBondPropensities.HBondDonor(d) for d in hba.observed_donor_atoms() ) acceptors = tuple( CrystalDescriptors.HBondPropensities.HBondAcceptor(a) for a in hba.observed_acceptor_atoms() ) return donors, acceptors
[docs] def hbond_atoms(self, crystal=None): '''The HBondDonor and HBondAcceptor atoms of a crystal. :param crystal: :class:`ccdc.crystal.Crystal` instance, or None, in which case the HBondAtoms of the target will be returned. :returns: a pair of tuples of :class:`ccdc.descriptors.CrystalDescriptors.HBondPropensities.HBondDonor` and :class:`ccdc.descriptors.CrystalDescriptors.HBondPropensities.HBondAcceptor`. ''' if crystal is None: return self.donors, self.acceptors _csv = ChemistryLib.CrystalStructureView_instantiate(crystal._crystal) return self._make_donors_and_acceptors(_csv)
[docs] def set_target(self, crystal): '''Sets a single target for the propensity calculation. :param crystal: a :class:`ccdc.crystal.Crystal` instance. ''' #self._model = PackingSimilarityLib.create_propensity_prediction_model(self.settings.working_directory, # self.settings.functional_groups_directory) #self._model.prediction().set_hbond_criterion(self.settings.hbond_criterion._hbc.hbond_criterion()) #self._model.functional_group_manager().read_groups() #self._substructures = PackingSimilarityLib.get_substructures_from_connsers( # self._model.functional_group_manager()) #self._model.prediction().set_model_groups(self._substructures) self._model.prediction().clear_hbond_models() self._model.prediction().set_working_directory(self.settings.working_directory) self.settings.limit_identifier_list = UtilitiesLib.DatabaseEntryIdentifierList() self._model.prediction().clear_regression_results() self._model.clear_predicted_propensities() self._called_make_fitting_data = False self.crystal = crystal self._csv = ChemistryLib.CrystalStructureView_instantiate(crystal._crystal) self._target = PackingSimilarityLib.PropensityPredictionTarget( crystal.identifier, crystal.identifier, self._csv ) self._model.prediction().set_target(self._target) func_groups = self._model.find_matching_groups(self._csv) self._model.prediction().set_model_groups(func_groups) self.functional_groups = tuple( CrystalDescriptors.HBondPropensities.FunctionalGroup(g) for g in func_groups ) self.donors, self.acceptors = self._make_donors_and_acceptors(self._csv) self._hbond_coordination.settings._graph_set_search_settings.hbond_criterion = self.settings.hbond_criterion self.coordination_scores = self._hbond_coordination.predict(crystal) for d in self.donors: obs = self.coordination_scores.predictions_for_label(d.label, 'donor') if obs is None: d.observed_coordination = 0.0 d.coordination_scores = {i: 0.0 for i in range(3)} else: d.observed_coordination, d.coordination_scores = obs for a in self.acceptors: obs = self.coordination_scores.predictions_for_label(a.label, 'acceptor') if obs is None: a.observed_coordination = 0.0 a.coordination_scores = {i: 0.0 for i in range(3)} else: a.observed_coordination, a.coordination_scores = obs self.crystal_hbonds = self.crystal.hbonds(hbond_criterion=self.settings.hbond_criterion)
@staticmethod def _get_as_dbhandle(db): if isinstance(db, FileFormatsLib.CrystalStructureDatabasePool): return DatabaseEntryLib.CrystalStructureDatabasePoolAsCrystalStructureDatabase(db) return db def _make_fitting_data_generator(self): self._fitting_data_generator = PackingSimilarityLib.PropensityPredictionFittingDataGenerator() self._fitting_data_generator.set_databases( [self._get_as_dbhandle(db._db) for db in self.settings.databases]) self._fitting_data_generator.set_identifier_list(self.settings.limit_identifier_list) self._fitting_data_generator.set_avoid_id( UtilitiesLib.DatabaseEntryIdentifier(self._target.name()) ) self._fitting_data_generator.set_target_id( UtilitiesLib.DatabaseEntryIdentifier(self._target.name()) ) self._fitting_data_generator.set_groups( self._model.prediction().model_groups(), self._model.prediction().groups_on_target(self._target, self._model.hbond_criterion()) )
[docs] def make_fitting_data(self): ''' Deprecated method. Please use match_fitting_data or use CrystalDescriptors.HBondPropensities.FittingData.from_file to limit the entries that are searched returns an object that will cause all of the database entries to be searched ''' self._make_fitting_data_generator() self._fitting_data_generator.generate_fitting_data() fd = CrystalDescriptors.HBondPropensities.FittingData( _fitting_data=self._fitting_data_generator.fitting_data(), identifiers=UtilitiesLib.DatabaseEntryIdentifierList(), databases=self.settings.databases) self._fitting_data = fd._fitting_data self._model.prediction().set_master_data(fd._fitting_data) self._called_make_fitting_data = True return fd
@property def fitting_data(self): '''The fitting data.''' _fd = None if not hasattr(self, '_fitting_data') else self._fitting_data return CrystalDescriptors.HBondPropensities.FittingData( _fitting_data=_fd, identifiers=self.settings.limit_identifier_list, databases=self.settings.databases) @fitting_data.setter def fitting_data(self, data): self._fitting_data = data._fitting_data self.settings.limit_identifier_list = data._identifiers() self.settings.databases = data.databases() self._model.prediction().set_fitting_data(self._fitting_data) self._make_fitting_data_generator()
[docs] def show_fitting_data_counts(self, data=None): '''Shows the matched counts for each functional group.''' if data is None: data = self.fitting_data longest_name = max(len(g.identifier) for g in self.functional_groups) print('%d structures' % len(data)) for g in self.functional_groups: print('%*s: %5d (%s)' % ( longest_name, g.identifier, data.nitems(g), data.advice_comment(g)) )
[docs] def match_fitting_data(self, count=None, verbose=False): '''Reduces fitting data down such that each functional group has at least the specified number of examples.''' if count is None: if not self._called_make_fitting_data: self.make_fitting_data() fitting_data_result = self._fitting_data_generator.fitting_data() self._model.prediction().set_master_data(fitting_data_result) best = PackingSimilarityLib.best_slider_position(self._model, True, 99) thresh = (99*best)//20 self._model.prediction().refine_fitting_data(True, 99, thresh) fitting_data_result = self._model.prediction().fitting_data() else: fitting_data_result = PackingSimilarityLib.PropensityPredictionFittingDataGenerator.full_generation_of_fitting_data( [self._get_as_dbhandle(db._db) for db in self.settings.databases], self.settings.limit_identifier_list, UtilitiesLib.DatabaseEntryIdentifier(self._target.name()), UtilitiesLib.DatabaseEntryIdentifier(self._target.name()), self._model.prediction().model_groups(), self._model.prediction().groups_on_target(self._target, self._model.hbond_criterion()), count, verbose ) fitting_data_result = PackingSimilarityLib.create_subset(fitting_data_result, fitting_data_result.model_groups(), count) self._fitting_data = fitting_data_result self.show_fitting_data_counts()
[docs] def analyse_fitting_data(self): '''Perform a hydrogen bond analysis of the fitting data.''' pred = self._model.prediction() bonding_analysis = PackingSimilarityLib.create_bonding_analysis(pred, self._model.hbond_criterion()) surveyor = PackingSimilarityLib.create_surveyor() pred.set_fitting_data(self._fitting_data) for i in range(self._fitting_data.nitems()): self._fitting_data.item(i).analyse(bonding_analysis, surveyor) self._find_donor_acceptor_counts()
def _find_donor_acceptor_counts(self): '''Find true/false outcomes for the functional groups, excluding the poorly represented hbonds.''' for d in self.donors + self.acceptors: d.nnegative = d.npositive = None d.exclude = False data = self._model.prediction().fitting_data() data.clear_exclusions() don, acc, txt = PackingSimilarityLib.find_donor_and_acceptor_counts(data) doing_donors = True self._don_excludes = [] self._acc_excludes = [] for l in txt[1:]: cat, at, pos, neg = l if cat == 'Acceptor(s)': doing_donors = False pos = int(pos) neg = int(neg) if pos < 3 or neg < 3 or pos + neg < 3: if doing_donors: self._don_excludes.append(at) else: self._acc_excludes.append(at) if at == 'other': continue if doing_donors: candidates = self.donors else: candidates = self.acceptors for x in candidates: tag = x._analysis.atom_in_group_label() if tag == at and x.npositive is None: x.npositive = int(pos) x.nnegative = int(neg) break html = [ '<table name="DonorAcceptorCounts" border="1" style=" border-color:#a0a0a4; border-style:dotted;" cellspacing="2" cellpadding="5">', '<tr><td>Category</td><td>Label</td><td>True</td><td>False</td></tr>' ] for x in self.donors + self.acceptors: if x.npositive is None or x.nnegative is None: x.exclude = True continue if x.npositive < 3 or x.nnegative < 3 or x.npositive + x.nnegative < 5: x.exclude = True html.append( '<tr><td>%s</td><td>%s</td><td align="right">%d</td><td align="right">%d</td></tr>' % ('Donor' if d in self.donors else 'Acceptor', x.identifier, x.npositive, x.nnegative) ) self.training_stats_html = html def _sort_out_exclusions(self): data = self._model.prediction().fitting_data() data.clear_exclusions() for d in self._don_excludes: data.add_exclusion(PackingSimilarityLib.DonorAcceptorPair(d, '')) for a in self._acc_excludes: data.add_exclusion(PackingSimilarityLib.DonorAcceptorPair('', a)) for d in self.donors: if d.exclude: data.add_exclusion( PackingSimilarityLib.DonorAcceptorPair(d.identifier, '') ) for a in self.acceptors: if a.exclude: data.add_exclusion(PackingSimilarityLib.DonorAcceptorPair('', a.identifier)) def _write_data_files(self): '''Write all necessary data files.''' # Training data self._sort_out_exclusions() data = self._model.prediction().fitting_data() stats = data.spreadsheet_data() stream = PackingSimilarityLib.ofstream( str(os.path.join(self.settings.working_directory, '%s_training_dataset.csv' % self._target.name())) ) PackingSimilarityLib.print_hbond_stats(stats, stream) stream.close() # Molecule with io.MoleculeWriter( os.path.join(self.settings.working_directory, 'prediction_molecule_%s.mol2' % self._target.name()) ) as writer: writer.write(self.crystal.molecule) # fitting_data_.gcd with open(os.path.join(self.settings.working_directory, 'fitting_data_%s.gcd' % self._target.name()), 'w') as writer: writer.write('\n'.join(data.item(i).identifier().str() for i in range(data.nitems()))) # _counts.html with open(os.path.join(self.settings.working_directory, '%s_counts.html' % self._target.name()), 'w') as writer: writer.write('\n'.join(self.training_stats_html)) # Matching .con files fgm = self._model.functional_group_manager() model_groups = self._model.prediction().model_groups() for g in model_groups: name = g.name for named_connser in fgm.connser_files(): if named_connser.name == name or named_connser.name == name.replace('_', ' '): named_connser.connser_file.print_(os.path.join(self.settings.working_directory, '%s.con' % name)) break else: print('No matching connser file for %s' % name)
[docs] def perform_regression(self): '''Performs the logistic regression.''' self._write_data_files() pred = self._model.prediction() # Need to be able to choose them pred.set_model_parameters( tuple(p._crystal_structure_property for p in self.all_parameters if p.include) ) pars = pred.r_model_parameters() regression = PackingSimilarityLib.RLogisticRegression() if regression.path_to_r() == "" or not os.path.exists(regression.path_to_r()): if sys.platform == 'win32': path_to_r = os.path.join( io.csd_directory(), os.pardir, 'Mercury', 'rstatistics', 'bin', 'R.exe' ) elif sys.platform == 'darwin': path_to_r = os.path.join(io.csd_directory(), os.pardir, os.pardir, 'mercury.app', 'Contents', 'MacOS', 'rstatistics', 'bin' , 'R') else: if sys.maxsize > 2 ** 31: path_to_r = os.path.join( os.path.dirname(io.csd_directory()), 'c_linux-64', 'rstatistics', 'bin', 'R' ) else: path_to_r = os.path.join( os.path.dirname(io.csd_directory()), 'c_linux', 'rstatistics', 'bin', 'R' ) regression.set_path_to_r(path_to_r) results = regression.run(pred) if not results: raise RuntimeError('Regression could not be performed. Look in %s for details.' % os.path.join( pred.working_directory(), 'logistic_regression.R')) self.model = CrystalDescriptors.HBondPropensities.Model(_model=results) return self.model
@property def propensities(self): '''The inter- and intra-propensities of the prediction.''' return self.inter_propensities + self.intra_propensities
[docs] def calculate_propensities(self, crystal=None): '''Apply the regression equation to a crystal. :parameter crystal: :class:`ccdc.crystal.Crystal` instance or None. If None the target structure will be used. ''' if crystal is None: _target = self._target else: _csv = ChemistryLib.CrystalStructureView_instantiate(crystal._crystal) _target = PackingSimilarityLib.PropensityPredictionTarget(crystal.identifier, _csv) data = self._model.prediction().fitting_data() self.freqs = PackingSimilarityLib.calculate_contact_frequencies(data) props = self._model.predict_propensity( _target, self.model._model ) inters = props.inter_predictions() self.inter_propensities = tuple( CrystalDescriptors.HBondPropensities.InterPropensity(self, inters.prediction(i)) for i in range(inters.npredictions()) ) intra_predictions = tuple( props.intra_all_pair_prediction(i) for i in range(props.nintra_all_pair_predictions()) ) self.intra_propensities = tuple( CrystalDescriptors.HBondPropensities.IntraPropensity(self, x) for x in intra_predictions ) return self.propensities
def _count_hbond_groupings(self, min_donor_prob=0.0, min_acceptor_prob=0.0): '''Counts the number of coordination possibilites, which correlates with the number of groupings.''' d_preds = [ [c for c, v in self.coordination_scores.predictions_for_label(d.label, 'donor')[1].items() if v >= min_donor_prob] for d in self.donors ] a_preds = [ [c for c, v in self.coordination_scores.predictions_for_label(a.label, 'acceptor')[1].items() if v >= min_acceptor_prob] for a in self.acceptors ] max_acc_ct = sum(max(ap) for ap in a_preds) def _possible_paths(donors, acceptors, d_ct): if donors: d, preds = donors[0] for dc in [c for c in preds if d_ct + c <= max_acc_ct]: for p in _possible_paths(donors[1:], acceptors, d_ct + dc): yield [(d, dc)] + p elif len(acceptors) > 1: a, preds = acceptors[0] for ac in [c for c in preds if 0 <= d_ct - c <= sum(max(ap) for z, ap in acceptors[1:])]: for p in _possible_paths(donors, acceptors[1:], d_ct - ac): yield [(a, ac)] + p else: a, preds = acceptors[0] yield [(a, d_ct)] for i, c in enumerate(_possible_paths(zip(self.donors, d_preds), zip(self.acceptors, a_preds), 0)): pass return i
[docs] def generate_hbond_groupings(self, min_donor_prob=None, min_acceptor_prob=None): '''Generate all possible permutations of donors and acceptors to create all possible hbond groupings.''' if min_donor_prob is None or min_acceptor_prob is None: if len(self.donors) >= 8 or len(self.acceptors) >= 8: min_donor_prob = 0.1 min_acceptor_prob = 0.1 else: min_donor_prob = 0.0 min_acceptor_prob = 0.0 if not hasattr(self, 'propensities'): self.calculate_propensities() predicted_propensity = self._model.prediction().calculate_hbond_propensities( self._target, self.model._model, self._model.hbond_criterion() ) pair_preds = predicted_propensity.inter_predictions() inter_predictions = tuple(pair_preds.prediction(i) for i in range(pair_preds.npredictions())) intra_predictions = tuple( predicted_propensity.intra_all_pair_prediction(i) for i in range(predicted_propensity.nintra_all_pair_predictions()) ) self._intra_predictions = intra_predictions analysis = self._hbond_coordination.hbond_analysis coordination_propensities = self._hbond_coordination._coordination_propensities self._hbond_coordination._calculator.update_propensities(coordination_propensities, analysis) assessment = PackingSimilarityLib.CrystalHydrogenBondAssessment( self._target.name(), analysis, inter_predictions, coordination_propensities, min_donor_prob, min_acceptor_prob ) self.outcomes = outcomes = [assessment.donor_acceptor_pairing(i) for i in range(assessment.ndonor_acceptor_pairings())] self.hbond_groupings = tuple( CrystalDescriptors.HBondPropensities.HBondGrouping(self, outcome) for outcome in outcomes ) self._observed_outcomes = [assessment.observed_outcome(i) for i in range(assessment.nobserved_outcomes())] if assessment.nobserved_outcomes() == 1: self._target_hbond_grouping = CrystalDescriptors.HBondPropensities.HBondGrouping(self, self._observed_outcomes[0]) else: self._target_hbond_grouping = None return self.hbond_groupings
[docs] def target_hbond_grouping(self): '''Which of the hbond groupings is of the target structure.''' if self._target_hbond_grouping is not None: return self._target_hbond_grouping def _valid(hb): d, a = hb.atoms[0], hb.atoms[-1] x = self._find_donor(d.label) y = self._find_acceptor(a.label) if x is None or y is None: x = self._find_donor(a.label) y = self._find_acceptor(d.label) return x.functional_group_identifier != 'other' and y.functional_group_identifier != 'other' chbs = [ hb for hb in self.crystal_hbonds if _valid(hb) ] for g in self.hbond_groupings: if len(g.hbonds) == len(chbs) and all(hb.is_observed for hb in g.hbonds): return g
#####################################################################################
[docs]class StatisticalDescriptors(object): '''A namespace holding statistical descriptors.'''
[docs] class RankStatistics(object): '''Represents a ranked collection of values for which statistics can be derived.''' def __init__(self, scores, activity_column=None): '''Instantiate a RankStatistics object. :param scores: Sorted list with descending scores containing active/inactive data. :param activity_column: extractor for activity data in scores. If ``None``, scores will be interpreted as an iterable of ``bool``s. If type is ``int`` or ``string``, scores will be interpreted as an iterable of indexed/named collections where the specified index is the activity ``bool``. Otherwise, use :meth:`operator.itemgetter' or :meth:`operator.attrgetter`. :raises: ValueError if the scores list is empty. ''' if len(scores) == 0: raise ValueError('scores list must not be empty.') self._scores = scores self.activity_column = activity_column self._score_count = len(scores) @property def activity_column(self): '''Get extractor for active/inactive classification from scores data.''' return self._activity_column @activity_column.setter def activity_column(self, activity_column): '''Set extractor for active/inactive classification from scores data :param activity_column: extractor for activity data in scores. If ``None``, scores will be interpreted as an iterable of ``bool``s. If type is ``int`` or ``string``, scores will be interpreted as an iterable of indexed/named collections where the specified index is the activity ``bool``. Otherwise, use :meth:`operator.itemgetter' or :meth:`operator.attrgetter`. ''' if activity_column is None: self._activity_column = lambda x: x elif type(activity_column) is int: self._activity_column = operator.itemgetter(activity_column) elif isinstance(activity_column, str): self._activity_column = operator.attrgetter(activity_column) else: self._activity_column = activity_column
[docs] def ROC(self): '''Calculate receiver operating characteristic (ROC) curve. :returns: list, list - True positive rate, False positive rate ''' true_positive_rate = [] # True positive rate = TP/(TP+FN) = Sensitivity false_positive_rate = [] # False positive rate = FP/(TN+FP) = Fall-out actives_count = 0 inactives_count = 0 for i in range(self._score_count): if self._activity_column(self._scores[i]): actives_count += 1 else: inactives_count += 1 true_positive_rate.append(float(actives_count)) false_positive_rate.append(float(inactives_count)) if actives_count > 0: true_positive_rate = [tp / actives_count for tp in true_positive_rate] if inactives_count > 0: false_positive_rate = [fp / inactives_count for fp in false_positive_rate] return true_positive_rate, false_positive_rate
[docs] def AUC(self): '''Calculate the area under the ROC curve. :returns: Area under the ROC curve.''' tpr, fpr = self.ROC() auc = 0 for i in range(0, self._score_count - 1): auc += (fpr[i + 1] - fpr[i]) * (tpr[i + 1] + tpr[i]) return auc / 2
def _RIE(self, alpha): active_count = 0 sum_exp = 0 for i in range(self._score_count): active = self._activity_column(self._scores[i]) if active: active_count += 1 sum_exp += math.exp(-(alpha * (i + 1)) / self._score_count) if active_count == 0: rie = 0.0 else: denom = 1.0 / self._score_count * ((1.0 - math.exp(-alpha)) / (math.exp(alpha / self._score_count) - 1.0)) rie = sum_exp / float(active_count * denom) return rie, active_count
[docs] def RIE(self, alpha=0.0): '''Calculate robust initial enhancement (RIE) as defined in: Sheridan R.P., Singh S.B., Fluder E.M., Kearsley S.K., "Protocols for Bridging the Peptide to Nonpeptide Gap in Topological Similarity Searches" J. Chem. Inf. Comp. Sci. 41:1395-1406 (2001). :param alpha: exponential weighting factor :raises: ValueError if alpha is less than or equal to 0.0 ''' if alpha <= 0.0: raise ValueError('alpha parameter must be greater than zero.') rie, active_count = self._RIE(float(alpha)) return rie
[docs] def BEDROC(self, alpha=0.0): '''Calculate Boltzmann-Enhanced Discrimination of ROC (BEDROC) as defined in: Truchon J., Bayly C.I., "Evaluating Virtual Screening Methods: Good and Bad Metric for the "Early Recognition" Problem" J. Chem. Inf. Model. 47:488-508 (2007). :param alpha: exponential weighting factor. :raises: ValueError if alpha is less than or equal to 0.0. ''' if alpha <= 0.0: raise ValueError('alpha parameter must be greater than zero.') rie, active_count = self._RIE(float(alpha)) if active_count <= 0.0: bedroc = 0.0 else: ratio = 1.0 * active_count / self._score_count rie_max = (1.0 - math.exp(-alpha * ratio)) / (ratio * (1.0 - math.exp(-alpha))) rie_min = (1.0 - math.exp(alpha * ratio)) / (ratio * (1.0 - math.exp(alpha))) if rie_max == rie_min: bedroc = 1.0 else: bedroc = (rie - rie_min) / (rie_max - rie_min) return bedroc
def _find_position(self, fraction): if fraction < 0.0 or fraction > 1.0: raise ValueError('fraction parameter must be within interval [0,1]') return int(math.ceil(self._score_count * fraction))
[docs] def EF(self, fraction=0.0): '''Calculate enrichment factor (EF) at the specified fraction. :param fraction: position within data for which enrichment factor is to be determined. :raises: ValueError if fraction is not within interval [0,1] ''' position = self._find_position(fraction) found_active_count = 0 for i in range(position): if self._activity_column(self._scores[i]): found_active_count += 1 if found_active_count == 0: return 0.0 active_count = found_active_count for i in range(position, self._score_count): if self._activity_column(self._scores[i]): active_count += 1 return found_active_count / (active_count * fraction)
[docs] def PPV(self, fraction=0.0): '''Calculate precision or positive predictive value (PPV) at the specified fraction. :param fraction: position within data for which precision is to be determined. :raises: ValueError if fraction is not within interval [0,1] ''' position = self._find_position(fraction) active_count = 0 for i in range(position): if self._activity_column(self._scores[i]): active_count += 1 return float(active_count) / (position + 1)
[docs] def ACC(self, fraction=0.0): '''Calculate accuracy metric (ACC) at the specified fraction. ACC = (TP+TN) / (TP+FP+TN+FN) :param fraction: position within data for which accuracy metric is to be determined. :raises: ValueError if fraction is not within interval [0,1] ''' position = min(self._find_position(fraction), self._score_count - 1) true_positives = [] false_negatives = [] actives_count = 0 inactives_count = 0 for i in range(self._score_count): if self._activity_column(self._scores[i]): actives_count += 1 else: inactives_count += 1 true_positives.append(float(actives_count)) false_negatives.append(float(inactives_count)) return (true_positives[position] + inactives_count - false_negatives[position])\ / (actives_count + inactives_count)
#################################################################################################