#
# This code is Copyright (C) 2015 The Cambridge Crystallographic Data Centre
# (CCDC) of 12 Union Road, Cambridge CB2 1EZ, UK and a proprietary work of CCDC.
# This code may not be used, reproduced, translated, modified, disassembled or
# copied, except in accordance with a valid licence agreement with CCDC and may
# not be disclosed or redistributed in any form, either in whole or in part, to
# any third party. All copies of this code made in accordance with a valid
# licence agreement as referred to above must contain this copyright notice.
#
# No representations, warranties, or liabilities are expressed or implied in the
# supply of this code by CCDC, its servants or agents, except where such
# exclusion or limitation is prohibited, void or unenforceable under governing
# law.
#
'''
The :mod:`ccdc.descriptors` module contains classes for calculating descriptors.
The main classes in the :mod:`ccdc.descriptors` module are:
- :class:`ccdc.descriptors.MolecularDescriptors`.
- :class:`ccdc.descriptors.GeometricDescriptors`.
- :class:`ccdc.descriptors.CrystalDescriptors.PowderPattern`.
- :class:`ccdc.descriptors.CrystalDescriptors.Morphology`.
- :class:`ccdc.descriptors.CrystalDescriptors.GraphSetSearch`.
- :class:`ccdc.descriptors.CrystalDescriptors.HBondCoordination`.
- :class:`ccdc.descriptors.CrystalDescriptors.HBondPropensities`.
- :class:`ccdc.descriptors.StatisticalDescriptors`.
'''
import collections
import math
import operator
import os
import warnings
import tempfile
warnings.simplefilter('always', DeprecationWarning)
from ccdc.molecule import Molecule, Atom, Bond, Coordinates
from ccdc.crystal import Crystal
from ccdc.utilities import Logger, bidirectional_dict, nested_class, Resources
from ccdc import io
from ccdc import morphology
from ccdc.utilities import _private_importer
with _private_importer() as pi:
pi.import_ccdc_module('ChemistryLib')
pi.import_ccdc_module('ChemicalAnalysisLib')
pi.import_ccdc_module('MathsLib')
pi.import_ccdc_module('SubstructureSearchLib')
pi.import_ccdc_module('MotifSearchLib')
pi.import_ccdc_module('PackingSimilarityLib')
pi.import_ccdc_module('UtilitiesLib')
pi.import_ccdc_module('DatabaseEntryLib')
pi.import_ccdc_module('FileFormatsLib')
pi.import_ccdc_module('CSDSQLDatabaseLib')
pi.import_ccdc_module('AnnotationsLib')
pi.import_ccdc_module('PoreAnalyserLib')
[docs]class MolecularDescriptors(object):
'''Namespace for descriptors of a molecular nature.'''
[docs] @staticmethod
def bond_length(bond):
'''The length of a bond.
:param bond: :class:`ccdc.molecule.Bond`
:returns: float, or ``None`` if an atom of the bond has no coordinates
'''
if bond._bond.atom1().site() and bond._bond.atom2().site():
return ChemistryLib.bond_length(
bond._bond.molecule(),
bond._bond.atom1().index(),
bond._bond.atom2().index()
)
[docs] @staticmethod
def atom_distance(a, b):
'''Distance between two atom irrespective their parent molecules.
:param a: :class:`ccdc.molecule.Atom`
:param b: :class:`ccdc.molecule.Atom`
:returns: float or ``None`` if one of the atoms has no coordinates
'''
if a._atom.site() and b._atom.site():
return MathsLib.distance(a.coordinates, b.coordinates)
[docs] @staticmethod
def atom_angle(a, b, c):
'''Angle subtended by three arbitrary atoms.
:param a: :class:`ccdc.molecule.Atom`
:param b: :class:`ccdc.molecule.Atom`
:param c: :class:`ccdc.molecule.Atom`
:returns: float - the angle in degrees or ``None`` if one of the atoms has no coordinates
'''
if a._atom.site() and b._atom.site() and c._atom.site():
return MathsLib.angle(
a.coordinates,
b.coordinates,
c.coordinates
).degrees()
[docs] @staticmethod
def atom_torsion_angle(a, b, c, d):
'''Plane angle subtended by the triples abc and bcd.
:param a: :class:`ccdc.molecule.Atom`
:param b: :class:`ccdc.molecule.Atom`
:param c: :class:`ccdc.molecule.Atom`
:param d: :class:`ccdc.molecule.Atom`
:returns: float - the angle in degrees or ``None`` if one of the atoms has no coordinates
'''
if a._atom.site() and b._atom.site() and c._atom.site() and d._atom.site():
return MathsLib.torsion_angle(
a.coordinates,
b.coordinates,
c.coordinates,
d.coordinates
).degrees()
@staticmethod
def _set_atom_torsion_angle(a, b, c, d, degrees):
'''Set a torsion angle.
:param a: :class:`ccdc.molecule.Atom`
:param b: :class:`ccdc.molecule.Atom`
:param c: :class:`ccdc.molecule.Atom`
:param d: :class:`ccdc.molecule.Atom`
:param degrees: float - the required torsion angle in degrees
'''
if degrees > 180.:
degrees = 360. - degrees
ChemicalAnalysisLib.set_torsion_angle(a._atom, b._atom, c._atom, d._atom, degrees)
[docs] @staticmethod
def atom_centroid(*atoms):
'''Define the centroid of the specified atoms.'''
coords = [a.coordinates for a in atoms]
if None in coords:
raise RuntimeError('An atom has no coordinates')
x, y, z = zip(*coords)
centroid = Coordinates(sum(x)/len(coords), sum(y)/len(coords), sum(z)/len(coords))
return centroid
[docs] @staticmethod
def atom_vector(atom0, atom1):
'''Define the vector from atom0 to atom1.
:param atom0: :class:`ccdc.molecule.Atom`
:param atom1: :class:`ccdc.molecule.Atom`
:returns: :class:`GeometricDescriptors.Vector`
:raises: RuntimeError if either atom has no coordinates.
'''
coords = [atom0.coordinates, atom1.coordinates]
if None in coords:
raise RuntimeError('An atom has no coordinates')
return GeometricDescriptors.Vector.from_points(coords[0], coords[1])
[docs] @staticmethod
def atom_plane(*atoms):
'''Define a plane from the coordinates of the atoms.
:param atoms: there must be at least three :class:`ccdc.molecule.Atom` in the arguments.
'''
return GeometricDescriptors.Plane.from_points(
*tuple(a.coordinates for a in atoms)
)
[docs] @staticmethod
def ring_centroid(ring):
'''The centroid of the ring's atoms.
:param ring: :class:`ccdc.molecule.Molecule.Ring`
'''
return MolecularDescriptors.atom_centroid(*ring.atoms)
[docs] @staticmethod
def ring_plane(ring):
'''The plane of the ring's atoms.
:param ring: :class:`ccdc.molecule.Molecule.Ring`
'''
return MolecularDescriptors.atom_plane(*ring.atoms)
[docs] @staticmethod
def point_group_analysis(mol):
'''Return Schoenflies notation of the point group symmetry of a molecule.
The point group symmetry is returned as a tuple of:
- order (e.g. 1)
- symbol (e.g. 'C1')
- description (e.g. 'Objects in this point group have no symmetry.')
:param mol: :class:`ccdc.molecule.Molecule`
:returns: (int, str, str)
'''
pga = ChemicalAnalysisLib.MoleculePointGroupAssigner(
ChemicalAnalysisLib.RadialAtomProximity(0.5)
)
try:
pg = pga.assign(mol._molecule)
if not pga.assignment_succeeded():
return 0, 'Unknown', 'point group assignment did not conclude'
return pg.order(), pg.symbol(), pg.description()
except RuntimeError as e:
logger = Logger()
logger.warning('molecule.point_group_analysis %s: %s' % (mol.identifier, e))
return 0, 'Unknown', 'point group assignment failed'
@staticmethod
def _make_subset(mol, atoms):
'''Private: make an internal subset.'''
sub = ChemistryLib.MoleculeSubset.instantiate(mol._molecule)
sub.select_all()
sub.select_atom_indices_from_current([a.index for a in atoms])
return sub
[docs] @nested_class('MolecularDescriptors')
class Overlay(object):
'''Overlays two molecules'''
def __init__(self, mol1, mol2, atoms=None, invert=False, rotate_torsions=False, with_symmetry=True, match_elements=True):
'''Overlay mol2 on mol1
:param mol1: a :class:`ccdc.molecule.Molecule` instance
:param mol2: a :class:`ccdc.molecule.Molecule` instance
:param atoms: a list of pairs of atoms to use in the overlay, or None
for all atoms to be used
:param invert: allow inversions in the overlay
:param rotate_torsions: allow torsional rotations when overlaying
:param with_symmetry: take account of symmetry when overlaying atoms
:param match_elements: require that elements match in the two molecules
'''
m2 = mol1.copy()
m1 = mol2.copy()
if atoms is not None:
# Got to get the matching atoms
def _match_atom(mol, at):
def element_match(at1, at2):
return match_elements is False or at1.atomic_symbol == at2.atomic_symbol
l = [a for a in mol.atoms if element_match(a, at) and str(a.coordinates) == str(at.coordinates)]
if len(l) == 1:
return l[0]
else:
# Fall back on match by index
if element_match(mol.atoms[at.index], at):
return mol.atoms[at.index]
raise RuntimeError('Atom %s could not be matched' % at)
inp1, inp2 = zip(*atoms)
ats1 = [_match_atom(m1, a) for a in inp2]
ats2 = [_match_atom(m2, a) for a in inp1]
if with_symmetry:
sub1 = MolecularDescriptors._make_subset(m1, ats1)
sub2 = MolecularDescriptors._make_subset(m2, ats2)
if invert:
invert = ChemicalAnalysisLib.MoleculeOverlay.NO_INVERT_AND_INVERT
else:
invert = ChemicalAnalysisLib.MoleculeOverlay.NO_INVERT
if rotate_torsions:
rotate = ChemicalAnalysisLib.MoleculeOverlay.FLEXIBLE_TORSIONS
else:
rotate = ChemicalAnalysisLib.MoleculeOverlay.FIXED_TORSIONS
if match_elements:
match_elements = ChemicalAnalysisLib.MoleculeOverlay.MATCH_ELEMENTS
else:
match_elements = ChemicalAnalysisLib.MoleculeOverlay.NO_MATCH_ELEMENTS
if atoms is None:
ov = ChemicalAnalysisLib.MoleculeOverlay(
m1._molecule,
m2._molecule,
invert,
rotate,
match_elements
)
ov.fit_geometry().apply()
else:
if with_symmetry:
ov = ChemicalAnalysisLib.MoleculeOverlay(
sub1,
sub2,
invert,
rotate,
match_elements
)
if hasattr(m1, '_cell'):
cell = m1._cell
else:
cell = ChemistryLib.Cell()
ov.fit_geometry(cell, m1._molecule).apply()
else:
ov = ChemicalAnalysisLib.MoleculeOverlay(
m1._molecule,
m2._molecule,
list(zip([a._atom for a in ats1], [a._atom for a in ats2])),
invert,
rotate,
match_elements
)
ov.fit_geometry().apply()
self._ov = ov
self._molecule = m1
self._transformation = Molecule.Transformation(_transformation=ov.transformation())
self._atom_match = []
best_match = ov.best_match()
for i in range(best_match.nnode_matches()):
atom_pair = best_match.node_match(i)
self._atom_match.append((mol1.atoms[atom_pair[1].index()], mol2.atoms[atom_pair[0].index()]))
@property
def molecule(self):
'''Returns input molecule mol2 transformed to overlay onto mol1'''
return self._molecule
@property
def rmsd(self):
'''Returns RMSD between the two overlaid molecules'''
return self._ov.rms()
@property
def rmsd_tanimoto(self):
'''Returns Tanimoto RMSD between the two overlaid molecules'''
return self._ov.rms_tanimoto()
@property
def transformation(self):
'''Returns Molecule.Transformation object required to overlay mol2 over mol1'''
return self._transformation
@property
def max_distance(self):
'''Returns the maximum distance between two equivalent atoms in the overlay (Angstroms)'''
return self._ov.max_distance()
@property
def atom_match(self):
'''Returns pairs of atoms from mol1 and mol2 matched in the overlay'''
return self._atom_match
[docs] @staticmethod
def overlay(mol1, mol2, atoms=None, invert=False, rotate_torsions=False, with_symmetry=True):
'''Overlay mol2 on mol1.
Deprecated and replaced with
:class:`ccdc.MolecularDescriptors.Overlay`
:param mol1: a :class:`ccdc.molecule.Molecule` instance
:param mol2: a :class:`ccdc.molecule.Molecule` instance
:param atoms: a list of pairs of atoms to use in the overlay, or None
for all atoms to be used
:param invert: allow inversions in the overlay
:param rotate_torsions: allow torsional rotations when overlaying
:param with_symmetry: take account of symmetry when overlaying atoms
:returns: a :class:`ccdc.molecule.Molecule` instance which is a copy of
mol2 overlaid on mol1
Note: if with_symmetry is true, and matching atoms are provided, then the matching atoms need to form a connected structure.
'''
warnings.warn('''This attribute is deprecated and will be removed in a later version.
Instead, please use overlay = MolecularDescriptors.Overlay(), overlay.molecule''', DeprecationWarning)
ov = MolecularDescriptors.Overlay(mol1, mol2, atoms, invert, rotate_torsions, with_symmetry)
return ov.molecule
[docs] @staticmethod
def rmsd(mol1, mol2, atoms=None, overlay=False, exclude_hydrogens=True, with_symmetry=True):
'''Return the RMSD of two molecules.
Both molecules should have the same atoms if ``atoms`` is ``None``.
:param atoms: a list of pairs :class:`ccdc.molecule.Atom` or ``None``
:param overlay: Whether to overlay the molecules before calculating RMSD
:param exclude_hydrogens: Whether all-atom or heavy atom RMSD should be calculated
:param with_symmetry: Whether to allow symmetrical matches
:returns: float
'''
if overlay:
the_overlay = MolecularDescriptors.Overlay(mol1, mol2, atoms, with_symmetry=with_symmetry)
mol2 = the_overlay.molecule
if with_symmetry:
symm = ChemicalAnalysisLib.WITH_SYMMETRY
else:
symm = ChemicalAnalysisLib.NO_SYMMETRY
if atoms is None:
if exclude_hydrogens:
ats1 = [a for a in mol1.atoms if a.atomic_symbol not in ['H', 'D', 'U']]
ats2 = [a for a in mol2.atoms if a.atomic_symbol not in ['H', 'D', 'U']]
atoms = list(zip(ats1, ats2))
else:
if with_symmetry:
ats1, ats2 = zip(*atoms)
orig_ats2 = mol2.atoms
ats2 = [Atom(_atom=orig_ats2[a.index]._atom) for a in ats2]
else:
tot = 0.0
for a, b in atoms:
ap = a.coordinates
bp = b.coordinates
tot += ((ap.x - bp.x)**2 + (ap.y - bp.y)**2 + (ap.z - bp.z)**2)
return (tot/len(atoms))**0.5
if atoms is None:
return ChemicalAnalysisLib.RMSD(mol1._molecule, mol2._molecule, symm)
else:
sub1 = MolecularDescriptors._make_subset(mol1, ats1)
sub2 = MolecularDescriptors._make_subset(mol2, ats2)
return ChemicalAnalysisLib.RMSD(sub1, sub2, symm)
[docs] class MaximumCommonSubstructure(object):
'''Identifies the maximum common substructure of two molecules.'''
[docs] class Settings(object):
'''Settings for the MCS calculation.'''
def __init__(self):
'''Initialises the settings.'''
self._settings = SubstructureSearchLib.MoleculeExactMatchCriteria()
self.connected = True
@property
def connected(self):
'''Whether substructure should be connected.
Note that finding disconnected maximal substructures is a lot slower than finding connected.
'''
return self._connected
@connected.setter
def connected(self, tf):
self._connected = tf
@property
def check_bond_count(self):
'''Whether the bond count of an atom be checked.'''
return self._settings.settings().check_bond_count()
@check_bond_count.setter
def check_bond_count(self, tf):
self._settings.settings().set_check_bond_count(tf)
@property
def check_bond_polymeric(self):
'''Check whether the bond be polymeric.'''
return self._settings.settings().check_bond_polymeric()
@check_bond_polymeric.setter
def check_bond_polymeric(self, tf):
self._settings.settings().set_check_bond_polymeric(tf)
@property
def check_bond_type(self):
'''Whether the bond type be checked.'''
return self._settings.settings().check_bondtype()
@check_bond_type.setter
def check_bond_type(self, tf):
self._settings.settings().set_check_bondtype(tf)
@property
def check_charge(self):
'''Whether the atom charge be checked.'''
return self._settings.settings().check_charge()
@check_charge.setter
def check_charge(self, tf):
self._settings.settings().set_check_charge(tf)
@property
def check_element(self):
'''Whether the element be checked.'''
return self._settings.settings().check_element()
@check_element.setter
def check_element(self, tf):
self._settings.settings().set_check_element(tf)
@property
def check_hydrogen_count(self):
'''Whether the atom's hydrogen count be checked.'''
return self._settings.settings().check_hydrogen_count()
@check_hydrogen_count.setter
def check_hydrogen_count(self, tf):
self._settings.settings().set_check_hydrogen_count(tf)
@property
def ignore_hydrogens(self):
'''Whether the hydrogens be ignored.'''
return self._settings.settings().ignore_hydrogens()
@ignore_hydrogens.setter
def ignore_hydrogens(self, tf):
self._settings.settings().set_ignore_hydrogens(tf)
def __init__(self, settings=None):
if settings is None:
settings = MolecularDescriptors.MaximumCommonSubstructure.Settings()
self.settings = settings
[docs] def search(self, mol1, mol2, only_edges=False, search_step_limit = 'unlimited'):
'''Calculate the maximum common substructure between two molecules.
:param mol1, mol2: :class:`ccdc.molecule.Molecule` instances.
:param only_edges: bool. The search will find a maximal common substructure matching only the edges.
:param search_step_limit: positive integer or 'unlimited'. Controls the maximum number of steps the algorithm takes.
:returns: a pair of tuples, giving matched :class:`ccdc.molecule.Atom` and :class:`ccdc.molecule.Bond` instances.
:raises: ValueError with invalid input
Note: this function is computationally exponential, so will take a long time on large molecules.
'''
if type(search_step_limit) == type(int()):
if search_step_limit <= 0:
raise ValueError("search_step_limit must be a positive integer or 'unlimited' ")
elif search_step_limit != 'unlimited':
raise ValueError("search_step_limit must be a positive integer or 'unlimited' ")
if len(mol1.atoms) > len(mol2.atoms):
def first(x): return x.second
def second(x): return x.first
mol2, mol1 = mol1, mol2
else:
def first(x): return x.first
def second(x): return x.second
if self.settings.connected:
conn = SubstructureSearchLib.CONNECTED
else:
conn = SubstructureSearchLib.DISCONNECTED
xcrit = SubstructureSearchLib.mol_exact_match_as_match(self.settings._settings)
if only_edges:
mcsg = SubstructureSearchLib.MoleculeMCSG(
mol1._molecule, mol2._molecule, conn, xcrit
)
if search_step_limit != 'unlimited':
mcsg.set_search_step_limit(search_step_limit)
match = mcsg.maximum_common_edge_subgraph()
else:
xcrit = SubstructureSearchLib.mol_exact_match_as_match(self.settings._settings)
mcsg = SubstructureSearchLib.MoleculeMCSG(
mol1._molecule, mol2._molecule, conn, xcrit
)
if search_step_limit != 'unlimited':
mcsg.set_search_step_limit(search_step_limit)
match = mcsg.maximum_common_subgraph()
if match is None:
return (tuple(), tuple())
atoms = tuple(
(Atom(_atom=first(match.node_match(i))), Atom(_atom=second(match.node_match(i))))
for i in range(match.nnode_matches())
)
bonds = tuple(
(Bond(_bond=first(match.edge_match(i))), Bond(_bond=second(match.edge_match(i))))
for i in range(match.nedge_matches())
)
return atoms, bonds
[docs] class AtomDistanceSearch(object):
'''More rapid searching for atoms within a certain distance of a point.'''
def __init__(self, molecule):
'''Initialise the searcher.
:param molecule: a :class:`ccdc.molecule.Molecule` instance.
'''
#self._searcher = ChemistryLib.create_distance_search(molecule._molecule)
self._searcher = ChemistryLib.SpatialAtomMap(molecule._molecule, ChemistryLib.SpatialAtomMap.ORTHOGONAL)
[docs] def atoms_within_range(self, point, radius):
'''A tuple of all atoms within the given radius of the given point.'''
ats = self._searcher.atoms_within_sphere(point, radius)
return tuple(Atom(_atom=a) for _, a in ats)
[docs] class PrincipleAxesAlignedBox(object):
'''The bounding box of the molecule aligned on its principle axes.
The vectors of the box have lengths of the size of the box. The x_vector is
the major axis of the molecule, the y_vector the minor axis and the z_vector
the minimal axis of the molecule.
'''
def __init__(self, molecule):
if not molecule.all_atoms_have_sites:
raise RuntimeError('Some atoms do not have coordinates')
self._identifier = molecule.identifier
self._box_model = ChemicalAnalysisLib.MoleculeBoxModel(molecule._molecule)
@property
def aligned_molecule(self):
'''The molecule aligned along its principle axes, with centre at its centre of geometry.'''
return Molecule(self._identifier, _molecule=self._box_model.aligned_molecule())
@property
def x_vector(self):
'''The vector of the major axis of the box.'''
v = self._box_model.z_vector()
return GeometricDescriptors.Vector.from_points(
Coordinates(0, 0, 0), Coordinates(v.x(), v.y(), v.z())
)
@property
def y_vector(self):
'''The vector of the minor axis of the box.'''
v = self._box_model.y_vector()
return GeometricDescriptors.Vector.from_points(
Coordinates(0, 0, 0), Coordinates(v.x(), v.y(), v.z())
)
@property
def z_vector(self):
'''The vector of the minimal axis of the box.'''
v = self._box_model.x_vector()
return GeometricDescriptors.Vector.from_points(
Coordinates(0, 0, 0), Coordinates(v.x(), v.y(), v.z())
)
@property
def volume(self):
'''The volume of the box.'''
return self._box_model.box_volume()
[docs] class AdjacencyMatrixDescriptorCalculator(object):
'''Descriptor calculator for descriptors based on a molecule's adjacency matrix.'''
def __init__(self, molecule):
'''Initialise the calculator for a molecule.
:param molecule: a :class:`ccdc.molecule.Molecule` instance.
'''
self.mol = molecule
self._calculator = ChemicalAnalysisLib.AdjacencyMatrixDescriptorCalculator(molecule._molecule)
[docs] def self_returning_walk(self, k):
'''Return the number of walks of length k that start and end at the same atom.
See Handbook of Molecular Descriptors, page 384, "self-returning walk counts".
:param k: the number of steps to walk.
:returns: float
'''
return self._calculator.self_returning_walk(k).value_
[docs] def self_returning_walk_ln(self, k):
'''Return the logarithm of the number of walks of length k that start and end at the same atom.
See Handbook of Molecular Descriptors, page 384, "self-returning walk counts".
:param k: the number of steps to walk.
:returns: float
'''
return self._calculator.self_returning_walk_ln(k).value_
[docs] def topological_charge_autocorrelation_index(self, k):
'''Calculate the topological charge autocorrelation index
See https://pubs.acs.org/doi/pdf/10.1021/ci00019a008
:param k: the topological distance to measure across
:returns: float
'''
return self._calculator.topological_charge_autocorrelation_index(k).value_
[docs] class ConnectivityIndices(object):
'''Connectivitiy index descriptor calculations.'''
def __init__(self, molecule):
'''Initialise the calculator for a molecule.
:param molecule: a :class:`ccdc.molecule.Molecule` instance.
'''
self.mol = molecule
[docs] def connectivity_index(self, m):
'''Return the connectivity index of mth order.
See Handbook of Molecular Descriptors, page 85, "connectivity indices".
:param m: the path length evaluated.
:returns: float
'''
return ChemicalAnalysisLib.ConnectivityIndices.connectivity_index(self.mol._molecule, m).value_
[docs] def average_connectivity_index(self, m):
'''Return the average connectivity index of mth order.
See Handbook of Molecular Descriptors, page 85, "connectivity indices".
:param m: the path length evaluated.
:returns: float
'''
return ChemicalAnalysisLib.ConnectivityIndices.average_connectivity_index(self.mol._molecule, m).value_
[docs] class AtomPairDistanceDescriptorCalculator(object):
'''Atom pair distance descriptor calculations.
:param molecule: a :class:`ccdc.molecule.Molecule` instance.
'''
def __init__(self, molecule):
'''Initialise the calculator for a molecule.'''
self.mol = molecule
self.calculator = ChemicalAnalysisLib.AtomPairDistanceDescriptorCalculator(self.mol._molecule)
[docs] def element_pair_count(self, element_a, element_b, distance):
'''Return a count of the number of times a pair of elements appear with a specified minimum path length.
See Handbook of Molecular Descriptors, page 428, "substructure descriptors, atom pairs".
:param element_a: str. the first element name.
:param element_b: str. the second element name.
:param distance: int. the number of bonds between atoms of the specified the elements.
:returns: float
'''
return self.calculator.element_pair_count(ChemistryLib.Element(element_a), ChemistryLib.Element(element_b), distance).value_
[docs] class InChIGenerator(object):
'''Generate InChI molecular descriptors.
The original source of the InChI generation tool is available at https://www.inchi-trust.org/
See Stephen R Heller, Igor Pletnev, Stephen Stein and Dmitrii Tchekhovskoi, J. Cheminformatics, 2015, 7:23
https://doi.org/10.1186/s13321-015-0068-4
:param include_stereo: configure the generator to include stereochemistry (True, default) or ignore stereochemistry (False)
:param add_hydrogens: configure the generator to add hydrogens (True, default) or not add hydrogens (False)
'''
[docs] class InChI(object):
'''An InChI object with the following attributes:
:ivar success: a boolean to indicate if the InChI generation was successful
:ivar inchi: the InChI string
:ivar key: the InChI key
:ivar errors: a tuple of InChI generation errors
:ivar warnings: a tuple of InChI generation warnings
'''
def __init__(self, inchi_internal):
'''Initialise from an internal InChI object.'''
self.success = inchi_internal.success()
self.inchi = inchi_internal.inchi() if self.success else None
self.key = inchi_internal.inchi_key() if self.success else None
self.errors = inchi_internal.errors()
self.warnings = inchi_internal.warnings()
def __eq__(self, other):
if isinstance(other, MolecularDescriptors.InChIGenerator.InChI):
return (
self.success == other.success and
self.inchi == other.inchi and
self.key == other.key and
self.errors == other.errors and
self.warnings == other.warnings
)
else:
raise TypeError('%s is not an InChI instance' % other)
def __hash__(self):
return hash(self.key)
def __init__(self, include_stereo=True, add_hydrogens=True):
'''Initialise, setting options.'''
self._generator = FileFormatsLib.InChIGenerator()
self.include_stereo = include_stereo
self.add_hydrogens = add_hydrogens
[docs] def generate(self, structure, include_stereo=None, add_hydrogens=None):
'''Generate InChI.
:param structure: a :class:`ccdc.crystal.Crystal` or :class:`ccdc.molecule.Molecule` object
:param include_stereo: set to True or False to override generator's setting
:param add_hydrogens: set to True or False to override generator's setting
:return: a :class:`ccdc.descriptors.MolecularDescripts.InChIGenerator.InChI` instance
:raises: TypeError if the type of the input structure is invalid
'''
if isinstance(structure, Crystal):
mol = structure.molecule._molecule
elif isinstance(structure, Molecule):
mol = structure._molecule
else:
raise TypeError(f'Invalid type for InChIGenerator.generate: {structure.__class__.__name__}')
if include_stereo is not None:
self.include_stereo = include_stereo
if add_hydrogens is not None:
self.add_hydrogens = add_hydrogens
return MolecularDescriptors.InChIGenerator.InChI(self._generator.generate(mol))
@property
def include_stereo(self):
'''Whether stereo chemistry be considered.'''
return self._generator.stereochemistry_option()
@include_stereo.setter
def include_stereo(self, value):
if value:
self._generator.set_stereochemistry_option(self._generator.INCLUDE_STEREOCHEMISTRY)
else:
self._generator.set_stereochemistry_option(self._generator.IGNORE_STEREOCHEMISTRY)
@property
def add_hydrogens(self):
'''Whether the InChI generator should add missing hydrogens'''
return self._generator.add_hydrogens_option()
@add_hydrogens.setter
def add_hydrogens(self, value):
new_val = self._generator.ADD_HYDROGENS if value else self._generator.DONT_ADD_HYDROGENS
self._generator.set_add_hydrogens_option(new_val)
#################################################################################################
[docs]class GeometricDescriptors(object):
'''A namespace to hold geometric classes and functions.'''
[docs] @staticmethod
def point_distance(p0, p1):
'''The distance between two points.'''
return MathsLib.distance(p0, p1)
[docs] @staticmethod
def point_angle(p0, p1, p2):
'''The angle between three points.'''
return MathsLib.angle(p0, p1, p2).degrees()
[docs] @staticmethod
def point_torsion_angle(p0, p1, p2, p3):
'''The torsion angle between four points.'''
return MathsLib.torsion_angle(p0, p1, p2, p3).degrees()
[docs] class Vector(object):
'''A 3D vector.'''
def __init__(self, x, y, z):
self.x = x
self.y = y
self.z = z
def __getitem__(self, index):
if index in range(3):
return getattr(self, 'xyz'[index])
def __iter__(self):
yield self.x
yield self.y
yield self.z
def __repr__(self):
return 'Vector(%.3f, %.3f, %.3f)' % (self.x, self.y, self.z)
@property
def length(self):
'''The length of the vector.'''
return sum(self[i]**2 for i in range(3))**0.5
[docs] def cross(self, other):
'''Cross product.'''
v = MathsLib.vector_3d(self.x, self.y, self.z)
w = MathsLib.vector_3d(*tuple(other[i] for i in range(3)))
x = v.cross(w)
return GeometricDescriptors.Vector(x[0], x[1], x[2])
[docs] def dot(self, other):
'''Dot product.'''
return sum(self[i]*other[i] for i in range(3))
[docs] @staticmethod
def from_points(p0, p1):
'''Construct the vector from p0 to p1.
:param p0, p1: :class:`ccdc.molecule.Coordinates`
'''
return GeometricDescriptors.Vector(p1.x - p0.x, p1.y - p0.y, p1.z - p0.z)
[docs] class Plane(object):
'''A plane in 3D.'''
def __init__(self, vector, distance, _plane=None):
'''Construct a plane from a vector and a distance to the origin.
:param vector: :class:`ccdc.descriptors.GeometricDescriptors.Vector`
:param distance: float
'''
if _plane is None:
self._plane = MathsLib.Plane(
MathsLib.vector_3d(vector[0], vector[1], vector[2]), distance
)
else:
self._plane = _plane
def __repr__(self):
return 'Plane(normal=%s, distance=%.3f)' % (self.normal, self.distance)
@property
def normal(self):
'''The normal to the plane.'''
v = self._plane.normal()
return GeometricDescriptors.Vector(v.x(), v.y(), v.z())
@property
def distance(self):
'''The distance from the origin of the plane.'''
return self._plane.distance()
[docs] @staticmethod
def from_points(*points):
'''Construct a RMS fitted plane from points.'''
return GeometricDescriptors.Plane(None, None, _plane=MathsLib.Plane(points))
[docs] def point_distance(self, point):
'''The distance of the point to the plane.'''
return self._plane.distance(point)
[docs] def plane_distance(self, plane):
'''The shortest distance of the plane to another.'''
return self._plane.distance(plane._plane)
[docs] def plane_angle(self, plane):
'''The angle between the two planes.'''
return self._plane.angle(plane._plane).degrees()
[docs] def vector_angle(self, vector):
'''The angle between the plane and the vector.'''
return self._plane.angle(MathsLib.vector_3d(vector.x, vector.y, vector.z)).degrees()
@property
def plane_vector1(self):
'''A vector in the plane, normal to the plane's normal.'''
vec = self._plane.v1()
return GeometricDescriptors.Vector(vec.x(), vec.y(), vec.z())
@property
def plane_vector2(self):
'''A vector in the plane, normal to both the plane's normal and the plane's plane_vector1.'''
vec = self._plane.v2()
return GeometricDescriptors.Vector(vec.x(), vec.y(), vec.z())
[docs] class Sphere(object):
'''A sphere in 3D.'''
def __init__(self, centre, radius):
self.centre = centre
self.radius = radius
def __repr__(self):
return 'Sphere(%s radius %.2f)' % (str(self.centre), self.radius)
def __eq__(self, other):
return (
all(round(self.centre[i], 6) == round(other.centre[i], 6) for i in range(3)) and
round(self.radius, 6) == round(other.radius, 6)
)
[docs] class Quaternion(object):
'''A normalised quaternion suitable for expressing rotations in 3D space.
Quaternions are a convenient method for expressing a complex sequence of rotations
By default this constructs a unit quaternion
'''
def __init__(self, _quaternion = None):
if _quaternion is None:
self._quaternion = MathsLib.Quaternion()
else:
self._quaternion = _quaternion
[docs] @staticmethod
def from_dimensions(q0,q1,q2,q3):
''' create from 4 real numbers. The quaternion will be normalised to unit length.
:param q0,q1,q2,q3: the 4 dimensions of the axes 1,i,j and k
:raises ValueError: with invalid input (e.g. if the length of the quaternion is 0)
'''
try:
return GeometricDescriptors.Quaternion(_quaternion=MathsLib.Quaternion(q0,q1,q2,q3))
except Exception as e:
raise ValueError('Unable to construct Quaternion') from e
[docs] @staticmethod
def from_vector_and_angle(vector,angle, unit = 'degrees'):
'''
Construct from a vector and an angle
:param vector: a tuple or list of length 3 that represents a vector in 3D space
:param angle: a double that represents an angle (in default in degrees.)
:raise: ValueError for a bad input
'''
if len(vector) != 3:
raise ValueError("Quaternion must be constructed from a 3D vector")
mlunit = MathsLib.Angle.DEGREES
if unit == 'radians':
mlunit = MathsLib.Angle.RADIANS
try:
return GeometricDescriptors.Quaternion(
_quaternion=MathsLib.Quaternion(
MathsLib.vector_3d(vector[0],vector[1],vector[2]),
MathsLib.Angle(angle,mlunit)))
except Exception as e:
raise ValueError('Unable to construct Quaternion') from e
[docs] @staticmethod
def from_euler_angles(alpha,beta,gamma, unit = 'degrees' ):
'''
create a quaternion from a set of euler angles
'''
mlunit = MathsLib.Angle.DEGREES
if unit == 'radians':
mlunit = MathsLib.Angle.RADIANS
try:
return GeometricDescriptors.Quaternion(
_quaternion=MathsLib.Quaternion(
MathsLib.Angle(alpha,mlunit),
MathsLib.Angle(beta, mlunit),
MathsLib.Angle(gamma,mlunit)))
except Exception as e:
raise ValueError('Unable to construct Quaternion') from e
[docs] def copy(self):
''' Create a copy of this quaternion
'''
return GeometricDescriptors.Quaternion( _quaternion=MathsLib.Quaternion(self._quaternion) )
[docs] def square(self):
''' In-place squares this quaternion
'''
self._quaternion.square()
[docs] def invert(self):
''' In-place inverts this quaternion
'''
self._quaternion.invert()
[docs] def complex_conjugate(self):
''' In-place converts this quaternion to its complex conjugate
'''
self._quaternion.complex_conjugate()
[docs] def rotation_matrix(self):
''' Returns a rotation matrix that the quaternion currently describes
'''
return self._quaternion.rotation_matrix()
[docs] def rotate(self, object_to_rotate, cell = None):
''' Rotates a set of vectors in place by this quaternion
'''
try:
if isinstance(object_to_rotate, Molecule):
self._rotate_molecule(object_to_rotate._molecule, cell)
return object_to_rotate
elif isinstance(object_to_rotate, Crystal):
if cell is None:
cell = object_to_rotate._crystal.cell()
molecule = object_to_rotate.molecule
self._rotate_molecule(molecule,cell)
object_to_rotate.molecule = molecule
return object_to_rotate
elif isinstance(object_to_rotate, Atom):
self._rotate_atoms([object_to_rotate],cell)
elif isinstance(object_to_rotate, MathsLib.vector_3d):
self._quaternion.rotate(object_to_rotate)
elif isinstance(object_to_rotate[0], Atom):
self._rotate_atoms(object_to_rotate, cell)
elif isinstance(object_to_rotate[0], float) or isinstance(object_to_rotate[0], int):
mlv = MathsLib.vector_3d(object_to_rotate[0], object_to_rotate[1], object_to_rotate[2])
self._quaternion.rotate(mlv)
object_to_rotate[0] = mlv.x()
object_to_rotate[1] = mlv.y()
object_to_rotate[2] = mlv.z()
else:
for vector in object_to_rotate:
self.rotate(vector)
return object_to_rotate
except Exception as e:
raise ValueError('Unable to rotate object of type %s' % str(type(object_to_rotate))) from e
def _rotate_molecule(self, molecule, _cell):
''' Rotates a molecule. Rotation is applied to orthogonal coordinates.
If possible, fractional coordinates are then updated too.
'''
self._rotate_atoms(molecule.atoms, _cell)
def _rotate_atoms(self, atoms, _cell):
''' Rotates a set of atoms. Rotation is applied to orthogonal coordinates.
If possible, fractional coordinates are then updated too.
'''
raw_atoms = [atom._atom for atom in atoms]
if _cell is not None:
ChemicalAnalysisLib.apply_quaternion_to_atoms_with_cell( raw_atoms, self._quaternion, _cell)
else:
ChemicalAnalysisLib.apply_quaternion_to_atoms(raw_atoms, self._quaternion)
def __eq__(self,other):
return self._quaternion == other._quaternion
def __ne__(self,other):
return self._quaternion != other._quaternion
def __pos__(self):
return GeometricDescriptors.Quaternion(quaternion = +self._quaternion)
def __neg__(self):
return GeometricDescriptors.Quaternion(quaternion = -self._quaternion)
def __pow__(self,n):
return GeometricDescriptors.Quaternion( _quaternion = self._quaternion ** n )
def __mul__(self, other):
return GeometricDescriptors.Quaternion( _quaternion = self._quaternion * other._quaternion )
def __imul__(self, other):
self._quaternion *= other._quaternion
return self
def __str__(self):
return str(self._quaternion.as_string())
#################################################################################################
[docs]class CrystalDescriptors(object):
'''Namespace for crystallographic descriptors.'''
#################################################################################################
# Powder patterns
#################################################################################################
[docs] @nested_class('CrystalDescriptors')
class PowderPattern(object):
'''Represents a powder pattern.
The powder pattern class is available only to CSD-Materials and
CSD-Enterprise users.
'''
[docs] @nested_class('PowderPattern')
class Settings(object):
'''Settings which may be set for a Powder simulation.'''
_slit_type_dict = bidirectional_dict(
fixed=PackingSimilarityLib.PXRDSimulatorSettings.SlitType_FIXED_SLITS,
variable=PackingSimilarityLib.PXRDSimulatorSettings.SlitType_VARIABLE_SLITS,
)
def __init__(self, _settings=None):
self._settings = _settings if _settings is not None else PackingSimilarityLib.PXRDSimulatorSettings()
@staticmethod
def _get_wavelength_object(value):
if value is not None:
if isinstance(value, float) or isinstance(value, int):
return CrystalDescriptors.PowderPattern.Wavelength(value)
elif (isinstance(value, list) or isinstance(value, tuple)) and len(value) == 2:
return CrystalDescriptors.PowderPattern.Wavelength(value[0], value[1])
elif isinstance(value, CrystalDescriptors.PowderPattern.Wavelength):
return value
else:
raise TypeError(f'Cant set wavelength; type must be a float or a pair of floats: {value} was passed in')
return None
def _get_wavelength(self, index):
if self._settings.nwavelengths() <= index:
return None
return CrystalDescriptors.PowderPattern.Wavelength(_wavelength=self._settings.wavelength(index))
def _set_wavelength(self, index, value):
wavelengths = [self._settings.wavelength(i) for i in range(self._settings.nwavelengths())]
while len(wavelengths) <= index:
wavelengths.append(value)
wavelengths[index] = value
self._settings.set_wavelengths(wavelengths)
@property
def wavelength(self):
'''Set or get the primary wavelength
:param value: float or pair of floats (wavelength and scale factor) or another Wavelength object or
None to reset to the default
:return: Primary wavelength object (or None) for the simulation
'''
return self._get_wavelength(0)
@wavelength.setter
def wavelength(self, value):
w = self._get_wavelength_object(value)
if w is not None:
wavelength = w._wavelength
else:
default_settings = PackingSimilarityLib.PXRDSimulatorSettings()
wavelength = default_settings.wavelength(0)
self._set_wavelength(0, wavelength)
@property
def second_wavelength(self):
'''Set or get the secondary wavelength
:param value: float or pair of floats (wavelength and scale factor) or another Wavelength object or
None to remove the secondary wavelength
:return: Secondary wavelength object (or None) for the simulation
'''
return self._get_wavelength(1)
@second_wavelength.setter
def second_wavelength(self, value):
w = self._get_wavelength_object(value)
if w is not None:
self._set_wavelength(1, w._wavelength)
else:
self._settings.set_wavelengths([self.wavelength._wavelength])
@property
def include_hydrogens(self):
'''
Whether to include hydrogens in the simulation
:return: True or false
'''
return self._settings.include_hydrogens()
@include_hydrogens.setter
def include_hydrogens(self, value):
self._settings.set_include_hydrogens(bool(value))
@property
def deuterium_is_hydrogen(self):
'''
Whether to include treat deuterium as hydrogen
:return: True or false
'''
return self._settings.D_is_H()
@deuterium_is_hydrogen.setter
def deuterium_is_hydrogen(self, value):
self._settings.set_D_is_H(bool(value))
@property
def two_theta_minimum(self):
'''
Where to start the pattern simulation
:return: float representing the minimum 2-theta (in degrees)
'''
return self._settings.two_theta_start().degrees()
@two_theta_minimum.setter
def two_theta_minimum(self, value):
self._settings.set_two_theta_range(MathsLib.Angle(float(value), MathsLib.Angle.DEGREES), self._settings.two_theta_end())
@property
def two_theta_maximum(self):
'''
Where to end the pattern simulation
:return: float representing the maximum 2-theta (in degrees)
'''
return self._settings.two_theta_end().degrees()
@two_theta_maximum.setter
def two_theta_maximum(self, value):
self._settings.set_two_theta_range(self._settings.two_theta_start(), MathsLib.Angle(float(value), MathsLib.Angle.DEGREES))
@property
def two_theta_step(self):
'''
The step-size used in the pattern simulation
:return: float representing the step size (in degrees)
'''
return self._settings.two_theta_step().degrees()
@two_theta_step.setter
def two_theta_step(self, value):
self._settings.set_two_theta_step(MathsLib.Angle(float(value), MathsLib.Angle.DEGREES))
@property
def full_width_at_half_maximum(self):
'''
The the full width at half height of peaks to use in simulation
:return: float representing the full width at half height of peaks (in degrees)
'''
x = self._settings.peak_shape().peak_width()
y = PackingSimilarityLib.get_fwhm_as_constant_fwhm(x)
if y:
return y.width().degrees()
return None
@full_width_at_half_maximum.setter
def full_width_at_half_maximum(self, value):
fwhm = PackingSimilarityLib.ConstantFWHM(
MathsLib.Angle(float(value), MathsLib.Angle.DEGREES)
)
self._settings.peak_shape().set_peak_width(fwhm)
@property
def march_dollase_preferred_orientation(self):
'''Setting for march_dollase.
:return: a :class:`PXRDMatchOptimiser.Settings.PreferredOrientation` or None
The default value is None. This can be set with a tuple of (h, k, l, r).
'''
po = self._settings.preferred_orientation()
md = PackingSimilarityLib.perferred_orientation_function_as_march_dollase(po)
return md if md is None else CrystalDescriptors.PowderPattern.PreferredOrientation(_function=md)
@march_dollase_preferred_orientation.setter
def march_dollase_preferred_orientation(self, value):
# Shape of input is (h,k,l,value)
if not value:
self._settings.set_preferred_orientation(None)
elif isinstance(value, CrystalDescriptors.PowderPattern.PreferredOrientation):
self._settings.set_preferred_orientation(value._function)
else:
try:
function = CrystalDescriptors.PowderPattern.PreferredOrientation(values=value)
self._settings.set_preferred_orientation(function._function)
except (ValueError, IndexError, TypeError):
raise ValueError(f"expected one of None or a CrystalDescriptors.PowderPattern.PreferredOrientation or a tuple of form (int,int,int,float) - got {value}")
@property
def slit_type(self) -> str:
'''The type of slit to be simulated
This may be 'fixed' (default) or 'variable'.
:return: string representing type of slit to be simulated
'''
return CrystalDescriptors.PowderPattern.Settings._slit_type_dict.inverse_lookup(
self._settings.slit_type())
@slit_type.setter
def slit_type(self, value: str):
self._settings.set_slit_type(
CrystalDescriptors.PowderPattern.Settings._slit_type_dict[value.lower()])
@property
def fast_peak_shape(self) -> bool:
'''Whether to use a fast but less accurate peak shape calculation during simulation
The peak shape will be applied using fast fourier transform convolution of the peak shape function.
This is faster but less accurate than the default convolution method. The resulting peaks will tend to be wider.
:return: True or False
'''
return self._settings.fft_convolve()
@fast_peak_shape.setter
def fast_peak_shape(self, value: bool):
self._settings.set_fft_convolve(value)
[docs] @nested_class('PowderPattern')
class PreferredOrientation:
'''A preferred orientation for PXRD simulation.'''
def __init__(self, values=None, _function=None) -> None:
self._function = _function
if values and not _function:
try:
mi = ChemistryLib.MillerIndices(*[int(v) for v in values[:3]])
self._function = PackingSimilarityLib.MarchDollase(mi, float(values[3]))
except (ValueError, IndexError):
raise ValueError(f"expected a tuple of form (int,int,int,float) - got {values}")
def __str__(self):
if not self._function:
return 'None'
mi = self._function.miller_indices()
return f'{mi.h()},{mi.k()},{mi.l()},{self._function.r()}'
def __repr__(self):
return f'CrystalDescriptors.PowderPattern.PreferredOrientation(values=({self}))'
def __len__(self):
return 4 if self._function else 0
def __getitem__(self, index):
if not self._function:
raise IndexError('PreferredOrientation has no value')
if index == 0:
return self._function.miller_indices().h()
if index == 1:
return self._function.miller_indices().k()
if index == 2:
return self._function.miller_indices().l()
if index == 3:
return self._function.r()
if isinstance(index, slice):
# Get the start, stop, and step from the slice
return [self[i] for i in range(*index.indices(len(self)))]
raise IndexError('PreferredOrientation index out of range')
def __eq__(self, value) -> bool:
if not isinstance(value, CrystalDescriptors.PowderPattern.PreferredOrientation):
return tuple(self) == value
if (not self._function) != (not value._function):
return False
if not self._function:
return True
if self._function.miller_indices() != value._function.miller_indices():
return False
return self._function.r() == value._function.r()
@property
def h(self):
'''The miller indices h value of the preferred orientation.'''
return self._function.miller_indices().h()
@property
def k(self):
'''The miller indices k value of the preferred orientation.'''
return self._function.miller_indices().k()
@property
def l(self):
'''The miller indices l value of the preferred orientation.'''
return self._function.miller_indices().l()
@property
def r(self):
'''The March-Dollase r value of the preferred orientation.'''
return self._function.r()
[docs] @nested_class('PowderPattern')
class Wavelength(object):
'''Represents a wavelength for powder studies.
Some standard wavelengths - these are floats, not :class:`ccdc.descriptors.CrystalDescriptors.PowderPattern.Wavelength`
'''
Wavelength_Cu = DatabaseEntryLib.Wavelength.Wavelength_Cu
Wavelength_CuKa1 = DatabaseEntryLib.Wavelength.Wavelength_CuKa1
Wavelength_CuKa2 = DatabaseEntryLib.Wavelength.Wavelength_CuKa2
Wavelength_CuKb1 = DatabaseEntryLib.Wavelength.Wavelength_CuKb1
Wavelength_Cr = DatabaseEntryLib.Wavelength.Wavelength_Cr
Wavelength_CrKa1 = DatabaseEntryLib.Wavelength.Wavelength_CrKa1
Wavelength_CrKa2 = DatabaseEntryLib.Wavelength.Wavelength_CrKa2
Wavelength_Co = DatabaseEntryLib.Wavelength.Wavelength_Co
Wavelength_CoKa1 = DatabaseEntryLib.Wavelength.Wavelength_CoKa1
Wavelength_CoKa2 = DatabaseEntryLib.Wavelength.Wavelength_CoKa2
Wavelength_CoKb1 = DatabaseEntryLib.Wavelength.Wavelength_CoKb1
Wavelength_Mo = DatabaseEntryLib.Wavelength.Wavelength_Mo
Wavelength_MoKa1 = DatabaseEntryLib.Wavelength.Wavelength_MoKa1
Wavelength_MoKa2 = DatabaseEntryLib.Wavelength.Wavelength_MoKa2
Wavelength_MoKb1 = DatabaseEntryLib.Wavelength.Wavelength_MoKb1
Wavelength_Fe = DatabaseEntryLib.Wavelength.Wavelength_Fe
Wavelength_FeKa1 = DatabaseEntryLib.Wavelength.Wavelength_FeKa1
Wavelength_FeKa2 = DatabaseEntryLib.Wavelength.Wavelength_FeKa2
Wavelength_FeKb1 = DatabaseEntryLib.Wavelength.Wavelength_FeKb1
def __init__(self, wavelength=None, scale_factor=1.0, _wavelength=None):
if _wavelength is not None:
self._wavelength = _wavelength
else:
if wavelength is None:
wavelength = CrystalDescriptors.PowderPattern.Wavelength.Wavelength_CuKa1
self._wavelength = DatabaseEntryLib.Wavelength(wavelength, scale_factor)
@property
def wavelength(self):
'''The wavelength.'''
return self._wavelength.wavelength()
@wavelength.setter
def wavelength(self, w):
self._wavelength = DatabaseEntryLib.Wavelength(w, self.scale_factor)
@property
def scale_factor(self):
'''The scale factor of this Wavelength.'''
return self._wavelength.scale_factor()
@scale_factor.setter
def scale_factor(self, f):
self._wavelength = DatabaseEntryLib.Wavelength(self.wavelength, f)
[docs] @nested_class('PowderPattern')
class TickMark(object):
'''A tick mark in a simulated powder pattern.
'''
def __init__(self, _tick, _crystal=None):
'''Create from an internal tick mark object.'''
self._tick = _tick
self._crystal = _crystal
@property
def two_theta(self):
'''Two theta value of this tick.'''
return self._tick.two_theta().degrees()
@property
def is_systematically_absent(self):
'''Whether this tick mark is systematically absent.'''
return self._tick.is_systematically_absent()
@property
def miller_indices(self):
'''The Miller indices of this tick mark.'''
if self._crystal is not None:
mi = self._tick.miller_indices()
return self._crystal.miller_indices(
mi.h(), mi.k(), mi.l()
)
[docs] @staticmethod
def from_file(file_name, format=None, default_wavelength=None):
'''Create a CrystalDescriptors.PowderPattern from a file.
``format`` may take one of the following values:
- ``'xy'``: XY format (2 columns: 2theta and intensity)
- ``'xye'``: XYE format (3 columns: 2theta, intensity and ESD)
- ``'xrdml'``: Panalytical XRDML format
If ``format`` is ``None``, it will be deduced from the filename extension.
:param file_name: path to the file
:param format: string indicating the format to expect; if ``None`` will deduce from filename extension
:param default_wavelength: Default wavelength used if no wavelength found/parsed in file
'''
powderlib_format = 'unspecified' if format is None else format
if default_wavelength is None:
pattern = PackingSimilarityLib.PXRDPattern(file_name, powderlib_format)
else:
pattern = PackingSimilarityLib.PXRDPattern(file_name, powderlib_format, default_wavelength)
settings = CrystalDescriptors.PowderPattern.Settings()
if (pattern.warningMessage() != ""):
print(pattern.warningMessage())
settings.wavelength = CrystalDescriptors.PowderPattern.Wavelength(pattern.wavelength(0).wavelength(), pattern.wavelength(0).scale_factor())
settings.two_theta_minimum = pattern.two_theta_start()
settings.two_theta_maximum = pattern.two_theta_end()
settings.two_theta_step = pattern.two_theta_uniform_step_size()
return CrystalDescriptors.PowderPattern(pattern, settings)
[docs] @staticmethod
def from_xye_file(file_name, default_wavelength=None):
'''Create a CrystalDescriptors.PowderPattern from an xye file.
:param file_name: path to xye file
:param default_wavelength: Default wavelength used if no wavelength found/parsed in file
'''
return CrystalDescriptors.PowderPattern.from_file(
file_name,
format='xye',
default_wavelength=default_wavelength)
[docs] @staticmethod
def from_xy_file(file_name, default_wavelength=None):
'''Create a CrystalDescriptors.PowderPattern from an xy file.
:param file_name: path to xy file
:param default_wavelength: Default wavelength used if no wavelength found/parsed in file
'''
return CrystalDescriptors.PowderPattern.from_file(
file_name,
format='xy',
default_wavelength=default_wavelength)
[docs] @staticmethod
def from_xrdml_file(file_name, default_wavelength=None):
'''Create a CrystalDescriptors.PowderPattern from a Panalytical XRDML file.
See https://www.malvernpanalytical.com/en/products/category/software/x-ray-diffraction-software/data-collector
for details of the XRDML format.
:param file_name: path to XRDML file
:param default_wavelength: Default wavelength used if no wavelength found/parsed in file
'''
return CrystalDescriptors.PowderPattern.from_file(
file_name,
format='xrdml',
default_wavelength=default_wavelength)
[docs] @staticmethod
def from_crystal(crystal, settings=None):
'''Create a CrystalDescriptors.PowderPattern from a crystal.
:param crystal: :class:`ccdc.crystal.Crystal`
:param settings: :class:`ccdc.descriptors.CrystalDescriptors.PowderPattern.Settings`
'''
if settings is None:
settings = CrystalDescriptors.PowderPattern.Settings()
sim = PackingSimilarityLib.PXRDSimulator(crystal._crystal, settings._settings)
return CrystalDescriptors.PowderPattern(sim.simulate_profile(), settings, sim, crystal)
def __init__(self, _pattern, _settings=None, _simulation=None, _crystal=None):
'''Private: create from an internal PXRDPattern.'''
self._pattern = _pattern
self.settings = _settings
self._simulation = _simulation
self._crystal = _crystal
[docs] def resetWavelength(self, new_wavelength=None):
'''Reset the wavelength for an existing powder pattern
:param new_wavelength: New wavelength, if this is left blank then the wavelength is reset to 1.54056 Angstrom
'''
if new_wavelength is None:
self.settings.wavelength = CrystalDescriptors.PowderPattern.Wavelength(DatabaseEntryLib.Wavelength.Wavelength_CuKa1, 1.0)
else:
self.settings.wavelength = CrystalDescriptors.PowderPattern.Wavelength(new_wavelength, 1.0)
[docs] def similarity(self, other, width=2.0, use_esds=True):
'''Measure of match between this pattern and another.
This uses the cross-correlations described in `R. de Gelder, R. Wehrens, J.A. Hageman (2001) <i>J. Comp. Chem.</i> <b>22</b>:273-289. https://doi.org/10.1002/1096-987X(200102)22:3%3C273::AID-JCC1001%3E3.0.CO;2-0`_
:param other: :class:`ccdc.descriptors.CrystalDescriptors.PowderPattern`
:param width: width (in degrees) of the base of the triangle weight function
:param use_esds: Whether to use the powder pattern estimates of standard deviation on the counts in the calculation as weightings
:returns: float that represents the similarity of the two patterns
'''
if use_esds:
ue = PackingSimilarityLib.PXRDPattern.USE_ESDS
else:
ue = PackingSimilarityLib.PXRDPattern.DO_NOT_USE_ESDS
return PackingSimilarityLib.PXRDMatch(self._pattern, other._pattern, width, ue)
[docs] def write_xye_file(self, file_name):
'''Write a .xye format file.
:param file_name: output file name
'''
self._pattern.write_xy_or_xye_file(file_name, PackingSimilarityLib.PXRDPattern.USE_ESDS)
[docs] def write_xy_file(self, file_name):
'''Write a .xy format file.
The .xy format is the same as the .xye format except that the ESD column is omitted.
:param file_name: output file name
'''
self._pattern.write_xy_or_xye_file(file_name, PackingSimilarityLib.PXRDPattern.DO_NOT_USE_ESDS)
[docs] def write_raw_file(self, file_name):
'''Write a Bruker .raw file.
:param file_name: output file name
'''
self._pattern.write_raw_file(file_name)
[docs] def write_xrdml_file(self, file_name):
'''Write a Panalytical .xrdml format file.
:param file_name: output file name
'''
self._pattern.write_xrdml_file(file_name)
[docs] def integral(self, start=0.0, end=180.0):
'''The area under the curve.
:param start: float
:param end: float
:returns: float
'''
return self._pattern.integral(start, end)
@property
def two_theta(self):
'''The array of two_theta values.'''
return [
self._pattern.two_theta(i) for i in range(self._pattern.npoints())
]
@property
def intensity(self):
'''The array of intensity values.'''
return [
self._pattern.intensity(i) for i in range(self._pattern.npoints())
]
@property
def esd(self):
'''The array of esd values (Estimated Square Deviations).'''
return [
self._pattern.esd(i) for i in range(self._pattern.npoints())
]
@property
def tick_marks(self):
'''The array of tick marks if this is a simulated powder pattern.
:returns: list of :class:`ccdc.descriptors.CrystalDescriptors.PowderPattern.TickMark`
or ``None`` if this is not a simulated powder pattern.
'''
if self._simulation is not None:
self._ticks = self._simulation.tick_mark_list(self._simulation.INCLUDE_SYSTEMATIC_ABSENCES)
self._ticks.sort_on_two_theta()
return [
CrystalDescriptors.PowderPattern.TickMark(
self._ticks.tick_mark(i),
self._crystal
)
for i in range(self._ticks.size())
]
#################################################################################################
# Morphology
#################################################################################################
[docs] class Morphology(morphology.MorphologyBase):
def __init__(self, crystal=None):
warnings.warn('''This class is deprecated and will be removed in a later version.
Instead, please use the ccdc.morphology.BFDHMorphology class.''', DeprecationWarning)
super().__init__(crystal)
Facet = morphology.Facet
OrientedBoundingBox = morphology.OrientedBoundingBox
#################################################################################################
# Graph sets
#################################################################################################
[docs] class GraphSetSearch(object):
'''Finds the graph sets of a crystal.'''
[docs] class GraphSet(object):
'''An individual graph set.'''
_descriptor_dict = {
MotifSearchLib.GraphSet.SELF: 'self',
MotifSearchLib.GraphSet.DISCRETE: 'discrete',
MotifSearchLib.GraphSet.CHAIN: 'chain',
MotifSearchLib.GraphSet.RING: 'ring',
MotifSearchLib.GraphSet.NONE: 'none'
}
def __init__(self, _graph_set_atoms, _view):
self._graph_set_atoms = _graph_set_atoms
self._graph_set = _graph_set_atoms.graph_set()
self._view = _view
def __repr__(self):
ostr = MotifSearchLib.ostringstream()
self._graph_set.print_(ostr)
return ostr.str()
@property
def descriptor(self):
'''The descriptor of the graph set.'''
return CrystalDescriptors.GraphSetSearch.GraphSet._descriptor_dict[self._graph_set.designator()]
@property
def ndonors(self):
'''The number of donors involved in the graph set.'''
return self._graph_set.ndonors()
@property
def nacceptors(self):
'''The number of acceptors involved in the graph set.'''
return self._graph_set.nacceptors()
@property
def degree(self):
'''The degree of the graph set, i.e. the number of atoms involved.'''
return self._graph_set.degree()
@property
def edge_labels(self):
'''The edge labels of the graph set.
The labels are arbitrary letters identifying a unique hydrogen bond, separated by
'>' or '<' indicating the donor-acceptor direction.
'''
return self._graph_set.edge_labels()
@property
def nmolecules(self):
'''The number of molecules involved in the graph set.'''
return self._graph_set_atoms.nmolecules()
@property
def period(self):
'''The period of the graph set, i.e the number of hydrogen bonds in the repeat unit.
If the type of the graph set is not a chain or a ring this will be -1
'''
return self._graph_set_atoms.period()
@property
def label_set(self):
'''The set of hydrogen bond labels found in the graph set.'''
return set(self._graph_set_atoms.label_set())
@property
def hbonds(self):
'''The hydrogen bonds of the graph set.
:returns: a tuple of :class:`ccdc.crystal.Crystal.HBond` instances.
'''
res = []
for i in range(self._graph_set_atoms.nhbonds()):
hb = self._graph_set_atoms.hbond(i)
_contact = ChemistryLib.MolecularContact(
hb.from_molecule(), hb.from_atom(),
hb.to_molecule(), hb.to_atom(),
ChemistryLib.MolecularContact.HBOND_CONTACT
)
res.append(Crystal.HBond(self._view, _contact))
return tuple(res)
[docs] class Settings(object):
'''Configurable settings for the graph set analyser.'''
level = 2 #: deepest level to search. This is the number of different HBonds involved.
max_ring_size = 6 #: largest ring to search
max_chain_size = 4 #: longest chain to search
max_discrete_chain_size = 4 #: longest discrete chain to search
_inter_dict = bidirectional_dict(
intermolecular=ChemistryLib.HBondCriterion.INTERMOLECULAR,
intramolecular=ChemistryLib.HBondCriterion.INTRAMOLECULAR,
any=ChemistryLib.HBondCriterion.ANY,
)
def __init__(self, hbond_criterion=None):
if hbond_criterion is None:
hbond_criterion = Molecule.HBondCriterion()
self.hbond_criterion = hbond_criterion
@property
def distance_range(self):
'''Allowable distance range for a HBond to be formed.'''
return (
self.hbond_criterion._hbc.hbond_criterion().min_dist_tolerance(),
self.hbond_criterion._hbc.hbond_criterion().max_dist_tolerance()
)
@distance_range.setter
def distance_range(self, val):
self.hbond_criterion._hbc.hbond_criterion().set_min_dist_tolerance(min(val))
self.hbond_criterion._hbc.hbond_criterion().set_max_dist_tolerance(max(val))
@property
def angle_tolerance(self):
'''The tolerance of the HBond angle.'''
return self.hbond_criterion._hbc.hbond_criterion().angle_tolerance().degrees()
@angle_tolerance.setter
def angle_tolerance(self, val):
if val is None:
self.hbond_criterion._hbc.set_angle_option(
MotifSearchLib.GraphSetHBondCriterion.ANGLE_IS_NOT_CHECKED
)
else:
self.hbond_criterion._hbc.set_angle_option(
MotifSearchLib.GraphSetHBondCriterion.ANGLE_IS_CHECKED
)
self.hbond_criterion._hbc.hbond_criterion().set_angle_tolerance(
MathsLib.Angle(val, MathsLib.Angle.DEGREES)
)
@property
def vdw_corrected(self):
'''Whether the distance range is Van der Waals corrected.'''
return self.hbond_criterion._hbc.hbond_criterion().vdw_corrected()
@vdw_corrected.setter
def vdw_corrected(self, val):
self.hbond_criterion._hbc.hbond_criterion().set_vdw_corrected(val)
@property
def require_hydrogens(self):
'''Whether Hydrogens are required for the HBond.'''
return not self.hbond_criterion._hbc.hbond_criterion().allow_non_hydrogen_contacts()
@require_hydrogens.setter
def require_hydrogens(self, val):
if val:
self.hbond_criterion._hbc.set_distance_option(
MotifSearchLib.GraphSetHBondCriterion.HYDROGEN_TO_ACCEPTOR
)
else:
self.hbond_criterion._hbc.set_distance_option(
MotifSearchLib.GraphSetHBondCriterion.DONOR_TO_ACCEPTOR
)
self.hbond_criterion._hbc.hbond_criterion().set_allow_non_hydrogen_contacts(not val)
@property
def intermolecular(self):
'''Whether HBonds should be intermolecular, intramolecular, or any.'''
return self._inter_dict.inverse_lookup(
self.hbond_criterion._hbc.hbond_criterion().type()
)
@intermolecular.setter
def intermolecular(self, val):
self.hbond_criterion._hbc.hbond_criterion().set_type(
self._inter_dict.prefix_lookup(val)
)
@property
def path_length_range(self):
'''The shortest and longest bond-path separation for intramolecular contacts.'''
return (
self.hbond_criterion._hbc.hbond_criterion().minpath(),
self.hbond_criterion._hbc.hbond_criterion().maxpath()
)
@path_length_range.setter
def path_length_range(self, val):
self.hbond_criterion._hbc.hbond_criterion().set_minpath(min(val))
self.hbond_criterion._hbc.hbond_criterion().set_maxpath(max(val))
def __init__(self, settings=None):
'''Initialise the analyser.'''
if settings is None:
settings = CrystalDescriptors.GraphSetSearch.Settings()
self.settings = settings
self._analyser = MotifSearchLib.GraphSetAnalyser()
self._analyser.set_level(2)
self._analyser.set_hbond_criterion(self.settings.hbond_criterion._hbc)
[docs] def search(self, crystal):
'''Find all graph sets for the crystal subject to the constraints of the settings.
:param crystal: :class:`ccdc.crystal.Crystal` instance.
:returns: a tuple of :class:`ccdc.descriptors.CrystalDescriptors.GraphSetSearch.GraphSet` instances.
'''
self._analyser.set_level(self.settings.level)
self._analyser.set_max_ring_size(self.settings.max_ring_size)
self._analyser.set_max_chain_size(self.settings.max_chain_size)
self._analyser.set_max_discrete_size(self.settings.max_discrete_chain_size)
self._analyser.set_hbond_criterion(self.settings.hbond_criterion._hbc)
atoms = self._analyser.run_graph_set_analysis(crystal._crystal)
view = ChemistryLib.CrystalStructureView.instantiate(crystal._crystal)
return tuple(CrystalDescriptors.GraphSetSearch.GraphSet(a, view) for a in atoms)
#################################################################################################
# Hydrogen bond coordination
#################################################################################################
[docs] class HBondCoordination(object):
'''Calculate HBond coordination predictions.
The HBondCoordination class is available only to CSD-Materials and
CSD-Enterprise users.
'''
_telemetry = 0
[docs] class Settings(object):
'''Settings pertaining to the calculation of coordination predictions.'''
def __init__(self):
self._graph_set_search_settings = CrystalDescriptors.GraphSetSearch.Settings()
self._graph_set_search_settings.hbond_criterion.intermolecular = 'Any'
self._graph_set_search_settings.hbond_criterion.require_hydrogens = True
self._graph_set_search_settings.hbond_criterion.path_length_range = (3, 999)
self._graph_set_search_settings.hbond_criterion.vdw_corrected = True
self._graph_set_search_settings.hbond_criterion.distance_range = (-5, 0.0)
self._coordination_models_path = None
self._models_changed = False
if self.coordination_models_path is None:
self.coordination_models_path = os.path.abspath(
os.path.join(
os.path.dirname(__file__),
'parameter_files',
'hydrogen_bond_coordination_models'
)
)
@property
def hbond_criterion(self):
return self._graph_set_search_settings.hbond_criterion
@property
def coordination_models_path(self):
'''The directory in which the coordination models may be found.'''
return self._coordination_models_path
@coordination_models_path.setter
def coordination_models_path(self, path):
self._coordination_models_path = path
self._models_changed = True
[docs] class Predictions(object):
'''The predictions for HBonds coordinations.'''
Observation = collections.namedtuple('Observation', ['label', 'coordination_count', 'probability'])
def observation_repr(obs):
return "Observation(label='%s', coordination_count=%d, probability=%f)" % (obs.label, obs.coordination_count, obs.probability)
Observation.__repr__ = observation_repr
def __init__(self, crystal, _analysis, _predictions):
self.crystal = crystal
self._predictions = _predictions
self._donor_atoms = tuple(
Atom(_atom=hb.atom()) for hb in _analysis.observed_donor_atoms()
)
self._acceptor_atoms = tuple(
Atom(_atom=hb.atom()) for hb in _analysis.observed_acceptor_atoms()
)
@property
def is_valid(self):
'''Whether or not valid predictions were made.'''
return not all(len(p) <= 1 for p in self._predictions)
@property
def observed(self):
'''The predicted probabilities of observed HBonds.'''
if not self._predictions:
return None
obs = tuple(
CrystalDescriptors.HBondCoordination.Predictions.Observation(p[0][0], i - 1, float(p[i][0]))
for p in self._predictions
for i in range(1, len(p))
if abs(p[i][1]) == 1
)
return obs
[docs] def predictions_for_label(self, label, type='donor'):
'''All the predictions for the atom.
:returns: a pair: observed hbond coordination number, dictionary with key, hbond coordination number, value, predicted probability.
'''
if not self._predictions:
return None
ps = [p for p in self._predictions if p[0][0].startswith(label + ' ')]
if len(ps) == 1:
p = ps[0]
elif len(ps) == 0:
return None
elif len(ps) > 2:
raise RuntimeError('Ambiguous atom label %s' % label)
elif ps[0][0][0].endswith('(%s)' % type.lower()[0]):
p = ps[0]
else:
p = ps[1]
obs = {
i - 1: float(p[i][0])
for i in range(1, len(p))
}
observed = [i - 1 for i in range(1, len(p)) if abs(p[i][1]) == 1][0]
return (observed, obs)
def _functional_group_of_label(self, label, type='donor'):
ps = [p[0][0] for p in self._predictions if p[0][0].startswith(label + ' ')]
if len(ps) == 1:
p = ps[0]
elif len(ps) == 0:
return None
elif len(ps) > 2:
raise RuntimeError('Ambiguous atom label %s' % label)
elif ps[0].endswith('(%s)' % type.lower()[0]):
p = ps[0]
else:
p = ps[1]
return p
[docs] def functional_groups_of_hbond(self, hbond):
'''The functional group pertaining to a hydrogen-bonding atom.'''
ats = hbond.atoms
if len(ats) == 3:
if ats[1] in ats[0].neighbours:
donor = self._functional_group_of_label(ats[0].label, 'donor')
acc = self._functional_group_of_label(ats[-1].label, 'acceptor')
else:
donor = self._functional_group_of_label(ats[-1].label, 'donor')
acc = self._functional_group_of_label(ats[0].label, 'acceptor')
else:
# No H, how can I tell which is donor?
if ats[0].is_donor and not ats[-1].is_donor:
donor = self._functional_group_of_label(ats[0].label, 'donor')
acc = self._functional_group_of_label(ats[-1].label, 'acceptor')
elif ats[-1].is_donor and not ats[0].is_donor:
donor = self._functional_group_of_label(ats[-1].label, 'donor')
acc = self._functional_group_of_label(ats[0].label, 'acceptor')
else:
donor = self._functional_group_of_label(ats[0].label, 'donor')
acc = self._functional_group_of_label(ats[-1].label, 'acceptor')
return donor, acc
[docs] def to_csv(self, separator=','):
'''Format the predictions suitable for a csv file.
:param separator: a separation string, or ``None``.
:returns: if separator is ``None`` a tuple of lists of components, otherwise a separated string of components.'''
components = self.crystal.molecule.components
file_orders = [
set(a._atom.annotations().obtain_FileOrdering().file_order() for a in c.atoms)
for c in components
]
def one_line(l):
id_bits = l[0][0].split()
at_label = id_bits[0]
context = ' '.join(id_bits[2:-1])
da = id_bits[-1][1:-1]
if da == 'd':
possible_correct_atoms = [a for a in self._donor_atoms if a.label == at_label]
else:
possible_correct_atoms = [a for a in self._acceptor_atoms if a.label == at_label]
if len(possible_correct_atoms) == 1:
correct_atom = possible_correct_atoms[0]
else:
raise RuntimeError('Ambiguous label %s %s' % (
at_label, ' '.join(a.label for a in possible_correct_atoms)))
for i, c in enumerate(file_orders):
if correct_atom._atom.annotations().obtain_FileOrdering().file_order() in c:
residue = 'RES%d' % (i + 1)
break
else:
raise RuntimeError('Cannot find appropriate file order')
ret = [
self.crystal.identifier,
residue,
# at_label,
correct_atom.label,
context,
da,
]
# Extract the coordination values themselves (starting at the second element of l)
coordination_values = l[1:]
for x in coordination_values:
ret.extend(
(float(x[0]), abs(x[1]))
)
for i in range(len(coordination_values), 7):
ret.extend((0.0, 0))
if separator is None:
return ret
return separator.join(str(x) for x in ret)
if separator is None:
return (one_line(l) for l in self._predictions[1:])
return '\n'.join(one_line(l) for l in self._predictions[1:]) + '\n'
def __init__(self, settings=None, skip_telemetry=False):
'''Initialise the prediction calculator.
:param settings: :class:`ccdc.hbond_coordination.CrystalDescriptors.HBondCoordination.Settings` instance.
'''
if settings is None:
settings = CrystalDescriptors.HBondCoordination.Settings()
self.settings = settings
self._setup_calculator()
if not skip_telemetry:
if type(self)._telemetry == 0:
UtilitiesLib.ccdc_coordination_quick_view_telemetry()
type(self)._telemetry = 1
def _setup_calculator(self):
self._coordination_models = PackingSimilarityLib.CoordinationModels(
self.settings.coordination_models_path)
self._calculator = PackingSimilarityLib.DonorAcceptorPropensityCalculator(self._coordination_models)
[docs] def predict(self, crystal):
'''Calculate HBond coordination likelihoods for the crystal.
:returns: a :class:`ccdc.hbond_coordination.CrystalDescriptors.HBondCoordination.Predictions` instance.
'''
if crystal.has_disorder:
raise RuntimeError('Cannot calculate propensities for disordered crystals')
if self.settings._models_changed:
self._setup_calculator()
self.settings._models_changed = False
self.crystal = crystal
self.hbond_analysis = PackingSimilarityLib.create_analysis_from_structure(
crystal._crystal,
self._coordination_models,
self.settings.hbond_criterion._hbc.hbond_criterion())
self._coordination_propensities = self._calculator.calculate(self.hbond_analysis)
pairs = self.hbond_analysis.observed_hbonding_outcome(
self._coordination_propensities
)
grouping_calculator = PackingSimilarityLib.DonorAcceptorOutcomeGroupingCalculator()
grouping = grouping_calculator.grouping(
pairs, self.hbond_analysis, self._coordination_propensities
)
self.donor_atoms = tuple(
Atom(_atom=d.atom()) for d in self.hbond_analysis.observed_donor_atoms()
)
self.acceptor_atoms = tuple(
Atom(_atom=a.atom()) for a in self.hbond_analysis.observed_acceptor_atoms()
)
return CrystalDescriptors.HBondCoordination.Predictions(
self.crystal,
self.hbond_analysis,
PackingSimilarityLib.coordination_likelihoods_table_raw_data(
grouping_calculator, grouping, self._coordination_propensities
)
)
#################################################################################################
# Hydrogen bond propensities
#################################################################################################
[docs] class HBondPropensities(object):
'''Calculates HBond propensities.
.. TODO: all donors and acceptors matched, load regression from file (fitting_data.txt)
'''
_telemetry = 0
[docs] class Settings(object):
'''Pertaining to HBond propensity calculation.'''
def __init__(self):
'''NB: databases *MUST* be in the SQLite ASER format.'''
self.hbond_criterion = Molecule.HBondCriterion()
self.hbond_criterion.require_hydrogens = True
self.hbond_criterion.path_length_range = (4, 999)
self.working_directory = tempfile.mkdtemp()
self._databases = [io.EntryReader('CSD')]
self._identifier_list = UtilitiesLib.DatabaseEntryIdentifierList()
self.functional_groups_directory = str(Resources().get_functional_groups_dir())
@property
def databases(self):
'''The databases to be used for the prediction.
Note: the databases *MUST* be SQLite ASER databases for the moment.
'''
return self._databases
@databases.setter
def databases(self, dbs):
if isinstance(dbs, io._DatabaseReader):
if not isinstance(dbs._db, (
CSDSQLDatabaseLib.CSDSQLDatabase,
FileFormatsLib.CrystalStructureDatabasePool,
FileFormatsLib.CrystalStructureDatabase
)):
raise RuntimeError('One or more databases is NOT searchable.')
self._databases = [dbs]
else:
for d in dbs:
if isinstance(d._db, (
CSDSQLDatabaseLib.CSDSQLDatabase,
FileFormatsLib.CrystalStructureDatabasePool,
FileFormatsLib.CrystalStructureDatabase
)):
continue
raise RuntimeError('One or more databases is NOT searchable.')
self._databases = dbs
@property
def limit_identifier_list(self):
'''A list of identifiers to limit the search'''
return self._identifier_list
@limit_identifier_list.setter
def limit_identifier_list(self, identifiers):
self._identifier_list = identifiers
@property
def working_directory(self):
'''The working directory for the predictions.'''
return self._working_directory
@working_directory.setter
def working_directory(self, path):
path = path.replace('\\', '/')
if not os.path.exists(path):
os.makedirs(path)
self._working_directory = path
[docs] class FunctionalGroup(object):
'''A functional group capable of hydrogen bonding.'''
def __init__(self, _model_group):
self._model_group = _model_group
def __repr__(self):
return 'FunctionalGroup(%s)' % self.identifier
@property
def identifier(self):
'''The name of the functional group.'''
return self._model_group.name
[docs] def matches(self, molecule):
'''The substructure search matches of the functional group.'''
from ccdc.search import SubstructureSearch, QuerySubstructure
searcher = SubstructureSearch()
sub = QuerySubstructure(self._model_group.substructure)
searcher.add_substructure(sub)
hits = searcher.search(molecule)
return hits
[docs] class FittingData(object):
'''The collection of entries used for the prediction.'''
[docs] class FittingDataEntry(object):
'''An individual entry with associated matching data.'''
def __init__(self, _fitting_item):
self._fitting_item = _fitting_item
@property
def identifier(self):
'''The identifier of the fitting data item.'''
return self._fitting_item.identifier().str()
def __init__(self, _fitting_data=None, identifiers=None, databases=None):
if _fitting_data is None:
_fitting_data = PackingSimilarityLib.PropensityPredictionFittingData()
self._fitting_data = _fitting_data
#if identifiers is None:
# # All the ids from all the dbs?
# pass
#else:
# self._fitting_data.add_fitting_data_items(identifiers, [d._db for d in databases])
self._limit_to_identifiers = identifiers
self._databases = databases
def __len__(self):
if self._fitting_data:
return self._fitting_data.nitems()
elif self._limit_to_identifiers:
return self._limit_to_identifiers.size()
else:
return 0
[docs] def write(self, file_name):
'''Writes the fitting data to a file.'''
with io.CrystalWriter(file_name) as writer:
for i in range(len(self)):
item = self._fitting_data.item(i)
writer.write(Crystal(item.crystal_structure(), item.identifier().str()))
[docs] def nitems(self, functional_group=None):
'''How many items there are representing the functional group.'''
if functional_group is None:
return self._fitting_data.nitems()
else:
return self._fitting_data.nitems_with_group(functional_group._model_group)
def _identifiers(self):
if self._fitting_data:
ids = UtilitiesLib.DatabaseEntryIdentifierList()
for i in range(len(self)):
item = self._fitting_data.item(i)
ids.push_back(item.identifier())
return ids
elif self._limit_to_identifiers:
return self._limit_to_identifiers
else:
return UtilitiesLib.DatabaseEntryIdentifierList()
def identifiers(self):
if self._fitting_data:
return tuple(self._fitting_data.item(i).identifier().str() for i in range(len(self)))
else:
return tuple()
def databases(self):
if self._databases:
return self._databases
return list(
set(
[self._fitting_data.item(i).parent_database() for i in range(len(self))]))
[docs] @staticmethod
def from_file(file_name, databases):
'''Reads fitting data from a file.'''
_fitting_data = PackingSimilarityLib.PropensityPredictionFittingData()
with io.EntryReader(file_name) as reader:
ids = UtilitiesLib.DatabaseEntryIdentifierList()
if isinstance(reader, io._GCDReader):
ids.read_from_file(file_name)
else:
for c in reader:
ids.push_back(UtilitiesLib.DatabaseEntryIdentifier(c.identifier))
_fitting_data.add_fitting_data_items(ids, [reader._db])
return CrystalDescriptors.HBondPropensities.FittingData(_fitting_data=_fitting_data, identifiers=ids, databases=databases)
[docs] class HBondAtom(object):
'''Base class for HBondDonor and HBondAcceptor.'''
def __init__(self, _analysis):
self._analysis = _analysis
self.nnegative = self.npositive = None
self.exclude = False
@property
def accessible_surface_area(self):
'''The accessible surface area of the HBond atom.'''
return PackingSimilarityLib.atom_accessible_surface(self._analysis)
@property
def functional_group_identifier(self):
'''The identifier of the functional group for this atom.'''
return self._analysis.substructure_label()
@property
def nlone_pairs(self):
'''The number of lone pairs associated with this atom.'''
return PackingSimilarityLib.nlone_pairs(self._analysis.atom())
@property
def identifier(self):
'''The full identifier of this atom.'''
return self._analysis.label()
@property
def label(self):
'''The label of the atom in the original structure.'''
return self._analysis.atom().label()
@property
def atom(self):
'''The :class:`ccdc.molecule.Atom` of the HBondAtom.'''
return Atom(_atom=self._analysis.atom())
[docs] class HBondDonor(HBondAtom):
'''A potential donor atom.
This class will be augmented with the evidence found during match_fitting_data().
'''
def __init__(self, _analysis):
super(self.__class__, self).__init__(_analysis)
@property
def donor_atom_type(self):
'''A string representation of the atom's donor type.'''
hbc = Molecule.HBondCriterion()
return hbc.donor_atom_type(Atom(_atom=self._analysis.atom()))
def __repr__(self):
if self.nnegative is None:
return 'HBondDonor(%s) no evidence' % self.identifier
else:
return 'HBondDonor(%s) %d positive %d negative' % (self.identifier, self.npositive, self.nnegative)
[docs] class HBondAcceptor(HBondAtom):
'''A potental acceptor atom.
This class will be augmented with the evidence found during match_fitting_data().
'''
def __init__(self, _analysis):
super(self.__class__, self).__init__(_analysis)
@property
def acceptor_atom_type(self):
'''A string representation of the atom's acceptor type.'''
hbc = Molecule.HBondCriterion()
return hbc.acceptor_atom_type(Atom(_atom=self._analysis.atom()))
def __repr__(self):
if self.nnegative is None:
return 'HBondAcceptor(%s) no evidence' % self.identifier
else:
return 'HBondAcceptor(%s) %d positive %d negative' % (
self.identifier, self.npositive, self.nnegative)
def _find_donor(self, label):
for d in self.donors:
if d.label == label:
return d
def _find_acceptor(self, label):
for a in self.acceptors:
if a.label == label:
return a
def _find_hbond(self, donor_label, acceptor_label, intermolecular):
for hb in self.crystal_hbonds:
if not hb.intermolecular == intermolecular:
continue
if hb.atoms[1] in hb.atoms[0].neighbours:
if donor_label == hb.atoms[0].label and acceptor_label == hb.atoms[-1].label:
return hb
elif hb.atoms[1] in hb.atoms[-1].neighbours:
if donor_label == hb.atoms[-1].label and acceptor_label == hb.atoms[0].label:
return hb
def _find_propensity(self, d, a):
for p in self.inter_propensities:
if d.atom().label() == p.donor_label and a.atom().label() == p.acceptor_label:
return p
raise RuntimeError('No matching propensity')
[docs] class HBond(object):
'''A putative HBond in the propensity calculation.
:ivar self.donor: the :class:`CrystalDescriptors.HBondPropensities.HBondDonor` involved in this hbond.
:ivar self.acceptor: the :class:`CrystalDescriptors.HBondPropensities.HBondAcceptor` involved in this hbond.
:ivar self.propensity: the :class:`CrystalDescriptors.HBondPropensities.Propensity` of this hbond.
'''
def __init__(self, hbp, _outcome):
self._outcome = _outcome
self.donor = hbp._find_donor(_outcome[0].atom().label())
self.acceptor = hbp._find_acceptor(_outcome[1].atom().label())
self.propensity = hbp._find_propensity(_outcome[0], _outcome[1])
def __eq__(self, other):
return (
self.donor == other.donor and
self.acceptor == other.acceptor and
self.propensity == other.propensity
)
[docs] class HBondGrouping(object):
'''A grouping of interactions between donors and acceptors representing a possible hbond network.
This represents a point in the chart of Mercury's HBondPropensity wizard.
:ivar self.donors: a tuple of :class:`CrystalDescriptors.HBondPropensities.HBondDonor` with specified coordination outcomes for this grouping.
:ivar self.acceptors: a tuple of :class:`CrystalDescriptors.HBondPropensities.HBondAcceptor` with specified coordination outcomes for this grouping.
:ivar self.hbonds: a tuple of :class:`CrystalDescriptors.HBondPropensities.HBond` forming the hbonds in the network.
:ivar self.hbond_score: the average propensity value of hbonds.
:ivar self.coordination_score: the negative average coordination score of the donors and acceptors with these coordination outcomes.
'''
_donor_outcome_mapping = {
PackingSimilarityLib.DOES_NOT_DONATE: "does not donate",
PackingSimilarityLib.DONATES_ONCE: "donates once",
PackingSimilarityLib.DONATES_TWICE: "donates twice",
PackingSimilarityLib.DONATES_THREE_TIMES: "donates 3 times",
PackingSimilarityLib.DONATES_FOUR_TIMES: "donates 4 times",
PackingSimilarityLib.DONATES_FIVE_TIMES: "donates 5 times",
PackingSimilarityLib.DONATES_SIX_OR_MORE_TIMES: "donates >= 6 times"
}
_acceptor_outcome_mapping = {
PackingSimilarityLib.DOES_NOT_ACCEPT: "does not accept",
PackingSimilarityLib.ACCEPTS_ONCE: "accepts once",
PackingSimilarityLib.ACCEPTS_TWICE: "accepts twice",
PackingSimilarityLib.ACCEPTS_THREE_TIMES: "accepts 3 times",
PackingSimilarityLib.ACCEPTS_FOUR_TIMES: "accepts 4 times",
PackingSimilarityLib.ACCEPTS_FIVE_TIMES: "accepts 5 times",
PackingSimilarityLib.ACCEPTS_SIX_TIMES: "accepts 6 times",
PackingSimilarityLib.ACCEPTS_SEVEN_OR_MORE_TIMES: "accepts >= 7 times"
}
def __init__(self, hbond_propensities, _outcome):
self._find_propensity = hbond_propensities._find_propensity
self._outcome = _outcome
self.donors = tuple(
CrystalDescriptors.HBondPropensities.HBondDonor(d._analysis) for d in hbond_propensities.donors
)
self.acceptors = tuple(
CrystalDescriptors.HBondPropensities.HBondAcceptor(a._analysis) for a in
hbond_propensities.acceptors
)
self.hbonds = tuple(
CrystalDescriptors.HBondPropensities.HBond(hbond_propensities, _outcome) for _outcome in
self._outcome.pair_propensities()
)
if len(self.hbonds) == 0:
self.hbond_score = 0.0
else:
self.hbond_score = sum(hb.propensity.propensity for hb in self.hbonds) / len(self.hbonds)
observed = []
for hb in hbond_propensities.crystal_hbonds:
ats = hb.atoms
if ats[1] in ats[0].neighbours:
d = ats[0]
a = ats[-1]
else:
d = ats[-1]
a = ats[0]
observed.append((d.label, a.label))
donor_counts = collections.defaultdict(int)
acceptor_counts = collections.defaultdict(int)
for hb in self.hbonds:
d = hb.donor._analysis.atom().label()
a = hb.acceptor._analysis.atom().label()
donor_counts[d] += 1
acceptor_counts[a] += 1
hb.is_observed = (d, a) in observed
coord_score = 0.0
for d in self.donors:
for attr, value in d.__dict__.items():
if not attr.startswith('_'):
setattr(d, attr, value)
label = d._analysis.atom().label()
outcome = donor_counts[label]
preds_for_label = hbond_propensities.coordination_scores.predictions_for_label(label, 'donor')
if preds_for_label is None:
d_coord = 0.0
else:
d_coord = preds_for_label[1].get(outcome, 0.0)
d.outcome = outcome
d.outcome_label = self._donor_outcome_mapping[outcome]
d.coordination_propensity = d_coord
coord_score += d_coord
for a in self.acceptors:
for attr, value in a.__dict__.items():
if not attr.startswith('_'):
setattr(a, attr, value)
label = a._analysis.atom().label()
outcome = acceptor_counts[label]
preds_for_label = hbond_propensities.coordination_scores.predictions_for_label(label, 'acceptor')
if preds_for_label is None:
a_coord = 0.0
else:
a_coord = preds_for_label[1].get(outcome, 0.0)
a.outcome = outcome
a.outcome_label = self._acceptor_outcome_mapping[outcome]
a.coordination_propensity = a_coord
coord_score += a_coord
self.coordination_score = -1 * (coord_score / len(self.donors + self.acceptors))
def __repr__(self):
dons = ' '.join('%s: %s' % (d.atom, d.outcome) for d in self.donors)
accs = ' '.join('%s: %s' % (d.atom, d.outcome) for d in self.acceptors)
return 'Grouping(Donors: %s; Acceptors: %s)' % (dons, accs)
def __eq__(self, other):
return (
all(d.atom == o.atom for d, o in zip(self.donors, other.donors)) and
all(d.outcome == o.outcome for d, o in zip(self.donors, other.donors)) and
all(a.atom == o.atom for a, o in zip(self.acceptors, other.acceptors)) and
all(a.outcome == o.outcome for a, o in zip(self.acceptors, other.acceptors)) and
len(self.hbonds) == len(other.hbonds) and
all(hb == other_hb for hb, other_hb in zip(self.hbonds, other.hbonds))
)
[docs] class Model(object):
'''The logistic regression model.'''
[docs] class Parameter(object):
'''A named parameter of the regression.'''
def __init__(self, _crystal_structure_property):
self._crystal_structure_property = _crystal_structure_property
self.include = True
@property
def identifier(self):
'''The identifier of the parameter.'''
return self._crystal_structure_property.label()
[docs] def calculate(self, donor, acceptor):
'''The value of this property for the pair of atoms.
:parameter donor: `ccdc.hbond_coordination.CrystalDescriptors.HBondPropensities.HBondDonor` instance.
:parameter acceptor: `ccdc.hbond_coordination.CrystalDescriptors.HBondPropensities.HBondAcceptor` instance.
:returns: float
'''
hbprop = PackingSimilarityLib.CrystalStructurePropertyAsHyBondingProperty(
self._crystal_structure_property)
if hbprop is not None:
hbprop.update(donor._analysis, acceptor._analysis, PackingSimilarityLib.HyBondContactAnalysis())
else:
hbprop = PackingSimilarityLib.CrystalStructurePropertyAsMoleculeProperty(
self._crystal_structure_property)
hbprop.update(donor._analysis.atom().molecule())
return hbprop.value()
[docs] class Coefficient(object):
'''A coefficient of the regression model.'''
def __init__(self, _coefficient):
self._coefficient = _coefficient
@property
def identifier(self):
'''The identifier of the coefficient.'''
return self._coefficient.label()
@property
def confidence_interval(self):
'''The upper and lower bounds of the coefficient.'''
return (self._coefficient.coefficient_lower_bound(), self._coefficient.coefficient_upper_bound())
@property
def estimate(self):
'''The estimate of the coefficient.'''
return self._coefficient.coefficient_value()
@property
def is_baseline(self):
'''Whether or not the coefficient is used for the baseline calculation.'''
return self._coefficient.is_baseline()
@property
def standard_error(self):
'''Standard error of the coefficient.'''
if not self.is_baseline:
return self._coefficient.significance_statistics().standard_error_
@property
def z_value(self):
'''Z-value of the coefficient.'''
if not self.is_baseline:
return self._coefficient.significance_statistics().z_value_
@property
def p_value(self):
'''P-value of the coefficient.'''
if not self.is_baseline:
return self._coefficient.significance_statistics().pr_
@property
def significance_code(self):
'''A string representation of how significant the parameter is.
'***' for P-value < 0.01, '**' < 0.01. '*' < 0.05 and '.' < 0.1
'''
if not self.is_baseline:
return self._coefficient.significance_statistics().significance_code()
def __init__(self, _model):
self._model = _model
@property
def equation(self):
'''The regression equation.'''
coeffs = [self._model.coefficient(i) for i in range(self._model.ncoefficients())]
s = '%.2f + ' % coeffs[0].coefficient_value()
s += ' + '.join(
'%.2f*%s' % (c.coefficient_value(), c.label()) for c in coeffs[1:] if not c.is_baseline())
return s
@property
def coefficients(self):
'''The coefficients of the model.'''
return tuple(
CrystalDescriptors.HBondPropensities.Model.Coefficient(self._model.coefficient(i))
for i in range(self._model.ncoefficients())
)
@property
def area_under_roc_curve(self):
'''Area under the ROC curve.'''
return self._model.goodness_of_fit_data().area_under_roc_curve()
@property
def advice_comment(self):
'''A string representing the quality of the discrimination based on the ROC.'''
auc = self.area_under_roc_curve
if auc < 0.7:
return 'bad'
if auc < 0.75:
return 'poor'
if auc < 0.8:
return 'maybe acceptable'
if auc < 0.85:
return 'good'
if auc < 0.9:
return 'excellent'
return 'outstanding'
@property
def akaike_information_criterion(self):
'''The Akaike Information Criterion (AIC) of the model.'''
return self._model.goodness_of_fit_data().akaike_information_criterion()
@property
def null_deviance(self):
'''The null deviance of the model.'''
return self._model.goodness_of_fit_data().null_deviance()
@property
def null_deviance_degrees_of_freedom(self):
'''The degrees of freedom of the null deviance of the model.'''
return self._model.goodness_of_fit_data().null_deviance_degrees_of_freedom()
@property
def residual_deviance(self):
'''The residual deviance of the model.'''
return self._model.goodness_of_fit_data().residual_deviance()
@property
def residual_deviance_degrees_of_freedom(self):
'''The number of degrees of freedom of the residual deviance of the model.'''
return self._model.goodness_of_fit_data().residual_deviance_degrees_of_freedom()
@property
def log_likelihood(self):
'''The log likelihood of the model.'''
return self._model.goodness_of_fit_data().log_likelihood()
@property
def log_likelihood_test_p_value(self):
'''The P-value of the log likelihood of the model.'''
return self._model.goodness_of_fit_data().log_likelihood_test_p_value()
[docs] class Propensity(object):
'''Base class for inter- and intra-molecular propensity predictions.
:ivar self.donor: the :class:`CrystalDescriptors.HBondPropensities.HBondDonor` involved in this putative hbond.
:ivar self.acceptor: the :class:`CrystalDescriptors.HBondPropensities.HBondAcceptor` involved in this putative hbond.
:ivar self.hbond: the :class:`ccdc.crystal.Crystal.HBond` from the target structure if this hbond is observed.
'''
def __init__(self, hbp, _row):
self._row = _row
self._prediction = _row.pair_prediction()
self.donor = hbp._find_donor(self.donor_label)
self.acceptor = hbp._find_acceptor(self.acceptor_label)
self.hbond = hbp._find_hbond(self.donor_label, self.acceptor_label, self.is_intermolecular)
@property
def propensity(self):
'''The predicted value.'''
return self._prediction.prediction()
@property
def donor_label(self):
'''The label of the donor atom.'''
return self._prediction.pair().donor
@property
def acceptor_label(self):
'''The label of the acceptor atom.'''
return self._prediction.pair().acceptor
@property
def bounds(self):
'''The lower and upper bounds of the prediction.'''
return (
self._prediction.predicted_lower_bound(),
self._prediction.predicted_upper_bound()
)
@property
def uncertainty(self):
'''The uncertainty in the prediction.'''
return self._prediction.uncertainty()
@property
def predictive_error(self):
'''The error in the prediction.'''
return self._prediction.predictive_error()
@property
def scores(self):
'''The calculated values and statistics for the hbond prediction.'''
return {
qp[0]: qp[1]
for qp in [self._prediction.quant_props(i) for i in range(self._prediction.nquant_props())]
}
@property
def is_observed(self):
'''Whether the hbond is observed in the target structure.'''
return self._row.is_observed()
@property
def hbond_count(self):
'''The number of instances of the hbond observed in the target structure.'''
return self._row.hbond_count()
@property
def donor_rank(self):
'''The rank number of the donor.'''
return self._row.donor_rank()
@property
def acceptor_rank(self):
'''The rank number of the acceptor.'''
return self._row.acceptor_rank()
@property
def donor_component(self):
'''The component number of the donor in the target structure.'''
return self._row.donor_component()
@property
def acceptor_component(self):
'''The component number of the acceptor in the target structure.'''
return self._row.acceptor_component()
@property
def is_donor_bifurcated(self):
'''Whether the donor is bifurcated in the target structure.'''
return self._row.is_donor_bifurcated()
@property
def is_acceptor_bifurcated(self):
'''Whether the acceptor is bifurcated in the target structure.'''
return self._row.is_acceptor_bifurcated()
[docs] class InterPropensity(Propensity):
'''Predicted propensity for a single HBond.'''
@property
def is_intermolecular(self):
'''Whether or not the predicted propensity is for an intermolecular HBond.'''
return True
[docs] class IntraPropensity(Propensity):
'''Predicted propensity for an intramolecular HBond.'''
@property
def is_intermolecular(self):
'''Whether or not the predicted propensity is for an intermolecular HBond.'''
return False
def __init__(self, settings=None):
if settings is None:
settings = CrystalDescriptors.HBondPropensities.Settings()
self.settings = settings
self._model = PackingSimilarityLib.create_propensity_prediction_model(settings.working_directory,
settings.functional_groups_directory)
self.all_parameters = tuple(
CrystalDescriptors.HBondPropensities.Model.Parameter(p)
for p in PackingSimilarityLib.analysis_model_properties()
)
self._model.set_hbond_criterion(self.settings.hbond_criterion._hbc.hbond_criterion())
# Load the functional groups
self._model.functional_group_manager().set_directory(
self.settings.functional_groups_directory
)
self._substructures = self._model.functional_group_manager().substructures()
self._model.prediction().set_model_groups(self._substructures)
self._model.prediction().set_working_directory(settings.working_directory)
self._hbond_coordination = CrystalDescriptors.HBondCoordination(skip_telemetry=True)
if type(self)._telemetry == 0:
UtilitiesLib.ccdc_hydrogen_bond_propensity_telemetry()
type(self)._telemetry = 1
def _make_donors_and_acceptors(self, _csv):
hba = PackingSimilarityLib.create_bonding_analysis(
self._model.prediction(),
self._model.hbond_criterion())
hba.run_analysis(_csv)
donors = tuple(
CrystalDescriptors.HBondPropensities.HBondDonor(d) for d in hba.observed_donor_atoms()
)
acceptors = tuple(
CrystalDescriptors.HBondPropensities.HBondAcceptor(a) for a in hba.observed_acceptor_atoms()
)
return donors, acceptors
[docs] def hbond_atoms(self, crystal=None):
'''The HBondDonor and HBondAcceptor atoms of a crystal.
:param crystal: :class:`ccdc.crystal.Crystal` instance, or None, in which case the HBondAtoms of the target will be returned.
:returns: a pair of tuples of :class:`ccdc.descriptors.CrystalDescriptors.HBondPropensities.HBondDonor` and :class:`ccdc.descriptors.CrystalDescriptors.HBondPropensities.HBondAcceptor`.
'''
if crystal is None:
return self.donors, self.acceptors
_csv = ChemistryLib.CrystalStructureView.instantiate(crystal._crystal)
return self._make_donors_and_acceptors(_csv)
[docs] def set_target(self, crystal):
'''Sets a single target for the propensity calculation.
:param crystal: a :class:`ccdc.crystal.Crystal` instance.
'''
#self._model = PackingSimilarityLib.create_propensity_prediction_model(self.settings.working_directory,
# self.settings.functional_groups_directory)
#self._model.prediction().set_hbond_criterion(self.settings.hbond_criterion._hbc.hbond_criterion())
#self._model.functional_group_manager().read_groups()
#self._substructures = PackingSimilarityLib.get_substructures_from_connsers(
# self._model.functional_group_manager())
#self._model.prediction().set_model_groups(self._substructures)
self._model.prediction().clear_hbond_models()
self._model.prediction().set_working_directory(self.settings.working_directory)
self.settings.limit_identifier_list = UtilitiesLib.DatabaseEntryIdentifierList()
self._model.prediction().clear_regression_results()
self._model.clear_predicted_propensities()
self._called_make_fitting_data = False
self.crystal = crystal
self._csv = ChemistryLib.CrystalStructureView.instantiate(crystal._crystal)
self._target = PackingSimilarityLib.PropensityPredictionTarget(
crystal.identifier, crystal.identifier, self._csv
)
self._model.prediction().set_target(self._target)
func_groups = self._model.find_matching_groups(self._csv)
self._model.prediction().set_model_groups(func_groups)
self.functional_groups = tuple(
CrystalDescriptors.HBondPropensities.FunctionalGroup(g) for g in func_groups
)
self.donors, self.acceptors = self._make_donors_and_acceptors(self._csv)
self._hbond_coordination.settings._graph_set_search_settings.hbond_criterion = self.settings.hbond_criterion
self.coordination_scores = self._hbond_coordination.predict(crystal)
for d in self.donors:
obs = self.coordination_scores.predictions_for_label(d.label, 'donor')
if obs is None:
d.observed_coordination = 0.0
d.coordination_scores = {i: 0.0 for i in range(3)}
else:
d.observed_coordination, d.coordination_scores = obs
for a in self.acceptors:
obs = self.coordination_scores.predictions_for_label(a.label, 'acceptor')
if obs is None:
a.observed_coordination = 0.0
a.coordination_scores = {i: 0.0 for i in range(3)}
else:
a.observed_coordination, a.coordination_scores = obs
self.crystal_hbonds = self.crystal.hbonds(hbond_criterion=self.settings.hbond_criterion)
@staticmethod
def _get_as_dbhandle(db):
if isinstance(db, FileFormatsLib.CrystalStructureDatabasePool):
return DatabaseEntryLib.CrystalStructureDatabasePoolAsCrystalStructureDatabase(db)
return db
def _make_fitting_data_generator(self):
self._fitting_data_generator = PackingSimilarityLib.PropensityPredictionFittingDataGenerator()
self._fitting_data_generator.set_databases(
[self._get_as_dbhandle(db._db) for db in self.settings.databases])
self._fitting_data_generator.set_identifier_list(self.settings.limit_identifier_list)
self._fitting_data_generator.set_avoid_id(
UtilitiesLib.DatabaseEntryIdentifier(self._target.name())
)
self._fitting_data_generator.set_target_id(
UtilitiesLib.DatabaseEntryIdentifier(self._target.name())
)
self._fitting_data_generator.set_groups(
self._model.prediction().model_groups(),
self._model.prediction().groups_on_target(self._target, self._model.hbond_criterion())
)
[docs] def make_fitting_data(self):
''' Deprecated method.
Please use match_fitting_data or use CrystalDescriptors.HBondPropensities.FittingData.from_file
to limit the entries that are searched
returns an object that will cause all of the database entries to be searched
'''
self._make_fitting_data_generator()
self._fitting_data_generator.generate_fitting_data()
fd = CrystalDescriptors.HBondPropensities.FittingData(
_fitting_data=self._fitting_data_generator.fitting_data(),
identifiers=UtilitiesLib.DatabaseEntryIdentifierList(),
databases=self.settings.databases)
self._fitting_data = fd._fitting_data
self._model.prediction().set_master_data(fd._fitting_data)
self._called_make_fitting_data = True
return fd
@property
def fitting_data(self):
'''The fitting data.'''
_fd = None if not hasattr(self, '_fitting_data') else self._fitting_data
return CrystalDescriptors.HBondPropensities.FittingData(
_fitting_data=_fd,
identifiers=self.settings.limit_identifier_list,
databases=self.settings.databases)
@fitting_data.setter
def fitting_data(self, data):
self._fitting_data = data._fitting_data
self.settings.limit_identifier_list = data._identifiers()
self.settings.databases = data.databases()
self._model.prediction().set_fitting_data(self._fitting_data)
self._make_fitting_data_generator()
[docs] def show_fitting_data_counts(self, data=None):
'''Shows the matched counts for each functional group.'''
if data is None:
data = self.fitting_data
longest_name = max(len(g.identifier) for g in self.functional_groups)
print('%d structures' % len(data))
for g in self.functional_groups:
print('%*s: %5d (%s)' % (
longest_name, g.identifier, data.nitems(g), data.advice_comment(g))
)
[docs] def match_fitting_data(self, count=None, verbose=False):
'''Reduces fitting data down such that each functional group has at least the specified number of examples.'''
if count is None:
if not self._called_make_fitting_data:
self.make_fitting_data()
fitting_data_result = self._fitting_data_generator.fitting_data()
self._model.prediction().set_master_data(fitting_data_result)
best = PackingSimilarityLib.best_slider_position(self._model, True, 99)
self._model.prediction().refine_fitting_data(True, 99, best, self._model.hbond_criterion())
fitting_data_result = self._model.prediction().fitting_data()
else:
fitting_data_result = PackingSimilarityLib.PropensityPredictionFittingDataGenerator.full_generation_of_fitting_data(
[self._get_as_dbhandle(db._db) for db in self.settings.databases],
self.settings.limit_identifier_list,
UtilitiesLib.DatabaseEntryIdentifier(self._target.name()),
UtilitiesLib.DatabaseEntryIdentifier(self._target.name()),
self._model.prediction().model_groups(),
self._model.prediction().groups_on_target(self._target, self._model.hbond_criterion()),
count,
verbose
)
fitting_data_result = PackingSimilarityLib.create_subset(fitting_data_result, fitting_data_result.model_groups(), count)
self._fitting_data = fitting_data_result
self.show_fitting_data_counts()
[docs] def analyse_fitting_data(self):
'''Perform a hydrogen bond analysis of the fitting data.'''
pred = self._model.prediction()
bonding_analysis = PackingSimilarityLib.create_bonding_analysis(pred, self._model.hbond_criterion())
surveyor = PackingSimilarityLib.create_surveyor()
pred.set_fitting_data(self._fitting_data)
for i in range(self._fitting_data.nitems()):
self._fitting_data.item(i).analyse(bonding_analysis, surveyor)
self._find_donor_acceptor_counts()
def _find_donor_acceptor_counts(self):
'''Find true/false outcomes for the functional groups, excluding the poorly represented hbonds.'''
for d in self.donors + self.acceptors:
d.nnegative = d.npositive = None
d.exclude = False
data = self._model.prediction().fitting_data()
data.clear_exclusions()
don, acc, txt = PackingSimilarityLib.find_donor_and_acceptor_counts(data)
doing_donors = True
self._don_excludes = []
self._acc_excludes = []
for l in txt[1:]:
cat, at, pos, neg = l
if cat == 'Acceptor(s)':
doing_donors = False
pos = int(pos)
neg = int(neg)
if pos < 3 or neg < 3 or pos + neg < 3:
if doing_donors:
self._don_excludes.append(at)
else:
self._acc_excludes.append(at)
if at == 'other':
continue
if doing_donors:
candidates = self.donors
else:
candidates = self.acceptors
for x in candidates:
tag = x._analysis.atom_in_group_label()
if tag == at and x.npositive is None:
x.npositive = int(pos)
x.nnegative = int(neg)
break
html = [
'<table name="DonorAcceptorCounts" border="1" style=" border-color:#a0a0a4; border-style:dotted;" cellspacing="2" cellpadding="5">',
'<tr><td>Category</td><td>Label</td><td>True</td><td>False</td></tr>'
]
for x in self.donors + self.acceptors:
if x.npositive is None or x.nnegative is None:
x.exclude = True
continue
if x.npositive < 3 or x.nnegative < 3 or x.npositive + x.nnegative < 5:
x.exclude = True
html.append(
'<tr><td>%s</td><td>%s</td><td align="right">%d</td><td align="right">%d</td></tr>'
% ('Donor' if d in self.donors else 'Acceptor', x.identifier, x.npositive, x.nnegative)
)
self.training_stats_html = html
def _sort_out_exclusions(self):
data = self._model.prediction().fitting_data()
data.clear_exclusions()
for d in self._don_excludes:
data.add_exclusion(PackingSimilarityLib.DonorAcceptorPair(d, ''))
for a in self._acc_excludes:
data.add_exclusion(PackingSimilarityLib.DonorAcceptorPair('', a))
for d in self.donors:
if d.exclude:
data.add_exclusion(
PackingSimilarityLib.DonorAcceptorPair(d.identifier, '')
)
for a in self.acceptors:
if a.exclude:
data.add_exclusion(PackingSimilarityLib.DonorAcceptorPair('', a.identifier))
def _write_data_files(self):
'''Write all necessary data files.'''
# Training data
self._sort_out_exclusions()
data = self._model.prediction().fitting_data()
stats = data.spreadsheet_data()
stream = PackingSimilarityLib.ofstream(
str(os.path.join(self.settings.working_directory, '%s_training_dataset.csv' % self._target.name()))
)
PackingSimilarityLib.print_hbond_stats(stats, stream)
stream.close()
# Molecule
with io.MoleculeWriter(
os.path.join(self.settings.working_directory, 'prediction_molecule_%s.mol2' % self._target.name())
) as writer:
writer.write(self.crystal.molecule)
# fitting_data_.gcd
with open(os.path.join(self.settings.working_directory, 'fitting_data_%s.gcd' % self._target.name()),
'w') as writer:
writer.write('\n'.join(data.item(i).identifier().str() for i in range(data.nitems())))
# _counts.html
with open(os.path.join(self.settings.working_directory, '%s_counts.html' % self._target.name()),
'w') as writer:
writer.write('\n'.join(self.training_stats_html))
# Matching .con files
fgm = self._model.functional_group_manager()
model_groups = self._model.prediction().model_groups()
for g in model_groups:
name = g.name
for named_connser in fgm.connser_files():
if named_connser.name == name or named_connser.name == name.replace('_', ' '):
named_connser.connser_file.print_(os.path.join(self.settings.working_directory, '%s.con' % name))
break
else:
print('No matching connser file for %s' % name)
@property
def propensities(self):
'''The inter- and intra-propensities of the prediction.'''
return self.inter_propensities + self.intra_propensities
[docs] def calculate_propensities(self, crystal=None):
'''Apply the regression equation to a crystal.
:parameter crystal: :class:`ccdc.crystal.Crystal` instance or None. If None the target structure will be used.
'''
if crystal is None:
_target = self._target
else:
_csv = ChemistryLib.CrystalStructureView.instantiate(crystal._crystal)
_target = PackingSimilarityLib.PropensityPredictionTarget(crystal.identifier, _csv)
data = self._model.prediction().fitting_data()
props = self._model.predict_propensity(
_target, self.model._model
)
inter_rows = PackingSimilarityLib.PropensityTableModel(props, PackingSimilarityLib.INTER, data).rows()
self.inter_propensities = tuple(
CrystalDescriptors.HBondPropensities.InterPropensity(self, row)
for row in inter_rows
)
intra_rows = PackingSimilarityLib.PropensityTableModel(props, PackingSimilarityLib.INTRA, data).rows()
self.intra_propensities = tuple(
CrystalDescriptors.HBondPropensities.IntraPropensity(self, row)
for row in intra_rows
)
return self.propensities
def _count_hbond_groupings(self, min_donor_prob=0.0, min_acceptor_prob=0.0):
'''Counts the number of coordination possibilites, which correlates with the number of groupings.'''
d_preds = [
[c for c, v in self.coordination_scores.predictions_for_label(d.label, 'donor')[1].items() if v >= min_donor_prob]
for d in self.donors
]
a_preds = [
[c for c, v in self.coordination_scores.predictions_for_label(a.label, 'acceptor')[1].items() if v >= min_acceptor_prob]
for a in self.acceptors
]
max_acc_ct = sum(max(ap) for ap in a_preds)
def _possible_paths(donors, acceptors, d_ct):
if donors:
d, preds = donors[0]
for dc in [c for c in preds if d_ct + c <= max_acc_ct]:
for p in _possible_paths(donors[1:], acceptors, d_ct + dc):
yield [(d, dc)] + p
elif len(acceptors) > 1:
a, preds = acceptors[0]
for ac in [c for c in preds if 0 <= d_ct - c <= sum(max(ap) for z, ap in acceptors[1:])]:
for p in _possible_paths(donors, acceptors[1:], d_ct - ac):
yield [(a, ac)] + p
else:
a, preds = acceptors[0]
yield [(a, d_ct)]
for i, c in enumerate(_possible_paths(zip(self.donors, d_preds), zip(self.acceptors, a_preds), 0)):
pass
return i
[docs] def generate_hbond_groupings(self, min_donor_prob=None, min_acceptor_prob=None):
'''Generate all possible permutations of donors and acceptors to create all possible hbond groupings.'''
if min_donor_prob is None or min_acceptor_prob is None:
min_donor_prob = 0.1
min_acceptor_prob = 0.1
if not hasattr(self, 'propensities'):
self.calculate_propensities()
predicted_propensity = self._model.prediction().calculate_hbond_propensities(
self._target, self.model._model, self._model.hbond_criterion()
)
pair_preds = predicted_propensity.inter_predictions()
inter_predictions = tuple(pair_preds.prediction(i) for i in range(pair_preds.npredictions()))
intra_predictions = tuple(
predicted_propensity.intra_all_pair_prediction(i)
for i in range(predicted_propensity.nintra_all_pair_predictions())
)
self._intra_predictions = intra_predictions
analysis = self._hbond_coordination.hbond_analysis
coordination_propensities = self._hbond_coordination._coordination_propensities
self._hbond_coordination._calculator.update_propensities(coordination_propensities, analysis)
assessment = PackingSimilarityLib.CrystalHydrogenBondAssessment(
self._target.name(), analysis, inter_predictions, coordination_propensities,
min_donor_prob, min_acceptor_prob
)
self.outcomes = outcomes = [assessment.donor_acceptor_pairing(i) for i in
range(assessment.ndonor_acceptor_pairings())]
self.hbond_groupings = tuple(
CrystalDescriptors.HBondPropensities.HBondGrouping(self, outcome)
for outcome in outcomes
)
self._observed_outcomes = [assessment.observed_outcome(i) for i in range(assessment.nobserved_outcomes())]
if assessment.nobserved_outcomes() == 1:
self._target_hbond_grouping = CrystalDescriptors.HBondPropensities.HBondGrouping(self, self._observed_outcomes[0])
else:
self._target_hbond_grouping = None
return self.hbond_groupings
[docs] def target_hbond_grouping(self):
'''Which of the hbond groupings is of the target structure.'''
if self._target_hbond_grouping is not None:
return self._target_hbond_grouping
def _valid(hb):
d, a = hb.atoms[0], hb.atoms[-1]
x = self._find_donor(d.label)
y = self._find_acceptor(a.label)
if x is None or y is None:
x = self._find_donor(a.label)
y = self._find_acceptor(d.label)
return x.functional_group_identifier != 'other' and y.functional_group_identifier != 'other'
chbs = [
hb for hb in self.crystal_hbonds
if _valid(hb)
]
for g in self.hbond_groupings:
if len(g.hbonds) == len(chbs) and all(hb.is_observed for hb in g.hbonds):
return g
[docs] class PoreAnalyser(object):
'''Calculates Pore Analysis.
crystal is ccdc.crystal.Crystal
'''
_telemetry = 0
def __init__(self, crystal, settings=None):
self._crystal = crystal
if settings is None:
settings = CrystalDescriptors.PoreAnalyser.Settings()
self.settings = settings
self._flags = CrystalDescriptors.PoreAnalyser.Flags()
self.settings._flags = self._flags
self._pore_calculator = None
self._system_volume = self._crystal.cell_volume # vol of unit cell (A^3)
# cached results
self._system_mass = None
self._system_density = None
self._total_surface_area = None
self._total_surface_area_per_volume = None
self._total_surface_area_per_mass = None
self._network_accessible_surface_area = None
self._network_accessible_surface_area_per_volume = None
self._network_accessible_surface_area_per_mass = None
self._total_he_volume = None
self._network_accessible_he_volume = None
self._total_geom_volume = None
self._network_accessible_geom_volume = None
self._pore_limiting_diameter = None
self._max_pore_diameter = None
self._num_percolated_dimensions = None
if type(self)._telemetry == 0:
UtilitiesLib.ccdc_pore_analyser_telemetry()
type(self)._telemetry = 1
[docs] class Flags:
'''Flags for validlity of cached variables'''
def __init__(self):
self._calculator_is_valid = False
self.system_mass_valid = False
self.system_density_valid = False
self.total_surface_area_valid = False
self.total_surface_area_per_volume_valid = False
self.total_surface_area_per_mass_valid = False
self.network_accessible_surface_area_valid = False
self.network_accessible_surface_area_per_volume_valid = False
self.network_accessible_surface_area_per_mass_valid = False
self.total_he_volume_valid = False
self.network_accessible_he_volume_valid = False
self.total_geom_volume_valid = False
self.network_accessible_geom_volume_valid = False
self.pore_limiting_diameter_valid = False
self.max_pore_diameter_valid = False
self.num_percolated_dimensions_valid = False
def set_all_false(self):
self._calculator_is_valid = False
self.system_mass_valid = False
self.system_density_valid = False
self.total_surface_area_valid = False
self.total_surface_area_per_volume_valid = False
self.total_surface_area_per_mass_valid = False
self.network_accessible_surface_area_valid = False
self.network_accessible_surface_area_per_volume_valid = False
self.network_accessible_surface_area_per_mass_valid = False
self.total_he_volume_valid = False
self.network_accessible_he_volume_valid = False
self.total_geom_volume_valid = False
self.network_accessible_geom_volume_valid = False
self.pore_limiting_diameter_valid = False
self.max_pore_diameter_valid = False
self.num_percolated_dimensions_valid = False
@property
def calculator_is_valid(self):
'''grid spacing (A)'''
return self._calculator_is_valid
@calculator_is_valid.setter
def calculator_is_valid(self, value):
self._calculator_is_valid = value
if value is False:
self.set_all_false()
[docs] class Settings:
'''Settings for PoreAnalyser'''
def __init__(self):
self._grid_spacing = None # grid spacing (A)
self._he_probe_sigma = None # UFF LJ sigma (A)
self._he_probe_epsilon = None # UFF LJ epsilon/k (K)
self._n_probe_sigma = None # UFF LJ sigma (A)
self._samples_per_atom = None # sample size for surface area calculation
self._cutoff_distance = None # cut-off distance (A)
self._temperature = None # temperature (K)
self._flags = None
self.set_to_defaults()
[docs] def set_to_defaults(self):
'''set to default values'''
self.grid_spacing = 0.2
self.he_probe_sigma = 2.58
self.he_probe_epsilon = 10.22
self.n_probe_sigma = 3.314
self.samples_per_atom = 500
self.cutoff_distance = 12.8
self.temperature = 298
if self._flags is not None:
self._flags.set_all_false()
@property
def grid_spacing(self):
'''grid spacing (A)'''
return self._grid_spacing
@grid_spacing.setter
def grid_spacing(self, value):
self._grid_spacing = value
if self._flags is not None:
self._flags.calculator_is_valid = False
@property
def he_probe_sigma(self):
'''UFF L-J sigma for He probe (A)'''
return self._he_probe_sigma
@he_probe_sigma.setter
def he_probe_sigma(self, value):
self._he_probe_sigma = value
if self._flags is not None:
self._flags.calculator_is_valid = False
@property
def he_probe_epsilon(self):
'''UFF L-J epsilon/k for He probe (K)'''
return self._he_probe_epsilon
@he_probe_epsilon.setter
def he_probe_epsilon(self, value):
self._he_probe_epsilon = value
if self._flags is not None:
self._flags.calculator_is_valid = False
@property
def n_probe_sigma(self):
'''UFF L-J sigma for N probe (A)'''
return self._n_probe_sigma
@n_probe_sigma.setter
def n_probe_sigma(self, value):
self._n_probe_sigma = value
if self._flags is not None:
self._flags.calculator_is_valid = False
@property
def samples_per_atom(self):
'''Sample size for surface area calculation'''
return self._samples_per_atom
@samples_per_atom.setter
def samples_per_atom(self, value):
self._samples_per_atom = value
if self._flags is not None:
self._flags.calculator_is_valid = False
@property
def cutoff_distance(self):
'''Cut-off distance (A)'''
return self._cutoff_distance
@cutoff_distance.setter
def cutoff_distance(self, value):
self._cutoff_distance = value
if self._flags is not None:
self._flags.calculator_is_valid = False
@property
def temperature(self):
'''Temperature (K)'''
return self._temperature
@temperature.setter
def temperature(self, value):
self._temperature = value
if self._flags is not None:
self._flags._he_pore_volume_valid = False
self._flags._network_accessible_he_pore_volume_valid = False
@property
def system_volume(self):
'''Result: volume of unit cell (A^3)'''
return self._system_volume
@property
def system_mass(self):
'''Result: mass of unit cell (g/mol)'''
if not self._flags.system_mass_valid:
self._system_mass = self._calculate_system_mass()
self._flags.system_mass_valid = True
self._flags.system_density_valid = False
self._flags.total_surface_area_per_mass_valid = False
self._flags.network_accessible_surface_area_per_mass_valid = False
return self._system_mass
def _calculate_system_mass(self):
self._initialize_calculator()
return self._pore_calculator.calculate_crystal_structure_mass()
@property
def system_density(self):
'''Result: density (g/cm^3)'''
if not self._flags.system_density_valid:
self._system_density = self._calculate_system_density(self.system_mass, self.system_volume)
self._flags.density_valid = True
return self._system_density
def _calculate_system_density(self, mass, volume):
self._initialize_calculator()
return self._pore_calculator.calculate_crystal_structure_density(mass, volume)
@property
def total_surface_area(self):
'''Result: surface area (A^2)'''
if not self._flags.total_surface_area_valid:
self._total_surface_area = self._calculate_total_surface_area()
self._flags.total_surface_area_valid = True
self._flags.total_surface_area_per_volume_valid = False
return self._total_surface_area
def _calculate_total_surface_area(self):
self._initialize_calculator()
return self._pore_calculator.calculate_surface_area(self.settings.samples_per_atom)
@property
def total_surface_area_per_volume(self):
'''Result: surface area per volume (m^2/cm^3)'''
if not self._flags.total_surface_area_per_volume_valid:
self._total_surface_area_per_volume = self._calculate_surface_area_per_volume(
self.total_surface_area, self.system_volume)
self._flags.total_surface_area_per_volume_valid = True
self._flags.total_surface_area_per_mass_valid = False
return self._total_surface_area_per_volume
def _calculate_surface_area_per_volume(self, surface_area, volume):
self._initialize_calculator()
return self._pore_calculator.calculate_surface_area_per_vol(surface_area, volume)
@property
def total_surface_area_per_mass(self):
'''Result: surface area per mass (m^2/g)'''
if not self._flags.total_surface_area_per_mass_valid:
self._total_surface_area_per_mass = self._calculate_surface_area_per_mass(
self.total_surface_area_per_volume, self.system_mass, self.system_volume)
self._flags.total_surface_area_per_mass_valid = True
return self._total_surface_area_per_mass
def _calculate_surface_area_per_mass(self, surface_area_per_vol, mass, volume):
self._initialize_calculator()
return self._pore_calculator.calculate_surface_area_per_mass(surface_area_per_vol, mass, volume)
@property
def network_accessible_surface_area(self):
'''Result: network accessible surface area (A^2)'''
if not self._flags.network_accessible_surface_area_valid:
self._network_accessible_surface_area = self._calculate_network_accessible_surface_area()
self._flags.network_accessible_surface_area_valid = True
self._flags.network_accessible_surface_area_per_volume_valid = False
return self._network_accessible_surface_area
def _calculate_network_accessible_surface_area(self):
self._initialize_calculator()
return self._pore_calculator.calculate_network_accessible_surface_area(self.settings.samples_per_atom)
@property
def network_accessible_surface_area_per_volume(self):
'''Result: metwork accessible surface area per volume (m^2/cm^3)'''
if not self._flags.network_accessible_surface_area_per_volume_valid:
self._network_accessible_surface_area_per_volume = self._calculate_surface_area_per_volume(
self.network_accessible_surface_area, self.system_volume)
self._flags.network_accessible_surface_area_per_volume_valid = True
self._flags.network_accessible_surface_area_per_mass_valid = False
return self._network_accessible_surface_area_per_volume
@property
def network_accessible_surface_area_per_mass(self):
'''Result: network accessible surface area per mass (m^2/g)'''
if not self._flags.network_accessible_surface_area_per_mass_valid:
self._network_accessible_surface_area_per_mass = self._calculate_surface_area_per_mass(
self.network_accessible_surface_area_per_volume, self.system_mass, self.system_volume)
self._flags.network_accessible_surface_area_per_mass_valid = True
return self._network_accessible_surface_area_per_mass
@property
def total_helium_volume(self):
'''Result: He pore volume (A^3)'''
if not self._flags.total_he_volume_valid:
self._total_he_volume = self._calculate_total_helium_volume(self.settings.temperature, self.system_volume)
self._flags.he_pore_volume_valid = True
return self._total_he_volume
def _calculate_total_helium_volume(self, temperature, crystal_volume):
self._initialize_calculator()
return self._pore_calculator.calculate_helium_pore_volume(temperature, crystal_volume)
@property
def network_accessible_helium_volume(self):
'''Result: Network accessible He pore volume (A^3)'''
if not self._flags.network_accessible_he_volume_valid:
self._network_accessible_he_volume = self._calculate_network_accessible_helium_volume(
self.settings.temperature, self.system_volume)
self._flags.network_accessible_he_volume_valid = True
return self._network_accessible_he_volume
def _calculate_network_accessible_helium_volume(self, temperature, crystal_volume):
self._initialize_calculator()
return self._pore_calculator.calculate_network_accessible_helium_pore_volume(temperature, crystal_volume)
@property
def total_geometric_volume(self):
'''Result: geometric pore volume (A^3)'''
if not self._flags.total_geom_volume_valid:
self._total_geom_volume = self._calculate_total_geometric_volume(self.system_volume)
self._flags.total_geom_volume_valid = True
return self._total_geom_volume
def _calculate_total_geometric_volume(self, crystal_volume):
self._initialize_calculator()
return self._pore_calculator.calculate_geometric_pore_volume(crystal_volume)
@property
def network_accessible_geometric_volume(self):
'''Result: Network accessible geometric pore volume (A^3)'''
if not self._flags.network_accessible_geom_volume_valid:
self._network_accessible_geom_volume = self._calculate_network_accessible_geometric_volume(self.system_volume)
self._flags.network_accessible_geom_volume_valid = True
return self._network_accessible_geom_volume
def _calculate_network_accessible_geometric_volume(self, crystal_volume):
self._initialize_calculator()
return self._pore_calculator.calculate_network_accessible_geometric_pore_volume(crystal_volume)
@property
def pore_limiting_diameter(self):
'''Result: Pore limiting diameter (A)'''
if not self._flags.pore_limiting_diameter_valid:
self._pore_limiting_diameter = self._calculate_pore_limiting_diameter()
self._flags.pore_limiting_diameter_valid = True
return self._pore_limiting_diameter
def _calculate_pore_limiting_diameter(self):
self._initialize_calculator()
return self._pore_calculator.calculate_pore_limiting_diameter()
@property
def max_pore_diameter(self):
'''Result: Max pore diameter (A)'''
if not self._flags.max_pore_diameter_valid:
self._max_pore_diameter = self._calculate_max_pore_diameter()
self._flags.max_pore_diameter_valid = True
return self._max_pore_diameter
def _calculate_max_pore_diameter(self):
self._initialize_calculator()
return self._pore_calculator.calculate_maximum_pore_diameter()
@property
def num_percolated_dimensions(self):
'''Result: Number of percolated dimensions'''
if not self._flags.num_percolated_dimensions_valid:
self._num_percolated_dimensions = self._calculate_num_percolated_dimensions()
self._flags.num_percolated_dimensions_valid = True
return self._num_percolated_dimensions
def _calculate_num_percolated_dimensions(self):
self._initialize_calculator()
return self._pore_calculator.calculate_n_percolated_dimensions()
[docs] def convert_a3_to_cm3_per_g(self, volume):
'''Utility to convert cubic angstroms to cm^3 per g'''
self._initialize_calculator()
return self._pore_calculator.convert_cubic_angstoms_to_cubic_centimetres_per_gram(
volume, self.system_mass)
# Initialize new PoreCalculator if current is not valid.
# If already exists and is valid, do nothing
def _initialize_calculator(self):
if not self._flags.calculator_is_valid:
self._pore_calculator = PoreAnalyserLib.PoreCalculator(
self._crystal._crystal,
self.settings.grid_spacing,
self.settings.cutoff_distance,
self.settings.he_probe_sigma,
self.settings.he_probe_epsilon,
self.settings.n_probe_sigma
)
self._flags.calculator_is_valid = True
#####################################################################################
[docs]class StatisticalDescriptors(object):
'''A namespace holding statistical descriptors.'''
[docs] class RankStatistics(object):
'''Represents a ranked collection of values for which statistics can be derived.'''
def __init__(self, scores, activity_column=None):
'''Instantiate a RankStatistics object.
:param scores: Sorted list with descending scores containing active/inactive data.
:param activity_column: extractor for activity data in scores. If ``None``, scores
will be interpreted as an iterable of ``bool``s. If type is ``int`` or ``string``,
scores will be interpreted as an iterable of indexed/named collections where the
specified index is the activity ``bool``. Otherwise, use :meth:`operator.itemgetter' or
:meth:`operator.attrgetter`.
:raises: ValueError if the scores list is empty.
'''
if len(scores) == 0:
raise ValueError('scores list must not be empty.')
self._scores = scores
self.activity_column = activity_column
self._score_count = len(scores)
@property
def activity_column(self):
'''Get extractor for active/inactive classification from scores data.'''
return self._activity_column
@activity_column.setter
def activity_column(self, activity_column):
'''Set extractor for active/inactive classification from scores data
:param activity_column: extractor for activity data in scores. If ``None``, scores
will be interpreted as an iterable of ``bool``s. If type is ``int`` or ``string``,
scores will be interpreted as an iterable of indexed/named collections where the
specified index is the activity ``bool``. Otherwise, use :meth:`operator.itemgetter' or
:meth:`operator.attrgetter`.
'''
if activity_column is None:
self._activity_column = lambda x: x
elif type(activity_column) is int:
self._activity_column = operator.itemgetter(activity_column)
elif isinstance(activity_column, str):
self._activity_column = operator.attrgetter(activity_column)
else:
self._activity_column = activity_column
[docs] def ROC(self):
'''Calculate receiver operating characteristic (ROC) curve.
:returns: list, list - True positive rate, False positive rate
'''
true_positive_rate = [] # True positive rate = TP/(TP+FN) = Sensitivity
false_positive_rate = [] # False positive rate = FP/(TN+FP) = Fall-out
actives_count = 0
inactives_count = 0
for i in range(self._score_count):
if self._activity_column(self._scores[i]):
actives_count += 1
else:
inactives_count += 1
true_positive_rate.append(float(actives_count))
false_positive_rate.append(float(inactives_count))
if actives_count > 0:
true_positive_rate = [tp / actives_count for tp in true_positive_rate]
if inactives_count > 0:
false_positive_rate = [fp / inactives_count for fp in false_positive_rate]
return true_positive_rate, false_positive_rate
[docs] def AUC(self):
'''Calculate the area under the ROC curve.
:returns: Area under the ROC curve.'''
tpr, fpr = self.ROC()
auc = 0
for i in range(0, self._score_count - 1):
auc += (fpr[i + 1] - fpr[i]) * (tpr[i + 1] + tpr[i])
return auc / 2
def _RIE(self, alpha):
active_count = 0
sum_exp = 0
for i in range(self._score_count):
active = self._activity_column(self._scores[i])
if active:
active_count += 1
sum_exp += math.exp(-(alpha * (i + 1)) / self._score_count)
if active_count == 0:
rie = 0.0
else:
denom = 1.0 / self._score_count * ((1.0 - math.exp(-alpha)) / (math.exp(alpha / self._score_count) - 1.0))
rie = sum_exp / float(active_count * denom)
return rie, active_count
[docs] def RIE(self, alpha=0.0):
'''Calculate robust initial enhancement (RIE) as defined in:
Sheridan R.P., Singh S.B., Fluder E.M., Kearsley S.K.,
"Protocols for Bridging the Peptide to Nonpeptide Gap in Topological Similarity Searches"
J. Chem. Inf. Comp. Sci. 41:1395-1406 (2001).
:param alpha: exponential weighting factor
:raises: ValueError if alpha is less than or equal to 0.0
'''
if alpha <= 0.0:
raise ValueError('alpha parameter must be greater than zero.')
rie, active_count = self._RIE(float(alpha))
return rie
[docs] def BEDROC(self, alpha=0.0):
'''Calculate Boltzmann-Enhanced Discrimination of ROC (BEDROC) as defined in:
Truchon J., Bayly C.I.,
"Evaluating Virtual Screening Methods: Good and Bad Metric for the "Early Recognition" Problem"
J. Chem. Inf. Model. 47:488-508 (2007).
:param alpha: exponential weighting factor.
:raises: ValueError if alpha is less than or equal to 0.0.
'''
if alpha <= 0.0:
raise ValueError('alpha parameter must be greater than zero.')
rie, active_count = self._RIE(float(alpha))
if active_count <= 0.0:
bedroc = 0.0
else:
ratio = 1.0 * active_count / self._score_count
rie_max = (1.0 - math.exp(-alpha * ratio)) / (ratio * (1.0 - math.exp(-alpha)))
rie_min = (1.0 - math.exp(alpha * ratio)) / (ratio * (1.0 - math.exp(alpha)))
if rie_max == rie_min:
bedroc = 1.0
else:
bedroc = (rie - rie_min) / (rie_max - rie_min)
return bedroc
def _find_position(self, fraction):
if fraction < 0.0 or fraction > 1.0:
raise ValueError('fraction parameter must be within interval [0,1]')
return int(math.ceil(self._score_count * fraction))
[docs] def EF(self, fraction=0.0):
'''Calculate enrichment factor (EF) at the specified fraction.
:param fraction: position within data for which enrichment factor is to be determined.
:raises: ValueError if fraction is not within interval [0,1]
'''
position = self._find_position(fraction)
found_active_count = 0
for i in range(position):
if self._activity_column(self._scores[i]):
found_active_count += 1
if found_active_count == 0:
return 0.0
active_count = found_active_count
for i in range(position, self._score_count):
if self._activity_column(self._scores[i]):
active_count += 1
return found_active_count / (active_count * fraction)
[docs] def PPV(self, fraction=0.0):
'''Calculate precision or positive predictive value (PPV) at the specified fraction.
:param fraction: position within data for which precision is to be determined.
:raises: ValueError if fraction is not within interval [0,1]
'''
position = self._find_position(fraction)
active_count = 0
for i in range(position):
if self._activity_column(self._scores[i]):
active_count += 1
return float(active_count) / (position + 1)
[docs] def ACC(self, fraction=0.0):
'''Calculate accuracy metric (ACC) at the specified fraction.
ACC = (TP+TN) / (TP+FP+TN+FN)
:param fraction: position within data for which accuracy metric is to be determined.
:raises: ValueError if fraction is not within interval [0,1]
'''
position = min(self._find_position(fraction), self._score_count - 1)
true_positives = []
false_negatives = []
actives_count = 0
inactives_count = 0
for i in range(self._score_count):
if self._activity_column(self._scores[i]):
actives_count += 1
else:
inactives_count += 1
true_positives.append(float(actives_count))
false_negatives.append(float(inactives_count))
return (true_positives[position] + inactives_count - false_negatives[position])\
/ (actives_count + inactives_count)
#################################################################################################