Source code for ccdc.descriptors

#
# This code is Copyright (C) 2015 The Cambridge Crystallographic Data Centre
# (CCDC) of 12 Union Road, Cambridge CB2 1EZ, UK and a proprietary work of CCDC.
# This code may not be used, reproduced, translated, modified, disassembled or
# copied, except in accordance with a valid licence agreement with CCDC and may
# not be disclosed or redistributed in any form, either in whole or in part, to
# any third party. All copies of this code made in accordance with a valid
# licence agreement as referred to above must contain this copyright notice.
#
# No representations, warranties, or liabilities are expressed or implied in the
# supply of this code by CCDC, its servants or agents, except where such
# exclusion or limitation is prohibited, void or unenforceable under governing
# law.
#
'''
The :mod:`ccdc.descriptors` module contains classes for calculating descriptors.

The main classes in the :mod:`ccdc.descriptors` module are:

- :class:`ccdc.descriptors.MolecularDescriptors`.
- :class:`ccdc.descriptors.GeometricDescriptors`.
- :class:`ccdc.descriptors.CrystalDescriptors.PowderPattern`.
- :class:`ccdc.descriptors.CrystalDescriptors.Morphology`.
- :class:`ccdc.descriptors.CrystalDescriptors.GraphSetSearch`.
- :class:`ccdc.descriptors.CrystalDescriptors.HBondCoordination`.
- :class:`ccdc.descriptors.CrystalDescriptors.HBondPropensities`.
- :class:`ccdc.descriptors.StatisticalDescriptors`.
'''

import collections
import csv
import math
import operator
import os
import warnings
import tempfile

warnings.simplefilter('always', DeprecationWarning)

from ccdc.molecule import Molecule, Atom, Bond, Coordinates
from ccdc.crystal import Crystal
from ccdc.utilities import Logger, bidirectional_dict, nested_class, Resources
from ccdc import io
from ccdc import morphology

from ccdc.utilities import _private_importer
with _private_importer() as pi:
    pi.import_ccdc_module('ChemistryLib')
    pi.import_ccdc_module('ChemicalAnalysisLib')
    pi.import_ccdc_module('MathsLib')
    pi.import_ccdc_module('SubstructureSearchLib')
    pi.import_ccdc_module('MotifSearchLib')
    pi.import_ccdc_module('PackingSimilarityLib')
    pi.import_ccdc_module('UtilitiesLib')
    pi.import_ccdc_module('DatabaseEntryLib')
    pi.import_ccdc_module('FileFormatsLib')
    pi.import_ccdc_module('CSDSQLDatabaseLib')
    pi.import_ccdc_module('AnnotationsLib')
    pi.import_ccdc_module('PoreAnalyserLib')
    pi.import_ccdc_module('LigandOverlayLib')



[docs]
class MolecularDescriptors(object):
    '''Namespace for descriptors of a molecular nature.'''

[docs]
    @staticmethod
    def bond_length(bond):
        '''The length of a bond.

        :param bond: :class:`ccdc.molecule.Bond`
        :returns: float, or ``None`` if an atom of the bond has no coordinates
        '''
        if bond._bond.atom1().site() and bond._bond.atom2().site():
            return ChemistryLib.bond_length(
                bond._bond.molecule(),
                bond._bond.atom1().index(),
                bond._bond.atom2().index()
            )



[docs]
    @staticmethod
    def atom_distance(a, b):
        '''Distance between two atom irrespective their parent molecules.

        :param a: :class:`ccdc.molecule.Atom`
        :param b: :class:`ccdc.molecule.Atom`
        :returns: float or ``None`` if one of the atoms has no coordinates
        '''
        if a._atom.site() and b._atom.site():
            return MathsLib.distance(a.coordinates, b.coordinates)



[docs]
    @staticmethod
    def atom_angle(a, b, c):
        '''Angle subtended by three arbitrary atoms.

        :param a: :class:`ccdc.molecule.Atom`
        :param b: :class:`ccdc.molecule.Atom`
        :param c: :class:`ccdc.molecule.Atom`
        :returns: float - the angle in degrees or ``None`` if one of the atoms has no coordinates
        '''
        if a._atom.site() and b._atom.site() and c._atom.site():
            return MathsLib.angle(
                a.coordinates,
                b.coordinates,
                c.coordinates
            ).degrees()



[docs]
    @staticmethod
    def atom_torsion_angle(a, b, c, d):
        '''Plane angle subtended by the triples abc and bcd.

        :param a: :class:`ccdc.molecule.Atom`
        :param b: :class:`ccdc.molecule.Atom`
        :param c: :class:`ccdc.molecule.Atom`
        :param d: :class:`ccdc.molecule.Atom`
        :returns: float - the angle in degrees or ``None`` if one of the atoms has no coordinates
        '''
        if a._atom.site() and b._atom.site() and c._atom.site() and d._atom.site():
            return MathsLib.torsion_angle(
                a.coordinates,
                b.coordinates,
                c.coordinates,
                d.coordinates
            ).degrees()


    @staticmethod
    def _set_atom_torsion_angle(a, b, c, d, degrees):
        '''Set a torsion angle.

        :param a: :class:`ccdc.molecule.Atom`
        :param b: :class:`ccdc.molecule.Atom`
        :param c: :class:`ccdc.molecule.Atom`
        :param d: :class:`ccdc.molecule.Atom`
        :param degrees: float - the required torsion angle in degrees
        '''
        if degrees > 180.:
            degrees = 360. - degrees
        ChemicalAnalysisLib.set_torsion_angle(a._atom, b._atom, c._atom, d._atom, degrees)


[docs]
    @staticmethod
    def atom_centroid(*atoms):
        '''Define the centroid of the specified atoms.'''
        coords = [a.coordinates for a in atoms]
        if None in coords:
            raise RuntimeError('An atom has no coordinates')
        x, y, z = zip(*coords)
        centroid = Coordinates(sum(x)/len(coords), sum(y)/len(coords), sum(z)/len(coords))
        return centroid



[docs]
    @staticmethod
    def atom_vector(atom0, atom1):
        '''Define the vector from atom0 to atom1.

        :param atom0: :class:`ccdc.molecule.Atom`
        :param atom1: :class:`ccdc.molecule.Atom`
        :returns: :class:`GeometricDescriptors.Vector`
        :raises: RuntimeError if either atom has no coordinates.
        '''
        coords = [atom0.coordinates, atom1.coordinates]
        if None in coords:
            raise RuntimeError('An atom has no coordinates')
        return GeometricDescriptors.Vector.from_points(coords[0], coords[1])



[docs]
    @staticmethod
    def atom_plane(*atoms):
        '''Define a plane from the coordinates of the atoms.

        :param atoms: there must be at least three :class:`ccdc.molecule.Atom` in the arguments.
        '''
        return GeometricDescriptors.Plane.from_points(
            *tuple(a.coordinates for a in atoms)
        )



[docs]
    @staticmethod
    def ring_centroid(ring):
        '''The centroid of the ring's atoms.

        :param ring: :class:`ccdc.molecule.Molecule.Ring`
        '''
        return MolecularDescriptors.atom_centroid(*ring.atoms)



[docs]
    @staticmethod
    def ring_plane(ring):
        '''The plane of the ring's atoms.

        :param ring: :class:`ccdc.molecule.Molecule.Ring`
        '''
        return MolecularDescriptors.atom_plane(*ring.atoms)



[docs]
    @staticmethod
    def point_group_analysis(mol):
        '''Return Schoenflies notation of the point group symmetry of a molecule.

        The point group symmetry is returned as a tuple of:

        - order (e.g. 1)
        - symbol (e.g. 'C1')
        - description (e.g. 'Objects in this point group have no symmetry.')

        :param mol: :class:`ccdc.molecule.Molecule`
        :returns: (int, str, str)
        '''
        pga = ChemicalAnalysisLib.MoleculePointGroupAssigner(
                ChemicalAnalysisLib.RadialAtomProximity(0.5)
        )
        try:
            pg = pga.assign(mol._molecule)
            if not pga.assignment_succeeded():
                return 0, 'Unknown', 'point group assignment did not conclude'
            return pg.order(), pg.symbol(), pg.description()
        except RuntimeError as e:
            logger = Logger()
            logger.warning('molecule.point_group_analysis %s: %s' % (mol.identifier, e))
            return 0, 'Unknown', 'point group assignment failed'


    @staticmethod
    def _make_subset(mol, atoms):
        '''Private: make an internal subset.'''
        sub = ChemistryLib.MoleculeSubset.instantiate(mol._molecule)
        sub.select_all()
        sub.select_atom_indices_from_current([a.index for a in atoms])
        return sub


[docs]
    @nested_class('MolecularDescriptors')
    class Overlay(object):
        '''Overlays two molecules'''

        def __init__(self, mol1, mol2, atoms=None, invert=False, rotate_torsions=False, with_symmetry=True, match_elements=True):
            '''Overlay mol2 on mol1

            :param mol1: a :class:`ccdc.molecule.Molecule` instance
            :param mol2: a :class:`ccdc.molecule.Molecule` instance
            :param atoms: a list of pairs of atoms to use in the overlay, or None
            for all atoms to be used
            :param invert: allow inversions in the overlay
            :param rotate_torsions: allow torsional rotations when overlaying
            :param with_symmetry: take account of symmetry when overlaying atoms
            :param match_elements: require that elements match in the two molecules
            '''
            m2 = mol1.copy()
            m1 = mol2.copy()
            if atoms is not None:
                # Got to get the matching atoms
                def _match_atom(mol, at):
                    def element_match(at1, at2):
                        return match_elements is False or at1.atomic_symbol == at2.atomic_symbol

                    l = [a for a in mol.atoms if element_match(a, at) and str(a.coordinates) == str(at.coordinates)]
                    if len(l) == 1:
                        return l[0]
                    else:
                        # Fall back on match by index
                        if element_match(mol.atoms[at.index], at):
                            return mol.atoms[at.index]
                        raise RuntimeError('Atom %s could not be matched' % at)

                inp1, inp2 = zip(*atoms)
                ats1 = [_match_atom(m1, a) for a in inp2]
                ats2 = [_match_atom(m2, a) for a in inp1]
                if with_symmetry:
                    sub1 = MolecularDescriptors._make_subset(m1, ats1)
                    sub2 = MolecularDescriptors._make_subset(m2, ats2)
            if invert:
                invert = ChemicalAnalysisLib.MoleculeOverlay.NO_INVERT_AND_INVERT
            else:
                invert = ChemicalAnalysisLib.MoleculeOverlay.NO_INVERT
            if rotate_torsions:
                rotate = ChemicalAnalysisLib.MoleculeOverlay.FLEXIBLE_TORSIONS
            else:
                rotate = ChemicalAnalysisLib.MoleculeOverlay.FIXED_TORSIONS
            if match_elements:
                match_elements = ChemicalAnalysisLib.MoleculeOverlay.MATCH_ELEMENTS
            else:
                match_elements = ChemicalAnalysisLib.MoleculeOverlay.NO_MATCH_ELEMENTS
            if atoms is None:
                ov = ChemicalAnalysisLib.MoleculeOverlay(
                    m1._molecule,
                    m2._molecule,
                    invert,
                    rotate,
                    match_elements
                )
                ov.fit_geometry().apply()
            else:
                if with_symmetry:
                    ov = ChemicalAnalysisLib.MoleculeOverlay(
                        sub1,
                        sub2,
                        invert,
                        rotate,
                        match_elements
                    )
                    if hasattr(m1, '_cell'):
                        cell = m1._cell
                    else:
                        cell = ChemistryLib.Cell()
                    ov.fit_geometry(cell, m1._molecule).apply()
                else:
                    ov = ChemicalAnalysisLib.MoleculeOverlay(
                        m1._molecule,
                        m2._molecule,
                        list(zip([a._atom for a in ats1], [a._atom for a in ats2])),
                        invert,
                        rotate,
                        match_elements
                    )
                    ov.fit_geometry().apply()

            self._ov = ov
            self._molecule = m1
            self._transformation = Molecule.Transformation(_transformation=ov.transformation())

            self._atom_match = []
            best_match = ov.best_match()
            for i in range(best_match.nnode_matches()):
                atom_pair = best_match.node_match(i)
                self._atom_match.append((mol1.atoms[atom_pair[1].index()], mol2.atoms[atom_pair[0].index()]))

        @property
        def molecule(self):
            '''Returns input molecule mol2 transformed to overlay onto mol1'''
            return self._molecule

        @property
        def rmsd(self):
            '''Returns RMSD between the two overlaid molecules'''
            return self._ov.rms()

        @property
        def rmsd_tanimoto(self):
            '''Returns Tanimoto RMSD between the two overlaid molecules'''
            return self._ov.rms_tanimoto()

        @property
        def transformation(self):
            '''Returns Molecule.Transformation object required to overlay mol2 over mol1'''
            return self._transformation

        @property
        def max_distance(self):
            '''Returns the maximum distance between two equivalent atoms in the overlay (Angstroms)'''
            return self._ov.max_distance()

        @property
        def atom_match(self):
            '''Returns pairs of atoms from mol1 and mol2 matched in the overlay'''
            return self._atom_match




[docs]
    @staticmethod
    def overlay_rmsds_and_transformation(mol1, mol2, atoms=None, invert=False, rotate_torsions=False, with_symmetry=True):
        '''Overlay mol2 on mol1 and return properties of the overlay.
        Deprecated and replaced with
        :class:`ccdc.MolecularDescriptors.Overlay`

        :param mol1: a :class:`ccdc.molecule.Molecule` instance
        :param mol2: a :class:`ccdc.molecule.Molecule` instance
        :param atoms: a list of pairs of atoms to use in the overlay, or None
                      for all atoms to be used
        :param invert: allow inversions in the overlay
        :param rotate_torsions: allow torsional rotations when overlaying
        :param with_symmetry: take account of symmetry when overlaying atoms
        :returns: a tuple containing a :class:`ccdc.molecule.Molecule` instance which is a copy of
                 mol2 overlaid on mol1 as entry 0, the rmsd as entry 1, the Tanimoto rmsd as entry 2
                 and the overlay transformation as entry 3
        '''

        warnings.warn('''This attribute is deprecated and will be removed in a later version.
Instead, please use MolecularDescriptors.Overlay()''', DeprecationWarning)

        overlay = MolecularDescriptors.Overlay(mol1, mol2, atoms, invert, rotate_torsions, with_symmetry)

        return overlay.molecule, overlay.rmsd, overlay.rmsd_tanimoto, overlay.transformation




[docs]
    @staticmethod
    def overlay(mol1, mol2, atoms=None, invert=False, rotate_torsions=False, with_symmetry=True):
        '''Overlay mol2 on mol1.
        Deprecated and replaced with
        :class:`ccdc.MolecularDescriptors.Overlay`

        :param mol1: a :class:`ccdc.molecule.Molecule` instance
        :param mol2: a :class:`ccdc.molecule.Molecule` instance
        :param atoms: a list of pairs of atoms to use in the overlay, or None
                      for all atoms to be used
        :param invert: allow inversions in the overlay
        :param rotate_torsions: allow torsional rotations when overlaying
        :param with_symmetry: take account of symmetry when overlaying atoms
        :returns: a :class:`ccdc.molecule.Molecule` instance which is a copy of
                  mol2 overlaid on mol1

        Note: if with_symmetry is true, and matching atoms are provided, then the matching atoms need to form a connected structure.
        '''

        warnings.warn('''This attribute is deprecated and will be removed in a later version.
Instead, please use overlay = MolecularDescriptors.Overlay(), overlay.molecule''', DeprecationWarning)

        ov = MolecularDescriptors.Overlay(mol1, mol2, atoms, invert, rotate_torsions, with_symmetry)

        return ov.molecule



[docs]
    @staticmethod
    def rmsd(mol1, mol2, atoms=None, overlay=False, exclude_hydrogens=True, with_symmetry=True):
        '''Return the RMSD of two molecules.

        Both molecules should have the same atoms if ``atoms`` is ``None``.

        :param atoms: a list of pairs :class:`ccdc.molecule.Atom` or ``None``
        :param overlay: Whether to overlay the molecules before calculating RMSD
        :param exclude_hydrogens: Whether all-atom or heavy atom RMSD should be calculated
        :param with_symmetry: Whether to allow symmetrical matches
        :returns: float
        '''
        if overlay:
            the_overlay = MolecularDescriptors.Overlay(mol1, mol2, atoms, with_symmetry=with_symmetry)
            mol2 = the_overlay.molecule
        if with_symmetry:
            symm = ChemicalAnalysisLib.WITH_SYMMETRY
        else:
            symm = ChemicalAnalysisLib.NO_SYMMETRY
        if atoms is None:
            if exclude_hydrogens:
                ats1 = [a for a in mol1.atoms if a.atomic_symbol not in ['H', 'D', 'U']]
                ats2 = [a for a in mol2.atoms if a.atomic_symbol not in ['H', 'D', 'U']]
                atoms = list(zip(ats1, ats2))
        else:
            if with_symmetry:
                ats1, ats2 = zip(*atoms)
                orig_ats2 = mol2.atoms
                ats2 = [Atom(_atom=orig_ats2[a.index]._atom) for a in ats2]
            else:
                tot = 0.0
                for a, b in atoms:
                    ap = a.coordinates
                    bp = b.coordinates
                    tot += ((ap.x - bp.x)**2 + (ap.y - bp.y)**2 + (ap.z - bp.z)**2)
                return (tot/len(atoms))**0.5
        if atoms is None:
            return ChemicalAnalysisLib.RMSD(mol1._molecule, mol2._molecule, symm)
        else:
            sub1 = MolecularDescriptors._make_subset(mol1, ats1)
            sub2 = MolecularDescriptors._make_subset(mol2, ats2)
            return ChemicalAnalysisLib.RMSD(sub1, sub2, symm)



[docs]
    class MaximumCommonSubstructure(object):
        '''Identifies the maximum common substructure of two molecules.'''

[docs]
        class Settings(object):
            '''Settings for the MCS calculation.'''
            def __init__(self):
                '''Initialises the settings.'''
                self._settings = SubstructureSearchLib.MoleculeExactMatchCriteria()
                self.connected = True

            @property
            def connected(self):
                '''Whether substructure should be connected.

                Note that finding disconnected maximal substructures is a lot slower than finding connected.
                '''
                return self._connected

            @connected.setter
            def connected(self, tf):
                self._connected = tf

            @property
            def check_bond_count(self):
                '''Whether the bond count of an atom be checked.'''
                return self._settings.settings().check_bond_count()

            @check_bond_count.setter
            def check_bond_count(self, tf):
                self._settings.settings().set_check_bond_count(tf)

            @property
            def check_bond_polymeric(self):
                '''Check whether the bond be polymeric.'''
                return self._settings.settings().check_bond_polymeric()

            @check_bond_polymeric.setter
            def check_bond_polymeric(self, tf):
                self._settings.settings().set_check_bond_polymeric(tf)

            @property
            def check_bond_type(self):
                '''Whether the bond type be checked.'''
                return self._settings.settings().check_bondtype()

            @check_bond_type.setter
            def check_bond_type(self, tf):
                self._settings.settings().set_check_bondtype(tf)

            @property
            def check_charge(self):
                '''Whether the atom charge be checked.'''
                return self._settings.settings().check_charge()

            @check_charge.setter
            def check_charge(self, tf):
                self._settings.settings().set_check_charge(tf)

            @property
            def check_element(self):
                '''Whether the element be checked.'''
                return self._settings.settings().check_element()

            @check_element.setter
            def check_element(self, tf):
                self._settings.settings().set_check_element(tf)

            @property
            def check_hydrogen_count(self):
                '''Whether the atom's hydrogen count be checked.'''
                return self._settings.settings().check_hydrogen_count()

            @check_hydrogen_count.setter
            def check_hydrogen_count(self, tf):
                self._settings.settings().set_check_hydrogen_count(tf)

            @property
            def ignore_hydrogens(self):
                '''Whether the hydrogens be ignored.'''
                return self._settings.settings().ignore_hydrogens()

            @ignore_hydrogens.setter
            def ignore_hydrogens(self, tf):
                self._settings.settings().set_ignore_hydrogens(tf)


        def __init__(self, settings=None):
            if settings is None:
                settings = MolecularDescriptors.MaximumCommonSubstructure.Settings()
            self.settings = settings


[docs]
        def search(self, mol1, mol2, only_edges=False, search_step_limit = 'unlimited'):
            '''Calculate the maximum common substructure between two molecules.

            :param mol1, mol2: :class:`ccdc.molecule.Molecule` instances.
            :param only_edges: bool.  The search will find a maximal common substructure matching only the edges.
            :param search_step_limit: positive integer or 'unlimited'. Controls the maximum number of steps the algorithm takes.
            :returns: a pair of tuples, giving matched :class:`ccdc.molecule.Atom` and :class:`ccdc.molecule.Bond` instances.
            :raises: ValueError with invalid input

            Note: this function is computationally exponential, so will take a long time on large molecules.
            '''
            if type(search_step_limit) == type(int()):
                if search_step_limit <= 0:
                    raise ValueError("search_step_limit must be a positive integer or 'unlimited' ")
            elif search_step_limit != 'unlimited':
                raise ValueError("search_step_limit must be a positive integer or 'unlimited' ")

            if len(mol1.atoms) > len(mol2.atoms):
                def first(x): return x.second
                def second(x): return x.first
                mol2, mol1 = mol1, mol2
            else:
                def first(x): return x.first
                def second(x): return x.second
            if self.settings.connected:
                conn = SubstructureSearchLib.CONNECTED
            else:
                conn = SubstructureSearchLib.DISCONNECTED
            xcrit = SubstructureSearchLib.mol_exact_match_as_match(self.settings._settings)
            if only_edges:
                mcsg = SubstructureSearchLib.MoleculeMCSG(
                    mol1._molecule, mol2._molecule, conn, xcrit
                )
                if search_step_limit != 'unlimited':
                    mcsg.set_search_step_limit(search_step_limit)
                match = mcsg.maximum_common_edge_subgraph()
            else:
                xcrit = SubstructureSearchLib.mol_exact_match_as_match(self.settings._settings)
                mcsg = SubstructureSearchLib.MoleculeMCSG(
                    mol1._molecule, mol2._molecule, conn, xcrit
                )
                if search_step_limit != 'unlimited':
                    mcsg.set_search_step_limit(search_step_limit)
                match = mcsg.maximum_common_subgraph()
            if match is None:
                return (tuple(), tuple())

            atoms = tuple(
                (Atom(_atom=first(match.node_match(i))), Atom(_atom=second(match.node_match(i))))
                for i in range(match.nnode_matches())
            )
            bonds = tuple(
                (Bond(_bond=first(match.edge_match(i))), Bond(_bond=second(match.edge_match(i))))
                for i in range(match.nedge_matches())
            )
            return atoms, bonds




[docs]
    class AtomDistanceSearch(object):
        '''More rapid searching for atoms within a certain distance of a point.'''
        def __init__(self, molecule):
            '''Initialise the searcher.

            :param molecule: a :class:`ccdc.molecule.Molecule` instance.
            '''
            #self._searcher = ChemistryLib.create_distance_search(molecule._molecule)
            self._searcher = ChemistryLib.SpatialAtomMap(molecule._molecule, ChemistryLib.SpatialAtomMap.ORTHOGONAL)


[docs]
        def atoms_within_range(self, point, radius):
            '''A tuple of all atoms within the given radius of the given point.'''
            ats = self._searcher.atoms_within_sphere(point, radius)
            return tuple(Atom(_atom=a) for _, a in ats)




[docs]
    class PrincipleAxesAlignedBox(object):
        '''The bounding box of the molecule aligned on its principle axes.

        The vectors of the box have lengths of the size of the box.  The x_vector is
        the major axis of the molecule, the y_vector the minor axis and the z_vector
        the minimal axis of the molecule.
        '''
        def __init__(self, molecule):
            if not molecule.all_atoms_have_sites:
                raise RuntimeError('Some atoms do not have coordinates')
            self._identifier = molecule.identifier
            self._box_model = ChemicalAnalysisLib.MoleculeBoxModel(molecule._molecule)

        @property
        def aligned_molecule(self):
            '''The molecule aligned along its principle axes, with centre at its centre of geometry.'''
            return Molecule(self._identifier, _molecule=self._box_model.aligned_molecule())

        @property
        def x_vector(self):
            '''The vector of the major axis of the box.'''
            v = self._box_model.z_vector()
            return GeometricDescriptors.Vector.from_points(
                Coordinates(0, 0, 0), Coordinates(v.x(), v.y(), v.z())
            )

        @property
        def y_vector(self):
            '''The vector of the minor axis of the box.'''
            v = self._box_model.y_vector()
            return GeometricDescriptors.Vector.from_points(
                Coordinates(0, 0, 0), Coordinates(v.x(), v.y(), v.z())
            )

        @property
        def z_vector(self):
            '''The vector of the minimal axis of the box.'''
            v = self._box_model.x_vector()
            return GeometricDescriptors.Vector.from_points(
                Coordinates(0, 0, 0), Coordinates(v.x(), v.y(), v.z())
            )

        @property
        def volume(self):
            '''The volume of the box.'''
            return self._box_model.box_volume()




[docs]
    class AdjacencyMatrixDescriptorCalculator(object):
        '''Descriptor calculator for descriptors based on a molecule's adjacency matrix.'''
        def __init__(self, molecule):
            '''Initialise the calculator for a molecule.

            :param molecule: a :class:`ccdc.molecule.Molecule` instance.
            '''
            self.mol = molecule
            self._calculator = ChemicalAnalysisLib.AdjacencyMatrixDescriptorCalculator(molecule._molecule)


[docs]
        def self_returning_walk(self, k):
            '''Return the number of walks of length k that start and end at the same atom.
            See Handbook of Molecular Descriptors, page 384, "self-returning walk counts".

            :param k: the number of steps to walk.
            :returns: float
            '''
            return self._calculator.self_returning_walk(k).value_



[docs]
        def self_returning_walk_ln(self, k):
            '''Return the logarithm of the number of walks of length k that start and end at the same atom.
            See Handbook of Molecular Descriptors, page 384, "self-returning walk counts".

            :param k: the number of steps to walk.
            :returns: float
            '''
            return self._calculator.self_returning_walk_ln(k).value_



[docs]
        def topological_charge_autocorrelation_index(self, k):
            '''Calculate the topological charge autocorrelation index
            See https://pubs.acs.org/doi/pdf/10.1021/ci00019a008

            :param k: the topological distance to measure across
            :returns: float
            '''
            return self._calculator.topological_charge_autocorrelation_index(k).value_





[docs]
    class ConnectivityIndices(object):
        '''Connectivitiy index descriptor calculations.'''
        def __init__(self, molecule):
            '''Initialise the calculator for a molecule.

            :param molecule: a :class:`ccdc.molecule.Molecule` instance.
            '''
            self.mol = molecule


[docs]
        def connectivity_index(self, m):
            '''Return the connectivity index of mth order.
            See Handbook of Molecular Descriptors, page 85, "connectivity indices".

            :param m: the path length evaluated.
            :returns: float
            '''
            return ChemicalAnalysisLib.ConnectivityIndices.connectivity_index(self.mol._molecule, m).value_



[docs]
        def average_connectivity_index(self, m):
            '''Return the average connectivity index of mth order.
            See Handbook of Molecular Descriptors, page 85, "connectivity indices".

            :param m: the path length evaluated.
            :returns: float
            '''
            return ChemicalAnalysisLib.ConnectivityIndices.average_connectivity_index(self.mol._molecule, m).value_





[docs]
    class AtomPairDistanceDescriptorCalculator(object):
        '''Atom pair distance descriptor calculations.

        :param molecule: a :class:`ccdc.molecule.Molecule` instance.
        '''
        def __init__(self, molecule):
            '''Initialise the calculator for a molecule.'''
            self.mol = molecule
            self.calculator = ChemicalAnalysisLib.AtomPairDistanceDescriptorCalculator(self.mol._molecule)


[docs]
        def element_pair_count(self, element_a, element_b, distance):
            '''Return a count of the number of times a pair of elements appear with a specified minimum path length.
            See Handbook of Molecular Descriptors, page 428, "substructure descriptors, atom pairs".

            :param element_a: str. the first element name.
            :param element_b: str. the second element name.
            :param distance: int. the number of bonds between atoms of the specified the elements.
            :returns: float
            '''
            return self.calculator.element_pair_count(ChemistryLib.Element(element_a), ChemistryLib.Element(element_b), distance).value_





[docs]
    class InChIGenerator(object):
        '''Generate InChI molecular descriptors.

        The original source of the InChI generation tool is available at https://www.inchi-trust.org/

        See Stephen R Heller, Igor Pletnev, Stephen Stein and Dmitrii Tchekhovskoi, J. Cheminformatics, 2015, 7:23
        https://doi.org/10.1186/s13321-015-0068-4

        :param include_stereo: configure the generator to include stereochemistry (True, default) or ignore stereochemistry (False)
        :param add_hydrogens: configure the generator to add hydrogens (True, default) or not add hydrogens (False)
        '''

[docs]
        class InChI(object):
            '''An InChI object with the following attributes:

                :ivar success: a boolean to indicate if the InChI generation was successful
                :ivar inchi: the InChI string
                :ivar key: the InChI key
                :ivar errors: a tuple of InChI generation errors
                :ivar warnings: a tuple of InChI generation warnings

            '''
            def __init__(self, inchi_internal):
                '''Initialise from an internal InChI object.'''
                self.success = inchi_internal.success()
                self.inchi = inchi_internal.inchi() if self.success else None
                self.key = inchi_internal.inchi_key() if self.success else None
                self.errors = inchi_internal.errors()
                self.warnings = inchi_internal.warnings()

            def __eq__(self, other):
                if isinstance(other, MolecularDescriptors.InChIGenerator.InChI):
                    return (
                        self.success == other.success and
                        self.inchi == other.inchi and
                        self.key == other.key and
                        self.errors == other.errors and
                        self.warnings == other.warnings
                    )
                else:
                    raise TypeError('%s is not an InChI instance' % other)

            def __hash__(self):
                return hash(self.key)


        def __init__(self, include_stereo=True, add_hydrogens=True):
            '''Initialise, setting options.'''
            self._generator = FileFormatsLib.InChIGenerator()
            self.include_stereo = include_stereo
            self.add_hydrogens = add_hydrogens


[docs]
        def generate(self, structure, include_stereo=None, add_hydrogens=None):
            '''Generate InChI.

            :param structure: a :class:`ccdc.crystal.Crystal` or :class:`ccdc.molecule.Molecule` object
            :param include_stereo: set to True or False to override generator's setting
            :param add_hydrogens: set to True or False to override generator's setting
            :return: a :class:`ccdc.descriptors.MolecularDescripts.InChIGenerator.InChI` instance
            :raises: TypeError if the type of the input structure is invalid
            '''
            if isinstance(structure, Crystal):
                mol = structure.molecule._molecule
            elif isinstance(structure, Molecule):
                mol = structure._molecule
            else:
                raise TypeError(f'Invalid type for InChIGenerator.generate: {structure.__class__.__name__}')

            if include_stereo is not None:
                self.include_stereo = include_stereo
            if add_hydrogens is not None:
                self.add_hydrogens = add_hydrogens

            return MolecularDescriptors.InChIGenerator.InChI(self._generator.generate(mol))


        @property
        def include_stereo(self):
            '''Whether stereo chemistry be considered.'''
            return self._generator.stereochemistry_option()

        @include_stereo.setter
        def include_stereo(self, value):
            if value:
                self._generator.set_stereochemistry_option(self._generator.INCLUDE_STEREOCHEMISTRY)
            else:
                self._generator.set_stereochemistry_option(self._generator.IGNORE_STEREOCHEMISTRY)

        @property
        def add_hydrogens(self):
            '''Whether the InChI generator should add missing hydrogens'''
            return self._generator.add_hydrogens_option()

        @add_hydrogens.setter
        def add_hydrogens(self, value):
            new_val = self._generator.ADD_HYDROGENS if value else self._generator.DONT_ADD_HYDROGENS
            self._generator.set_add_hydrogens_option(new_val)



[docs]
    class LigandOverlay(object):
        '''A class for overlaying ligands and generating pharmacophores.'''


[docs]
        @nested_class('LigandOverlay')
        class LigandOverlaySolution:
            '''A class to represent an individual solution from a ligand overlay.'''
            def __init__(self, ligands: dict[str: Molecule], pharmacophore: Molecule, volume: float, hbond: float, hydrophobe: float):
                '''Initialise a LigandOverlaySolution object.

                :param ligands: a dictionary of the overlaid ligands, with the identifier as the key
                :param pharmacophore: a pharmacophore represented as a molecule
                :param volume: Measures the union volume of the overlaid ligands
                :param hbond: Measures how well hydrogen-bonding groups (donors and acceptors) in different ligands align
                :param hydrophobe: Evaluates the alignment of hydrophobic groups (like aromatic rings or alkyl chains).
                '''
                self._ligands = ligands
                self._pharmacophore = pharmacophore
                self._volume = volume
                self._hbond = hbond
                self._hydrophobe = hydrophobe

            @property
            def overlaid_ligands(self) -> dict[str: Molecule]:
                '''Returns a dictionary of the overlaid ligands, with the identifier as the key.'''
                return self._ligands

            @property
            def pharmacophore(self) -> Molecule:
                '''Returns a pharmacophore represented as a molecule.'''
                return self._pharmacophore

            @property
            def volume(self) -> float:
                '''Returns the union volume of the overlaid ligands.'''
                return self._volume

            @property
            def hbond(self) -> float:
                '''Hydrogen Bond Score: Returns how well hydrogen-bonding groups (donors and acceptors) in different ligands align.'''
                return self._hbond

            @property
            def hydrophobe(self) -> float:
                '''Hydrophobe Coplanarity: Returns the alignment of hydrophobic groups (like aromatic rings or alkyl chains)'''
                return self._hydrophobe


        def __init__(self, project_directory: str):
            '''Initialise a LigandOverlay object.

            :param project_directory: directory in which to write the output files
            '''
            self._project_directory = project_directory
            self._ligands = []


[docs]
        def add_ligands(self, ligands: list[Molecule]) -> None:
            '''Adds ligands to be overlaid.

            :param ligands: a list of Molecule to be added
            '''
            self._ligands.extend(ligands)



[docs]
        def conformer_file_name(self, index: int, file_name: str) -> str:
            """Generates a formatted filename with an index prefix, the first 8 characters of the ligand identifier and a '.mol2' extension."""
            return f"{index+1:02d}_{os.path.splitext(os.path.basename(file_name))[0][:8]}.mol2"



[docs]
        def overlay_ligands(self) -> list[LigandOverlaySolution]:
            '''Overlays the ligands that have been added.

            :return: a list of LigandOverlaySolution
            '''
            model = LigandOverlayLib.PharmacophoreModel()
            model.create(str(Resources().get_ligand_overlay_dir()), self._project_directory, '')

            for index, ligand in enumerate(self._ligands):
                ligand_filename = os.path.join(self._project_directory, f'{ligand.identifier}.mol2')
                with io.MoleculeWriter(ligand_filename) as writer:
                    writer.write(ligand)
                    writer.close()
                model.add_input_file(ligand.identifier, ligand_filename, self.conformer_file_name(index, ligand_filename), False)

            model.overlay_ligands(True)

            # Find the latest job directory in the project directory (format: "job<number>")
            job_dirs = []
            for item in os.listdir(self._project_directory):
                item_path = os.path.join(self._project_directory, item)
                if os.path.isdir(item_path) and item.startswith("job") and item[3:].isdigit():
                    job_dirs.append(item)

            if not job_dirs:
                raise FileNotFoundError("No job directories found in the project directory")

            job_dirs.sort(key=lambda x: int(x[3:]))
            latest_job_dir = job_dirs[-1]

            job_directory = os.path.join(self._project_directory, latest_job_dir)
            scores_file = os.path.join(job_directory, 'scores.csv')
            pharmacophore_dir = os.path.join(job_directory, 'pharmacophores')

            with open(scores_file) as csv_file:
                csv_reader = csv.DictReader(csv_file)
                results = []
                for row in csv_reader:
                    volume = float(row['volume'])
                    hydrophobe = float(row['hydrophobe'])
                    hbond = float(row['hbond'])
                    ligand_file = row['molecule_file']
                    with io.EntryReader(ligand_file) as entry_reader:
                        ligands = {entry.molecule.identifier: entry.molecule for entry in entry_reader}

                    ligand_filename = os.path.basename(ligand_file)
                    pharmacophore_filename = ligand_filename.replace("solution_", "solution_pharm_")
                    pharmacophore_file = os.path.join(pharmacophore_dir, pharmacophore_filename)

                    with io.EntryReader(pharmacophore_file) as ph_reader:
                        pharmacophore_molecule = ph_reader[0].molecule

                    results.append(MolecularDescriptors.LigandOverlay.LigandOverlaySolution(ligands, pharmacophore_molecule, volume, hbond, hydrophobe))

            return results





#################################################################################################


[docs]
class GeometricDescriptors(object):
    '''A namespace to hold geometric classes and functions.'''


[docs]
    @staticmethod
    def point_distance(p0, p1):
        '''The distance between two points.'''
        return MathsLib.distance(p0, p1)



[docs]
    @staticmethod
    def point_angle(p0, p1, p2):
        '''The angle between three points.'''
        return MathsLib.angle(p0, p1, p2).degrees()



[docs]
    @staticmethod
    def point_torsion_angle(p0, p1, p2, p3):
        '''The torsion angle between four points.'''
        return MathsLib.torsion_angle(p0, p1, p2, p3).degrees()



[docs]
    class Vector(object):
        '''A 3D vector.'''
        def __init__(self, x, y, z):
            self.x = x
            self.y = y
            self.z = z

        def __getitem__(self, index):
            if index in range(3):
                return getattr(self, 'xyz'[index])

        def __iter__(self):
            yield self.x
            yield self.y
            yield self.z

        def __repr__(self):
            return 'Vector(%.3f, %.3f, %.3f)' % (self.x, self.y, self.z)

        @property
        def length(self):
            '''The length of the vector.'''
            return sum(self[i]**2 for i in range(3))**0.5


[docs]
        def cross(self, other):
            '''Cross product.'''
            v = MathsLib.vector_3d(self.x, self.y, self.z)
            w = MathsLib.vector_3d(*tuple(other[i] for i in range(3)))
            x = v.cross(w)
            return GeometricDescriptors.Vector(x[0], x[1], x[2])



[docs]
        def dot(self, other):
            '''Dot product.'''
            return sum(self[i]*other[i] for i in range(3))



[docs]
        @staticmethod
        def from_points(p0, p1):
            '''Construct the vector from p0 to p1.

            :param p0, p1: :class:`ccdc.molecule.Coordinates`
            '''
            return GeometricDescriptors.Vector(p1.x - p0.x, p1.y - p0.y, p1.z - p0.z)




[docs]
    class Plane(object):
        '''A plane in 3D.'''
        def __init__(self, vector, distance, _plane=None):
            '''Construct a plane from a vector and a distance to the origin.

            :param vector: :class:`ccdc.descriptors.GeometricDescriptors.Vector`
            :param distance: float
            '''
            if _plane is None:
                self._plane = MathsLib.Plane(
                    MathsLib.vector_3d(vector[0], vector[1], vector[2]), distance
                )
            else:
                self._plane = _plane

        def __repr__(self):
            return 'Plane(normal=%s, distance=%.3f)' % (self.normal, self.distance)

        @property
        def normal(self):
            '''The normal to the plane.'''
            v = self._plane.normal()
            return GeometricDescriptors.Vector(v.x(), v.y(), v.z())

        @property
        def distance(self):
            '''The distance from the origin of the plane.'''
            return self._plane.distance()


[docs]
        @staticmethod
        def from_points(*points):
            '''Construct a RMS fitted plane from points.'''
            return GeometricDescriptors.Plane(None, None, _plane=MathsLib.Plane(points))



[docs]
        def point_distance(self, point):
            '''The distance of the point to the plane.'''
            return self._plane.distance(point)



[docs]
        def plane_distance(self, plane):
            '''The shortest distance of the plane to another.'''
            return self._plane.distance(plane._plane)



[docs]
        def plane_angle(self, plane):
            '''The angle between the two planes.'''
            return self._plane.angle(plane._plane).degrees()



[docs]
        def vector_angle(self, vector):
            '''The angle between the plane and the vector.'''
            return self._plane.angle(MathsLib.vector_3d(vector.x, vector.y, vector.z)).degrees()


        @property
        def plane_vector1(self):
            '''A vector in the plane, normal to the plane's normal.'''
            vec = self._plane.v1()
            return GeometricDescriptors.Vector(vec.x(), vec.y(), vec.z())

        @property
        def plane_vector2(self):
            '''A vector in the plane, normal to both the plane's normal and the plane's plane_vector1.'''
            vec = self._plane.v2()
            return GeometricDescriptors.Vector(vec.x(), vec.y(), vec.z())




[docs]
    class Sphere(object):
        '''A sphere in 3D.'''

        def __init__(self, centre, radius):
            self.centre = centre
            self.radius = radius

        def __repr__(self):
            return 'Sphere(%s radius %.2f)' % (str(self.centre), self.radius)

        def __eq__(self, other):
            return (
                all(round(self.centre[i], 6) == round(other.centre[i], 6) for i in range(3)) and
                round(self.radius, 6) == round(other.radius, 6)
            )



[docs]
    class Quaternion(object):
        '''A normalised quaternion suitable for expressing rotations in 3D space.

           Quaternions are a convenient method for expressing a complex sequence of rotations

           By default this constructs a unit quaternion
        '''
        def __init__(self, _quaternion = None):
            if _quaternion is None:
                self._quaternion = MathsLib.Quaternion()
            else:
                self._quaternion = _quaternion


[docs]
        @staticmethod
        def from_dimensions(q0,q1,q2,q3):
            ''' create from 4 real numbers. The quaternion will be normalised to unit length.
                :param q0,q1,q2,q3: the 4 dimensions of the axes 1,i,j and k
                :raises ValueError: with invalid input (e.g. if the length of the quaternion is 0)
            '''
            try:
                return GeometricDescriptors.Quaternion(_quaternion=MathsLib.Quaternion(q0,q1,q2,q3))
            except Exception as e:
                raise ValueError('Unable to construct Quaternion') from e



[docs]
        @staticmethod
        def from_vector_and_angle(vector,angle, unit = 'degrees'):
            '''
            Construct from a vector and an angle
            :param vector: a tuple or list of length 3 that represents a vector in 3D space
            :param angle: a double that represents an angle (in default in degrees.)
            :raise: ValueError for a bad input
            '''

            if len(vector) != 3:
                raise ValueError("Quaternion must be constructed from a 3D vector")

            mlunit = MathsLib.Angle.DEGREES
            if unit == 'radians':
               mlunit = MathsLib.Angle.RADIANS
            try:
                return GeometricDescriptors.Quaternion(
                    _quaternion=MathsLib.Quaternion(
                    MathsLib.vector_3d(vector[0],vector[1],vector[2]),
                    MathsLib.Angle(angle,mlunit)))
            except Exception as e:
                raise ValueError('Unable to construct Quaternion') from e




[docs]
        @staticmethod
        def from_euler_angles(alpha,beta,gamma, unit = 'degrees' ):
            '''
            create a quaternion from a set of euler angles
            '''
            mlunit = MathsLib.Angle.DEGREES
            if unit == 'radians':
               mlunit = MathsLib.Angle.RADIANS
            try:
                return GeometricDescriptors.Quaternion(
                    _quaternion=MathsLib.Quaternion(
                    MathsLib.Angle(alpha,mlunit),
                    MathsLib.Angle(beta, mlunit),
                    MathsLib.Angle(gamma,mlunit)))
            except Exception as e:
                raise ValueError('Unable to construct Quaternion') from e



[docs]
        def copy(self):
            ''' Create a copy of this quaternion
            '''
            return GeometricDescriptors.Quaternion( _quaternion=MathsLib.Quaternion(self._quaternion) )



[docs]
        def square(self):
            ''' In-place squares this quaternion
            '''
            self._quaternion.square()



[docs]
        def invert(self):
            ''' In-place inverts this quaternion
            '''
            self._quaternion.invert()



[docs]
        def complex_conjugate(self):
            ''' In-place converts this quaternion to its complex conjugate
            '''
            self._quaternion.complex_conjugate()



[docs]
        def rotation_matrix(self):
            ''' Returns a rotation matrix that the quaternion currently describes
            '''
            return self._quaternion.rotation_matrix()



[docs]
        def rotate(self, object_to_rotate, cell = None):
            ''' Rotates a set of vectors in place by this quaternion
            '''
            try:
                if isinstance(object_to_rotate, Molecule):
                    self._rotate_molecule(object_to_rotate._molecule, cell)
                    return object_to_rotate
                elif isinstance(object_to_rotate, Crystal):
                    if cell is None:
                        cell = object_to_rotate._crystal.cell()
                    molecule = object_to_rotate.molecule
                    self._rotate_molecule(molecule,cell)
                    object_to_rotate.molecule = molecule
                    return object_to_rotate
                elif isinstance(object_to_rotate, Atom):
                    self._rotate_atoms([object_to_rotate],cell)
                elif isinstance(object_to_rotate, MathsLib.vector_3d):
                    self._quaternion.rotate(object_to_rotate)
                elif isinstance(object_to_rotate[0], Atom):
                    self._rotate_atoms(object_to_rotate, cell)
                elif isinstance(object_to_rotate[0], float) or isinstance(object_to_rotate[0], int):
                    mlv = MathsLib.vector_3d(object_to_rotate[0], object_to_rotate[1], object_to_rotate[2])
                    self._quaternion.rotate(mlv)
                    object_to_rotate[0] = mlv.x()
                    object_to_rotate[1] = mlv.y()
                    object_to_rotate[2] = mlv.z()
                else:
                    for vector in object_to_rotate:
                        self.rotate(vector)

                return object_to_rotate

            except Exception as e:
                raise ValueError('Unable to rotate object of type %s' % str(type(object_to_rotate))) from e


        def _rotate_molecule(self, molecule, _cell):
            ''' Rotates a molecule. Rotation is applied to orthogonal coordinates.

                If possible, fractional coordinates are then updated too.
            '''
            self._rotate_atoms(molecule.atoms, _cell)

        def _rotate_atoms(self, atoms, _cell):
            ''' Rotates a set of atoms. Rotation is applied to orthogonal coordinates.

                If possible, fractional coordinates are then updated too.
            '''
            raw_atoms = [atom._atom for atom in atoms]
            if _cell is not None:
                ChemicalAnalysisLib.apply_quaternion_to_atoms_with_cell( raw_atoms, self._quaternion, _cell)
            else:
                ChemicalAnalysisLib.apply_quaternion_to_atoms(raw_atoms, self._quaternion)

        def __eq__(self,other):
            return self._quaternion == other._quaternion

        def __ne__(self,other):
            return self._quaternion != other._quaternion

        def __pos__(self):
            return GeometricDescriptors.Quaternion(quaternion = +self._quaternion)

        def __neg__(self):
            return GeometricDescriptors.Quaternion(quaternion = -self._quaternion)

        def __pow__(self,n):
            return GeometricDescriptors.Quaternion( _quaternion = self._quaternion ** n )

        def __mul__(self, other):
            return GeometricDescriptors.Quaternion( _quaternion = self._quaternion * other._quaternion )

        def __imul__(self, other):
            self._quaternion *= other._quaternion
            return self

        def __str__(self):
            return str(self._quaternion.as_string())




#################################################################################################


[docs]
class CrystalDescriptors(object):
    '''Namespace for crystallographic descriptors.'''

#################################################################################################
#   Powder patterns
#################################################################################################

[docs]
    @nested_class('CrystalDescriptors')
    class PowderPattern(object):
        '''Represents a powder pattern.

        The powder pattern class is available only to CSD-Materials and
              CSD-Enterprise users.
        '''


[docs]
        @nested_class('PowderPattern')
        class Settings(object):
            '''Settings which may be set for a Powder simulation.'''

            _slit_type_dict = bidirectional_dict(
                fixed=PackingSimilarityLib.PXRDSimulatorSettings.SlitType_FIXED_SLITS,
                variable=PackingSimilarityLib.PXRDSimulatorSettings.SlitType_VARIABLE_SLITS,
            )

            def __init__(self, _settings=None):
                self._settings = _settings if _settings is not None else PackingSimilarityLib.PXRDSimulatorSettings()

            @staticmethod
            def _get_wavelength_object(value):
                if value is not None:
                    if isinstance(value, float) or isinstance(value, int):
                        return CrystalDescriptors.PowderPattern.Wavelength(value)
                    elif (isinstance(value, list) or isinstance(value, tuple)) and len(value) == 2:
                        return CrystalDescriptors.PowderPattern.Wavelength(value[0], value[1])
                    elif isinstance(value, CrystalDescriptors.PowderPattern.Wavelength):
                        return value
                    else:
                        raise TypeError(f'Cant set wavelength; type must be a float or a pair of floats: {value} was passed in')
                return None

            def _get_wavelength(self, index):
                if self._settings.nwavelengths() <= index:
                    return None
                return CrystalDescriptors.PowderPattern.Wavelength(_wavelength=self._settings.wavelength(index))

            def _set_wavelength(self, index, value):
                wavelengths = [self._settings.wavelength(i) for i in range(self._settings.nwavelengths())]
                while len(wavelengths) <= index:
                    wavelengths.append(value)
                wavelengths[index] = value
                self._settings.set_wavelengths(wavelengths)

            @property
            def wavelength(self):
                '''Set or get the primary wavelength

                :param value: float or pair of floats (wavelength and scale factor) or another Wavelength object or
                              None to reset to the default
                :return: Primary wavelength object (or None) for the simulation
                '''
                return self._get_wavelength(0)

            @wavelength.setter
            def wavelength(self, value):
                w = self._get_wavelength_object(value)
                if w is not None:
                    wavelength = w._wavelength
                else:
                    default_settings = PackingSimilarityLib.PXRDSimulatorSettings()
                    wavelength = default_settings.wavelength(0)
                self._set_wavelength(0, wavelength)

            @property
            def second_wavelength(self):
                '''Set or get the secondary wavelength

                :param value: float or pair of floats (wavelength and scale factor) or another Wavelength object or
                             None to remove the secondary wavelength
                :return: Secondary wavelength object (or None) for the simulation
                '''
                return self._get_wavelength(1)

            @second_wavelength.setter
            def second_wavelength(self, value):
                w = self._get_wavelength_object(value)
                if w is not None:
                    self._set_wavelength(1, w._wavelength)
                else:
                    self._settings.set_wavelengths([self.wavelength._wavelength])

            @property
            def include_hydrogens(self):
                '''
                Whether to include hydrogens in the simulation
                :return: True or false
                '''
                return self._settings.include_hydrogens()

            @include_hydrogens.setter
            def include_hydrogens(self, value):
                self._settings.set_include_hydrogens(bool(value))

            @property
            def deuterium_is_hydrogen(self):
                '''
                Whether to include treat deuterium as hydrogen
                :return: True or false
                '''
                return self._settings.D_is_H()

            @deuterium_is_hydrogen.setter
            def deuterium_is_hydrogen(self, value):
                self._settings.set_D_is_H(bool(value))

            @property
            def two_theta_minimum(self):
                '''
                Where to start the pattern simulation
                :return: float representing the minimum 2-theta (in degrees)
                '''
                return self._settings.two_theta_start().degrees()

            @two_theta_minimum.setter
            def two_theta_minimum(self, value):
                self._settings.set_two_theta_range(MathsLib.Angle(float(value), MathsLib.Angle.DEGREES), self._settings.two_theta_end())

            @property
            def two_theta_maximum(self):
                '''
                Where to end the pattern simulation
                :return: float representing the maximum 2-theta (in degrees)
                '''
                return self._settings.two_theta_end().degrees()

            @two_theta_maximum.setter
            def two_theta_maximum(self, value):
                self._settings.set_two_theta_range(self._settings.two_theta_start(), MathsLib.Angle(float(value), MathsLib.Angle.DEGREES))

            @property
            def two_theta_step(self):
                '''
                The step-size used in the pattern simulation
                :return: float representing the step size (in degrees)
                '''
                return self._settings.two_theta_step().degrees()

            @two_theta_step.setter
            def two_theta_step(self, value):
                self._settings.set_two_theta_step(MathsLib.Angle(float(value), MathsLib.Angle.DEGREES))

            @property
            def full_width_at_half_maximum(self):
                '''
                The the full width at half height of peaks to use in simulation
                :return: float representing the full width at half height of peaks (in degrees)
                '''
                x = self._settings.peak_shape().peak_width()
                y = PackingSimilarityLib.get_fwhm_as_constant_fwhm(x)
                if y:
                    return y.width().degrees()
                return None

            @full_width_at_half_maximum.setter
            def full_width_at_half_maximum(self, value):
                fwhm = PackingSimilarityLib.ConstantFWHM(
                    MathsLib.Angle(float(value), MathsLib.Angle.DEGREES)
                )
                self._settings.peak_shape().set_peak_width(fwhm)

            @property
            def march_dollase_preferred_orientation(self):
                '''Setting for march_dollase.

                :return: a :class:`PXRDMatchOptimiser.Settings.PreferredOrientation` or None

                The default value is None. This can be set with a tuple of (h, k, l, r).
                '''
                po = self._settings.preferred_orientation()
                md = PackingSimilarityLib.perferred_orientation_function_as_march_dollase(po)
                return md if md is None else CrystalDescriptors.PowderPattern.PreferredOrientation(_function=md)

            @march_dollase_preferred_orientation.setter
            def march_dollase_preferred_orientation(self, value):
                # Shape of input is (h,k,l,value)
                if not value:
                    self._settings.set_preferred_orientation(None)
                elif isinstance(value, CrystalDescriptors.PowderPattern.PreferredOrientation):
                    self._settings.set_preferred_orientation(value._function)
                else:
                    try:
                        function = CrystalDescriptors.PowderPattern.PreferredOrientation(values=value)
                        self._settings.set_preferred_orientation(function._function)
                    except (ValueError, IndexError, TypeError):
                        raise ValueError(f"expected one of None or a CrystalDescriptors.PowderPattern.PreferredOrientation or a tuple of form (int,int,int,float) - got {value}")

            @property
            def slit_type(self) -> str:
                '''The type of slit to be simulated

                This may be 'fixed' (default) or 'variable'.

                :return: string representing type of slit to be simulated
                '''
                return CrystalDescriptors.PowderPattern.Settings._slit_type_dict.inverse_lookup(
                    self._settings.slit_type())

            @slit_type.setter
            def slit_type(self, value: str):
                self._settings.set_slit_type(
                    CrystalDescriptors.PowderPattern.Settings._slit_type_dict[value.lower()])

            @property
            def fast_peak_shape(self) -> bool:
                '''Whether to use a fast but less accurate peak shape calculation during simulation

                The peak shape will be applied using fast fourier transform convolution of the peak shape function.
                This is faster but less accurate than the default convolution method. The resulting peaks will tend to be wider.

                :return: True or False
                '''
                return self._settings.fft_convolve()

            @fast_peak_shape.setter
            def fast_peak_shape(self, value: bool):
                self._settings.set_fft_convolve(value)



[docs]
        @nested_class('PowderPattern')
        class PreferredOrientation:
            '''A preferred orientation for PXRD simulation.'''
            def __init__(self, values=None, _function=None) -> None:
                self._function = _function
                if values and not _function:
                    try:
                        mi = ChemistryLib.MillerIndices(*[int(v) for v in values[:3]])
                        self._function = PackingSimilarityLib.MarchDollase(mi, float(values[3]))
                    except (ValueError, IndexError):
                        raise ValueError(f"expected a tuple of form (int,int,int,float) - got {values}")

            def __str__(self):
                if not self._function:
                    return 'None'
                mi = self._function.miller_indices()
                return f'{mi.h()},{mi.k()},{mi.l()},{self._function.r()}'

            def __repr__(self):
                return f'CrystalDescriptors.PowderPattern.PreferredOrientation(values=({self}))'

            def __len__(self):
                return 4 if self._function else 0

            def __getitem__(self, index):
                if not self._function:
                    raise IndexError('PreferredOrientation has no value')
                if index == 0:
                    return self._function.miller_indices().h()
                if index == 1:
                    return self._function.miller_indices().k()
                if index == 2:
                    return self._function.miller_indices().l()
                if index == 3:
                    return self._function.r()
                if isinstance(index, slice):
                    # Get the start, stop, and step from the slice
                    return [self[i] for i in range(*index.indices(len(self)))]
                raise IndexError('PreferredOrientation index out of range')

            def __eq__(self, value) -> bool:
                if not isinstance(value, CrystalDescriptors.PowderPattern.PreferredOrientation):
                    return tuple(self) == value
                if (not self._function) != (not value._function):
                    return False
                if not self._function:
                    return True
                if self._function.miller_indices() != value._function.miller_indices():
                    return False
                return self._function.r() == value._function.r()

            @property
            def h(self):
                '''The miller indices h value of the preferred orientation.'''
                return self._function.miller_indices().h()

            @property
            def k(self):
                '''The miller indices k value of the preferred orientation.'''
                return self._function.miller_indices().k()

            @property
            def l(self):
                '''The miller indices l value of the preferred orientation.'''
                return self._function.miller_indices().l()

            @property
            def r(self):
                '''The March-Dollase r value of the preferred orientation.'''
                return self._function.r()



[docs]
        @nested_class('PowderPattern')
        class Wavelength(object):
            '''Represents a wavelength for powder studies.

            Some standard wavelengths - these are floats, not :class:`ccdc.descriptors.CrystalDescriptors.PowderPattern.Wavelength`
            '''
            Wavelength_Cu    = DatabaseEntryLib.Wavelength.Wavelength_Cu
            Wavelength_CuKa1 = DatabaseEntryLib.Wavelength.Wavelength_CuKa1
            Wavelength_CuKa2 = DatabaseEntryLib.Wavelength.Wavelength_CuKa2
            Wavelength_CuKb1 = DatabaseEntryLib.Wavelength.Wavelength_CuKb1
            Wavelength_Cr    = DatabaseEntryLib.Wavelength.Wavelength_Cr
            Wavelength_CrKa1 = DatabaseEntryLib.Wavelength.Wavelength_CrKa1
            Wavelength_CrKa2 = DatabaseEntryLib.Wavelength.Wavelength_CrKa2
            Wavelength_Co    = DatabaseEntryLib.Wavelength.Wavelength_Co
            Wavelength_CoKa1 = DatabaseEntryLib.Wavelength.Wavelength_CoKa1
            Wavelength_CoKa2 = DatabaseEntryLib.Wavelength.Wavelength_CoKa2
            Wavelength_CoKb1 = DatabaseEntryLib.Wavelength.Wavelength_CoKb1
            Wavelength_Mo    = DatabaseEntryLib.Wavelength.Wavelength_Mo
            Wavelength_MoKa1 = DatabaseEntryLib.Wavelength.Wavelength_MoKa1
            Wavelength_MoKa2 = DatabaseEntryLib.Wavelength.Wavelength_MoKa2
            Wavelength_MoKb1 = DatabaseEntryLib.Wavelength.Wavelength_MoKb1
            Wavelength_Fe    = DatabaseEntryLib.Wavelength.Wavelength_Fe
            Wavelength_FeKa1 = DatabaseEntryLib.Wavelength.Wavelength_FeKa1
            Wavelength_FeKa2 = DatabaseEntryLib.Wavelength.Wavelength_FeKa2
            Wavelength_FeKb1 = DatabaseEntryLib.Wavelength.Wavelength_FeKb1

            def __init__(self, wavelength=None, scale_factor=1.0, _wavelength=None):
                if _wavelength is not None:
                    self._wavelength = _wavelength
                else:
                    if wavelength is None:
                        wavelength = CrystalDescriptors.PowderPattern.Wavelength.Wavelength_CuKa1
                    self._wavelength = DatabaseEntryLib.Wavelength(wavelength, scale_factor)

            @property
            def wavelength(self):
                '''The wavelength.'''
                return self._wavelength.wavelength()

            @wavelength.setter
            def wavelength(self, w):
                self._wavelength = DatabaseEntryLib.Wavelength(w, self.scale_factor)

            @property
            def scale_factor(self):
                '''The scale factor of this Wavelength.'''
                return self._wavelength.scale_factor()

            @scale_factor.setter
            def scale_factor(self, f):
                self._wavelength = DatabaseEntryLib.Wavelength(self.wavelength, f)



[docs]
        @nested_class('PowderPattern')
        class TickMark(object):
            '''A tick mark in a simulated powder pattern.
            '''
            def __init__(self, _tick, _crystal=None):
                '''Create from an internal tick mark object.'''
                self._tick = _tick
                self._crystal = _crystal

            @property
            def two_theta(self):
                '''Two theta value of this tick.'''
                return self._tick.two_theta().degrees()

            @property
            def is_systematically_absent(self):
                '''Whether this tick mark is systematically absent.'''
                return self._tick.is_systematically_absent()

            @property
            def miller_indices(self):
                '''The Miller indices of this tick mark.'''
                if self._crystal is not None:
                    mi = self._tick.miller_indices()
                    return self._crystal.miller_indices(
                        mi.h(), mi.k(), mi.l()
                    )



[docs]
        @staticmethod
        def from_file(file_name, format=None, default_wavelength=None):
            '''Create a CrystalDescriptors.PowderPattern from a file.

            ``format`` may take one of the following values:
            - ``'xy'``: XY format (2 columns: 2theta and intensity)
            - ``'xye'``: XYE format (3 columns: 2theta, intensity and ESD)
            - ``'xrdml'``: Panalytical XRDML format
            If ``format`` is ``None``, it will be deduced from the filename extension.

            :param file_name: path to the file
            :param format: string indicating the format to expect; if ``None`` will deduce from filename extension
            :param default_wavelength: Default wavelength used if no wavelength found/parsed in file
            '''

            powderlib_format = 'unspecified' if format is None else format
            if default_wavelength is None:
                pattern = PackingSimilarityLib.PXRDPattern(file_name, powderlib_format)
            else:
                pattern = PackingSimilarityLib.PXRDPattern(file_name, powderlib_format, default_wavelength)

            settings = CrystalDescriptors.PowderPattern.Settings()

            if (pattern.warningMessage() != ""):
                print(pattern.warningMessage())

            settings.wavelength = CrystalDescriptors.PowderPattern.Wavelength(pattern.wavelength(0).wavelength(), pattern.wavelength(0).scale_factor())
            settings.two_theta_minimum = pattern.two_theta_start()
            settings.two_theta_maximum = pattern.two_theta_end()
            settings.two_theta_step = pattern.two_theta_uniform_step_size()

            return CrystalDescriptors.PowderPattern(pattern, settings)



[docs]
        @staticmethod
        def from_xye_file(file_name, default_wavelength=None):
            '''Create a CrystalDescriptors.PowderPattern from an xye file.

            :param file_name: path to xye file
            :param default_wavelength: Default wavelength used if no wavelength found/parsed in file
            '''
            return CrystalDescriptors.PowderPattern.from_file(
                file_name,
                format='xye',
                default_wavelength=default_wavelength)



[docs]
        @staticmethod
        def from_xy_file(file_name, default_wavelength=None):
            '''Create a CrystalDescriptors.PowderPattern from an xy file.

            :param file_name: path to xy file
            :param default_wavelength: Default wavelength used if no wavelength found/parsed in file
            '''
            return CrystalDescriptors.PowderPattern.from_file(
                file_name,
                format='xy',
                default_wavelength=default_wavelength)



[docs]
        @staticmethod
        def from_xrdml_file(file_name, default_wavelength=None):
            '''Create a CrystalDescriptors.PowderPattern from a Panalytical XRDML file.

            See https://www.malvernpanalytical.com/en/products/category/software/x-ray-diffraction-software/data-collector
            for details of the XRDML format.

            :param file_name: path to XRDML file
            :param default_wavelength: Default wavelength used if no wavelength found/parsed in file
            '''
            return CrystalDescriptors.PowderPattern.from_file(
                file_name,
                format='xrdml',
                default_wavelength=default_wavelength)



[docs]
        @staticmethod
        def from_crystal(crystal, settings=None):
            '''Create a CrystalDescriptors.PowderPattern from a crystal.

            :param crystal: :class:`ccdc.crystal.Crystal`
            :param settings: :class:`ccdc.descriptors.CrystalDescriptors.PowderPattern.Settings`
            '''
            if settings is None:
                settings = CrystalDescriptors.PowderPattern.Settings()
            sim = PackingSimilarityLib.PXRDSimulator(crystal._crystal, settings._settings)
            return CrystalDescriptors.PowderPattern(sim.simulate_profile(), settings, sim, crystal)


        def __init__(self, _pattern, _settings=None, _simulation=None, _crystal=None):
            '''Private: create from an internal PXRDPattern.'''
            self._pattern = _pattern
            self.settings = _settings
            self._simulation = _simulation
            self._crystal = _crystal


[docs]
        def resetWavelength(self, new_wavelength=None):
            '''Reset the wavelength for an existing powder pattern

            :param new_wavelength: New wavelength, if this is left blank then the wavelength is reset to 1.54056 Angstrom
            '''
            if new_wavelength is None:
                self.settings.wavelength = CrystalDescriptors.PowderPattern.Wavelength(DatabaseEntryLib.Wavelength.Wavelength_CuKa1, 1.0)
            else:
                self.settings.wavelength = CrystalDescriptors.PowderPattern.Wavelength(new_wavelength, 1.0)



[docs]
        def similarity(self, other, width=2.0, use_esds=True):
            '''Measure of match between this pattern and another. 

            This uses the cross-correlations described in R. de Gelder, R. Wehrens, J.A. Hageman (2001)
            <i>J. Comp. Chem.</i> <b>22</b>:273-289. `https://doi.org/10.1002/1096-987X(200102)22:3%3C273::AID-JCC1001%3E3.0.CO;2-0`_

            :param other: :class:`ccdc.descriptors.CrystalDescriptors.PowderPattern`
            :param width: width (in degrees) of the base of the triangle weight function
            :param use_esds: Whether to use the powder pattern estimates of standard deviation on the counts in the calculation as weightings

            :returns: float that represents the similarity of the two patterns
            '''
            if use_esds:
                ue = PackingSimilarityLib.PXRDPattern.USE_ESDS
            else:
                ue = PackingSimilarityLib.PXRDPattern.DO_NOT_USE_ESDS

            return PackingSimilarityLib.PXRDMatch(self._pattern, other._pattern, width, ue)



[docs]
        def write_xye_file(self, file_name):
            '''Write a .xye format file.

            :param file_name: output file name
            '''
            self._pattern.write_xy_or_xye_file(file_name, PackingSimilarityLib.PXRDPattern.USE_ESDS)



[docs]
        def write_xy_file(self, file_name):
            '''Write a .xy format file.

            The .xy format is the same as the .xye format except that the ESD column is omitted.

            :param file_name: output file name
            '''
            self._pattern.write_xy_or_xye_file(file_name, PackingSimilarityLib.PXRDPattern.DO_NOT_USE_ESDS)



[docs]
        def write_raw_file(self, file_name):
            '''Write a Bruker .raw file.

            :param file_name: output file name
            '''
            self._pattern.write_raw_file(file_name)



[docs]
        def write_xrdml_file(self, file_name):
            '''Write a Panalytical .xrdml format file.

            :param file_name: output file name
            '''
            self._pattern.write_xrdml_file(file_name)



[docs]
        def integral(self, start=0.0, end=180.0):
            '''The area under the curve.

            :param start: float
            :param end: float
            :returns: float
            '''
            return self._pattern.integral(start, end)


        @property
        def two_theta(self):
            '''The array of two_theta values.'''
            return [
                self._pattern.two_theta(i) for i in range(self._pattern.npoints())
            ]

        @property
        def intensity(self):
            '''The array of intensity values.'''
            return [
                self._pattern.intensity(i) for i in range(self._pattern.npoints())
            ]

        @property
        def esd(self):
            '''The array of esd values (Estimated Square Deviations).'''
            return [
                self._pattern.esd(i) for i in range(self._pattern.npoints())
            ]

        @property
        def tick_marks(self):
            '''The array of tick marks if this is a simulated powder pattern.

            :returns: list of :class:`ccdc.descriptors.CrystalDescriptors.PowderPattern.TickMark`
                     or ``None`` if this is not a simulated powder pattern.
            '''
            if self._simulation is not None:
                self._ticks = self._simulation.tick_mark_list(self._simulation.INCLUDE_SYSTEMATIC_ABSENCES)
                self._ticks.sort_on_two_theta()
                return [
                    CrystalDescriptors.PowderPattern.TickMark(
                        self._ticks.tick_mark(i),
                        self._crystal
                    )
                    for i in range(self._ticks.size())
                ]



#################################################################################################
#   Morphology
#################################################################################################


[docs]
    class Morphology(morphology.MorphologyBase):
        def __init__(self, crystal=None):
            warnings.warn('''This class is deprecated and will be removed in a later version.
Instead, please use the ccdc.morphology.BFDHMorphology class.''', DeprecationWarning)
            super().__init__(crystal)

        Facet = morphology.Facet
        OrientedBoundingBox = morphology.OrientedBoundingBox



#################################################################################################
#   Graph sets
#################################################################################################


[docs]
    class GraphSetSearch(object):
        '''Finds the graph sets of a crystal.'''

[docs]
        class GraphSet(object):
            '''An individual graph set.'''
            _descriptor_dict = {
                MotifSearchLib.GraphSet.SELF: 'self',
                MotifSearchLib.GraphSet.DISCRETE: 'discrete',
                MotifSearchLib.GraphSet.CHAIN: 'chain',
                MotifSearchLib.GraphSet.RING: 'ring',
                MotifSearchLib.GraphSet.NONE: 'none'
            }
            def __init__(self, _graph_set_atoms, _view):
                self._graph_set_atoms = _graph_set_atoms
                self._graph_set = _graph_set_atoms.graph_set()
                self._view = _view

            def __repr__(self):
                ostr = MotifSearchLib.ostringstream()
                self._graph_set.print_(ostr)
                return ostr.str()

            @property
            def descriptor(self):
                '''The descriptor of the graph set.'''
                return CrystalDescriptors.GraphSetSearch.GraphSet._descriptor_dict[self._graph_set.designator()]

            @property
            def ndonors(self):
                '''The number of donors involved in the graph set.'''
                return self._graph_set.ndonors()

            @property
            def nacceptors(self):
                '''The number of acceptors involved in the graph set.'''
                return self._graph_set.nacceptors()

            @property
            def degree(self):
                '''The degree of the graph set, i.e. the number of atoms involved.'''
                return self._graph_set.degree()

            @property
            def edge_labels(self):
                '''The edge labels of the graph set.

                The labels are arbitrary letters identifying a unique hydrogen bond, separated by
                '>' or '<' indicating the donor-acceptor direction.
                '''
                return self._graph_set.edge_labels()

            @property
            def nmolecules(self):
                '''The number of molecules involved in the graph set.'''
                return self._graph_set_atoms.nmolecules()

            @property
            def period(self):
                '''The period of the graph set, i.e the number of hydrogen bonds in the repeat unit.

                If the type of the graph set is not a chain or a ring this will be -1
                '''
                return self._graph_set_atoms.period()

            @property
            def label_set(self):
                '''The set of hydrogen bond labels found in the graph set.'''
                return set(self._graph_set_atoms.label_set())

            @property
            def hbonds(self):
                '''The hydrogen bonds of the graph set.

                :returns: a tuple of :class:`ccdc.crystal.Crystal.HBond` instances.
                '''
                res = []
                for i in range(self._graph_set_atoms.nhbonds()):
                    hb = self._graph_set_atoms.hbond(i)
                    _contact = ChemistryLib.MolecularContact(
                        hb.from_molecule(), hb.from_atom(),
                        hb.to_molecule(), hb.to_atom(),
                        ChemistryLib.MolecularContact.HBOND_CONTACT
                    )
                    res.append(Crystal.HBond(self._view, _contact))
                return tuple(res)



[docs]
        class Settings(object):
            '''Configurable settings for the graph set analyser.'''
            level = 2 #: deepest level to search.  This is the number of different HBonds involved.
            max_ring_size = 6 #: largest ring to search
            max_chain_size = 4 #: longest chain to search
            max_discrete_chain_size = 4 #: longest discrete chain to search
            _inter_dict = bidirectional_dict(
                intermolecular=ChemistryLib.HBondCriterion.INTERMOLECULAR,
                intramolecular=ChemistryLib.HBondCriterion.INTRAMOLECULAR,
                any=ChemistryLib.HBondCriterion.ANY,
            )

            def __init__(self, hbond_criterion=None):
                if hbond_criterion is None:
                    hbond_criterion = Molecule.HBondCriterion()
                self.hbond_criterion = hbond_criterion

            @property
            def distance_range(self):
                '''Allowable distance range for a HBond to be formed.'''
                return (
                    self.hbond_criterion._hbc.hbond_criterion().min_dist_tolerance(),
                    self.hbond_criterion._hbc.hbond_criterion().max_dist_tolerance()
                )

            @distance_range.setter
            def distance_range(self, val):
                self.hbond_criterion._hbc.hbond_criterion().set_min_dist_tolerance(min(val))
                self.hbond_criterion._hbc.hbond_criterion().set_max_dist_tolerance(max(val))


            @property
            def angle_tolerance(self):
                '''The tolerance of the HBond angle.'''
                return self.hbond_criterion._hbc.hbond_criterion().angle_tolerance().degrees()

            @angle_tolerance.setter
            def angle_tolerance(self, val):
                if val is None:
                    self.hbond_criterion._hbc.set_angle_option(
                        MotifSearchLib.GraphSetHBondCriterion.ANGLE_IS_NOT_CHECKED
                    )
                else:
                    self.hbond_criterion._hbc.set_angle_option(
                        MotifSearchLib.GraphSetHBondCriterion.ANGLE_IS_CHECKED
                    )
                    self.hbond_criterion._hbc.hbond_criterion().set_angle_tolerance(
                        MathsLib.Angle(val, MathsLib.Angle.DEGREES)
                    )

            @property
            def vdw_corrected(self):
                '''Whether the distance range is Van der Waals corrected.'''
                return self.hbond_criterion._hbc.hbond_criterion().vdw_corrected()

            @vdw_corrected.setter
            def vdw_corrected(self, val):
                self.hbond_criterion._hbc.hbond_criterion().set_vdw_corrected(val)

            @property
            def require_hydrogens(self):
                '''Whether Hydrogens are required for the HBond.'''
                return not self.hbond_criterion._hbc.hbond_criterion().allow_non_hydrogen_contacts()

            @require_hydrogens.setter
            def require_hydrogens(self, val):
                if val:
                    self.hbond_criterion._hbc.set_distance_option(
                        MotifSearchLib.GraphSetHBondCriterion.HYDROGEN_TO_ACCEPTOR
                    )
                else:
                    self.hbond_criterion._hbc.set_distance_option(
                        MotifSearchLib.GraphSetHBondCriterion.DONOR_TO_ACCEPTOR
                    )
                self.hbond_criterion._hbc.hbond_criterion().set_allow_non_hydrogen_contacts(not val)

            @property
            def intermolecular(self):
                '''Whether HBonds should be intermolecular, intramolecular, or any.'''
                return self._inter_dict.inverse_lookup(
                    self.hbond_criterion._hbc.hbond_criterion().type()
                )

            @intermolecular.setter
            def intermolecular(self, val):
                self.hbond_criterion._hbc.hbond_criterion().set_type(
                    self._inter_dict.prefix_lookup(val)
                )

            @property
            def path_length_range(self):
                '''The shortest and longest bond-path separation for intramolecular contacts.'''
                return (
                    self.hbond_criterion._hbc.hbond_criterion().minpath(),
                    self.hbond_criterion._hbc.hbond_criterion().maxpath()
                )

            @path_length_range.setter
            def path_length_range(self, val):
                self.hbond_criterion._hbc.hbond_criterion().set_minpath(min(val))
                self.hbond_criterion._hbc.hbond_criterion().set_maxpath(max(val))


        def __init__(self, settings=None):
            '''Initialise the analyser.'''
            if settings is None:
                settings = CrystalDescriptors.GraphSetSearch.Settings()
            self.settings = settings
            self._analyser = MotifSearchLib.GraphSetAnalyser()
            self._analyser.set_level(2)
            self._analyser.set_hbond_criterion(self.settings.hbond_criterion._hbc)


[docs]
        def search(self, crystal):
            '''Find all graph sets for the crystal subject to the constraints of the settings.

            :param crystal: :class:`ccdc.crystal.Crystal` instance.
            :returns: a tuple of :class:`ccdc.descriptors.CrystalDescriptors.GraphSetSearch.GraphSet` instances.
            '''
            self._analyser.set_level(self.settings.level)
            self._analyser.set_max_ring_size(self.settings.max_ring_size)
            self._analyser.set_max_chain_size(self.settings.max_chain_size)
            self._analyser.set_max_discrete_size(self.settings.max_discrete_chain_size)
            self._analyser.set_hbond_criterion(self.settings.hbond_criterion._hbc)
            atoms = self._analyser.run_graph_set_analysis(crystal._crystal)
            view = ChemistryLib.CrystalStructureView.instantiate(crystal._crystal)
            return tuple(CrystalDescriptors.GraphSetSearch.GraphSet(a, view) for a in atoms)



#################################################################################################
#   Hydrogen bond coordination
#################################################################################################


[docs]
    class HBondCoordination(object):
        '''Calculate HBond coordination predictions.

        The HBondCoordination class is available only to CSD-Materials and
        CSD-Enterprise users.
        '''
        _telemetry = 0


[docs]
        class Settings(object):
            '''Settings pertaining to the calculation of coordination predictions.'''

            def __init__(self):
                self._graph_set_search_settings = CrystalDescriptors.GraphSetSearch.Settings()

                self._graph_set_search_settings.hbond_criterion.intermolecular = 'Any'
                self._graph_set_search_settings.hbond_criterion.require_hydrogens = True
                self._graph_set_search_settings.hbond_criterion.path_length_range = (3, 999)
                self._graph_set_search_settings.hbond_criterion.vdw_corrected = True
                self._graph_set_search_settings.hbond_criterion.distance_range = (-5, 0.0)
                self._coordination_models_path = None
                self._models_changed = False
                if self.coordination_models_path is None:
                    self.coordination_models_path = os.path.abspath(
                        os.path.join(
                            os.path.dirname(__file__),
                            'parameter_files',
                            'hydrogen_bond_coordination_models'
                        )
                    )

            @property
            def hbond_criterion(self):
                return self._graph_set_search_settings.hbond_criterion

            @property
            def coordination_models_path(self):
                '''The directory in which the coordination models may be found.'''
                return self._coordination_models_path

            @coordination_models_path.setter
            def coordination_models_path(self, path):
                self._coordination_models_path = path
                self._models_changed = True



[docs]
        class Predictions(object):
            '''The predictions for HBonds coordinations.'''
            Observation = collections.namedtuple('Observation', ['label', 'coordination_count', 'probability'])
            def observation_repr(obs):
                return "Observation(label='%s', coordination_count=%d, probability=%f)" % (obs.label, obs.coordination_count, obs.probability)
            Observation.__repr__ = observation_repr

            def __init__(self, crystal, _analysis, _predictions):
                self.crystal = crystal
                self._predictions = _predictions
                self._donor_atoms = tuple(
                    Atom(_atom=hb.atom()) for hb in _analysis.observed_donor_atoms()
                )
                self._acceptor_atoms = tuple(
                    Atom(_atom=hb.atom()) for hb in _analysis.observed_acceptor_atoms()
                )

            @property
            def is_valid(self):
                '''Whether or not valid predictions were made.'''
                return not all(len(p) <= 1 for p in self._predictions)

            @property
            def observed(self):
                '''The predicted probabilities of observed HBonds.'''
                if not self._predictions:
                    return None
                obs = tuple(
                    CrystalDescriptors.HBondCoordination.Predictions.Observation(p[0][0], i - 1, float(p[i][0]))
                    for p in self._predictions
                    for i in range(1, len(p))
                    if abs(p[i][1]) == 1
                )
                return obs


[docs]
            def predictions_for_label(self, label, type='donor'):
                '''All the predictions for the atom.

                :returns: a pair: observed hbond coordination number, dictionary with key, hbond coordination number, value, predicted probability.
                '''
                if not self._predictions:
                    return None
                ps = [p for p in self._predictions if p[0][0].startswith(label + ' ')]
                if len(ps) == 1:
                    p = ps[0]
                elif len(ps) == 0:
                    return None
                elif len(ps) > 2:
                    raise RuntimeError('Ambiguous atom label %s' % label)
                elif ps[0][0][0].endswith('(%s)' % type.lower()[0]):
                    p = ps[0]
                else:
                    p = ps[1]
                obs = {
                    i - 1: float(p[i][0])
                    for i in range(1, len(p))
                }
                observed = [i - 1 for i in range(1, len(p)) if abs(p[i][1]) == 1][0]
                return (observed, obs)


            def _functional_group_of_label(self, label, type='donor'):
                ps = [p[0][0] for p in self._predictions if p[0][0].startswith(label + ' ')]
                if len(ps) == 1:
                    p = ps[0]
                elif len(ps) == 0:
                    return None
                elif len(ps) > 2:
                    raise RuntimeError('Ambiguous atom label %s' % label)
                elif ps[0].endswith('(%s)' % type.lower()[0]):
                    p = ps[0]
                else:
                    p = ps[1]
                return p


[docs]
            def functional_groups_of_hbond(self, hbond):
                '''The functional group pertaining to a hydrogen-bonding atom.'''
                ats = hbond.atoms
                if len(ats) == 3:
                    if ats[1] in ats[0].neighbours:
                        donor = self._functional_group_of_label(ats[0].label, 'donor')
                        acc = self._functional_group_of_label(ats[-1].label, 'acceptor')
                    else:
                        donor = self._functional_group_of_label(ats[-1].label, 'donor')
                        acc = self._functional_group_of_label(ats[0].label, 'acceptor')
                else:
                    # No H, how can I tell which is donor?
                    if ats[0].is_donor and not ats[-1].is_donor:
                        donor = self._functional_group_of_label(ats[0].label, 'donor')
                        acc = self._functional_group_of_label(ats[-1].label, 'acceptor')
                    elif ats[-1].is_donor and not ats[0].is_donor:
                        donor = self._functional_group_of_label(ats[-1].label, 'donor')
                        acc = self._functional_group_of_label(ats[0].label, 'acceptor')
                    else:
                        donor = self._functional_group_of_label(ats[0].label, 'donor')
                        acc = self._functional_group_of_label(ats[-1].label, 'acceptor')
                return donor, acc



[docs]
            def to_csv(self, separator=','):
                '''Format the predictions suitable for a csv file.

                :param separator: a separation string, or ``None``.
                :returns: if separator is ``None`` a tuple of lists of components, otherwise a separated string of components.'''
                components = self.crystal.molecule.components
                file_orders = [
                    set(a._atom.annotations().obtain_FileOrdering().file_order() for a in c.atoms)
                    for c in components
                ]

                def one_line(l):
                    id_bits = l[0][0].split()
                    at_label = id_bits[0]
                    context = ' '.join(id_bits[2:-1])
                    da = id_bits[-1][1:-1]
                    if da == 'd':
                        possible_correct_atoms = [a for a in self._donor_atoms if a.label == at_label]
                    else:
                        possible_correct_atoms = [a for a in self._acceptor_atoms if a.label == at_label]
                    if len(possible_correct_atoms) == 1:
                        correct_atom = possible_correct_atoms[0]
                    else:
                        raise RuntimeError('Ambiguous label %s %s' % (
                            at_label, ' '.join(a.label for a in possible_correct_atoms)))
                    for i, c in enumerate(file_orders):
                        if correct_atom._atom.annotations().obtain_FileOrdering().file_order() in c:
                            residue = 'RES%d' % (i + 1)
                            break
                    else:
                        raise RuntimeError('Cannot find appropriate file order')
                    ret = [
                        self.crystal.identifier,
                        residue,
                        # at_label,
                        correct_atom.label,
                        context,
                        da,
                    ]

                    # Extract the coordination values themselves (starting at the second element of l)
                    coordination_values = l[1:]
                    for x in coordination_values:
                        ret.extend(
                            (float(x[0]), abs(x[1]))
                        )
                    for i in range(len(coordination_values), 7):
                        ret.extend((0.0, 0))
                    if separator is None:
                        return ret
                    return separator.join(str(x) for x in ret)

                if separator is None:
                    return (one_line(l) for l in self._predictions[1:])
                return '\n'.join(one_line(l) for l in self._predictions[1:]) + '\n'



        def __init__(self, settings=None, skip_telemetry=False):
            '''Initialise the prediction calculator.

            :param settings: :class:`ccdc.hbond_coordination.CrystalDescriptors.HBondCoordination.Settings` instance.
            '''
            if settings is None:
                settings = CrystalDescriptors.HBondCoordination.Settings()
            self.settings = settings
            self._setup_calculator()
            if not skip_telemetry:
                if type(self)._telemetry == 0:
                    UtilitiesLib.ccdc_coordination_quick_view_telemetry()
                    type(self)._telemetry = 1

        def _setup_calculator(self):
            self._coordination_models = PackingSimilarityLib.CoordinationModels(
                self.settings.coordination_models_path)
            self._calculator = PackingSimilarityLib.DonorAcceptorPropensityCalculator(self._coordination_models)


[docs]
        def predict(self, crystal):
            '''Calculate HBond coordination likelihoods for the crystal.

            :returns: a :class:`ccdc.hbond_coordination.CrystalDescriptors.HBondCoordination.Predictions` instance.
            '''
            if crystal.has_disorder:
                raise RuntimeError('Cannot calculate propensities for disordered crystals')
            if self.settings._models_changed:
                self._setup_calculator()
                self.settings._models_changed = False
            self.crystal = crystal
            self.hbond_analysis = PackingSimilarityLib.create_analysis_from_structure(
                crystal._crystal,
                self._coordination_models,
                self.settings.hbond_criterion._hbc.hbond_criterion())
            self._coordination_propensities = self._calculator.calculate(self.hbond_analysis)
            pairs = self.hbond_analysis.observed_hbonding_outcome(
                self._coordination_propensities
            )
            grouping_calculator = PackingSimilarityLib.DonorAcceptorOutcomeGroupingCalculator()
            grouping = grouping_calculator.grouping(
                pairs, self.hbond_analysis, self._coordination_propensities
            )
            self.donor_atoms = tuple(
                Atom(_atom=d.atom()) for d in self.hbond_analysis.observed_donor_atoms()
            )
            self.acceptor_atoms = tuple(
                Atom(_atom=a.atom()) for a in self.hbond_analysis.observed_acceptor_atoms()
            )
            return CrystalDescriptors.HBondCoordination.Predictions(
                self.crystal,
                self.hbond_analysis,
                PackingSimilarityLib.coordination_likelihoods_table_raw_data(
                    grouping_calculator, grouping, self._coordination_propensities
                )
            )




#################################################################################################
#   Hydrogen bond propensities
#################################################################################################


[docs]
    class HBondPropensities(object):
        '''Calculates HBond propensities.

        .. TODO: all donors and acceptors matched, load regression from file (fitting_data.txt)
        '''
        _telemetry = 0


[docs]
        class Settings(object):
            '''Pertaining to HBond propensity calculation.'''

            def __init__(self):
                '''NB: databases *MUST* be in the CSD SQLite format.'''
                self.hbond_criterion = Molecule.HBondCriterion()
                self.hbond_criterion.require_hydrogens = True
                self.hbond_criterion.path_length_range = (4, 999)
                self.working_directory = tempfile.mkdtemp()
                self._databases = [io.EntryReader('CSD')]
                self._identifier_list = UtilitiesLib.DatabaseEntryIdentifierList()
                self.functional_groups_directory = str(Resources().get_functional_groups_dir())

            @property
            def databases(self):
                '''The databases to be used for the prediction.

                Note: the databases *MUST* be CSD SQLite databases for the moment.
                '''
                return self._databases

            @databases.setter
            def databases(self, dbs):
                if isinstance(dbs, io._DatabaseReader):
                    if not isinstance(dbs._db, (
                            CSDSQLDatabaseLib.CSDSQLDatabase,
                            FileFormatsLib.CrystalStructureDatabasePool,
                            FileFormatsLib.CrystalStructureDatabase
                    )):
                        raise RuntimeError('One or more databases is NOT searchable.')
                    self._databases = [dbs]
                else:
                    for d in dbs:
                        if isinstance(d._db, (
                                CSDSQLDatabaseLib.CSDSQLDatabase,
                                FileFormatsLib.CrystalStructureDatabasePool,
                                FileFormatsLib.CrystalStructureDatabase
                        )):
                            continue
                        raise RuntimeError('One or more databases is NOT searchable.')
                    self._databases = dbs

            @property
            def limit_identifier_list(self):
                '''A list of identifiers to limit the search'''
                return self._identifier_list

            @limit_identifier_list.setter
            def limit_identifier_list(self, identifiers):
                self._identifier_list = identifiers

            @property
            def working_directory(self):
                '''The working directory for the predictions.'''
                return self._working_directory

            @working_directory.setter
            def working_directory(self, path):
                path = path.replace('\\', '/')
                if not os.path.exists(path):
                    os.makedirs(path)
                self._working_directory = path



[docs]
        class FunctionalGroup(object):
            '''A functional group capable of hydrogen bonding.'''

            def __init__(self, _model_group):
                self._model_group = _model_group

            def __repr__(self):
                return 'FunctionalGroup(%s)' % self.identifier

            @property
            def identifier(self):
                '''The name of the functional group.'''
                return self._model_group.name


[docs]
            def matches(self, molecule):
                '''The substructure search matches of the functional group.'''
                from ccdc.search import SubstructureSearch, QuerySubstructure
                searcher = SubstructureSearch()
                sub = QuerySubstructure(self._model_group.substructure)
                searcher.add_substructure(sub)
                hits = searcher.search(molecule)
                return hits




[docs]
        class FittingData(object):
            '''The collection of entries used for the prediction.'''


[docs]
            class FittingDataEntry(object):
                '''An individual entry with associated matching data.'''

                def __init__(self, _fitting_item):
                    self._fitting_item = _fitting_item

                @property
                def identifier(self):
                    '''The identifier of the fitting data item.'''
                    return self._fitting_item.identifier().str()


            def __init__(self, _fitting_data=None, identifiers=None, databases=None):
                if _fitting_data is None:
                    _fitting_data = PackingSimilarityLib.PropensityPredictionFittingData()
                self._fitting_data = _fitting_data
                #if identifiers is None:
                #    # All the ids from all the dbs?
                #    pass
                #else:
                #    self._fitting_data.add_fitting_data_items(identifiers, [d._db for d in databases])
                self._limit_to_identifiers = identifiers
                self._databases = databases

            def __len__(self):
                if self._fitting_data:
                    return self._fitting_data.nitems()
                elif self._limit_to_identifiers:
                    return self._limit_to_identifiers.size()
                else:
                    return 0


[docs]
            def write(self, file_name):
                '''Writes the fitting data to a file.'''
                with io.CrystalWriter(file_name) as writer:
                    for i in range(len(self)):
                        item = self._fitting_data.item(i)
                        writer.write(Crystal(item.crystal_structure(), item.identifier().str()))



[docs]
            def nitems(self, functional_group=None):
                '''How many items there are representing the functional group.'''
                if functional_group is None:
                    return self._fitting_data.nitems()
                else:
                    return self._fitting_data.nitems_with_group(functional_group._model_group)



[docs]
            def advice_comment(self, functional_group=None):
                '''A string indicating whether or not there are enough data for propensity predictions.

                Note: when first made the fitting data has not performed substructure matching, so results for
                particular groups will be inappropriately bad.  Results will be valid after :meth:`ccdc.hbond_coordination.CrystalDescriptors.HBondPropensities.match_fitting_data` has been called.
                '''
                if functional_group is None:
                    return self._fitting_data.advice_comment()
                else:
                    return self._fitting_data.advice_comment(functional_group._model_group)


            def _identifiers(self):
                if self._fitting_data:
                    ids = UtilitiesLib.DatabaseEntryIdentifierList()
                    for i in range(len(self)):
                        item = self._fitting_data.item(i)
                        ids.push_back(item.identifier())
                    return ids
                elif self._limit_to_identifiers:
                    return self._limit_to_identifiers
                else:
                    return UtilitiesLib.DatabaseEntryIdentifierList()

            def identifiers(self):
                if self._fitting_data:
                    return tuple(self._fitting_data.item(i).identifier().str() for i in range(len(self)))
                else:
                    return tuple()

            def databases(self):
                if self._databases:
                    return self._databases
                return list(
                    set(
                        [self._fitting_data.item(i).parent_database() for i in range(len(self))]))


[docs]
            @staticmethod
            def from_file(file_name, databases):
                '''Reads fitting data from a file.'''
                _fitting_data = PackingSimilarityLib.PropensityPredictionFittingData()
                with io.EntryReader(file_name) as reader:
                    ids = UtilitiesLib.DatabaseEntryIdentifierList()
                    if isinstance(reader, io._GCDReader):
                        ids.read_from_file(file_name)
                    else:
                        for c in reader:
                            ids.push_back(UtilitiesLib.DatabaseEntryIdentifier(c.identifier))
                    _fitting_data.add_fitting_data_items(ids, [reader._db])
                return CrystalDescriptors.HBondPropensities.FittingData(_fitting_data=_fitting_data, identifiers=ids, databases=databases)




[docs]
        class HBondAtom(object):
            '''Base class for HBondDonor and HBondAcceptor.'''

            def __init__(self, _analysis):
                self._analysis = _analysis
                self.nnegative = self.npositive = None
                self.exclude = False

            @property
            def accessible_surface_area(self):
                '''The accessible surface area of the HBond atom.'''
                return PackingSimilarityLib.atom_accessible_surface(self._analysis)

            @property
            def functional_group_identifier(self):
                '''The identifier of the functional group for this atom.'''
                return self._analysis.substructure_label()

            @property
            def nlone_pairs(self):
                '''The number of lone pairs associated with this atom.'''
                return ChemistryLib.nlone_pairs(self._analysis.atom())

            @property
            def identifier(self):
                '''The full identifier of this atom.'''
                return self._analysis.label()

            @property
            def label(self):
                '''The label of the atom in the original structure.'''
                return self._analysis.atom().label()

            @property
            def atom(self):
                '''The :class:`ccdc.molecule.Atom` of the HBondAtom.'''
                return Atom(_atom=self._analysis.atom())



[docs]
        class HBondDonor(HBondAtom):
            '''A potential donor atom.

            This class will be augmented with the evidence found during match_fitting_data().
            '''

            def __init__(self, _analysis):
                super(self.__class__, self).__init__(_analysis)

            @property
            def donor_atom_type(self):
                '''A string representation of the atom's donor type.'''
                hbc = Molecule.HBondCriterion()
                return hbc.donor_atom_type(Atom(_atom=self._analysis.atom()))

            def __repr__(self):
                if self.nnegative is None:
                    return 'HBondDonor(%s) no evidence' % self.identifier
                else:
                    return 'HBondDonor(%s) %d positive %d negative' % (self.identifier, self.npositive, self.nnegative)



[docs]
        class HBondAcceptor(HBondAtom):
            '''A potental acceptor atom.

            This class will be augmented with the evidence found during match_fitting_data().
            '''

            def __init__(self, _analysis):
                super(self.__class__, self).__init__(_analysis)

            @property
            def acceptor_atom_type(self):
                '''A string representation of the atom's acceptor type.'''
                hbc = Molecule.HBondCriterion()
                return hbc.acceptor_atom_type(Atom(_atom=self._analysis.atom()))

            def __repr__(self):
                if self.nnegative is None:
                    return 'HBondAcceptor(%s) no evidence' % self.identifier
                else:
                    return 'HBondAcceptor(%s) %d positive %d negative' % (
                    self.identifier, self.npositive, self.nnegative)


        def _find_donor(self, label):
            for d in self.donors:
                if d.label == label:
                    return d

        def _find_acceptor(self, label):
            for a in self.acceptors:
                if a.label == label:
                    return a

        def _find_hbond(self, donor_label, acceptor_label, intermolecular):
            for hb in self.crystal_hbonds:
                if not hb.intermolecular == intermolecular:
                    continue

                if hb.atoms[1] in hb.atoms[0].neighbours:
                    if donor_label == hb.atoms[0].label and acceptor_label == hb.atoms[-1].label:
                        return hb
                elif hb.atoms[1] in hb.atoms[-1].neighbours:
                    if donor_label == hb.atoms[-1].label and acceptor_label == hb.atoms[0].label:
                        return hb

        def _find_propensity(self, d, a):
            for p in self.inter_propensities:
                if d.atom().label() == p.donor_label and a.atom().label() == p.acceptor_label:
                    return p
            raise RuntimeError('No matching propensity')


[docs]
        class HBond(object):
            '''A putative HBond in the propensity calculation.

            :ivar self.donor: the :class:`CrystalDescriptors.HBondPropensities.HBondDonor` involved in this hbond.
            :ivar self.acceptor: the :class:`CrystalDescriptors.HBondPropensities.HBondAcceptor` involved in this hbond.
            :ivar self.propensity: the :class:`CrystalDescriptors.HBondPropensities.Propensity` of this hbond.
            '''

            def __init__(self, hbp, _outcome):
                self._outcome = _outcome
                self.donor = hbp._find_donor(_outcome[0].atom().label())
                self.acceptor = hbp._find_acceptor(_outcome[1].atom().label())
                self.propensity = hbp._find_propensity(_outcome[0], _outcome[1])

            def __eq__(self, other):
                return (
                    self.donor == other.donor and
                    self.acceptor == other.acceptor and
                    self.propensity == other.propensity
                )



[docs]
        class HBondGrouping(object):
            '''A grouping of interactions between donors and acceptors representing a possible hbond network.

            This represents a point in the chart of Mercury's HBondPropensity wizard.

            :ivar self.donors: a tuple of :class:`CrystalDescriptors.HBondPropensities.HBondDonor` with specified coordination outcomes for this grouping.
            :ivar self.acceptors: a tuple of :class:`CrystalDescriptors.HBondPropensities.HBondAcceptor` with specified coordination outcomes for this grouping.
            :ivar self.hbonds: a tuple of :class:`CrystalDescriptors.HBondPropensities.HBond` forming the hbonds in the network.
            :ivar self.hbond_score: the average propensity value of hbonds.
            :ivar self.coordination_score: the negative average coordination score of the donors and acceptors with these coordination outcomes.
            '''
            _donor_outcome_mapping = {
                PackingSimilarityLib.DOES_NOT_DONATE: "does not donate",
                PackingSimilarityLib.DONATES_ONCE: "donates once",
                PackingSimilarityLib.DONATES_TWICE: "donates twice",
                PackingSimilarityLib.DONATES_THREE_TIMES: "donates 3 times",
                PackingSimilarityLib.DONATES_FOUR_TIMES: "donates 4 times",
                PackingSimilarityLib.DONATES_FIVE_TIMES: "donates 5 times",
                PackingSimilarityLib.DONATES_SIX_OR_MORE_TIMES: "donates >= 6 times"
            }

            _acceptor_outcome_mapping = {
                PackingSimilarityLib.DOES_NOT_ACCEPT: "does not accept",
                PackingSimilarityLib.ACCEPTS_ONCE: "accepts once",
                PackingSimilarityLib.ACCEPTS_TWICE: "accepts twice",
                PackingSimilarityLib.ACCEPTS_THREE_TIMES: "accepts 3 times",
                PackingSimilarityLib.ACCEPTS_FOUR_TIMES: "accepts 4 times",
                PackingSimilarityLib.ACCEPTS_FIVE_TIMES: "accepts 5 times",
                PackingSimilarityLib.ACCEPTS_SIX_TIMES: "accepts  6 times",
                PackingSimilarityLib.ACCEPTS_SEVEN_OR_MORE_TIMES: "accepts >= 7 times"
            }

            def __init__(self, hbond_propensities, _outcome):
                self._find_propensity = hbond_propensities._find_propensity
                self._outcome = _outcome
                self.donors = tuple(
                    CrystalDescriptors.HBondPropensities.HBondDonor(d._analysis) for d in hbond_propensities.donors
                )
                self.acceptors = tuple(
                    CrystalDescriptors.HBondPropensities.HBondAcceptor(a._analysis) for a in
                    hbond_propensities.acceptors
                )
                self.hbonds = tuple(
                    CrystalDescriptors.HBondPropensities.HBond(hbond_propensities, _outcome) for _outcome in
                    self._outcome.pair_propensities()
                )
                if len(self.hbonds) == 0:
                    self.hbond_score = 0.0
                else:
                    self.hbond_score = sum(hb.propensity.propensity for hb in self.hbonds) / len(self.hbonds)
                observed = []
                for hb in hbond_propensities.crystal_hbonds:
                    ats = hb.atoms
                    if ats[1] in ats[0].neighbours:
                        d = ats[0]
                        a = ats[-1]
                    else:
                        d = ats[-1]
                        a = ats[0]
                    observed.append((d.label, a.label))
                donor_counts = collections.defaultdict(int)
                acceptor_counts = collections.defaultdict(int)
                for hb in self.hbonds:
                    d = hb.donor._analysis.atom().label()
                    a = hb.acceptor._analysis.atom().label()
                    donor_counts[d] += 1
                    acceptor_counts[a] += 1
                    hb.is_observed = (d, a) in observed
                coord_score = 0.0
                for d in self.donors:
                    for attr, value in d.__dict__.items():
                        if not attr.startswith('_'):
                            setattr(d, attr, value)
                    label = d._analysis.atom().label()
                    outcome = donor_counts[label]
                    preds_for_label = hbond_propensities.coordination_scores.predictions_for_label(label, 'donor')
                    if preds_for_label is None:
                        d_coord = 0.0
                    else:
                        d_coord = preds_for_label[1].get(outcome, 0.0)
                    d.outcome = outcome
                    d.outcome_label = self._donor_outcome_mapping[outcome]
                    d.coordination_propensity = d_coord
                    coord_score += d_coord
                for a in self.acceptors:
                    for attr, value in a.__dict__.items():
                        if not attr.startswith('_'):
                            setattr(a, attr, value)
                    label = a._analysis.atom().label()
                    outcome = acceptor_counts[label]
                    preds_for_label = hbond_propensities.coordination_scores.predictions_for_label(label, 'acceptor')
                    if preds_for_label is None:
                        a_coord = 0.0
                    else:
                        a_coord = preds_for_label[1].get(outcome, 0.0)
                    a.outcome = outcome
                    a.outcome_label = self._acceptor_outcome_mapping[outcome]
                    a.coordination_propensity = a_coord
                    coord_score += a_coord
                self.coordination_score = -1 * (coord_score / len(self.donors + self.acceptors))

            def __repr__(self):
                dons = ' '.join('%s: %s' % (d.atom, d.outcome) for d in self.donors)
                accs = ' '.join('%s: %s' % (d.atom, d.outcome) for d in self.acceptors)
                return 'Grouping(Donors: %s; Acceptors: %s)' % (dons, accs)

            def __eq__(self, other):
                return (
                    all(d.atom == o.atom for d, o in zip(self.donors, other.donors)) and
                    all(d.outcome == o.outcome for d, o in zip(self.donors, other.donors)) and
                    all(a.atom == o.atom for a, o in zip(self.acceptors, other.acceptors)) and
                    all(a.outcome == o.outcome for a, o in zip(self.acceptors, other.acceptors)) and
                    len(self.hbonds) == len(other.hbonds) and
                    all(hb == other_hb for hb, other_hb in zip(self.hbonds, other.hbonds))
                )



[docs]
        class Model(object):
            '''The logistic regression model.'''


[docs]
            class Parameter(object):
                '''A named parameter of the regression.'''

                def __init__(self, _crystal_structure_property):
                    self._crystal_structure_property = _crystal_structure_property
                    self.include = True

                @property
                def identifier(self):
                    '''The identifier of the parameter.'''
                    return self._crystal_structure_property.label()


[docs]
                def calculate(self, donor, acceptor):
                    '''The value of this property for the pair of atoms.

                    :parameter donor: `ccdc.hbond_coordination.CrystalDescriptors.HBondPropensities.HBondDonor` instance.
                    :parameter acceptor: `ccdc.hbond_coordination.CrystalDescriptors.HBondPropensities.HBondAcceptor` instance.
                    :returns: float
                    '''
                    hbprop = PackingSimilarityLib.CrystalStructurePropertyAsHyBondingProperty(
                        self._crystal_structure_property)
                    if hbprop is not None:
                        hbprop.update(donor._analysis, acceptor._analysis, PackingSimilarityLib.HyBondContactAnalysis())
                    else:
                        hbprop = PackingSimilarityLib.CrystalStructurePropertyAsMoleculeProperty(
                            self._crystal_structure_property)
                        hbprop.update(donor._analysis.atom().molecule())
                    return hbprop.value()




[docs]
            class Coefficient(object):
                '''A coefficient of the regression model.'''

                def __init__(self, _coefficient):
                    self._coefficient = _coefficient

                @property
                def identifier(self):
                    '''The identifier of the coefficient.'''
                    return self._coefficient.label()

                @property
                def confidence_interval(self):
                    '''The upper and lower bounds of the coefficient.'''
                    return (self._coefficient.coefficient_lower_bound(), self._coefficient.coefficient_upper_bound())

                @property
                def estimate(self):
                    '''The estimate of the coefficient.'''
                    return self._coefficient.coefficient_value()

                @property
                def is_baseline(self):
                    '''Whether or not the coefficient is used for the baseline calculation.'''
                    return self._coefficient.is_baseline()

                @property
                def standard_error(self):
                    '''Standard error of the coefficient.'''
                    if not self.is_baseline:
                        return self._coefficient.significance_statistics().standard_error_

                @property
                def z_value(self):
                    '''Z-value of the coefficient.'''
                    if not self.is_baseline:
                        return self._coefficient.significance_statistics().z_value_

                @property
                def p_value(self):
                    '''P-value of the coefficient.'''
                    if not self.is_baseline:
                        return self._coefficient.significance_statistics().pr_

                @property
                def significance_code(self):
                    '''A string representation of how significant the parameter is.

                    '***' for P-value < 0.01, '**' < 0.01. '*' < 0.05 and '.' < 0.1
                    '''
                    if not self.is_baseline:
                        return self._coefficient.significance_statistics().significance_code()


            def __init__(self, _model):
                self._model = _model

            @property
            def equation(self):
                '''The regression equation.'''
                coeffs = [self._model.coefficient(i) for i in range(self._model.ncoefficients())]
                s = '%.2f + ' % coeffs[0].coefficient_value()
                s += ' + '.join(
                    '%.2f*%s' % (c.coefficient_value(), c.label()) for c in coeffs[1:] if not c.is_baseline())
                return s

            @property
            def coefficients(self):
                '''The coefficients of the model.'''
                return tuple(
                    CrystalDescriptors.HBondPropensities.Model.Coefficient(self._model.coefficient(i))
                    for i in range(self._model.ncoefficients())
                )

            @property
            def area_under_roc_curve(self):
                '''Area under the ROC curve.'''
                return self._model.goodness_of_fit_data().area_under_roc_curve()

            @property
            def advice_comment(self):
                '''A string representing the quality of the discrimination based on the ROC.'''
                auc = self.area_under_roc_curve
                if auc < 0.7:
                    return 'bad'
                if auc < 0.75:
                    return 'poor'
                if auc < 0.8:
                    return 'maybe acceptable'
                if auc < 0.85:
                    return 'good'
                if auc < 0.9:
                    return 'excellent'
                return 'outstanding'

            @property
            def akaike_information_criterion(self):
                '''The Akaike Information Criterion (AIC) of the model.'''
                return self._model.goodness_of_fit_data().akaike_information_criterion()

            @property
            def null_deviance(self):
                '''The null deviance of the model.'''
                return self._model.goodness_of_fit_data().null_deviance()

            @property
            def null_deviance_degrees_of_freedom(self):
                '''The degrees of freedom of the null deviance of the model.'''
                return self._model.goodness_of_fit_data().null_deviance_degrees_of_freedom()

            @property
            def residual_deviance(self):
                '''The residual deviance of the model.'''
                return self._model.goodness_of_fit_data().residual_deviance()

            @property
            def residual_deviance_degrees_of_freedom(self):
                '''The number of degrees of freedom of the residual deviance of the model.'''
                return self._model.goodness_of_fit_data().residual_deviance_degrees_of_freedom()

            @property
            def log_likelihood(self):
                '''The log likelihood of the model.'''
                return self._model.goodness_of_fit_data().log_likelihood()

            @property
            def log_likelihood_test_p_value(self):
                '''The P-value of the log likelihood of the model.'''
                return self._model.goodness_of_fit_data().log_likelihood_test_p_value()



[docs]
        class Propensity(object):
            '''Base class for inter- and intra-molecular propensity predictions.

            :ivar self.donor: the :class:`CrystalDescriptors.HBondPropensities.HBondDonor` involved in this putative hbond.
            :ivar self.acceptor: the :class:`CrystalDescriptors.HBondPropensities.HBondAcceptor` involved in this putative hbond.
            :ivar self.hbond: the :class:`ccdc.crystal.Crystal.HBond` from the target structure if this hbond is observed.
            '''

            def __init__(self, hbp, _row):
                self._row = _row
                self._prediction = _row.pair_prediction()
                self.donor = hbp._find_donor(self.donor_label)
                self.acceptor = hbp._find_acceptor(self.acceptor_label)
                self.hbond = hbp._find_hbond(self.donor_label, self.acceptor_label, self.is_intermolecular)

            @property
            def propensity(self):
                '''The predicted value.'''
                return self._prediction.prediction()

            @property
            def donor_label(self):
                '''The label of the donor atom.'''
                return self._prediction.pair().donor

            @property
            def acceptor_label(self):
                '''The label of the acceptor atom.'''
                return self._prediction.pair().acceptor

            @property
            def bounds(self):
                '''The lower and upper bounds of the prediction.'''
                return (
                    self._prediction.predicted_lower_bound(),
                    self._prediction.predicted_upper_bound()
                )

            @property
            def uncertainty(self):
                '''The uncertainty in the prediction.'''
                return self._prediction.uncertainty()

            @property
            def predictive_error(self):
                '''The error in the prediction.'''
                return self._prediction.predictive_error()

            @property
            def scores(self):
                '''The calculated values and statistics for the hbond prediction.'''
                return {
                    qp[0]: qp[1]
                    for qp in [self._prediction.quant_props(i) for i in range(self._prediction.nquant_props())]
                }

            @property
            def is_observed(self):
                '''Whether the hbond is observed in the target structure.'''
                return self._row.is_observed()

            @property
            def hbond_count(self):
                '''The number of instances of the hbond observed in the target structure.'''
                return self._row.hbond_count()

            @property
            def donor_rank(self):
                '''The rank number of the donor.'''
                return self._row.donor_rank()

            @property
            def acceptor_rank(self):
                '''The rank number of the acceptor.'''
                return self._row.acceptor_rank()

            @property
            def donor_component(self):
                '''The component number of the donor in the target structure.'''
                return self._row.donor_component()

            @property
            def acceptor_component(self):
                '''The component number of the acceptor in the target structure.'''
                return self._row.acceptor_component()

            @property
            def is_donor_bifurcated(self):
                '''Whether the donor is bifurcated in the target structure.'''
                return self._row.is_donor_bifurcated()

            @property
            def is_acceptor_bifurcated(self):
                '''Whether the acceptor is bifurcated in the target structure.'''
                return self._row.is_acceptor_bifurcated()




[docs]
        class InterPropensity(Propensity):
            '''Predicted propensity for a single HBond.'''

            @property
            def is_intermolecular(self):
                '''Whether or not the predicted propensity is for an intermolecular HBond.'''
                return True



[docs]
        class IntraPropensity(Propensity):
            '''Predicted propensity for an intramolecular HBond.'''

            @property
            def is_intermolecular(self):
                '''Whether or not the predicted propensity is for an intermolecular HBond.'''
                return False


        def __init__(self, settings=None):
            if settings is None:
                settings = CrystalDescriptors.HBondPropensities.Settings()
            self.settings = settings
            self._model = PackingSimilarityLib.create_propensity_prediction_model(settings.working_directory,
                                                                                  settings.functional_groups_directory)
            self.all_parameters = tuple(
                CrystalDescriptors.HBondPropensities.Model.Parameter(p)
                for p in PackingSimilarityLib.analysis_model_properties()
            )

            self._model.set_hbond_criterion(self.settings.hbond_criterion._hbc.hbond_criterion())
            # Load the functional groups
            self._model.functional_group_manager().set_directory(
                self.settings.functional_groups_directory
            )
            self._substructures = self._model.functional_group_manager().substructures()
            self._model.prediction().set_model_groups(self._substructures)
            self._model.prediction().set_working_directory(settings.working_directory)
            self._hbond_coordination = CrystalDescriptors.HBondCoordination(skip_telemetry=True)
            if type(self)._telemetry == 0:
                UtilitiesLib.ccdc_hydrogen_bond_propensity_telemetry()
                type(self)._telemetry = 1

        def _make_donors_and_acceptors(self, _csv):
            hba = PackingSimilarityLib.create_bonding_analysis(
                self._model.prediction(),
                self._model.hbond_criterion())
            hba.run_analysis(_csv)
            donors = tuple(
                CrystalDescriptors.HBondPropensities.HBondDonor(d) for d in hba.observed_donor_atoms()
            )
            acceptors = tuple(
                CrystalDescriptors.HBondPropensities.HBondAcceptor(a) for a in hba.observed_acceptor_atoms()
            )
            return donors, acceptors


[docs]
        def hbond_atoms(self, crystal=None):
            '''The HBondDonor and HBondAcceptor atoms of a crystal.

            :param crystal: :class:`ccdc.crystal.Crystal` instance, or None, in which case the HBondAtoms of the target will be returned.
            :returns: a pair of tuples of :class:`ccdc.descriptors.CrystalDescriptors.HBondPropensities.HBondDonor` and :class:`ccdc.descriptors.CrystalDescriptors.HBondPropensities.HBondAcceptor`.
            '''
            if crystal is None:
                return self.donors, self.acceptors
            _csv = ChemistryLib.CrystalStructureView.instantiate(crystal._crystal)
            return self._make_donors_and_acceptors(_csv)



[docs]
        def set_target(self, crystal):
            '''Sets a single target for the propensity calculation.

            :param crystal: a :class:`ccdc.crystal.Crystal` instance.
            '''
            #self._model = PackingSimilarityLib.create_propensity_prediction_model(self.settings.working_directory,
            #                                                                      self.settings.functional_groups_directory)
            #self._model.prediction().set_hbond_criterion(self.settings.hbond_criterion._hbc.hbond_criterion())
            #self._model.functional_group_manager().read_groups()
            #self._substructures = PackingSimilarityLib.get_substructures_from_connsers(
            #    self._model.functional_group_manager())
            #self._model.prediction().set_model_groups(self._substructures)
            self._model.prediction().clear_hbond_models()

            self._model.prediction().set_working_directory(self.settings.working_directory)
            self.settings.limit_identifier_list = UtilitiesLib.DatabaseEntryIdentifierList()
            self._model.prediction().clear_regression_results()
            self._model.clear_predicted_propensities()
            self._called_make_fitting_data = False
            self.crystal = crystal
            self._csv = ChemistryLib.CrystalStructureView.instantiate(crystal._crystal)
            self._target = PackingSimilarityLib.PropensityPredictionTarget(
                crystal.identifier, crystal.identifier, self._csv
            )
            self._model.prediction().set_target(self._target)
            func_groups = self._model.find_matching_groups(self._csv)
            self._model.prediction().set_model_groups(func_groups)
            self.functional_groups = tuple(
                CrystalDescriptors.HBondPropensities.FunctionalGroup(g) for g in func_groups
            )
            self.donors, self.acceptors = self._make_donors_and_acceptors(self._csv)
            self._hbond_coordination.settings._graph_set_search_settings.hbond_criterion = self.settings.hbond_criterion
            self.coordination_scores = self._hbond_coordination.predict(crystal)
            for d in self.donors:
                obs = self.coordination_scores.predictions_for_label(d.label, 'donor')
                if obs is None:
                    d.observed_coordination = 0.0
                    d.coordination_scores = {i: 0.0 for i in range(3)}
                else:
                    d.observed_coordination, d.coordination_scores = obs
            for a in self.acceptors:
                obs = self.coordination_scores.predictions_for_label(a.label, 'acceptor')
                if obs is None:
                    a.observed_coordination = 0.0
                    a.coordination_scores = {i: 0.0 for i in range(3)}
                else:
                    a.observed_coordination, a.coordination_scores = obs

            self.crystal_hbonds = self.crystal.hbonds(hbond_criterion=self.settings.hbond_criterion)


        @staticmethod
        def _get_as_dbhandle(db):
            if isinstance(db, FileFormatsLib.CrystalStructureDatabasePool):
                return DatabaseEntryLib.CrystalStructureDatabasePoolAsCrystalStructureDatabase(db)
            return db

        def _make_fitting_data_generator(self):
            self._fitting_data_generator = PackingSimilarityLib.PropensityPredictionFittingDataGenerator()
            self._fitting_data_generator.set_databases(
                [self._get_as_dbhandle(db._db) for db in self.settings.databases])
            self._fitting_data_generator.set_identifier_list(self.settings.limit_identifier_list)
            self._fitting_data_generator.set_avoid_id(
                UtilitiesLib.DatabaseEntryIdentifier(self._target.name())
            )
            self._fitting_data_generator.set_target_id(
                UtilitiesLib.DatabaseEntryIdentifier(self._target.name())
            )
            self._fitting_data_generator.set_groups(
                self._model.prediction().model_groups(),
                self._model.prediction().groups_on_target(self._target, self._model.hbond_criterion())
            )


[docs]
        def make_fitting_data(self):
            ''' Deprecated method.
            Please use match_fitting_data or use CrystalDescriptors.HBondPropensities.FittingData.from_file
            to limit the entries that are searched

            returns an object that will cause all of the database entries to be searched
            '''
            self._make_fitting_data_generator()
            self._fitting_data_generator.generate_fitting_data()
            fd = CrystalDescriptors.HBondPropensities.FittingData(
                _fitting_data=self._fitting_data_generator.fitting_data(),
                identifiers=UtilitiesLib.DatabaseEntryIdentifierList(),
                databases=self.settings.databases)
            self._fitting_data = fd._fitting_data
            self._model.prediction().set_master_data(fd._fitting_data)
            self._called_make_fitting_data = True
            return fd


        @property
        def fitting_data(self):
            '''The fitting data.'''
            _fd = None if not hasattr(self, '_fitting_data') else self._fitting_data
            return CrystalDescriptors.HBondPropensities.FittingData(
                _fitting_data=_fd,
                identifiers=self.settings.limit_identifier_list,
                databases=self.settings.databases)

        @fitting_data.setter
        def fitting_data(self, data):
            self._fitting_data = data._fitting_data
            self.settings.limit_identifier_list = data._identifiers()
            self.settings.databases = data.databases()
            self._model.prediction().set_fitting_data(self._fitting_data)
            self._make_fitting_data_generator()


[docs]
        def show_fitting_data_counts(self, data=None):
            '''Shows the matched counts for each functional group.'''
            if data is None:
                data = self.fitting_data
            longest_name = max(len(g.identifier) for g in self.functional_groups)
            print('%d structures' % len(data))
            for g in self.functional_groups:
                print('%*s: %5d (%s)' % (
                    longest_name, g.identifier, data.nitems(g), data.advice_comment(g))
                      )



[docs]
        def match_fitting_data(self, count=None, verbose=False):
            '''Reduces fitting data down such that each functional group has at least the specified number of examples.'''
            if count is None:
                if not self._called_make_fitting_data:
                    self.make_fitting_data()
                fitting_data_result = self._fitting_data_generator.fitting_data()
                self._model.prediction().set_master_data(fitting_data_result)
                best = PackingSimilarityLib.best_slider_position(self._model, True, 99)
                self._model.prediction().refine_fitting_data(True, 99, best, self._model.hbond_criterion())
                fitting_data_result = self._model.prediction().fitting_data()
            else:
                fitting_data_result = PackingSimilarityLib.PropensityPredictionFittingDataGenerator.full_generation_of_fitting_data(
                    [self._get_as_dbhandle(db._db) for db in self.settings.databases],
                    self.settings.limit_identifier_list,
                    UtilitiesLib.DatabaseEntryIdentifier(self._target.name()),
                    UtilitiesLib.DatabaseEntryIdentifier(self._target.name()),
                    self._model.prediction().model_groups(),
                    self._model.prediction().groups_on_target(self._target, self._model.hbond_criterion()),
                    count,
                    verbose
                )
                fitting_data_result = PackingSimilarityLib.create_subset(fitting_data_result, fitting_data_result.model_groups(), count)
            self._fitting_data = fitting_data_result
            self.show_fitting_data_counts()



[docs]
        def analyse_fitting_data(self):
            '''Perform a hydrogen bond analysis of the fitting data.'''
            pred = self._model.prediction()
            bonding_analysis = PackingSimilarityLib.create_bonding_analysis(pred, self._model.hbond_criterion())
            surveyor = PackingSimilarityLib.create_surveyor()
            pred.set_fitting_data(self._fitting_data)

            for i in range(self._fitting_data.nitems()):
                self._fitting_data.item(i).analyse(bonding_analysis, surveyor)
            self._find_donor_acceptor_counts()


        def _find_donor_acceptor_counts(self):
            '''Find true/false outcomes for the functional groups, excluding the poorly represented hbonds.'''

            for d in self.donors + self.acceptors:
                d.nnegative = d.npositive = None
                d.exclude = False
            data = self._model.prediction().fitting_data()
            data.clear_exclusions()
            don, acc, txt = PackingSimilarityLib.find_donor_and_acceptor_counts(data)
            doing_donors = True
            self._don_excludes = []
            self._acc_excludes = []
            for l in txt[1:]:
                cat, at, pos, neg = l
                if cat == 'Acceptor(s)':
                    doing_donors = False
                pos = int(pos)
                neg = int(neg)
                if pos < 3 or neg < 3 or pos + neg < 3:
                    if doing_donors:
                        self._don_excludes.append(at)
                    else:
                        self._acc_excludes.append(at)
                if at == 'other':
                    continue
                if doing_donors:
                    candidates = self.donors
                else:
                    candidates = self.acceptors
                for x in candidates:
                    tag = x._analysis.atom_in_group_label()
                    if tag == at and x.npositive is None:
                        x.npositive = int(pos)
                        x.nnegative = int(neg)
                        break
            html = [
                '<table name="DonorAcceptorCounts" border="1" style=" border-color:#a0a0a4; border-style:dotted;" cellspacing="2" cellpadding="5">',
                '<tr><td>Category</td><td>Label</td><td>True</td><td>False</td></tr>'
            ]
            for x in self.donors + self.acceptors:
                if x.npositive is None or x.nnegative is None:
                    x.exclude = True
                    continue
                if x.npositive < 3 or x.nnegative < 3 or x.npositive + x.nnegative < 5:
                    x.exclude = True
                html.append(
                    '<tr><td>%s</td><td>%s</td><td align="right">%d</td><td align="right">%d</td></tr>'
                    % ('Donor' if d in self.donors else 'Acceptor', x.identifier, x.npositive, x.nnegative)
                )
            self.training_stats_html = html

        def _sort_out_exclusions(self):
            data = self._model.prediction().fitting_data()
            data.clear_exclusions()
            for d in self._don_excludes:
                data.add_exclusion(PackingSimilarityLib.DonorAcceptorPair(d, ''))
            for a in self._acc_excludes:
                data.add_exclusion(PackingSimilarityLib.DonorAcceptorPair('', a))
            for d in self.donors:
                if d.exclude:
                    data.add_exclusion(
                        PackingSimilarityLib.DonorAcceptorPair(d.identifier, '')
                    )
            for a in self.acceptors:
                if a.exclude:
                    data.add_exclusion(PackingSimilarityLib.DonorAcceptorPair('', a.identifier))

        def _write_data_files(self):
            '''Write all necessary data files.'''
            # Training data
            self._sort_out_exclusions()
            data = self._model.prediction().fitting_data()
            stats = data.spreadsheet_data()
            stream = PackingSimilarityLib.ofstream(
                str(os.path.join(self.settings.working_directory, '%s_training_dataset.csv' % self._target.name()))
            )
            PackingSimilarityLib.print_hbond_stats(stats, stream)
            stream.close()
            # Molecule
            with io.MoleculeWriter(
                    os.path.join(self.settings.working_directory, 'prediction_molecule_%s.mol2' % self._target.name())
            ) as writer:
                writer.write(self.crystal.molecule)
            # fitting_data_.gcd
            with open(os.path.join(self.settings.working_directory, 'fitting_data_%s.gcd' % self._target.name()),
                      'w') as writer:
                writer.write('\n'.join(data.item(i).identifier().str() for i in range(data.nitems())))
            # _counts.html
            with open(os.path.join(self.settings.working_directory, '%s_counts.html' % self._target.name()),
                      'w') as writer:
                writer.write('\n'.join(self.training_stats_html))
            # Matching .con files
            fgm = self._model.functional_group_manager()
            model_groups = self._model.prediction().model_groups()
            for g in model_groups:
                name = g.name
                for named_connser in fgm.connser_files():
                    if named_connser.name == name or named_connser.name == name.replace('_', ' '):
                        named_connser.connser_file.print_(os.path.join(self.settings.working_directory, '%s.con' % name))
                        break
                else:
                    print('No matching connser file for %s' % name)


[docs]
        def perform_regression(self):
            '''Performs the logistic regression.'''
            self._write_data_files()
            pred = self._model.prediction()
            # Need to be able to choose them
            pred.set_model_parameters(
                tuple(p._crystal_structure_property for p in self.all_parameters if p.include)
            )
            pars = pred.r_model_parameters()

            regression = PackingSimilarityLib.RLogisticRegression()
            if regression.path_to_r() == "" or not os.path.exists(regression.path_to_r()):
                path_to_r = str(Resources().get_r_path())
                regression.set_path_to_r(path_to_r)
            results = regression.run(pred)
            if not results:
                raise RuntimeError('Regression could not be performed. Look in %s for details.' % os.path.join(
                    pred.working_directory(), 'logistic_regression.R'))
            self.model = CrystalDescriptors.HBondPropensities.Model(_model=results)
            return self.model


        @property
        def propensities(self):
            '''The inter- and intra-propensities of the prediction.'''
            return self.inter_propensities + self.intra_propensities


[docs]
        def calculate_propensities(self, crystal=None):
            '''Apply the regression equation to a crystal.

            :parameter crystal: :class:`ccdc.crystal.Crystal` instance or None.  If None the target structure will be used.
            '''
            if crystal is None:
                _target = self._target
            else:
                _csv = ChemistryLib.CrystalStructureView.instantiate(crystal._crystal)
                _target = PackingSimilarityLib.PropensityPredictionTarget(crystal.identifier, _csv)
            data = self._model.prediction().fitting_data()
            props = self._model.predict_propensity(
                _target, self.model._model
            )
            inter_rows = PackingSimilarityLib.PropensityTableModel(props, PackingSimilarityLib.INTER, data).rows()
            self.inter_propensities = tuple(
                CrystalDescriptors.HBondPropensities.InterPropensity(self, row)
                for row in inter_rows
            )
            intra_rows = PackingSimilarityLib.PropensityTableModel(props, PackingSimilarityLib.INTRA, data).rows()
            self.intra_propensities = tuple(
                CrystalDescriptors.HBondPropensities.IntraPropensity(self, row)
                for row in intra_rows
            )

            return self.propensities


        def _count_hbond_groupings(self, min_donor_prob=0.0, min_acceptor_prob=0.0):
            '''Counts the number of coordination possibilites, which correlates with the number of groupings.'''
            d_preds = [
                [c for c, v in self.coordination_scores.predictions_for_label(d.label, 'donor')[1].items() if v >= min_donor_prob]
                for d in self.donors
            ]
            a_preds = [
                [c for c, v in self.coordination_scores.predictions_for_label(a.label, 'acceptor')[1].items() if v >= min_acceptor_prob]
                for a in self.acceptors
            ]
            max_acc_ct = sum(max(ap) for ap in a_preds)
            def _possible_paths(donors, acceptors, d_ct):
                if donors:
                    d, preds = donors[0]
                    for dc in [c for c in preds if d_ct + c <= max_acc_ct]:
                        for p in _possible_paths(donors[1:], acceptors, d_ct + dc):
                            yield [(d, dc)] + p
                elif len(acceptors) > 1:
                    a, preds = acceptors[0]
                    for ac in [c for c in preds if 0 <= d_ct - c <= sum(max(ap) for z, ap in acceptors[1:])]:
                        for p in _possible_paths(donors, acceptors[1:], d_ct - ac):
                            yield [(a, ac)] + p
                else:
                    a, preds = acceptors[0]
                    yield [(a, d_ct)]

            for i, c in enumerate(_possible_paths(zip(self.donors, d_preds), zip(self.acceptors, a_preds), 0)):
                pass
            return i


[docs]
        def generate_hbond_groupings(self, min_donor_prob=None, min_acceptor_prob=None):
            '''Generate all possible permutations of donors and acceptors to create all possible hbond groupings.'''
            if min_donor_prob is None or min_acceptor_prob is None:
                min_donor_prob = 0.1
                min_acceptor_prob = 0.1

            if not hasattr(self, 'propensities'):
                self.calculate_propensities()
            predicted_propensity = self._model.prediction().calculate_hbond_propensities(
                self._target, self.model._model, self._model.hbond_criterion()
            )
            pair_preds = predicted_propensity.inter_predictions()
            inter_predictions = tuple(pair_preds.prediction(i) for i in range(pair_preds.npredictions()))
            intra_predictions = tuple(
                predicted_propensity.intra_all_pair_prediction(i)
                for i in range(predicted_propensity.nintra_all_pair_predictions())
            )
            self._intra_predictions = intra_predictions
            analysis = self._hbond_coordination.hbond_analysis
            coordination_propensities = self._hbond_coordination._coordination_propensities
            self._hbond_coordination._calculator.update_propensities(coordination_propensities, analysis)
            assessment = PackingSimilarityLib.CrystalHydrogenBondAssessment(
                self._target.name(), analysis, inter_predictions, coordination_propensities,
                min_donor_prob, min_acceptor_prob
            )
            self.outcomes = outcomes = [assessment.donor_acceptor_pairing(i) for i in
                                        range(assessment.ndonor_acceptor_pairings())]
            self.hbond_groupings = tuple(
                CrystalDescriptors.HBondPropensities.HBondGrouping(self, outcome)
                for outcome in outcomes
            )
            self._observed_outcomes = [assessment.observed_outcome(i) for i in range(assessment.nobserved_outcomes())]
            if assessment.nobserved_outcomes() == 1:
                self._target_hbond_grouping = CrystalDescriptors.HBondPropensities.HBondGrouping(self, self._observed_outcomes[0])
            else:
                self._target_hbond_grouping = None
            return self.hbond_groupings



[docs]
        def target_hbond_grouping(self):
            '''Which of the hbond groupings is of the target structure.'''
            if self._target_hbond_grouping is not None:
                return self._target_hbond_grouping
            def _valid(hb):
                d, a = hb.atoms[0], hb.atoms[-1]
                x = self._find_donor(d.label)
                y = self._find_acceptor(a.label)
                if x is None or y is None:
                    x = self._find_donor(a.label)
                    y = self._find_acceptor(d.label)
                return x.functional_group_identifier != 'other' and y.functional_group_identifier != 'other'

            chbs = [
                hb for hb in self.crystal_hbonds
                if _valid(hb)
            ]
            for g in self.hbond_groupings:
                if len(g.hbonds) == len(chbs) and all(hb.is_observed for hb in g.hbonds):
                    return g




[docs]
    class PoreAnalyser(object):
        '''Calculates Pore Analysis.
        crystal is ccdc.crystal.Crystal
        '''
        _telemetry = 0

        def __init__(self, crystal, settings=None, _calculated=None):
            self._crystal = crystal

            if settings is None:
                settings = CrystalDescriptors.PoreAnalyser.Settings()
            self.settings = settings

            self._flags = CrystalDescriptors.PoreAnalyser.Flags()
            self.settings._flags = self._flags

            self._pore_calculator = None

            self._system_volume = self._crystal.cell_volume  # vol of unit cell (A^3)

            # cached results
            self._system_mass = None
            self._system_density = None
            self._total_surface_area = None
            self._total_surface_area_per_volume = None
            self._total_surface_area_per_mass = None
            self._network_accessible_surface_area = None
            self._network_accessible_surface_area_per_volume = None
            self._network_accessible_surface_area_per_mass = None
            self._total_he_volume = None
            self._network_accessible_he_volume = None
            self._total_geom_volume = None
            self._network_accessible_geom_volume = None
            self._pore_limiting_diameter = None
            self._max_pore_diameter = None
            self._num_percolated_dimensions = None

            if _calculated is not None:
                self._total_surface_area = _calculated.total_surface_area()
                self._total_geom_volume = _calculated.geometric_pore_volume()
                self._pore_limiting_diameter = _calculated.pore_limiting_diameter()
                self._max_pore_diameter = _calculated.maximum_pore_diameter()
                self._num_percolated_dimensions = _calculated.num_percolated_dimensions()
                self._flags.total_surface_area_valid = True
                self._flags.total_geom_volume_valid = True
                self._flags.pore_limiting_diameter_valid = True
                self._flags.max_pore_diameter_valid = True
                self._flags.num_percolated_dimensions_valid = True

            if type(self)._telemetry == 0:
                UtilitiesLib.ccdc_pore_analyser_telemetry()
                type(self)._telemetry = 1


[docs]
        class Flags:
            '''Flags for validlity of cached variables'''

            def __init__(self):
                self.set_all(False)

            def set_all(self, value):
                self._calculator_is_valid = value
                self.system_mass_valid = value
                self.system_density_valid = value
                self.total_surface_area_valid = value
                self.total_surface_area_per_volume_valid = value
                self.total_surface_area_per_mass_valid = value
                self.network_accessible_surface_area_valid = value
                self.network_accessible_surface_area_per_volume_valid = value
                self.network_accessible_surface_area_per_mass_valid = value
                self.total_he_volume_valid = value
                self.network_accessible_he_volume_valid = value
                self.total_geom_volume_valid = value
                self.network_accessible_geom_volume_valid = value
                self.pore_limiting_diameter_valid = value
                self.max_pore_diameter_valid = value
                self.num_percolated_dimensions_valid = value

            @property
            def calculator_is_valid(self):
                '''grid spacing (A)'''
                return self._calculator_is_valid

            @calculator_is_valid.setter
            def calculator_is_valid(self, value):
                self._calculator_is_valid = value
                if value is False:
                    self.set_all(False)



[docs]
        class Settings:
            '''Settings for PoreAnalyser'''
            def __init__(self):
                self._grid_spacing = None  # grid spacing (A)
                self._he_probe_sigma = None  # UFF LJ sigma (A)
                self._he_probe_epsilon = None  # UFF LJ epsilon/k (K)
                self._n_probe_sigma = None  # UFF LJ sigma (A)
                self._samples_per_atom = None  # sample size for surface area calculation
                self._cutoff_distance = None  # cut-off distance (A)
                self._temperature = None  # temperature (K)

                self._flags = None
                self.set_to_defaults()


[docs]
            def set_to_defaults(self):
                '''set to default values'''
                self.grid_spacing = 0.2
                self.he_probe_sigma = 2.58
                self.he_probe_epsilon = 10.22
                self.n_probe_sigma = 3.314
                self.samples_per_atom = 500
                self.cutoff_distance = 12.8
                self.temperature = 298

                if self._flags is not None:
                    self._flags.set_all(False)


            @property
            def grid_spacing(self):
                '''grid spacing (A)'''
                return self._grid_spacing

            @grid_spacing.setter
            def grid_spacing(self, value):
                self._grid_spacing = value
                if self._flags is not None:
                    self._flags.calculator_is_valid = False

            @property
            def he_probe_sigma(self):
                '''UFF L-J sigma for He probe (A)'''
                return self._he_probe_sigma

            @he_probe_sigma.setter
            def he_probe_sigma(self, value):
                self._he_probe_sigma = value
                if self._flags is not None:
                    self._flags.calculator_is_valid = False

            @property
            def he_probe_epsilon(self):
                '''UFF L-J epsilon/k for He probe (K)'''
                return self._he_probe_epsilon

            @he_probe_epsilon.setter
            def he_probe_epsilon(self, value):
                self._he_probe_epsilon = value
                if self._flags is not None:
                    self._flags.calculator_is_valid = False

            @property
            def n_probe_sigma(self):
                '''UFF L-J sigma for N probe (A)'''
                return self._n_probe_sigma

            @n_probe_sigma.setter
            def n_probe_sigma(self, value):
                self._n_probe_sigma = value
                if self._flags is not None:
                    self._flags.calculator_is_valid = False

            @property
            def samples_per_atom(self):
                '''Sample size for surface area calculation'''
                return self._samples_per_atom

            @samples_per_atom.setter
            def samples_per_atom(self, value):
                self._samples_per_atom = value
                if self._flags is not None:
                    self._flags.calculator_is_valid = False

            @property
            def cutoff_distance(self):
                '''Cut-off distance (A)'''
                return self._cutoff_distance

            @cutoff_distance.setter
            def cutoff_distance(self, value):
                self._cutoff_distance = value
                if self._flags is not None:
                    self._flags.calculator_is_valid = False

            @property
            def temperature(self):
                '''Temperature (K)'''
                return self._temperature

            @temperature.setter
            def temperature(self, value):
                self._temperature = value
                if self._flags is not None:
                    self._flags._he_pore_volume_valid = False
                    self._flags._network_accessible_he_pore_volume_valid = False


        @property
        def system_volume(self):
            '''Result: volume of unit cell (A^3)'''
            return self._system_volume

        @property
        def system_mass(self):
            '''Result: mass of unit cell (g/mol)'''
            if not self._flags.system_mass_valid:
                self._system_mass = self._calculate_system_mass()
                self._flags.system_mass_valid = True
                self._flags.system_density_valid = False
                self._flags.total_surface_area_per_mass_valid = False
                self._flags.network_accessible_surface_area_per_mass_valid = False
            return self._system_mass

        def _calculate_system_mass(self):
            self._initialize_calculator()
            return self._pore_calculator.calculate_crystal_structure_mass()

        @property
        def system_density(self):
            '''Result: density (g/cm^3)'''
            if not self._flags.system_density_valid:
                self._system_density = self._calculate_system_density(self.system_mass, self.system_volume)
                self._flags.density_valid = True
            return self._system_density

        def _calculate_system_density(self, mass, volume):
            self._initialize_calculator()
            return self._pore_calculator.calculate_crystal_structure_density(mass, volume)

        @property
        def total_surface_area(self):
            '''Result: surface area (A^2)'''
            if not self._flags.total_surface_area_valid:
                self._total_surface_area = self._calculate_total_surface_area()
                self._flags.total_surface_area_valid = True
                self._flags.total_surface_area_per_volume_valid = False
            return self._total_surface_area

        def _calculate_total_surface_area(self):
            self._initialize_calculator()
            return self._pore_calculator.calculate_surface_area(self.settings.samples_per_atom)

        @property
        def total_surface_area_per_volume(self):
            '''Result: surface area per volume (m^2/cm^3)'''
            if not self._flags.total_surface_area_per_volume_valid:
                self._total_surface_area_per_volume = self._calculate_surface_area_per_volume(
                    self.total_surface_area, self.system_volume)
                self._flags.total_surface_area_per_volume_valid = True
                self._flags.total_surface_area_per_mass_valid = False
            return self._total_surface_area_per_volume

        def _calculate_surface_area_per_volume(self, surface_area, volume):
            self._initialize_calculator()
            return self._pore_calculator.calculate_surface_area_per_vol(surface_area, volume)

        @property
        def total_surface_area_per_mass(self):
            '''Result: surface area per mass (m^2/g)'''
            if not self._flags.total_surface_area_per_mass_valid:
                self._total_surface_area_per_mass = self._calculate_surface_area_per_mass(
                    self.total_surface_area_per_volume, self.system_mass, self.system_volume)
                self._flags.total_surface_area_per_mass_valid = True
            return self._total_surface_area_per_mass

        def _calculate_surface_area_per_mass(self, surface_area_per_vol, mass, volume):
            self._initialize_calculator()
            return self._pore_calculator.calculate_surface_area_per_mass(surface_area_per_vol, mass, volume)

        @property
        def network_accessible_surface_area(self):
            '''Result: network accessible surface area (A^2)'''
            if not self._flags.network_accessible_surface_area_valid:
                self._network_accessible_surface_area = self._calculate_network_accessible_surface_area()
                self._flags.network_accessible_surface_area_valid = True
                self._flags.network_accessible_surface_area_per_volume_valid = False
            return self._network_accessible_surface_area

        def _calculate_network_accessible_surface_area(self):
            self._initialize_calculator()
            return self._pore_calculator.calculate_network_accessible_surface_area(self.settings.samples_per_atom)

        @property
        def network_accessible_surface_area_per_volume(self):
            '''Result: metwork accessible surface area per volume (m^2/cm^3)'''
            if not self._flags.network_accessible_surface_area_per_volume_valid:
                self._network_accessible_surface_area_per_volume = self._calculate_surface_area_per_volume(
                    self.network_accessible_surface_area, self.system_volume)
                self._flags.network_accessible_surface_area_per_volume_valid = True
                self._flags.network_accessible_surface_area_per_mass_valid = False
            return self._network_accessible_surface_area_per_volume

        @property
        def network_accessible_surface_area_per_mass(self):
            '''Result: network accessible surface area per mass (m^2/g)'''
            if not self._flags.network_accessible_surface_area_per_mass_valid:
                self._network_accessible_surface_area_per_mass = self._calculate_surface_area_per_mass(
                    self.network_accessible_surface_area_per_volume, self.system_mass, self.system_volume)
                self._flags.network_accessible_surface_area_per_mass_valid = True
            return self._network_accessible_surface_area_per_mass

        @property
        def total_helium_volume(self):
            '''Result: He pore volume (A^3)'''
            if not self._flags.total_he_volume_valid:
                self._total_he_volume = self._calculate_total_helium_volume(self.settings.temperature, self.system_volume)
                self._flags.he_pore_volume_valid = True
            return self._total_he_volume

        def _calculate_total_helium_volume(self, temperature, crystal_volume):
            self._initialize_calculator()
            return self._pore_calculator.calculate_helium_pore_volume(temperature, crystal_volume)

        @property
        def network_accessible_helium_volume(self):
            '''Result: Network accessible He pore volume (A^3)'''
            if not self._flags.network_accessible_he_volume_valid:
                self._network_accessible_he_volume = self._calculate_network_accessible_helium_volume(
                    self.settings.temperature, self.system_volume)
                self._flags.network_accessible_he_volume_valid = True
            return self._network_accessible_he_volume

        def _calculate_network_accessible_helium_volume(self, temperature, crystal_volume):
            self._initialize_calculator()
            return self._pore_calculator.calculate_network_accessible_helium_pore_volume(temperature, crystal_volume)

        @property
        def total_geometric_volume(self):
            '''Result: geometric pore volume (A^3)'''
            if not self._flags.total_geom_volume_valid:
                self._total_geom_volume = self._calculate_total_geometric_volume(self.system_volume)
                self._flags.total_geom_volume_valid = True
            return self._total_geom_volume

        def _calculate_total_geometric_volume(self, crystal_volume):
            self._initialize_calculator()
            return self._pore_calculator.calculate_geometric_pore_volume(crystal_volume)

        @property
        def network_accessible_geometric_volume(self):
            '''Result: Network accessible geometric pore volume (A^3)'''
            if not self._flags.network_accessible_geom_volume_valid:
                self._network_accessible_geom_volume = self._calculate_network_accessible_geometric_volume(self.system_volume)
                self._flags.network_accessible_geom_volume_valid = True
            return self._network_accessible_geom_volume

        def _calculate_network_accessible_geometric_volume(self, crystal_volume):
            self._initialize_calculator()
            return self._pore_calculator.calculate_network_accessible_geometric_pore_volume(crystal_volume)

        @property
        def pore_limiting_diameter(self):
            '''Result: Pore limiting diameter (A)'''
            if not self._flags.pore_limiting_diameter_valid:
                self._pore_limiting_diameter = self._calculate_pore_limiting_diameter()
                self._flags.pore_limiting_diameter_valid = True
            return self._pore_limiting_diameter

        def _calculate_pore_limiting_diameter(self):
            self._initialize_calculator()
            return self._pore_calculator.calculate_pore_limiting_diameter()

        @property
        def max_pore_diameter(self):
            '''Result: Max pore diameter (A)'''
            if not self._flags.max_pore_diameter_valid:
                self._max_pore_diameter = self._calculate_max_pore_diameter()
                self._flags.max_pore_diameter_valid = True
            return self._max_pore_diameter

        def _calculate_max_pore_diameter(self):
            self._initialize_calculator()
            return self._pore_calculator.calculate_maximum_pore_diameter()

        @property
        def num_percolated_dimensions(self):
            '''Result: Number of percolated dimensions'''
            if not self._flags.num_percolated_dimensions_valid:
                self._num_percolated_dimensions = self._calculate_num_percolated_dimensions()
                self._flags.num_percolated_dimensions_valid = True
            return self._num_percolated_dimensions

        def _calculate_num_percolated_dimensions(self):
            self._initialize_calculator()
            return self._pore_calculator.calculate_n_percolated_dimensions()


[docs]
        def convert_a3_to_cm3_per_g(self, volume):
            '''Utility to convert cubic angstroms to cm^3 per g'''
            self._initialize_calculator()
            return self._pore_calculator.convert_cubic_angstoms_to_cubic_centimetres_per_gram(
                volume, self.system_mass)


        # Initialize new PoreCalculator if current is not valid.
        # If already exists and is valid, do nothing
        def _initialize_calculator(self):
            if not self._flags.calculator_is_valid:
                self._pore_calculator = PoreAnalyserLib.PoreCalculator(
                        self._crystal._crystal,
                        self.settings.grid_spacing,
                        self.settings.cutoff_distance,
                        self.settings.he_probe_sigma,
                        self.settings.he_probe_epsilon,
                        self.settings.n_probe_sigma
                )
                self._flags.calculator_is_valid = True

        def _get_calculated(self):
            '''Return a copy of the calculated values for storage'''
            props = DatabaseEntryLib.PoreAnalysisProperties()
            props.set_total_surface_area(self.total_surface_area)
            props.set_geometric_pore_volume(self.total_geometric_volume)
            props.set_pore_limiting_diameter(self.pore_limiting_diameter)
            props.set_maximum_pore_diameter(self.max_pore_diameter)
            props.set_num_percolated_dimensions(self.num_percolated_dimensions)
            return props




#####################################################################################


[docs]
class StatisticalDescriptors(object):
    '''A namespace holding statistical descriptors.'''

[docs]
    class RankStatistics(object):
        '''Represents a ranked collection of values for which statistics can be derived.'''

        def __init__(self, scores, activity_column=None):
            '''Instantiate a RankStatistics object.

            :param scores: Sorted list with descending scores containing active/inactive data.
            :param activity_column: extractor for activity data in scores.  If ``None``, scores
            will be interpreted as an iterable of ``bool``s. If type is ``int`` or ``string``,
            scores will be interpreted as an iterable of indexed/named collections where the
            specified index is the activity ``bool``. Otherwise, use :meth:`operator.itemgetter' or
            :meth:`operator.attrgetter`.

            :raises: ValueError if the scores list is empty.
            '''
            if len(scores) == 0:
                raise ValueError('scores list must not be empty.')

            self._scores = scores
            self.activity_column = activity_column
            self._score_count = len(scores)

        @property
        def activity_column(self):
            '''Get extractor for active/inactive classification from scores data.'''
            return self._activity_column

        @activity_column.setter
        def activity_column(self, activity_column):
            '''Set extractor for active/inactive classification from scores data

            :param activity_column: extractor for activity data in scores.  If ``None``, scores
            will be interpreted as an iterable of ``bool``s. If type is ``int`` or ``string``,
            scores will be interpreted as an iterable of indexed/named collections where the
            specified index is the activity ``bool``. Otherwise, use :meth:`operator.itemgetter' or
            :meth:`operator.attrgetter`.
            '''
            if activity_column is None:
                self._activity_column = lambda x: x
            elif type(activity_column) is int:
                self._activity_column = operator.itemgetter(activity_column)
            elif isinstance(activity_column, str):
                self._activity_column = operator.attrgetter(activity_column)
            else:
                self._activity_column = activity_column


[docs]
        def ROC(self):
            '''Calculate receiver operating characteristic (ROC) curve.

            :returns: list, list - True positive rate, False positive rate
            '''
            true_positive_rate = []   # True positive rate = TP/(TP+FN) = Sensitivity
            false_positive_rate = []   # False positive rate = FP/(TN+FP) = Fall-out
            actives_count = 0
            inactives_count = 0

            for i in range(self._score_count):
                if self._activity_column(self._scores[i]):
                    actives_count += 1
                else:
                    inactives_count += 1
                true_positive_rate.append(float(actives_count))
                false_positive_rate.append(float(inactives_count))

            if actives_count > 0:
                true_positive_rate = [tp / actives_count for tp in true_positive_rate]
            if inactives_count > 0:
                false_positive_rate = [fp / inactives_count for fp in false_positive_rate]

            return true_positive_rate, false_positive_rate



[docs]
        def AUC(self):
            '''Calculate the area under the ROC curve.

            :returns: Area under the ROC curve.'''
            tpr, fpr = self.ROC()

            auc = 0
            for i in range(0, self._score_count - 1):
                auc += (fpr[i + 1] - fpr[i]) * (tpr[i + 1] + tpr[i])

            return auc / 2


        def _RIE(self, alpha):
            active_count = 0
            sum_exp = 0

            for i in range(self._score_count):
                active = self._activity_column(self._scores[i])
                if active:
                    active_count += 1
                    sum_exp += math.exp(-(alpha * (i + 1)) / self._score_count)

            if active_count == 0:
                rie = 0.0
            else:
                denom = 1.0 / self._score_count * ((1.0 - math.exp(-alpha)) / (math.exp(alpha / self._score_count) - 1.0))
                rie = sum_exp / float(active_count * denom)

            return rie, active_count


[docs]
        def RIE(self, alpha=0.0):
            '''Calculate robust initial enhancement (RIE) as defined in:

            Sheridan R.P., Singh S.B., Fluder E.M., Kearsley S.K.,
            "Protocols for Bridging the Peptide to Nonpeptide Gap in Topological Similarity Searches"
            J. Chem. Inf. Comp. Sci. 41:1395-1406 (2001).

            :param alpha: exponential weighting factor

            :raises: ValueError if alpha is less than or equal to 0.0
            '''
            if alpha <= 0.0:
                raise ValueError('alpha parameter must be greater than zero.')

            rie, active_count = self._RIE(float(alpha))
            return rie



[docs]
        def BEDROC(self, alpha=0.0):
            '''Calculate Boltzmann-Enhanced Discrimination of ROC (BEDROC) as defined in:

            Truchon J., Bayly C.I.,
            "Evaluating Virtual Screening Methods: Good and Bad Metric for the "Early Recognition" Problem"
            J. Chem. Inf. Model. 47:488-508 (2007).

            :param alpha: exponential weighting factor.
            :raises: ValueError if alpha is less than or equal to 0.0.
            '''
            if alpha <= 0.0:
                raise ValueError('alpha parameter must be greater than zero.')

            rie, active_count = self._RIE(float(alpha))

            if active_count <= 0.0:
                bedroc = 0.0
            else:
                ratio = 1.0 * active_count / self._score_count
                rie_max = (1.0 - math.exp(-alpha * ratio)) / (ratio * (1.0 - math.exp(-alpha)))
                rie_min = (1.0 - math.exp(alpha * ratio)) / (ratio * (1.0 - math.exp(alpha)))

                if rie_max == rie_min:
                    bedroc = 1.0
                else:
                    bedroc = (rie - rie_min) / (rie_max - rie_min)

            return bedroc


        def _find_position(self, fraction):
            if fraction < 0.0 or fraction > 1.0:
                raise ValueError('fraction parameter must be within interval [0,1]')

            return int(math.ceil(self._score_count * fraction))


[docs]
        def EF(self, fraction=0.0):
            '''Calculate enrichment factor (EF) at the specified fraction.

            :param fraction: position within data for which enrichment factor is to be determined.

            :raises: ValueError if fraction is not within interval [0,1]
            '''
            position = self._find_position(fraction)

            found_active_count = 0
            for i in range(position):
                if self._activity_column(self._scores[i]):
                    found_active_count += 1

            if found_active_count == 0:
                return 0.0

            active_count = found_active_count
            for i in range(position, self._score_count):
                if self._activity_column(self._scores[i]):
                    active_count += 1

            return found_active_count / (active_count * fraction)



[docs]
        def PPV(self, fraction=0.0):
            '''Calculate precision or positive predictive value (PPV) at the specified fraction.

            :param fraction: position within data for which precision is to be determined.

            :raises: ValueError if fraction is not within interval [0,1]
            '''
            position = self._find_position(fraction)

            active_count = 0

            for i in range(position):
                if self._activity_column(self._scores[i]):
                    active_count += 1

            return float(active_count) / (position + 1)



[docs]
        def ACC(self, fraction=0.0):
            '''Calculate accuracy metric (ACC) at the specified fraction.

            ACC = (TP+TN) / (TP+FP+TN+FN)

            :param fraction: position within data for which accuracy metric is to be determined.

            :raises: ValueError if fraction is not within interval [0,1]
            '''
            position = min(self._find_position(fraction), self._score_count - 1)

            true_positives = []
            false_negatives = []
            actives_count = 0
            inactives_count = 0

            for i in range(self._score_count):
                if self._activity_column(self._scores[i]):
                    actives_count += 1
                else:
                    inactives_count += 1
                true_positives.append(float(actives_count))
                false_negatives.append(float(inactives_count))

            return (true_positives[position] + inactives_count - false_negatives[position])\
                   / (actives_count + inactives_count)




#################################################################################################