Source code for ccdc.conformer

#
# This code is Copyright (C) 2015 The Cambridge Crystallographic Data Centre
# (CCDC) of 12 Union Road, Cambridge CB2 1EZ, UK and a proprietary work of CCDC.
# This code may not be used, reproduced, translated, modified, disassembled or
# copied, except in accordance with a valid licence agreement with CCDC and may
# not be disclosed or redistributed in any form, either in whole or in part, to
# any third party. All copies of this code made in accordance with a valid
# licence agreement as referred to above must contain this copyright notice.
#
# No representations, warranties, or liabilities are expressed or implied in the
# supply of this code by CCDC, its servants or agents, except where such
# exclusion or limitation is prohibited, void or unenforceable under governing
# law.
#
'''
The :mod:`ccdc.conformer` module contains classes concerned with molecular
conformations.

The three main classes of the :mod:`ccdc.conformer` module are:

- :class:`ccdc.conformer.MoleculeMinimiser`
- :class:`ccdc.conformer.ConformerGenerator`
- :class:`ccdc.conformer.GeometryAnalyser`

A :class:`ccdc.conformer.MoleculeMinimiser` instance can be used to optimise the bond
distances and valence angles of a 3D input molecule using the
:func:`ccdc.conformer.MoleculeMinimiser.minimise` function::

    from ccdc.conformer import MoleculeMinimiser
    molecule_minimiser = MoleculeMinimiser()
    minimised_mol = molecule_minimiser.minimise(mol)


A :class:`ccdc.conformer.ConformerGenerator` instance can be used to generate a set of
conformers for an input molecule using the
:func:`ccdc.conformer.ConformerGenerator.generate` function::

    from ccdc.conformer import ConformerGenerator
    from ccdc.io import MoleculeWriter
    conformer_generator = ConformerGenerator()
    conformers = conformer_generator.generate(mol)
    with MoleculeWriter('conformers.mol2') as mol_writer:
        for c in conformers:
            mol_writer.write(c.molecule)


A :class:`ccdc.conformer.GeometryAnalyser` instance can be used to analyse the geometry
of an input molecule using a knowledge-based library of intramolecular
geometries based on the CSD.

The :class:`ccdc.conformer.GeometryAnalyser` class contains nested classes:

- :class:`ccdc.conformer.GeometryAnalyser.Settings`
- :class:`ccdc.conformer.GeometryAnalyser.Analysis`
- :class:`ccdc.conformer.GeometryAnalyser.AnalysisHit`

The :func:`ccdc.conformer.GeometryAnalyser.analyse_molecule` function can be used to validate
the complete geometry of a given query structure.

>>> from ccdc.io import EntryReader
>>> csd_reader = EntryReader('CSD')
>>> yigpio01 = csd_reader.molecule('YIGPIO01')

>>> from ccdc.conformer import GeometryAnalyser
>>> analysis_engine = GeometryAnalyser()
>>> checked_mol = analysis_engine.analyse_molecule(yigpio01)
>>> for tor in checked_mol.analysed_torsions:
...     if tor.unusual:
...         print('%s: %d %.2f' % (', '.join(tor.atom_labels), tor.nhits, tor.local_density)) # doctest: +SKIP
...
C36, C12, C11, N1: 72 2.78
O4, C31, N5, C24: 3743 3.55
O5, C31, N5, C24: 3736 3.93
O5, C32, C33, S1: 108 1.85
O5, C32, C33, C34: 73 4.11
'''
##########################################################################

import os

import warnings
warnings.filterwarnings('always', '.*deprecated.*', DeprecationWarning, '.*', 0)


from ccdc.entry import Entry
from ccdc.molecule import Molecule

from ccdc.descriptors import MolecularDescriptors as MD
from ccdc.io import _CSDDatabaseLocator, EntryReader, csd_directory
from ccdc.utilities import Logger, nested_class, bidirectional_dict

from ccdc.utilities import _private_importer
with _private_importer() as pi:
    pi.import_ccdc_module('ChemistryLib')
    pi.import_ccdc_module('MogulAnalysisLib')
    pi.import_ccdc_module('UtilitiesLib')
    pi.import_ccdc_module('VirtualScreening')
    pi.import_ccdc_module('ConformerGeneratorLib')
    pi.import_ccdc_module('AnnotationsLib')

##########################################################################

class _DataRecordExtractor(object):
    '''Private: extract relevant fields from a data record.'''
    def __init__(self, _dr):
        self._dr = _dr
        self._cache = dict()

    def _get_getter(self, type):
        '''Checked find of accessor.'''
        try:
            return getattr(VirtualScreening, 'get_properties_' + type)
        except RuntimeError:
            raise KeyError('Invalid type for data record %s' % type)

    def _get_full_getter(self, type):
        '''Checked find of accessor.'''
        try:
            return getattr(VirtualScreening, 'get_properties_and_names_' + type)
        except RuntimeError:
            raise KeyError('Invalid type for data record %s' % type)

    def get(self, key, type):
        '''Dict like interface.'''
        if not key in self._cache:
            getter = self._get_getter(type)
            try:
                val = getter(self._dr, key)
            except RuntimeError:
                raise KeyError('Invalid key for data record %s' % key)
            if not val:
                self._cache[key] = None
            elif len(val) == 1:
                self._cache[key] = val[0].property()
            else:
                self._cache[key] = tuple(v.property() for v in val)
        return self._cache[key]

    def get_all(self, type):
        getter = self._get_full_getter(type)
        return getter(self._dr)

    def __str__(self):
        '''Print the whole thing.'''
        def iprint(*args, **kw):
            indent = kw.get('indent', 0)
            s = '%s%s' % (' '*indent, ' '.join(map(str, args)))
            return s

        l = [
            iprint(self._dr.identifier()),
        ]
        x = list(iprint('%s: %d' % (n, p.property()), indent=2) for n, p in self.get_all('int'))
        if x:
            l.append(iprint('int:'))
            l.extend(x)
        x = list(iprint('%s: %.3f' % (n, p.property()), indent=2) for n, p in self.get_all('double'))
        if x:
            l.append(iprint('double:'))
            l.extend(x)
        x = list(iprint('%s: %s' % (n, p.property()), indent=2) for n, p in self.get_all('QString'))
        if x:
            l.append(iprint('string:'))
            l.extend(x)
        x = list(iprint('%s:' % n, indent=2) for n, p in self.get_all('HMolecule'))
        if x:
            l.append(iprint('molecule:'))
            l.extend(x)
        x = list(iprint('%s: %d' % (n, len(p.property())), indent=2) for n, p in self.get_all('Hvector_HMolecule'))
        if x:
            l.append(iprint('vector_molecule:'))
            l.extend(x)
        x = list(iprint('%s' % n, indent=2) for n, p in self.get_all('HCrystalStructure'))
        if x:
            l.append(iprint('crystal:'))
            l.extend(x)
        x = list(iprint('%s' % n, indent=2) for n, p in self.get_all('Hvector_int'))
        if x:
            l.append(iprint('vector_int:'))
            l.extend(x)
        x = list(iprint('%s: %d' % (n, len(p.property())), indent=2) for n, p in self.get_all('Hvector_double'))
        if x:
            l.append(iprint('vector_double:'))
            l.extend(x)
        x = list(iprint('%s:' % n, indent=2) for n, p in self.get_all('HQImage'))
        if x:
            l.append(iprint('QImage:'))
            l.extend(x)
        x = list(iprint('%s: %d' % (n, len(p.property())), indent=2) for n, p in self.get_all('Hvector_HQImage'))
        if x:
            l.append(iprint('vector_QImage:'))
            l.extend(x)
        return '\n'.join(l)

##########################################################################

[docs]class ConformerHit(object):
    '''An individual conformer.'''
    def __init__(self, mol, parent):
        self.molecule = mol
        self.parent = parent

[docs]    def rmsd(self, wrt='original', reference=None, exclude_hydrogens=True):
        '''Return the RMSD of this conformer with respect to a reference, the original or the minimised molecule.

        :param reference: ``None`` or a CSD molecule object. If not ``None``, the rmsd is measured with respect to this reference
        :param wrt: either 'original' or 'minimised'. This is ignored if a reference molecule is passed in
        :param exclude_hydrogens: boolean
        :return: float
        '''
        if reference is not None:
            target = reference
        elif wrt.lower().startswith('orig'):
            target = self.parent.original_molecule
        else:
            target = self.parent.minimised_molecule
        if target is None:
            return None
        return MD.rmsd(target, self.molecule, exclude_hydrogens=exclude_hydrogens, overlay=True)

    @property
    def probability(self):
        '''Probability associated with this conformer.'''
        props = self.molecule._molecule.annotations().find_ConformerProperties()
        if props:
            return props.probability()

    @property
    def normalised_score(self):
        '''Normalised score associated with this conformer (0 = best, 1 = worst).'''
        props = self.molecule._molecule.annotations().find_ConformerProperties()
        if props:
            return props.normalised_score()

class DefaultConformerParameterFileLocator:
    '''Dependency injection for the location of a conformer parameter file'''
    def locate(self):
        return _CSDDatabaseLocator.get_conformer_parameter_file_location()

[docs]class ConformerHitList(object):
    '''A conformer generator result.'''
    def __init__(self, identifier, _dr):
        '''Initialiser.

        :param identifier: identifier of input molecule
        :param _dr: a private result object from a conformer generation

        '''
        self._de = _DataRecordExtractor(_dr)
        self.identifier = identifier
        mols = self._de.get('conf_gen.pass.conformers', 'Hvector_HMolecule')
        self.hits = [
            ConformerHit(Molecule(identifier, _molecule=m), self) for m in mols
        ]

    def __iter__(self):
        '''Iterator over hits.'''
        return iter(self.hits)

    def __getitem__(self, inx):
        '''List like interface.'''
        return self.hits[inx]

    def __len__(self):
        '''Length of conformer list.'''
        return len(self.hits)

    @property
    def sampling_limit_reached(self):
        '''Whether the internal sampling limit as been reached.'''
        return bool(self._de.get('conf_gen.pass.max_conf_reached', 'int'))

    @property
    def n_rotamers_with_no_observations(self):
        '''Number of rotamers for which no crystallographic data is available.'''
        return self._de.get('conf_gen.pass.n_zero_obs_rotamers', 'int')

    @property
    def rotamers_with_no_observations(self):
        '''The list of bonds for which the CSD was unable to provide enough input data.'''
        input_molecule = Molecule(self.identifier, _molecule=self._de.get('conf_gen.pass.input_molecule', 'HMolecule'))
        rotamers_with_no_observations = []
        for b in input_molecule.bonds:
            rbd = b._bond.annotations().find_RotatableBondWithDistribution()
            if rbd:
                dist = rbd.distribution()
                if dist:
                    lib = dist.library_version_name()
                    if lib == 'null':
                        rotamers_with_no_observations.append(b)
        return rotamers_with_no_observations

    @property
    def rotamers(self):
        '''The rotamers considered by the generator.'''
        input_molecule = Molecule(self.identifier, _molecule=self._de.get('conf_gen.pass.input_molecule', 'HMolecule'))
        rotamers = []
        for b in input_molecule.bonds:
            rbd = b._bond.annotations().find_RotatableBondWithDistribution()
            if rbd:
                dist = rbd.distribution()
                if dist:
                    rotamers.append(b)
        return rotamers

    @property
    def n_flexible_rings_with_no_observations(self):
        '''Number of flexible rings for which no crystallographic data is available.'''
        return self._de.get('conf_gen.pass.n_zero_template_rings', 'int')

    @property
    def flexible_rings(self):
        '''The flexible rings considered by the generator.'''
        input_molecule = Molecule(self.identifier, _molecule=self._de.get('conf_gen.pass.input_molecule', 'HMolecule'))
        flexibles = []
        for r in input_molecule.rings:
            annos = [a._atom.annotations().find_ReplaceableSiteNode() for a in r.atoms]
            if all(annos) and all(a.rigid_template_collection().type() == 0 for a in annos):
                flexibles.append(r)
        return flexibles

    @property
    def max_log_probability(self):
        '''Maximum log probability.'''
        return self._de.get('conf_gen.pass.max_log_prob_conf_theory', 'double')

    @property
    def min_log_probability(self):
        '''Minimum log probability.'''
        return self._de.get('conf_gen.pass.min_log_prob_conf_theory', 'double')

    @property
    def original_molecule(self):
        '''The input molecule.'''
        return Molecule(self.identifier, _molecule=self._de.get('reader.pass.molecule', 'HMolecule'))

    @property
    def minimised_molecule(self):
        '''The minimised molecule from which conformers were generated.'''
        x = self._de.get('minimiser.pass.minimised_molecule', 'HMolecule')
        if x is not None:
            return Molecule(self.identifier, _molecule=x)

    @property
    def n_rotamers_sampled(self):
        '''The number of rotamers sampled by the generator.

        This may be smaller than the number of rotamers in the input molecule if there are no data in the CSD for the rotamer.
        '''
        return self._de.get('conf_gen.pass.n_tors', 'int')

    @property
    def n_flexible_rings_sampled(self):
        '''The number of flexible rings sampled by the generator.

        This may be smaller than the number of rings in the input molecule if there are no data in the CSD for the ring.
        '''
        return self._de.get('conf_gen.pass.n_ring', 'int')

    @property
    def n_rotamers_in_molecule(self):
        '''The number of rotamers in the molecule.'''
        return self._de.get('conf_gen.pass.n_tors_mol', 'int')

    @property
    def n_matched_rotamers(self):
        '''Rotamers which have been matched in the fragment_library.txt parameter file.'''
        return self._de.get('conf_gen.pass.n_tors_fragment', 'int')

    @property
    def n_flexible_rings_in_molecule(self):
        '''The number of flexible rings in the molecule.'''
        return self._de.get('conf_gen.pass.n_ring_mol', 'int')

    @property
    def distributions_pruned(self):
        '''Whether or not the geometry distributions were pruned in order to perform an exhaustive search.'''
        return bool(self._de.get('conf_gen.pass.dists_pruned', 'int'))


    #@property
    #def cluster_atom_rmsd(self):
    #    '''The atom rmsd used for clustering.'''
    #    return self._de.get('conf_gen.pass.cluster_atom_rmsd', 'double')

    #@property
    #def cluster_torsion_dissimilarity(self):
    #    '''The torsion dissimilarity used for clustering.'''
    #    return self._de.get('conf_gen.pass.cluster_torsion_dissimilarity', 'double')

    #@property
    #def cluster_n_conformers_sampled(self):
    #    '''The number of conformers sampled before clustering.'''
    #    return self._de.get('conf_gen.pass.n_conf_gen', 'int')

    #@property
    #def cluster_n_conformers_generated(self):
    #    '''The number of conformers generated after clustering.'''
    #    return self._de.get('conf_gen.pass.n_conf_gen_clust', 'int')

##########################################################################

[docs]class MoleculeMinimiser(object):
    '''Minimises a single or a list of molecules.'''

    def __init__(self, nthreads=1, parameter_locator=DefaultConformerParameterFileLocator()):
        '''Initialise the Minimiser.

        :param nthreads: number of threads on which to run the minimiser
        '''
        parameter_files = parameter_locator.locate()
        self._minimiser = VirtualScreening.MinimiseMoleculeWorkFlow(
            parameter_files, nthreads
        )

[docs]    def minimise(self, mol):
        '''Return a minimised copy of the input molecule.

        This makes use of the Tripos force field functional forms.

        However, where available equilibrium bond distances and valence angles
        are parameterised using data obtained from CSD distributions.

        :param mol: :class:`ccdc.molecule.Molecule`
        :returns: :class:`ccdc.molecule.Molecule`
        '''
        if isinstance(mol, Molecule):
            dr = self._minimiser.minimise_molecules([mol._molecule])
            if dr:
                de = _DataRecordExtractor(dr[0])
                if de.get('minimiser.pass.success', 'int'):
                    return Molecule(mol.identifier, _molecule=de.get('minimiser.pass.minimised_molecule', 'HMolecule'))
        else:
            mols = [m for m in mol if m.all_atoms_have_sites]
            mins = self._minimiser.minimise_molecules([m._molecule for m in mols])
            return [
                Molecule(
                    mols[i].identifier,
                    _DataRecordExtractor(dr).get('minimiser.pass.minimised_molecule', 'HMolecule')
                )
                for i, dr in enumerate(mins)
            ]


##########################################################################

[docs]class ConformerSettings(object):
    '''Settings for conformer generation.

    Any settings that are set to ``None`` will be set to the system defaults.
    '''
    #: Maximum number of conformers to generate.
    max_conformers = None
    #: Number of unusual torsions allowed per confomer.
    max_unusual_torsions = None
    # Definition of unusual as a probability between 0 and 1.
    #unusual_torsion_threshold = None
    #: Whether or not to superimpose to a common reference.
    superimpose_conformers_onto_reference = None
    #: Whether or not to reject input molecules with missing hydrogen atoms.
    reject_missing_hydrogen = False
    # Whether to use the input conformation for defining the torsion distributions
    _use_input_torsion_distributions = None

    @property
    def normalised_score_threshold(self):
        warnings.warn('''This attribute is deprecated and will be removed.''', DeprecationWarning)
        return self._normalised_score_threshold

    @normalised_score_threshold.setter
    def normalised_score_threshold(self, value):
        warnings.warn('''This attribute is deprecated and will be removed.''', DeprecationWarning)
        self._normalised_score_threshold = value

    def __init__(self):
        self._normalised_score_threshold = None


[docs]class ConformerGenerator(object):
    '''Generates conformers for a single or a list of molecules.

    This functionality is available only under licenced conditions.
    Please contact support@ccdc.cam.ac.uk for details.
    '''
    _telemetry = 0

    def __init__(self, settings=None, skip_minimisation=False, nthreads=1, parameter_locator=DefaultConformerParameterFileLocator()):
        '''Initialise the ConformerGenerator.

        :param settings: a :class:`ccdc.conformer.ConformerSettings` instance or ``None``.
        If ``None`` a default settings will be constructed
        :param skip_minimisation: whether an input molecule be minimised or not
        :param nthreads: number of threads on which to run the generation
        '''
        ConformerGeneratorLib.licence_check()
        self.logger = Logger()
        if settings is None:
            settings = ConformerSettings()
        self.settings = settings
        parameter_files = parameter_locator.locate()

        if parameter_files is None:
            raise RuntimeError("Conformer parameter files not found")

        self._conformer_generator = ConformerGeneratorLib.GenerateConformersWorkFlow(
            parameter_files, skip_minimisation, nthreads
        )
        if self.settings.max_conformers is None:
            self.settings.max_conformers = self._conformer_generator.maximum_number_of_clustered_conformers()
        if self.settings.max_unusual_torsions is None:
            self.settings.max_unusual_torsions = self._conformer_generator.max_unusual_torsions()
        if self.settings.superimpose_conformers_onto_reference is None:
            self.settings.superimpose_conformers_onto_reference = self._conformer_generator.superimpose_conformers_onto_reference()
        if self.settings._use_input_torsion_distributions is None:
            self.settings._use_input_torsion_distributions = self._conformer_generator.use_input_torsion_distribution()

        if type(self)._telemetry == 0:
            UtilitiesLib.ccdc_conformer_generation_telemetry()
            type(self)._telemetry = 1

[docs]    @staticmethod
    def lock_torsion(bond):
        '''Specify that a particular torsion should not be changed when generating conformers of its molecule.

        If the bond is in a ring, the whole ring will be locked.

        :parameter bond: a :class:`ccdc.molecule.Bond` instance.
        '''
        annos = bond._bond.annotations()
        annos.set_LockedTorsionAnnotation(AnnotationsLib.LockedTorsionAnnotation())


[docs]    def generate(self, mols):
        '''Generate conformers for supplied molecule(s).

        :param mols: a :class:`ccdc.molecule.Molecule` or a list of :class:`ccdc.molecule.Molecule`
        :returns: a :class:`ccdc.conformer.ConformerHitList` or a list of
                  :class:`ccdc.conformer.ConformerHitList` instances

        Note that missing hydrogen atoms will be added to conformers in order to generate 3D coordinates,
        unless :attr:`ccdc.conformer.ConformerSettings.reject_missing_hydrogen` is set in which case
        None is returned.
        '''
        if self.settings.max_conformers is not None:
            self._conformer_generator.set_maximum_number_of_clustered_conformers(
                self.settings.max_conformers
            )
        if self.settings.max_unusual_torsions is not None:
            self._conformer_generator.set_max_unusual_torsions(
                self.settings.max_unusual_torsions
            )
        if self.settings.superimpose_conformers_onto_reference is not None:
            self._conformer_generator.set_superimpose_conformers_onto_reference(
                self.settings.superimpose_conformers_onto_reference
            )

        if self.settings._use_input_torsion_distributions is not None:
            self._conformer_generator.set_use_input_torsion_distribution(
                self.settings._use_input_torsion_distributions
            )


        if isinstance(mols, Molecule):
            if len(mols.components) > 1:
                self.logger.warning('Conformers not generated for discontiguous molecules.')
                return None
            mols._molecule.annotations().set_DatabaseEntryIdentifier(
                UtilitiesLib.DatabaseEntryIdentifier(mols.identifier)
            )
            if not self._check_missing_h(mols):
                self.logger.warning('Conformers not generated for molecules missing hydrogen atoms.')
                return None
            od = self._conformer_generator.generate_conformers([mols._molecule])
            if od:
                return ConformerHitList(mols.identifier, od[0])
        else:
            mols = [m for m in mols if len(m.components) == 1]
            for m in mols:
                m._molecule.annotations().set_DatabaseEntryIdentifier(
                    UtilitiesLib.DatabaseEntryIdentifier(m.identifier)
                )
                if not self._check_missing_h(m):
                    self.logger.warning('Conformers not generated for molecules missing hydrogen atoms.')
                    return None

            od = self._conformer_generator.generate_conformers(
                [m._molecule for m in mols]
            )
            des = [
                _DataRecordExtractor(dr) for dr in od
            ]
            hit_lists = sorted([
                (de.get('reader.pass.molecule_id', 'int'), ConformerHitList('', de._dr))
                for de in des
            ])
            for ident, hl in hit_lists:
                hl.identifier = mols[ident].identifier
                for c in hl.hits:
                    c.identifier = hl.identifier
                    c.molecule.identifier = hl.identifier
            return [hl for ident, hl in hit_lists]

    def _check_missing_h(self, mol):
        if not self.settings.reject_missing_hydrogen:
            return True
        test_mol = mol.copy()
        test_mol.add_hydrogens(mode='missing', add_sites=False)
        return len(mol.atoms) == len(test_mol.atoms)

##########################################################################

[docs]def _mogul_version():
    '''The version of mogul being used.'''
    return MogulAnalysisLib.MogulVersions().version()

########################################################################

[docs]class GeometryAnalyser(object):
    '''The geometry analysis engine.'''

[docs]    @nested_class('GeometryAnalyser')
    class Settings(object):
        '''Controls the operation of the geometry analyser.'''
        _solvent_filters = {
            MogulAnalysisLib.FilterParameters.INCLUDE_SOLVENTS  :   'include_solvent',
            MogulAnalysisLib.FilterParameters.EXCLUDE_SOLVENTS  :   'exclude_solvent',
            MogulAnalysisLib.FilterParameters.SOLVENTS_ONLY     :   'only_solvent',
        }

        _organometallic_filters = {
            MogulAnalysisLib.FilterParameters.ORGANIC_AND_ORGANOMETALLIC    :   'all',
            MogulAnalysisLib.FilterParameters.EXCLUDE_ORGANOMETALLIC        :   'organics_only',
            MogulAnalysisLib.FilterParameters.EXCLUDE_ORGANIC               :   'metalorganics_only',

        }

        _rfactor_filters = {
            MogulAnalysisLib.RFactorDetector.RFACTOR_RANGE :   'Not covered',
            MogulAnalysisLib.RFactorDetector.RFACTOR_NULL  :   'No category',
            MogulAnalysisLib.RFactorDetector.RFACTOR_5     :   '< 5%',
            MogulAnalysisLib.RFactorDetector.RFACTOR_7_5   :   '< 7.5%',
            MogulAnalysisLib.RFactorDetector.RFACTOR_10    :   '< 10%',
            MogulAnalysisLib.RFactorDetector.RFACTOR_ANY   :   'any',
        }

[docs]        @nested_class('GeometryAnalyser.Settings')
        class GeometrySettings(object):
            '''Settings for a particular fragment type.

            In other words settings that are applied to one of the below:

            - Bond distances
            - Valence angles
            - Torsion angles
            - Ring RMSDs

            '''
            _classification_measures = bidirectional_dict([
                (MogulAnalysisLib.ResultsClassifierSettings.LOCAL_DENSITY, 'Local density'),
                (MogulAnalysisLib.ResultsClassifierSettings.NEAREST_OBSERVATION, 'Nearest observation'),
                (MogulAnalysisLib.ResultsClassifierSettings.Z_SCORE, 'Z-score'),
                (MogulAnalysisLib.ResultsClassifierSettings.MEAN_DISTANCE, 'Mean distance')
            ])

            def __init__(self, identifier, type, settings):
                self._frag_params = MogulAnalysisLib.MogulFragParameters()
                self.identifier = identifier
                self._type = type
                self._settings = settings
                self._analyse = True
                self.min_obs_generalised = 15
                self.min_obs_exact = 15
                if identifier == 'torsion':
                    self.min_obs_generalised = 40
                    self.min_obs_exact = 40
                self.min_relevance = 0.75
                self._frag_params.set_selection_mode( self._frag_params.SELECT_BEST )

[docs]            def summary(self):
                '''Return a summary the settings as a string.'''
                if self.classification_measure == 'Local density':
                    interval = ' interval = %.2f' % self.local_density_tolerance
                else:
                    interval = ''
                s = [
                    'Type: %s' % self.identifier,
                    '\tAnalyse: %s' % self._analyse,
                    '\tClassification measure: %s' % self.classification_measure,
                    '\tClassification measure threshold: %.2f%s' % (
                        self.classification_measure_threshold, interval
                    ),
                    '\tMin. Obs. Generalised: %d' % self.min_obs_generalised,
                    '\tMin. Obs. Exact: %d' % self.min_obs_exact,
                    '\tMin. Relevance: %.2f' % self.min_relevance,
                    '\tFew hits threshold: %d' % self.few_hits_threshold,
                ]
                return '\n'.join(s)

            @property
            def analyse(self):
                '''Whether to analyse this fragment type.'''
                return self._analyse

            @analyse.setter
            def analyse(self, value):
                self._analyse = value

            @property
            def classification_measure(self):
                '''How to measure whether an observation is unusual.'''
                return self._classification_measures[self._settings.classification_measure(self._type)]

            @classification_measure.setter
            def classification_measure(self, value):
                try:
                    m = self._classification_measures.inverse_lookup(value)
                    self._settings.set_classification_measure(
                        self._type, m, self._settings.classification_measure_threshold(self._type, m)
                    )
                except RuntimeError:
                    raise TypeError('The classification measure %s is inappropriate for %s' %
                        (value, self.identifier)
                    )

            @property
            def classification_measure_threshold(self):
                '''The value at which an observation will be found to be unusual.'''
                return self._settings.classification_measure_threshold(self._type)

            @classification_measure_threshold.setter
            def classification_measure_threshold(self, value):
                self._settings.set_classification_measure_threshold(self._type, self._classification_measures.inverse_lookup(self.classification_measure), value)

            @property
            def min_obs_exact(self):
                '''Minimum acceptable size of an exact distribution.

                If there is no distribution containing at least this number of
                observations the geometry analyser will perform a generalised search according to
                the criteria specified by other settings.

                '''
                return self._frag_params.min_obs_exact()

            @min_obs_exact.setter
            def min_obs_exact(self, value):
                self._frag_params.set_min_obs_exact(value)

            @property
            def min_obs_generalised(self):
                '''Minimum number of observations that the geometry analyser should try to find.

                If this is 0 then generalised searches will never be performed.

                Similarly, if generalisation has been turned off this setting will
                not have an effect.

                '''
                return self._frag_params.min_obs_generalised()

            @min_obs_generalised.setter
            def min_obs_generalised(self, value):
                self._frag_params.set_min_obs_generalised(value)

            @property
            def min_relevance(self):
                '''Relevance criterion for a generalised hit to be accepted.

                The geometry analyser determines how similar a fragment is to the query by
                calculating a relevance value. The min_relevance setting tells
                the geometry analyser to accept, in a generalised search, only fragments whose
                relevance is equal to or greater than this threshold.

                '''
                return self._frag_params.relevance_threshold()

            @min_relevance.setter
            def min_relevance(self, value):
                self._frag_params.set_relevance_threshold(value)

            @property
            def zscore_threshold(self):
                '''Z-score threshold used to classify bonds and angles as unusual.

                Note that the z-score is irrelevant for torsions and rings.

                '''
                return self._settings.classification_measure_threshold(
                    self._type, MogulAnalysisLib.ResultsClassifierSettings.Z_SCORE
                )

            @zscore_threshold.setter
            def zscore_threshold(self, value):
                self._settings.set_classification_measure_threshold(
                    self._type, MogulAnalysisLib.ResultsClassifierSettings.Z_SCORE, value
                )

            @property
            def local_density_threshold(self):
                '''Local density threshold used to classify torsions and rings as unusual.

                Note that the local density is irrelevant for bonds and angles.

                '''
                return self._settings.classification_measure_threshold(
                    self._type, MogulAnalysisLib.ResultsClassifierSettings.LOCAL_DENSITY
                )

            @local_density_threshold.setter
            def local_density_threshold(self, value):
                self._settings.set_classification_measure_threshold(
                    self._type, MogulAnalysisLib.ResultsClassifierSettings.LOCAL_DENSITY, value
                )

            @property
            def local_density_tolerance(self):
                '''The local density tolerance.'''
                return self._settings.local_density_interval(self._type)

            @local_density_tolerance.setter
            def local_density_tolerance(self, value):
                self._settings.set_local_density_interval(self._type, value)

            @property
            def few_hits_threshold(self):
                '''Threshold below which a distribution is considered to have too few hits.'''
                return self._settings.minimum_observations_threshold(self._type)

            @few_hits_threshold.setter
            def few_hits_threshold(self, value):
                self._settings.set_minimum_observations_threshold(self._type, value)

        def __init__(self):
            '''Construct the default geometry analyser settings.'''
            self._parameters = MogulAnalysisLib.MogulSearchParameters()
            self._settings = MogulAnalysisLib.ResultsClassifierSettings(
                MogulAnalysisLib.ResultsClassifierSettings.INSTRUCTION_FILE
            )
            self.generalisation = True
            self.bond    = GeometryAnalyser.Settings.GeometrySettings('bond', MogulAnalysisLib.BondLengths, self._settings)
            self.angle   = GeometryAnalyser.Settings.GeometrySettings('angle', MogulAnalysisLib.ValenceAngles, self._settings)
            self.torsion = GeometryAnalyser.Settings.GeometrySettings('torsion', MogulAnalysisLib.TorsionAngles, self._settings)
            self.ring    = GeometryAnalyser.Settings.GeometrySettings('ring', MogulAnalysisLib.Rings, self._settings)
            self._parameters.set_frag_parameters(
                MogulAnalysisLib.BondLengths, self.bond._frag_params
            )
            self._parameters.set_frag_parameters(
                MogulAnalysisLib.ValenceAngles, self.angle._frag_params
            )
            self._parameters.set_frag_parameters(
                MogulAnalysisLib.TorsionAngles, self.torsion._frag_params
            )
            self._parameters.set_frag_parameters(
                MogulAnalysisLib.Rings, self.ring._frag_params
            )
            self._filters = MogulAnalysisLib.FilterParameters()
            self._parameters.set_filters(self._filters)

[docs]        def summary(self):
            '''Return a summary the settings as a string.'''
            s = [
                'Generalisation: %s' % self.generalisation,
                'Impose upper limits: %s' % self.impose_upper_limits,
                'Filter rfactor: %s' % self.rfactor_filter,
                'Filter heaviest element: %s' % self.heaviest_element,
                'Filter solvent: %s' % self.solvent_filter,
                'Filter organometallic: %s' % self.organometallic_filter,
                'Filter powder: %s' % self.powder_filter,
                self.bond.summary(),
                self.angle.summary(),
                self.torsion.summary(),
                self.ring.summary(),
            ]
            return '\n'.join(s)

        @property
        def generalisation(self):
            '''Setting determining if searches should be generalised or not.'''
            return self._parameters.generalise()

        @generalisation.setter
        def generalisation(self, tf):
            self._parameters.set_generalise(tf)

        @property
        def impose_upper_limits(self):
            '''Whether there an upper limit imposed on generalised searches or not.

            This setting tells the geometry analyser whether or not to limit the number of levels
            traversed for generalised searches. Occasionally the geometry analyser can take a very
            long time to identify similar fragments when performing a generalised
            search. Limiting the number of levels traversed will reduce the
            chances of this happening but may also result n fewer hits being
            found.

            '''
            return self._parameters.impose_upper_level_limits()

        @impose_upper_limits.setter
        def impose_upper_limits(self, tf):
            self._parameters.set_impose_upper_level_limits(tf)

        @property
        def rfactor_filter(self):
            '''Filter on R-factor.

            Note that there are only four possible settings for this option:

            - 0.05
            - 0.075
            - 0.1
            - any

            However you can set the filter using any value and the appropriate
            filter will be selected. Note that if the value supplied is greater
            than 0.1 this means that the R-factor filter will be set to ``None``.
            If you set the filter to ``None`` or 'any' the filter will also be set to ``None``.
            '''
            return self._rfactor_filters[self._filters.rfactor()]

        @rfactor_filter.setter
        def rfactor_filter(self, val):
            if val is None:
                val = 'any'
            if isinstance(val, str):
                s = val.replace(' ', '')
                f = 1.0
            else:
                f = float(val)
                s = ''
            if s.lower() == 'any':
                self._filters.set_rfactor(MogulAnalysisLib.RFactorDetector.RFACTOR_ANY)
            elif 0.0 <= f <= 0.05 or s == '<5%':
                self._filters.set_rfactor(MogulAnalysisLib.RFactorDetector.RFACTOR_5)
            elif 0.05 < f <= 0.075 or s == '<7.5%':
                self._filters.set_rfactor(MogulAnalysisLib.RFactorDetector.RFACTOR_7_5)
            elif 0.075 < f <= 0.1 or s == '<10%':
                self._filters.set_rfactor(MogulAnalysisLib.RFactorDetector.RFACTOR_10)
            else:
                self._filters.set_rfactor(MogulAnalysisLib.RFactorDetector.RFACTOR_ANY)

        @property
        def heaviest_element(self):
            '''Filter on heaviest element.

            This setting tells the geometry analyser to ignore hits from CSD structures that have
            elements heavier than that for a specified atomic symbol.

            The atomic symbol is case sensitive.
            '''
            i = self._filters.heaviest_element()
            return ChemistryLib.Element.ccdc_code_to_atomic_symbol(
                ChemistryLib.Element.atomic_number_to_ccdc_code(i)
            )

        @heaviest_element.setter
        def heaviest_element(self, atomic_symbol):
            i = ChemistryLib.Element.ccdc_code_to_atomic_number(
                ChemistryLib.Element.atomic_symbol_to_ccdc_code(atomic_symbol)
            )
            self._filters.set_heaviest_element(i)

        @property
        def solvent_filter(self):
            '''Configure how solvents and non-solvents should be filtered.

            This setting instructs the geometry analyser to ignore fragments depending on whether
            they are from solvent or non-solvent molecules.

            There are three possible options for this setting:

            - 'include_solvent'
            - 'exclude_solvent'
            - 'only_solvent'

            '''
            return self._solvent_filters[self._filters.solvent()]

        @solvent_filter.setter
        def solvent_filter(self, val):
            if val.lower().startswith('inc'):
                self._filters.set_solvent(MogulAnalysisLib.FilterParameters.INCLUDE_SOLVENTS)
            elif val.lower().startswith('exc'):
                self._filters.set_solvent(MogulAnalysisLib.FilterParameters.EXCLUDE_SOLVENTS)
            elif val.lower().startswith('only'):
                self._filters.set_solvent(MogulAnalysisLib.FilterParameters.SOLVENTS_ONLY)
            else:
                raise TypeError('solvent_filter should be one of %s' % ', '.join(self._solvent_filters.values()))

        @property
        def organometallic_filter(self):
            '''Configure how organometallic and organic hits should be filtered.

            This setting instructs the geometry analyser to ignore fragments depending on whether
            they are from organic or organometallic structures.

            There are three possible options for this setting:

            - 'all'
            - 'metalorganics_only'
            - 'organics_only'

            '''
            return self._organometallic_filters[self._filters.organometallic()]

        @organometallic_filter.setter
        def organometallic_filter(self, val):
            if val.lower().startswith('all'):
                self._filters.set_organometallic(MogulAnalysisLib.FilterParameters.ORGANIC_AND_ORGANOMETALLIC)
            elif 'metal' in val.lower():
                self._filters.set_organometallic(MogulAnalysisLib.FilterParameters.EXCLUDE_ORGANIC)
            elif 'organ' in val.lower():
                self._filters.set_organometallic(MogulAnalysisLib.FilterParameters.EXCLUDE_ORGANOMETALLIC)
            else:
                raise TypeError('organometallic filter should be one of %s' % ', '.join(self._organometallic_filters.values()))

        @property
        def powder_filter(self):
            '''Configure whether or not powder structures be filtered.

            This setting instructs the geometry analyser to ignore if set to True or retain
            if set to False, fragments from powder study analyses.
            '''
            return not self._filters.allow_powder()

        @powder_filter.setter
        def powder_filter(self, val):
            self._filters.set_allow_powder(not val)

########################################################################

[docs]    class AnalysisHit(object):
        '''A single geometry analysis hit fragment.

        In other words one of the observations that make up the geometry analysis
        distribution.

        '''
        def __init__(self, refcode, source, value, _analysis, _distrib, _index):
            '''A proxy for a hit.'''
            self.refcode = refcode
            self._source = source
            self.value = value
            self._analysis = _analysis
            self._distrib = _distrib
            self._index = _index
            self._atoms = None
            self._labels = None
            self._entry = None
            self._crystal = None
            self._molecule = None

        @property
        def identifier(self):
            '''The identifier of the hit.'''
            return self.refcode

        @property
        def source_name(self):
            '''The name of the source of the hit.'''
            return self._source.name()

        @property
        def entry(self):
            '''The hit entry.'''
            if self._entry is None:
                db = self._source.source_database()
                if db is not None:
                    self._entry = Entry(_entry=db.entry(UtilitiesLib.DatabaseEntryIdentifier(self.refcode)))
            return self._entry

        @property
        def crystal(self):
            '''The hit crystal.'''
            if self._crystal is None:
                e = self.entry
                if e is not None:
                    self._crystal = e.crystal
            return self._crystal

        @property
        def molecule(self):
            '''The hit molecule.'''
            if self._molecule is None:
                e = self.entry
                if e is not None:
                    self._molecule = e.molecule
            return self._molecule

        @property
        def similarity_score(self):
            '''The similarity of the matched fragment to the analysed fragment.

            This will be 1.0 for an exact match, and a lower value for a generalised match.
            '''
            return self._distrib.score()

        def _make_fragment(self):
            '''Lazy construction of the fragment.'''
            frag = self._analysis.fragment(self._distrib, self._index)
            if frag is not None:
                self._labels = frag.labels().split()
            else:
                self._labels = ''

        @property
        def atoms(self):
            '''The atoms of a hit.'''
            if self._atoms is None:
                def _find_atom(a):
                    try:
                        return self.molecule.atom(a.label())
                    except RuntimeError:
                        for at in self.molecule.atoms:
                            if at.label == a.label():
                                orth = a.site().orth()
                                coords = at.coordinates
                                if (
                                        round(orth.x(), 4) == round(coords[0], 4) and
                                        round(orth.y(), 4) == round(coords[1], 4) and
                                        round(orth.z(), 4) == round(coords[2], 4)
                                   ):
                                    return at
                        raise RuntimeError('Cannot find atom in molecule')
                frag = self._analysis.fragment(self._distrib, self._index)
                ats = [frag.atom(i) for i in range(frag.natoms())]
                self._atoms = [_find_atom(at) for at in ats]
            return self._atoms

        @property
        def atom_indices(self):
            '''The indices of the matched atoms in the hit molecule.'''
            return [a.index for a in self.atoms]

        @property
        def atom_labels(self):
            '''The labels of the matched atoms in the hit molecule.'''
            return [a.label for a in self.atoms]

        @property
        def bond_length(self):
            '''The bond length of the hit fragment.

            :raises: TypeError if the hit is not for a bond length
            '''
            if len(self.atoms) == 2:
                return self.value
            raise TypeError('This is not a bond length hit')

        @property
        def valence_angle(self):
            '''The valence angle of the hit fragment.

            :raises: TypeError if the hit is not for a valence angle

            '''
            if len(self.atoms) == 3:
                return self.value
            raise TypeError('This is not a valence angle hit')

        @property
        def torsion_angle(self):
            '''The absolute value of the torsion angle of the hit fragment.
            The sign of a torsion angle calculated from a CSD entry is often arbitrary. For
            example, if the CSD entry is centrosymmetric, for every torsion angle with a positive
            sign there is, elsewhere in the unit cell, a symmetry-equivalent torsion with a
            negative sign. Consequently, only the absolute values of torsion angles are used.

            :raises: TypeError if the hit is not for a torsion angle

            '''
            if len(self.atoms) == 4:
                return self.value
            raise TypeError('This is not a torsion hit')

########################################################################

[docs]    @nested_class('GeometryAnalyser')
    class Analysis(object):
        '''A single geometric analysis for a specific bond, angle, torsion or ring feature.
        '''
        type_to_name = {
            MogulAnalysisLib.BondLengths    : 'bond',
            MogulAnalysisLib.ValenceAngles  : 'angle',
            MogulAnalysisLib.TorsionAngles  : 'torsion',
            MogulAnalysisLib.Rings          : 'ring'
        }

        def __init__(self, analysis, mol, classification, settings, siteless):
            '''A proxy for a toolkit Mogul result.'''
            self._search_output = analysis
            self._statistics = analysis.statistics()
            self.classification = classification
            self._settings = settings
            inxs = [s.index for s in siteless]
            self.atom_indices = [self._search_output.fragment().atom(i).index() for i in range(self._search_output.fragment().natoms())]
            for i in self.atom_indices:
                if i in inxs:
                    self._invalidate()
                    break
            else:
                self._valid = True

        def _invalidate(self):
            '''Private: invalidate the hit.'''
            self._valid = False
            try:
                self.classification = self.classification[self.classification.index('('):]
            except ValueError:
                pass

        @property
        def generalised(self):
            '''Whether or not the analysis for this fragment resulted from a generalised search.'''
            return self._search_output.message() not in [
                'Found required number of exact hits',
                'Exact search completed'
            ]

        @property
        def unusual(self):
            '''Check if the geometric feature is unusual or not.

            If the enough_hits and few_hits parameters are set to True (default
            behaviour) this function will return True if the geometric feature is
            classified as unusual.

            If the few_hits parameter is set to False this function will only
            return True if the geometric feature is unusual and there are enough
            hits to support this claim.

            If the enough_hits parameter is set to False this function will only
            return True if the geometric feature is unusual and there is not
            enough hits to support this claim.

            If both the enough_hits and few_hits parameter are set to False then
            this function will always return False.

            '''
            if self._valid:
                return self.classification.startswith('Unusual')

        @property
        def few_hits(self):
            '''Whether there be enough hits for a sound judgement.'''
            return 'few hits' in self.classification

        @property
        def enough_hits(self):
            '''Whether there be enough hits for a sound judgement.'''
            return 'enough hits' in self.classification

        @property
        def no_hits(self):
            '''Whether the fragment has no data within the CSD.'''
            return self.classification == 'No hits'

        # Fragment
        @property
        def fragment_label(self):
            '''Underscore separated string of atom labels.'''
            return self._search_output.fragment().labels().strip().replace(' ', '_')

        @property
        def atom_labels(self):
            '''The labels of atoms in the reference fragment.'''
            return self._search_output.fragment().labels().strip().split()

        @property
        def value(self):
            '''Geometric value represented by the reference fragment.'''
            if self._valid and self._search_output.fragment().valid_value():
                return self._search_output.fragment().geometric_value()

        @property
        def type(self):
            '''The type of geometric feature represented by this result.

            In other words was this
            :class:`ccdc.conformer.GeometryAnalyser.Analysis` derived from a
            bond, angle, torsion or ring analysis.

            '''
            return self.type_to_name[self._search_output.fragment().scan_type()]

        # Statistics

        @property
        def mean(self):
            '''The mean of the distribution.'''
            return self._statistics.mean()

        @property
        def nhits(self):
            '''The number of hits in the distribution.'''
            return self._statistics.number()

        @property
        def minimum(self):
            '''The minimum of the distribution.'''
            return self._statistics.minimum()

        @property
        def maximum(self):
            '''The maximum of the distribution.'''
            return self._statistics.maximum()

        @property
        def median(self):
            '''The median of the distribution.'''
            return self._statistics.median()

        @property
        def standard_deviation(self):
            '''The standard deviation of the distribution.'''
            return self._statistics.standard_deviation()

        @property
        def upper_quartile(self):
            '''The upper quartile of the distribution.'''
            return self._statistics.upper_quartile()

        @property
        def lower_quartile(self):
            '''The lower quartile of the distribution.'''
            return self._statistics.lower_quartile()

[docs]        def percentile(self, p):
            '''Return the percentile of the observed value.

            :raises: TypeError if the value (p) is not between 0 and 1.

            '''
            if p < 0 or p > 1:
                raise TypeError('The percentile value %f must be 0 <= p <= 1' % p)
            return self._statistics.percentile(p)

        @property
        def z_score(self):
            '''Return the Z-score of the observed value.
            '''
            if self.value is not None and self.type not in ['torsion', 'ring']:
                return self._statistics.z_score(self.value)

        @property
        def d_min(self):
            '''Return the distance to the nearest observed value.

            If rawscore is not specified, the geometric value of the query
            fragment will be used.

            '''
            if self.value is not None:
                return self._statistics.d_min(self.value)

        @property
        def local_density(self):
            '''Local density of the distribution around the query value.'''
            if self.type == 'ring':
                value = 0.0
            elif self.value is None:
                return None
            else:
                value = self.value
            return self._statistics.interval_probability(
                value,
                self._settings.local_density_interval(
                    self._search_output.fragment().scan_type()
                )
            ) * 100.0

        # Omitted shannon_interval_probability and interval_probability for now

[docs]        def histogram(self, bin_size=None, minimum=None, maximum=None):
            '''Return the histogram of the distribution as a tuple of integers.

            This function puts the distribution values into bins according to the
            criteria specified.

            :param bin_size: defaults to (maximum - minimum)/40 if set to None
            :param minimum: The minimum value of the distribution range.  If None, defaults to 0 for torsions, or the
                minimum value in the distribution (or the query fragment value if smaller) for other fragment types
            :param maximum: The maximum value of the histogram range.  If None, defaults to 180 for torsions, or the
                maximum value in the distribution (or the query fragment value if larger) for other fragment types
            :returns: tuple of integers

            '''
            if self.value is None:
                value = 0.0
            else:
                value = self.value

            # Default range for torsions is fixed at 0-180, rather than based on the distribution as for other types
            is_torsion = (self._search_output.fragment().scan_type() == MogulAnalysisLib.TorsionAngles)
            if minimum is None:
                minimum = 0.0 if is_torsion else min(value, self.minimum)
            if maximum is None:
                maximum = 180.0 if is_torsion else max(value, self.maximum)
            if bin_size is None:
                bin_size = (maximum - minimum)/40.
            binning = MogulAnalysisLib.MogulDistributionBinning(
                self.distribution, bin_size, minimum, maximum
            )
            return binning.bin_occupancies()

        @property
        def distribution(self):
            '''List of numeric values found by the search.'''
            return self._search_output.statistics().values()

        @property
        def hits(self):
            '''List of :class:`ccdc.conformer.GeometryAnalyser.AnalysisHit` instances found by the search.

            Note that the features below can be extracted from an :class:`ccdc.conformer.GeometryAnalyser.AnalysisHit`:

            - :attr:`ccdc.conformer.GeometryAnalyser.AnalysisHit.molecule`
            - :attr:`ccdc.conformer.GeometryAnalyser.AnalysisHit.atom_indices`
            - :attr:`ccdc.conformer.GeometryAnalyser.AnalysisHit.atom_labels`
            - ``value`` of the geometric feature in the hit

            For more information see the
            :class:`ccdc.conformer.GeometryAnalyser.AnalysisHit`
            documentation.

            '''
            result = []
            for d in self._search_output.all_results():
                for i in range(d.nhits()):
                    h = d.mogul_hit_element(i)
                    result.append(
                        GeometryAnalyser.AnalysisHit(
                            h.refcode_name().strip(),
                            h.source_library(),
                            h.value(),
                            self._search_output,
                            d,
                            i
                        )
                    )
            return result

        @property
        def hit_identifiers(self):
            '''List of molecule identifiers of the hits in the distribution.'''
            result = []
            for d in self._search_output.all_results():
                result.extend(x.strip() for x in d.refcode_names())
            return result

        @property
        def hit_molecules(self):
            '''The list of molecules hit by this result.'''
            return [
                h.molecule for h in self.hits
            ]

########################################################################

    def __init__(self, settings=None, databases=None, ignore_updates=False):
        '''The geometry analysis engine

        The optional list of databases defaults to the CSD. If provided it can
        include the CSD, and the paths of "mogul.path" files in Mogul data.

        Example:

        >>> analyser = GeometryAnalyser(databases=['CSD', '/data/1/mogul.path']) # doctest: +SKIP

        :param settings: Optional settings override
        :param databases: An optional list of databases
        :param ignore_updates: Set to True to only set up main CSD database
        '''
        if settings is None:
            self.settings = GeometryAnalyser.Settings()
        else:
            self.settings = settings
        mogul_setup = MogulAnalysisLib.MogulMainSetup()
        mogul_setup.retrieve(mogul_setup.IGNORE_USER_LIBRARIES_FROM_QSETTINGS)

        if databases is None:
            databases = ['CSD']

        self._database_files = []
        for database in databases:
            if database.upper() == 'CSD':
                self._setup_databases_csd(mogul_setup, ignore_updates)
            else:
                self._setup_database_user(mogul_setup, database)

        self._check_databases()

        '''Initialise the mogul instance with an optional settings instance.'''
        if len(self._database_files) == 1:
            self._data_library = MogulAnalysisLib.MogulCompositeDataLibrary(
                self._database_files[0]
            )
        else:
            vec = MogulAnalysisLib.vector_HMogulSingleSourceDataLibrary(len(self._database_files))
            for i, s in enumerate(self._database_files):
                vec[i] = s
            self._data_library = MogulAnalysisLib.MogulCompositeDataLibrary(
                vec
            )
        self._searcher = MogulAnalysisLib.MogulUniversalSearch(
            self._data_library,
            self.settings._parameters
        )
        self._classifier = MogulAnalysisLib.ResultsClassifier()
        self._altered = []

    def _setup_databases_csd(self, mogul_setup, ignore_updates):
        '''Set up the CSD databases

        :param mogul_setup: A Mogul setup object
        :param ignore_updates: Set to True to only set up main CSD database
        :return: None
        '''
        rdr = EntryReader('csd')
        if isinstance(rdr.file_name, list):
            dbs = rdr.file_name
        else:
            dbs = [rdr.file_name]
        for fname in dbs:
            db = EntryReader(fname)
            base = os.path.splitext(os.path.basename(fname))[0]
            possible = os.path.join(mogul_setup.mogul_data(), base)
            if possible.endswith('_ASER'):
                possible = possible[:-5]
            if os.path.exists(possible):
                if not ignore_updates:
                    df = MogulAnalysisLib.MogulDataFiles(
                        mogul_setup.fragment_types(),
                        possible,
                        db._db,
                        base
                    )
                    self._database_files.append(df)
            else:
                df = MogulAnalysisLib.MogulDataFiles(
                    mogul_setup.fragment_types(),
                    mogul_setup.mogul_data(),
                    db._db,
                    'CSD'
                )
                self._database_files.append(df)

    def _setup_database_user(self, mogul_setup, mogul_path):
        '''Set up a user-provided database

        :param mogul_setup: A Mogul setup object
        :param mogul_path: A mogul.path file
        :return: None
        '''
        mogul_dir = os.path.dirname(mogul_path)
        mpf = MogulAnalysisLib.MogulPathFile(mogul_path)
        database = mpf.aser_path()
        name = mpf.name()

        if database and os.path.exists(database):
            db_file = EntryReader(database)._db
            if not name:
                name = os.path.splitext(os.path.basename(database))[0]
        else:
            db_file = None
            name = 'No sqlite format database'
            logger = Logger()
            logger.warning(
                    f'Unable to locate sqlite database from file: {mogul_path}'
                    )
            if self.settings.ring.analyse:
                logger.warning(
                        'Turning ring analysis off as no sqlite database found.'
                        )
                self.settings.ring.analyse = False
        df = MogulAnalysisLib.MogulDataFiles(mogul_setup.fragment_types(),
                                             mogul_dir, db_file, name)
        self._database_files.append(df)

    @property
    def database_files_path(self):
        '''The directory of the databases

        :returns: a list of str
        '''
        return [os.path.normpath(d.path()) for d in self._database_files]

    @property
    def database_files_name(self):
        '''The name of the databases, for example ['CSD', 'Sep23_ASER']

        :returns: a list of str
        '''
        return [d.name() for d in self._database_files]

    @property
    def database_files_source_db_file_name(self):
        '''The file name of the source databases

        :returns: a list of str
        '''
        source_file_names = []
        for database in self._database_files:
            src_db = database.source_database()
            if src_db:
                source_file_names.append(os.path.normpath(src_db.file_name()))
            else:
                source_file_names.append(None)
        return source_file_names

    def _check_databases(self):
        for d in self._database_files:
            if not os.path.exists(d.path()) or not os.path.isdir(d.path()):
                raise RuntimeError('Database is not a valid CSD format database %s' % d.path())

    def __del__(self):
        '''Tidy up.'''
        self._searcher = None
        self._data_library = None

[docs]    def fragment_identifier(self, fragment):
        '''The unique identifier of a particular type of fragment.

        This is a string encoding the molecular environment of a fragment.

        :param fragment: an instance of :class:`ccdc.conformer.GeometryAnalyser.Analysis`
        :returns: a string of four numbers separated by colons.
        '''
        frag_id = '_%s_fragment_identifier' % fragment.type
        scan_type = fragment._search_output.fragment().scan_type()
        if not hasattr(self, frag_id):
            setattr(self, frag_id,
                MogulAnalysisLib.FragmentIdentifier(
                    self._data_library.tree(MogulAnalysisLib.MogulFragmentTypeAndVersion(scan_type)), scan_type
                )
            )
        return getattr(self, frag_id).identifier(fragment._search_output.fragment())

    def _one_search(self, mol, ty):
        '''PRIVATE: implementation of a single search.'''
        self._searcher.make_fragments(mol._molecule, ty)
        search_output = self._searcher.search_all_fragments()
        result = list()
        for i, r in enumerate(search_output):
            which = self._classifier.classification(r, self.settings._settings)
            if which.startswith('Excluded'):
                continue
            result.append(GeometryAnalyser.Analysis(r, mol, which, self.settings._settings, self._altered))
        return result

[docs]    def analyse_molecule(self, mol, _max_atoms_to_analyse=999):
        '''Perform a geometry analysis of the whole molecule.

        :params mol: :class:`ccdc.molecule.Molecule` to be analysed
        :returns: :class:`ccdc.molecule.Molecule` augmented with analysis data
        '''
        num_heavy_atoms = len(mol.heavy_atoms)
        if _max_atoms_to_analyse and num_heavy_atoms > _max_atoms_to_analyse:
            raise RuntimeError('Too many atoms for geometry analysis ({} > {})'.format(
                num_heavy_atoms, _max_atoms_to_analyse))

        result = mol.copy()
        if len(set(a.label for a in result.heavy_atoms)) < num_heavy_atoms:
            result.normalise_labels()

        self._altered = []
        if not result.all_atoms_have_sites:
            for a in result.atoms:
                if a.coordinates is None:
                    a.coordinates = [0, 0, 0]
                    self._altered.append(a)

        self.settings._parameters.set_filters(self.settings._filters)
        self._searcher = MogulAnalysisLib.MogulUniversalSearch(
            self._data_library,
            self.settings._parameters
        )
        if self.settings.bond.analyse:
            result.analysed_bonds = self._one_search(result, MogulAnalysisLib.BondLengths)
        if self.settings.angle.analyse:
            result.analysed_angles = self._one_search(result, MogulAnalysisLib.ValenceAngles)
        if self.settings.torsion.analyse:
            result.analysed_torsions = self._one_search(result, MogulAnalysisLib.TorsionAngles)
        if self.settings.ring.analyse:
            try:
                result.analysed_rings = self._one_search(result, MogulAnalysisLib.Rings)
            except RuntimeError:
                result.analysed_rings = []
        for a in self._altered:
            a.coordinates = None
        if all(abs(a.coordinates.z) < 1e-7 for a in result.heavy_atoms if a.coordinates is not None):
            if hasattr(result, 'analysed_bonds'):
                for h in result.analysed_bonds:
                    h._invalidate()
            if hasattr(result, 'analysed_angles'):
                for h in result.analysed_angles:
                    h._invalidate()
            if hasattr(result, 'analysed_torsions'):
                for h in result.analysed_torsions:
                    h._invalidate()
            if hasattr(result, 'analysed_rings'):
                result.analysed_rings = []
        return result

    def _fragment_search(self, ty, mol, atoms):
        self.settings._parameters.set_filters(self.settings._filters)
        self._altered = []
        for a in atoms:
            if a.coordinates is None:
                a.coordinates = [0, 0, 0]
                self._altered.append(a)

        nfrags = self._searcher.make_fragments(
            mol._molecule,
            ty,
            [a._atom for a in atoms]
        )
        if nfrags == 0:
            raise TypeError('The atoms %s do not form a valid mogul %s' % (
                ' '.join(a.label for a in atoms),
                GeometryAnalyser.Analysis.type_to_name[ty]
            ))
        return self._searcher.fragment_search(0)

    def _analyse_fragment(self, ty, atoms):
        '''Private: analyse a single fragment.'''
        mol = Molecule('MOGUL', atoms[0]._atom.molecule())
        search_output = self._fragment_search(ty, mol, atoms)
        classification = self._classifier.classification(search_output, self.settings._settings)
        result = GeometryAnalyser.Analysis(search_output, mol, classification, self.settings._settings, self._altered)
        for a in self._altered:
            a.coordinates = None
        return result

[docs]    def analyse_bond(self, a, b):
        '''Perform a geometry analysis on a single bond.

        :params a: :class:`ccdc.molecule.Atom`
        :params b: :class:`ccdc.molecule.Atom`
        :returns: :class:`ccdc.conformer.GeometryAnalyser.Analysis`
        :raises: TypeError if the atoms supplied do not form a covalent bond
        '''
        return self._analyse_fragment(MogulAnalysisLib.BondLengths, [a, b])

[docs]    def analyse_angle(self, a, b, c):
        '''Perform a geometry analysis on a single valence angle.

        :params a: :class:`ccdc.molecule.Atom`
        :params b: :class:`ccdc.molecule.Atom`
        :params c: :class:`ccdc.molecule.Atom`
        :returns: :class:`ccdc.conformer.GeometryAnalyser.Analysis`
        :raises: TypeError if the atoms supplied do not make up a bonded angle
        '''
        return self._analyse_fragment(MogulAnalysisLib.ValenceAngles, [a, b, c])

[docs]    def analyse_torsion(self, a, b, c, d):
        '''Perform a geometry analysis on a single torsion angle.

        :params a: :class:`ccdc.molecule.Atom`
        :params b: :class:`ccdc.molecule.Atom`
        :params c: :class:`ccdc.molecule.Atom`
        :params d: :class:`ccdc.molecule.Atom`
        :returns: :class:`ccdc.conformer.GeometryAnalyser.Analysis`
        :raises: TypeError if the atoms supplied do not make up a bonded torsion
        '''
        return self._analyse_fragment(MogulAnalysisLib.TorsionAngles, [a, b, c, d])

[docs]    def analyse_ring(self, *ats):
        '''Perform a geometry analysis on a single ring.

        :params `*ats`: :class:`ccdc.molecule.Atom` instances that make up the ring
        :returns: :class:`ccdc.conformer.GeometryAnalyser.Analysis`
        :raises: TypeError if the atoms supplied do not make up a ring
        '''
        return self._analyse_fragment(MogulAnalysisLib.Rings, ats)

########################################################################