Source code for ccdc.search

#
# This code is Copyright (C) 2015 The Cambridge Crystallographic Data Centre
# (CCDC) of 12 Union Road, Cambridge CB2 1EZ, UK and a proprietary work of CCDC.
# This code may not be used, reproduced, translated, modified, disassembled or
# copied, except in accordance with a valid licence agreement with CCDC and may
# not be disclosed or redistributed in any form, either in whole or in part, to
# any third party. All copies of this code made in accordance with a valid
# licence agreement as referred to above must contain this copyright notice.
#
# No representations, warranties, or liabilities are expressed or implied in the
# supply of this code by CCDC, its servants or agents, except where such
# exclusion or limitation is prohibited, void or unenforceable under governing
# law.
#
'''
The :mod:`ccdc.search` module provides various search classes.

The main classes of the :mod:`ccdc.search` module are:

- :class:`ccdc.search.TextNumericSearch`
- :class:`ccdc.search.SubstructureSearch`
- :class:`ccdc.search.SimilaritySearch`
- :class:`ccdc.search.ReducedCellSearch`
- :class:`ccdc.search.CombinedSearch`

These all inherit from the base class :class:`ccdc.search.Search`. The base
:class:`ccdc.search.Search` contains nested classes defining basic search hits
and settings:

- :class:`ccdc.search.Search.SearchHit`
- :class:`ccdc.search.Search.Settings`

The base class :class:`ccdc.search.Search` also contains the
:func:`ccdc.search.Search.search` function which is used to search the CSD.

All the searches except :class:`ccdc.search.TextNumericSearch` also support
searching of the following additional data sources:

- a Python list of identifiers
- a molecule file path
- a :mod:`ccdc.io` reader
- an individual :class:`ccdc.molecule.Molecule`
- an individual :class:`ccdc.crystal.Crystal`
- a list of molecules, crystals or entries

The :class:`ccdc.search.TextNumericSearch` can only sensibly be applied to
a crystal structure database, which is the CSD by default or a :class:`ccdc.io.EntryReader`
opened on a database file.

The :func:`ccdc.search.Search.search` returns a list of
:class:`ccdc.search.Search.SearchHit` instances. Some of the searches make use
of more specific search hit classes, namely:

- :class:`ccdc.search.TextNumericSearch.TextNumericHit`
- :class:`ccdc.search.SubstructureSearch.SubstructureHit`
- :class:`ccdc.search.SimilaritySearch.SimilarityHit`

Most of the searches return simple Python lists of search hits. However,
a search carried out using a :class:`ccdc.search.SubstructureSearch` returns a
:class:`ccdc.search.SubstructureSearch.SubstructureHitList`, which contains a
:func:`ccdc.search.SubstructureSearch.SubstructureHitList.superimpose` function for superimposing
all the hits on the first instance in the list.

To illustrate some of the searches let us first get an aspirin molecule.

>>> from ccdc.io import EntryReader
>>> csd_reader = EntryReader('CSD')
>>> mol = csd_reader.molecule('ACSALA')

Text numeric searching.

>>> from ccdc.search import TextNumericSearch
>>> text_numeric_search = TextNumericSearch()
>>> text_numeric_search.add_compound_name('aspirin')
>>> hits = text_numeric_search.search()
>>> len(hits)
102

Substructure searching.

>>> from ccdc.search import MoleculeSubstructure, SubstructureSearch
>>> substructure = MoleculeSubstructure(mol)
>>> substructure_search = SubstructureSearch()
>>> _ = substructure_search.add_substructure(substructure)
>>> hits = substructure_search.search()
>>> len(hits)
66

Similarity searching.

>>> from ccdc.search import SimilaritySearch
>>> similarity_search = SimilaritySearch(mol)
>>> hits = similarity_search.search()
>>> len(hits)
113

Reduced cell searching.

>>> from ccdc.search import ReducedCellSearch
>>> crystal = csd_reader.crystal('ACSALA')
>>> query = ReducedCellSearch.CrystalQuery(crystal)
>>> reduced_cell_searcher = ReducedCellSearch(query)
>>> hits = reduced_cell_searcher.search()
>>> len(hits)
17

Combined searches.

>>> from ccdc.search import CombinedSearch
>>> combined_search = CombinedSearch(similarity_search & -text_numeric_search)
>>> hits = combined_search.search()
>>> len(hits)
33

'''
###########################################################################

import sys
import os
import math
import re
import collections
import operator
import warnings
warnings.filterwarnings('always', '.*deprecated.*', DeprecationWarning, '.*', 0)

from ccdc import molecule, io
from ccdc.entry import Entry
from ccdc.crystal import Crystal
from ccdc.io import (
    _CSDDatabaseLocator, _DatabaseReader, CrystalReader,
    EntryReader
)
from ccdc.descriptors import MolecularDescriptors, GeometricDescriptors
from ccdc import utilities

from ccdc import maxint32

from ccdc.utilities import _private_importer
with _private_importer() as pi:
    pi.import_ccdc_module('UtilitiesLib')
    pi.import_ccdc_module('MathsLib')
    pi.import_ccdc_module('ChemistryLib')
    pi.import_ccdc_module('SubstructureSearchLib')
    pi.import_ccdc_module('DatabaseEntryLib')
    pi.import_ccdc_module('CSDSQLDatabaseLib')
    pi.import_ccdc_module('MotifSearchLib')
    pi.import_ccdc_module('ProteinLib')
    pi.import_ccdc_module('FileFormatsLib')
    pi.import_ccdc_module('AnnotationsLib')
    pi.import_ccdc_module('SolubilityPlatformLib')

###########################################################################
#   Queries
###########################################################################


def _decode_condition(r):
    '''PRIVATE: work out the condition from the argument.'''
    if isinstance(r, (int, float)):
        crit = SubstructureSearchLib.EqualTo(r)
    elif isinstance(r, (tuple, list)):
        a = r[0]
        if isinstance(a, (int, float)):
            if len(r) == 2 and isinstance(r[1], (int, float)):
                crit = SubstructureSearchLib.InclusiveRange(min(a, r[1]), max(a, r[1]))
            else:
                raise TypeError('Invalid type for condition %s' % r)
        elif isinstance(a, str):
            op = a.strip()
            if op == '==':
                crit = SubstructureSearchLib.EqualTo(r[1])
            elif op == '>':
                crit = SubstructureSearchLib.Greater(r[1])
            elif op == '<':
                crit = SubstructureSearchLib.Less(r[1])
            elif op == '>=':
                crit = SubstructureSearchLib.GreaterEqual(r[1])
            elif op == '<=':
                crit = SubstructureSearchLib.LessEqual(r[1])
            elif op == '!=':
                crit = SubstructureSearchLib.NotEqualTo(r[1])
            elif op == 'in':
                crit = SubstructureSearchLib.OneOf(r[1])
            else:
                raise TypeError('Invalid operator for condition %s' % r)
        else:
            raise TypeError('Invalid value for condition %s' % r)
    return crit

def _constraint_property(which, doc, nullary=False):
    '''Private: make a property from a class.'''
    return property(
        lambda x: x._get_constraint(which),
        lambda x, value, nullary=nullary: x._set_constraint(which, value, nullary=nullary),
        None,
        doc
    )

[docs]class QueryAtom(object):
    '''Atom used to define a substructure search.

    A QueryAtom can be used to represent a single atom type or a set of atom
    types. A QueryAtom can also have additional constraints imposed on it, for
    example that it should be aromatic.

    Let us create a query atom representing an oxygen atom.

    >>> query_atom = QueryAtom('O')
    >>> print(query_atom)
    QueryAtom(O)

    Suppose that we wanted the query atom to be either a carbon or a nitrogen
    atom.

    >>> query_atom = QueryAtom(['C', 'N'])
    >>> print(query_atom)
    QueryAtom(C, N)

    It is possible to add further constraints on a QueryAtom. For, example,
    we can insist that it should be aromatic.

    >>> query_atom.aromatic = True
    >>> print(query_atom.aromatic)
    AtomAromaticConstraint: 1
    >>> print(query_atom)
    QueryAtom(C, N)[atom aromaticity: equal to 1]

    See :ref:`query_atoms` for further details.

    '''

    def __init__(self, atomic_symbol='', _substructure_atom=None):
        '''Initialiser.

        :param atomic_symbol: an atomic symbol or a list or tuple of atomic symbols.

        The resulting QueryAtom will match any of the provided symbols.
        '''
        if _substructure_atom is not None:
            self._substructure_atom = _substructure_atom
        else:
            if atomic_symbol:
                if isinstance(atomic_symbol, (list, tuple)):
                    self._substructure_atom = SubstructureSearchLib.SubstructureAtom(
                        ChemistryLib.Element(atomic_symbol[0])
                    )
                    for sym in atomic_symbol[1:]:
                        self._substructure_atom.add_element(
                            ChemistryLib.Element(sym)
                        )
                elif isinstance(atomic_symbol, molecule.Atom):
                    self._substructure_atom = SubstructureSearchLib.SubstructureAtom(
                        atomic_symbol._atom.element()  # pylint: disable=E1103
                    )
                else:
                    self._substructure_atom = SubstructureSearchLib.SubstructureAtom(
                        ChemistryLib.Element(atomic_symbol)
                    )
            else:
                self._substructure_atom = SubstructureSearchLib.SubstructureAtom()

    def __str__(self):
        '''String representation of a QueryAtom.

            >>> q = QueryAtom(['C', 'N'])
            >>> print(q)
            QueryAtom(C, N)
        '''
        l = ['QueryAtom(']
        if not self._substructure_atom.matches_any_element():
            for i in range(self._substructure_atom.nelements()):
                if i:
                    l.append(', ')
                l.append(self._substructure_atom.element(i).atomic_symbol())
        l.append(')')
        if self._substructure_atom.nconstraints():
            l.append('[')
            for i in range(self._substructure_atom.nconstraints()):
                if i:
                    l.append(', ')
                l.append(str(self._substructure_atom.constraint(i)).strip('\n'))
            l.append(']')
        return ''.join(l).replace('\t', ' ')

    __repr__ = __str__

    def __eq__(self, other):
        '''Return True if the underlying atoms have the same memory location.'''
        return isinstance(other, QueryAtom) and self._substructure_atom == other._substructure_atom

    def __ne__(self, other):
        '''Inequality for atoms.'''
        return not self == other

    @property
    def index(self):
        '''Index of this atom in a substructure.

        >>> atom = QueryAtom(['C', 'N'])
        >>> print(atom.index)
        None
        >>> substructure = QuerySubstructure()
        >>> _ = substructure.add_atom(atom)
        >>> print(atom.index)
        0
        '''
        try:
            return self._substructure_atom.index()
        except RuntimeError:
            return None

    def _get_constraint(self, which, boolean=False):
        '''Private: get a printable representation of a constraint.'''
        ty = which()
        if self._substructure_atom.has_constraint_of_type(ty):
            k = self._substructure_atom.constraint_of_type(ty)
            if which == SubstructureSearchLib.AtomHas3DSiteConstraint:
                cond = k.get_site_option()
            elif which == SubstructureSearchLib.AtomLabelConstraint:
                cond = k.regular_expression()
            else:
                cond = k.condition()
            return '%s: %s' % (which.__name__, cond)
        return None

    def _set_constraint(self, which, value, nullary=False):
        '''Private: set a constraint.  Removes the constraint if value is None'''
        if self._substructure_atom.has_constraint_of_type(which()):
            self._substructure_atom.remove_constraints_of_type(which())
        if value is None:
            return
        if which == SubstructureSearchLib.AtomHas3DSiteConstraint:
            cond = bool(value)
            constraint = which()
            constraint.set_site_option(cond)
        elif which == SubstructureSearchLib.AtomLabelConstraint:
            constraint = which()
            constraint.set_regular_expression(value)
        elif nullary:
            constraint = which()
        elif value in (True, False):
            cond = SubstructureSearchLib.EqualTo(value)
            constraint = which(cond)
        else:
            cond = _decode_condition(value)
            constraint = which(cond)
        self._substructure_atom.add_constraint(constraint)

    acceptor = _constraint_property(
        SubstructureSearchLib.AtomAcceptorTypeConstraint,
        '''Constraint specifying whether or not the QueryAtom is an acceptor.

        >>> a = QueryAtom(['C', 'N'])
        >>> a.acceptor = True
        >>> print(a)
        QueryAtom(C, N)[AtomAcceptorTypeConstraint]
        ''',
        nullary=True
    )
    aromatic = _constraint_property(
        SubstructureSearchLib.AtomAromaticConstraint,
        '''Constraint specifying whether or not the QueryAtom is aromatic.

        >>> a = QueryAtom(['C', 'N'])
        >>> a.aromatic = True
        >>> print(a)
        QueryAtom(C, N)[atom aromaticity: equal to 1]
        '''
    )
    donor = _constraint_property(
        SubstructureSearchLib.AtomDonorTypeConstraint,
        '''Constraint specifying whether or not the QueryAtom is a donor.

        >>> a = QueryAtom(['C', 'N'])
        >>> a.donor = True
        >>> print(a)
        QueryAtom(C, N)[AtomDonorTypeConstraint]
        ''',
        nullary=True
    )
    cyclic = _constraint_property(
        SubstructureSearchLib.AtomCyclicityConstraint,
        '''Constraint specifying whether or not the QueryAtom is part of a cycle.

        >>> a = QueryAtom(['C', 'N'])
        >>> a.cyclic = True
        >>> print(a)
        QueryAtom(C, N)[atom cyclicity: equal to 1]
        '''
    )
    formal_charge = _constraint_property(
        SubstructureSearchLib.AtomFormalChargeConstraint,
        '''Constraint specifying the formal charge on the QueryAtom.

        >>> a = QueryAtom(['C', 'N'])
        >>> a.formal_charge = ('in', [-1, 1])
        >>> print(a)
        QueryAtom(C, N)[charge: one of -1, 1]
        '''
    )
    formal_valency = _constraint_property(
        SubstructureSearchLib.AtomFormalValencyConstraint,
        '''Constraint specifying the formal valency of the QueryAtom.

        >>> a = QueryAtom(['C', 'N'])
        >>> a.formal_valency = ('>', 3)
        >>> print(a)
        QueryAtom(C, N)[atom valency: greater than 3]
        '''
    )
    cyclic_bonds = _constraint_property(
        SubstructureSearchLib.AtomNCyclicBondsConstraint,
        '''Constraint specifying the number of cyclic bonds of the QueryAtom.

        >>> a = QueryAtom(['C', 'N'])
        >>> a.cyclic_bonds = ('!=', 4)
        >>> print(a)
        QueryAtom(C, N)[number of cyclic bonds:not equal to 4]
        '''
    )
    smallest_ring = _constraint_property(
        SubstructureSearchLib.AtomSmallestRingConstraint,
        '''Constraint specifying the size of the smallest ring the QueryAtom forms part of.

        >>> a = QueryAtom(['C', 'N'])
        >>> a.smallest_ring = (5, 6)
        >>> print(a)
        QueryAtom(C, N)[atom smallest ring: in range 5 to 6]
        '''
    )
    num_bonds = _constraint_property(
        SubstructureSearchLib.AtomNBondsConstraint,
        '''Constraint specifying the number of bonds the QueryAtom may have.

        >>> a = QueryAtom(['C', 'N'])
        >>> a.num_bonds = ('<=', 3)
        >>> print(a)
        QueryAtom(C, N)[number of connected atoms: less than or equal to 3]
        '''
    )
    num_hydrogens = _constraint_property(
        SubstructureSearchLib.AtomNHydrogensConstraint,
        '''Constraint specifying the number of hydrogens the QueryAtom may have.

        >>> a = QueryAtom(['C', 'N'])
        >>> a.num_hydrogens = 1
        >>> print(a)
        QueryAtom(C, N)[hydrogen count, including deuterium: equal to 1]
        '''
    )
    unfused_unbridged_ring = _constraint_property(
        SubstructureSearchLib.AtomUnfusedUnbridgedRingConstraint,
        '''Constraint specifying whether or not the QueryAtom is part of an unfused and unbridged ring.

        >>> a = QueryAtom(['C', 'N'])
        >>> a.unfused_unbridged_ring = True
        >>> print(a)
        QueryAtom(C, N)[atom unfused/unbridged ring: equal to 1]
        '''
    )
    nimplicit_hydrogens = _constraint_property(
        SubstructureSearchLib.AtomNImplicitHydrogensConstraint,
        '''Constraint specifying a count of implicit hydrogens.

        >>> a = QueryAtom(['C', 'N'])
        >>> a.nimplicit_hydrogens = 0
        >>> print(a)
        QueryAtom(C, N)[implicit hydrogen count: equal to 0]
        '''
    )
    has_3d_coordinates = _constraint_property(
        SubstructureSearchLib.AtomHas3DSiteConstraint,
        '''Constraint specifying that the atom has 3d coordinates.

        >>> a = QueryAtom(['C', 'N'])
        >>> a.has_3d_coordinates = True
        >>> print(a)
        QueryAtom(C, N)[atom must have 3D site]
        ''',
        nullary=True
    )
    label_match  = _constraint_property(
        SubstructureSearchLib.AtomLabelConstraint,
        '''Constraint specifying that the atom label must match a regular expression.

        >>> a = QueryAtom(['C'])
        >>> a.label_match = '^C12$'
        >>> print(a)
        QueryAtom(C)[atom label must match regular expression with pattern: ^C12$]
        ''',
        nullary=True
    )

    @property
    def chirality(self):
        '''Constraint specifying the chirality around an atom.

        The return value will either be None or a tuple of 4 QueryAtoms in clockwise order.

        >>> s = SMARTSSubstructure("FC(I)O[C@](S)(P)H")
        >>> s.atoms[1].chirality is None
        True
        >>> s.atoms[4].chirality
        (QueryAtom(O)[atom aromaticity: equal to 0], QueryAtom(H), QueryAtom(P)[atom aromaticity: equal to 0], QueryAtom(S)[atom aromaticity: equal to 0])
        '''
        rs = SubstructureSearchLib.get_chirality(self._substructure_atom)
        if rs.atom() == self._substructure_atom:
            atoms = rs.ordered_bound_atoms()
            query_atoms = (QueryAtom(_substructure_atom=a) for a in atoms)
            return tuple(query_atoms)
        return None

    @chirality.setter
    def chirality(self, chirality):
        '''Constraint specifying the chirality around an atom.

        The set value may be None to clear a chirality constraint, or a tuple of 4 ordered atoms and
        optionally a string 'clockwise' (the assumed default) or 'anticlockwise' specifying the chiral relationship.
        >>> s = SMARTSSubstructure("O[C@](I)(F)H")
        >>> s.atoms[1].chirality = None
        >>> s.atoms[1].chirality is None
        True
        >>> s.atoms[1].chirality = (s.atoms[0],s.atoms[2],s.atoms[3],s.atoms[4])
        >>> s.atoms[1].chirality
        (QueryAtom(O)[atom aromaticity: equal to 0], QueryAtom(I), QueryAtom(F), QueryAtom(H))
        '''
        if chirality is None:
            SubstructureSearchLib.remove_chirality(self._substructure_atom)
            return

        if len(chirality) >= 5:
            if chirality[4] == "anticlockwise":
                chirality = (chirality[0], chirality[1], chirality[3], chirality[2])
            elif chirality[4] != "clockwise":
                raise RuntimeError("Chirality description must be 'clockwise' or 'anticlockwise'")
            chirality = chirality[0:4]

        if not all(isinstance(atom, QueryAtom) for atom in chirality):
            raise RuntimeError("QueryAtom chirality must be set to 4 QueryAtoms or None")

        atoms = [a._substructure_atom for a in chirality]
        SubstructureSearchLib.set_chirality(self._substructure_atom, atoms[0], atoms[1], atoms[2], atoms[3])


[docs]    def add_connected_element_count(self, atomic_symbols, count):
        '''Set the number of connected elements constraint.

        Constraint to define the number of times the QueryAtom should be
        connected to atoms with elements defined in the atomic_symbols list.

        :param atomic_symbols: atomic symbol or list of atomic symbols.
        :param count: see :ref:`conditions` for details.

        >>> a = QueryAtom(['C', 'N'])
        >>> a.add_connected_element_count(['F', 'Cl'], 2)
        >>> print(a)
        QueryAtom(C, N)[count connected elements equal to 2 from [F,Cl]]
        '''
        x = ChemistryLib.ElementSet()
        if isinstance(atomic_symbols, list):
            for s in atomic_symbols:
                x.add_element(ChemistryLib.Element(s))
        else:
            x.add_element(ChemistryLib.Element(atomic_symbols))
        if isinstance(count, list):
            cond = SubstructureSearchLib.InclusiveRange(count[0], count[1])
        else:
            cond = SubstructureSearchLib.EqualTo(count)
        constraint = SubstructureSearchLib.AtomConnectedElementCountConstraint(x, cond)
        if self._substructure_atom.has_constraint_of_type(constraint):
            self._substructure_atom.remove_constraints_of_type(constraint)
        self._substructure_atom.add_constraint(constraint)

[docs]    def add_protein_atom_type_constraint(self, *types):
        '''Add a constraint that an atom be in one of the protein atom types.

        This is of use only when searching a protein structure.

        :param `*types`: one or more of 'AMINO_ACID', 'LIGAND', 'COFACTOR', 'WATER', 'METAL',
            'NUCLEOTIDE', 'UNKNOWN'. Any case-insensitive, unique prefix may be used.

        >>> a = QueryAtom('Zn')
        >>> a.add_protein_atom_type_constraint('Ligand', 'Metal')
        >>> print(a)
        QueryAtom(Zn)[protein substructure type : one of 1, 3]
        '''
        _type_dict = utilities.bidirectional_dict(
            AMINO_ACID=AnnotationsLib.ProteinSubstructureData.AMINOACID,
            LIGAND=AnnotationsLib.ProteinSubstructureData.LIGAND,
            COFACTOR=AnnotationsLib.ProteinSubstructureData.COFACTOR,
            WATER=AnnotationsLib.ProteinSubstructureData.WATER,
            METAL=AnnotationsLib.ProteinSubstructureData.METAL,
            NUCLEOTIDE=AnnotationsLib.ProteinSubstructureData.NUCLEOTIDE,
            UNKNOWN=AnnotationsLib.ProteinSubstructureData.UNKNOWN
        )
        indices = [_type_dict.prefix_lookup(t) for t in types]
        if len(indices) == 1:
            cond = SubstructureSearchLib.EqualTo(indices[0])
        else:
            cond = SubstructureSearchLib.OneOf(indices)
        self._substructure_atom.add_constraint(ProteinLib.ProteinSubstructureTypeAtomConstraint(cond))

[docs]class QueryBond(object):
    '''Bond used to define a substructure search.

    A QueryBond can be used to represent a single bond type or a set of bond
    types. A QueryBond can also have additional constraints imposed on it, for
    example that it should be cyclic.

    Let us create a QueryBond that will match any bond type.

    >>> query_bond = QueryBond()
    >>> print(query_bond)  # doctest: +NORMALIZE_WHITESPACE
    QueryBond(Unknown, Single, Double, Triple,
              Quadruple, Aromatic, Delocalised, Pi)

    To create a more specific QueryBond we need to specify some bond types.

    >>> from ccdc.molecule import Bond
    >>> single_bond = Bond.BondType('Single')
    >>> double_bond = Bond.BondType('Double')
    >>> query_bond = QueryBond(single_bond)
    >>> print(query_bond)
    QueryBond(Single)
    >>> query_bond = QueryBond([single_bond, double_bond])
    >>> print(query_bond)  # doctest: +NORMALIZE_WHITESPACE
    QueryBond(Single, Double)

    Finally, let us set a constraint for the bond to be cyclic.

    >>> query_bond.cyclic = True
    >>> print(query_bond)
    QueryBond(Single, Double)[bond cyclicity: equal to 1]

    >>> print(query_bond.cyclic)
    BondCyclicityConstraint: 1

    '''
    def __init__(self, bond_type=None, _substructure_bond=None):
        '''Initialise a QueryBond.

        :param bond_type: may be None, for a :class:`QueryBond` that will match any bond, a
                          :class:`ccdc.molecule.Bond.BondType` instance which will match only that
                          bond type, a string representation which will match only that bond type,
                          'any' that will match any bond, or a list of
                          :class:`ccdc.molecule.Bond.BondType` which will match any of those
                          specified.
        '''
        if _substructure_bond is not None:
            self._substructure_bond = _substructure_bond
        else:
            if bond_type is None:
                self._substructure_bond = SubstructureSearchLib.SubstructureBond()
            elif isinstance(bond_type, (list, tuple)):
                if len(bond_type):
                    if isinstance(bond_type[0], str):
                        b = molecule.Bond.BondType(bond_type[0])._bond_type
                    else:
                        b = bond_type[0]._bond_type
                    self._substructure_bond = SubstructureSearchLib.SubstructureBond(
                        b
                    )
                    for b in bond_type[1:]:
                        if isinstance(b, str):
                            bt = molecule.Bond.BondType(b)._bond_type
                        else:
                            bt = b._bond_type
                        self._substructure_bond.add_type(bt)
                else:
                    self._substructure_bond = SubstructureSearchLib.SubstructureBond()
            elif isinstance(bond_type, str):
                if bond_type.lower() == 'any':
                    self._substructure_bond = SubstructureSearchLib.SubstructureBond()
                else:
                    self._substructure_bond = SubstructureSearchLib.SubstructureBond(
                        molecule.Bond.BondType(bond_type)._bond_type
                    )
            else:
                self._substructure_bond = SubstructureSearchLib.SubstructureBond(
                    bond_type._bond_type
                )

    def __str__(self):
        '''String representation of a QueryBond.

        >>> b = QueryBond(['Single', 'Double'])
        >>> print(b)
        QueryBond(Single, Double)
        '''
        l = ['QueryBond(']
        for i in range(self._substructure_bond.ntypes()):
            if i:
                l.append(', ')
            l.append(str(molecule.Bond.BondType(self._substructure_bond.type(i))))
        l.append(')')
        if self._substructure_bond.nconstraints():
            l.append('[')
            for i in range(self._substructure_bond.nconstraints()):
                if i:
                    l.append(', ')
                l.append(str(self._substructure_bond.constraint(i)))
            l.append(']')
        return ''.join(l)

    __repr__ = __str__

    @property
    def atoms(self):
        '''A list of the two QueryAtoms of the bond, if it is in a substructure, or ``None``.

        >>> s = QuerySubstructure()
        >>> c = s.add_atom(QueryAtom('C'))
        >>> n = s.add_atom(QueryAtom('N'))
        >>> b = QueryBond(['Single', 'Double'])
        >>> _ = s.add_bond(b, c, n)
        >>> print(b)
        QueryBond(Single, Double)
        >>> print('%s, %s' % (b.atoms[0], b.atoms[1]))
        QueryAtom(C), QueryAtom(N)
        '''
        try:
            return [
                QueryAtom(_substructure_atom=self._substructure_bond.atom1()),
                QueryAtom(_substructure_atom=self._substructure_bond.atom2())
            ]
        except RuntimeError:
            return None

    def _get_constraint(self, which):
        '''Private: get a string representation of a bond constraint.'''
        ty = which()
        if self._substructure_bond.has_constraint_of_type(ty):
            k = self._substructure_bond.constraint_of_type(ty)
            cond = k.condition()
            return '%s: %s' % (which.__name__, cond)
        return None

    def _set_constraint(self, which, value, nullary=False):
        '''Private: set a bond constraint.'''
        if self._substructure_bond.has_constraint_of_type(which()):
            self._substructure_bond.remove_constraints_of_type(which())
        if value is None:
            return
        if nullary or value in (True, False):
            cond = SubstructureSearchLib.EqualTo(value)
        else:
            cond = _decode_condition(value)
        constraint = which(cond)
        self._substructure_bond.add_constraint(constraint)

    cyclic = _constraint_property(
        SubstructureSearchLib.BondCyclicityConstraint,
        '''Constraint specifying whether or not the :class:`QueryBond` is part of a cycle.

        >>> b = QueryBond('Single')
        >>> b.cyclic = True
        >>> print(b)
        QueryBond(Single)[bond cyclicity: equal to 1]
        '''
    )

    bond_length = _constraint_property(
        SubstructureSearchLib.BondLengthConstraint,
        '''Constraint specifying the length of the bond.

        >>> b = QueryBond('Single')
        >>> c1 = QueryAtom('C')
        >>> c2 = QueryAtom('C')
        >>> s = QuerySubstructure()
        >>> _ = s.add_atom(c1)
        >>> _ = s.add_atom(c2)
        >>> _ = s.add_bond(b, c1, c2)
        >>> b.bond_length = ('>', 1.6)
        >>> print(b)
        QueryBond(Single)[bond length: greater than 1.6]
        '''
    )

    bond_polymeric = _constraint_property(
        SubstructureSearchLib.BondPolymericConstraint,
        '''Constraint specifying whether or not the :class:`QueryBond` is polymeric.

        >>> b = QueryBond('Single')
        >>> b.bond_polymeric = True
        >>> print(b)
        QueryBond(Single)[bond polymeric: equal to 1]
        '''
    )

    bond_smallest_ring = _constraint_property(
        SubstructureSearchLib.BondSmallestRingConstraint,
        '''Constraint specifying the smallest ring the bond should be a part of.

        >>> b = QueryBond('Aromatic')
        >>> b.bond_smallest_ring = 5
        >>> print(b)
        QueryBond(Aromatic)[bond smallest ring: equal to 5]
        '''
    )

    bond_unfused_unbridged_ring = _constraint_property(
        SubstructureSearchLib.BondUnfusedUnbridgedRingConstraint,
        '''Constraint specifying whether or not the :class:`QueryBond` is part of an unfused and unbridged ring.

        >>> b = QueryBond('Single')
        >>> b.bond_unfused_unbridged_ring = True
        >>> print(b)
        QueryBond(Single)[bond unfused/unbridged ring: equal to 1]
        '''
    )

    @property
    def stereochemistry(self):
        r'''Constraint specifying the stereochemistry around a double bond.

        The return value will either be None or a tuple of 2 QueryAtoms and one of 'cis' or 'trans'.

        >>> s = SMARTSSubstructure(r"I/C=C\F")
        >>> s.bonds[1].stereochemistry
        (QueryAtom(I), QueryAtom(F), 'cis')
        '''
        ez = SubstructureSearchLib.get_stereochemistry(self._substructure_bond)
        if self._substructure_bond.is_same_bond(ez.bond()):
            if ez.stereochemistry() == SubstructureSearchLib.EZStereoChemistryFlag_E_STEREOCHEMISTRY:
                stereo = "trans"
            elif ez.stereochemistry() == SubstructureSearchLib.EZStereoChemistryFlag_Z_STEREOCHEMISTRY:
                stereo = "cis"
            else:
                return None
            return (QueryAtom(_substructure_atom=ez.adjacent_to_first()), QueryAtom(_substructure_atom=ez.adjacent_to_second()), stereo)
        return None

    @stereochemistry.setter
    def stereochemistry(self, stereo):
        '''Set a stereochemistry constraint on a bond.

        The set value may be None to remove stereochemistry, or a tuple of 2 atoms adjacent to the bond's atoms and a string either 'cis' or 'trans'

        >>> s = SMARTSSubstructure(R"IC=CF")
        >>> s.bonds[1].stereochemistry = (s.atoms[0], s.atoms[3], 'trans')
        >>> s.bonds[1].stereochemistry
        (QueryAtom(I), QueryAtom(F), 'trans')
        '''
        if stereo is None:
            SubstructureSearchLib.remove_stereochemistry(self._substructure_bond)
            return

        adj1, adj2, flag = stereo
        if flag == "cis":
            flag = SubstructureSearchLib.EZStereoChemistryFlag_Z_STEREOCHEMISTRY
        elif flag == "trans":
            flag = SubstructureSearchLib.EZStereoChemistryFlag_E_STEREOCHEMISTRY
        else:
            raise RuntimeError("stereochemistry flag must be either 'cis' or 'trans'")
        ez = SubstructureSearchLib.SubstructureEZStereoChemistry(flag, self._substructure_bond, adj1._substructure_atom, adj2._substructure_atom)
        SubstructureSearchLib.set_stereochemistry(ez)


###########################################################################

[docs]class QuerySubstructure(object):
    '''Class to define and run substructure searches.

    As an example let us set up a QuerySubstructure for a carbonyl (C=O).

    >>> from ccdc.molecule import Bond
    >>> double_bond = Bond.BondType('Double')
    >>> substructure_query = QuerySubstructure()
    >>> query_atom1 = substructure_query.add_atom('C')
    >>> query_atom2 = substructure_query.add_atom('O')
    >>> query_bond = substructure_query.add_bond(double_bond, query_atom1, query_atom2)

    '''
    def __init__(self, _substructure=None):
        '''Create a substructure.

        If the _substructure parameter is set it should be a
        SubstructureSearchLib.Substructure.
        '''
        if _substructure is None:
            self._substructure = SubstructureSearchLib.Substructure.instantiate()
        else:
            self._substructure = _substructure
        self._searcher = None
        self.measurements = []
        self._constraints = None
        self._geometric_constraints = None
        self._geometric_objects = None

[docs]    def clear(self):
        '''Restart the query.'''
        self._substructure = SubstructureSearchLib.Substructure.instantiate()
        self._searcher = None
        self.measurements = []
        self._constraints = None
        self._geometric_constraints = None
        self._geometric_objects = None

[docs]    def add_atom(self, atom):
        '''Add an atom to the substructure.

        :param atom: may be a QueryAtom separately constructed, an atom of a
                     molecule, or an atomic symbol.
        :returns: :class:`QueryAtom`

        >>> q = QuerySubstructure()
        >>> a = q.add_atom(QueryAtom(['N', 'O']))
        >>> print(a)
        QueryAtom(N, O)
        '''
        if isinstance(atom, QueryAtom):
            at = atom
        elif isinstance(atom, molecule.Atom):
            at = QueryAtom(atom.atomic_symbol)
        else:
            at = QueryAtom(atom)
        self._substructure.add(at._substructure_atom)
        return at

    @property
    def atoms(self):
        '''The query atoms in the substructure.

        >>> q = QuerySubstructure()
        >>> _ = q.add_atom(QueryAtom('C'))
        >>> _ = q.add_atom(QueryAtom(['O', 'N']))
        >>> atoms = q.atoms
        >>> print('%s, %s' % (atoms[0], atoms[1]))
        QueryAtom(C), QueryAtom(N, O)
        '''
        return [
            QueryAtom(_substructure_atom=self._substructure.atom(i))
            for i in range(self._substructure.natoms())
        ]

[docs]    def add_bond(self, bond, atom1=None, atom2=None):
        '''Add a bond to the substructure.

        :param bond: may be a :class:`QueryBond`, a
                     :class:`ccdc.molecule.Bond.BondType`, a
                     :class:`ccdc.molecule.Bond`, a string or an int.
        :param atom1: :class:`QueryAtom` or ``None`` for any atom
        :param atom2: :class:`QueryAtom` or ``None`` for any atom
        :returns: :class:`QueryBond`
        :raises: TypeError if an improper bond argument is supplied

        >>> s = QuerySubstructure()
        >>> c = s.add_atom(QueryAtom('C'))
        >>> o1 = s.add_atom(QueryAtom('O'))
        >>> o2 = s.add_atom(QueryAtom('O'))
        >>> h = s.add_atom(QueryAtom('H'))
        >>> _ = s.add_bond(QueryBond('Double'), c, o1)
        >>> _ = s.add_bond(QueryBond('Single'), c, o2)
        >>> _ = s.add_bond(QueryBond('Single'), o2, h)
        '''
        if isinstance(bond, molecule.Bond.BondType):
            sub_bond = SubstructureSearchLib.SubstructureBond(bond._bond_type)
            bond = QueryBond(_substructure_bond=sub_bond)
        elif isinstance(bond, molecule.Bond):
            sub_bond = SubstructureSearchLib.SubstructureBond(bond.bond_type._bond_type)
            bond = QueryBond(_substructure_bond=sub_bond)
        elif isinstance(bond, QueryBond):
            pass
        elif isinstance(bond, str):
            if bond.lower() == 'any':
                bond = QueryBond()
            else:
                ty = molecule.Bond.BondType(bond)._bond_type
                sub_bond = SubstructureSearchLib.SubstructureBond(ty)
                bond = QueryBond(_substructure_bond=sub_bond)
        elif isinstance(bond, int):
            ty = ChemistryLib.BondType(bond)
            sub_bond = SubstructureSearchLib.SubstructureBond(ty)
            bond = QueryBond(_substructure_bond=sub_bond)
        else:
            raise TypeError('Improper argument to add_bond(%s)' % bond)
        if atom1 is None:
            atom1 = QueryAtom()
        if isinstance(atom1, molecule.Atom):
            atom1 = self.add_atom(atom1)
        elif isinstance(atom1, str):
            atom1 = self.add_atom(atom1)
        if atom1.index is None:
            atom1 = self.add_atom(atom1)
        if atom2 is None:
            atom2 = QueryAtom()
        if isinstance(atom2, molecule.Atom):
            atom2 = self.add_atom(atom2)
        elif isinstance(atom2, str):
            atom2 = self.add_atom(atom2)
        if atom2.index is None:
            atom2 = self.add_atom(atom2)
        self._substructure.add(
            bond._substructure_bond, atom1.index, atom2.index
        )
        return bond

    @property
    def bonds(self):
        '''The bonds in the substructure.

        >>> s = QuerySubstructure()
        >>> b = s.add_bond('Single', QueryAtom('C'), QueryAtom('F'))
        >>> bonds = s.bonds
        >>> print(bonds[0])
        QueryBond(Single)
        '''
        return [
            QueryBond(_substructure_bond=self._substructure.bond(i))
            for i in range(self._substructure.nbonds())
        ]

[docs]    def write_xml(self, file_name):
        '''Write an XML representation of the substructure.
        Deprecated.

        :param fname: path to XML file
        '''
        warnings.warn('''This method is deprecated and will be removed in a later version.''', DeprecationWarning)
        w = SubstructureSearchLib.XMLSubstructureWriter()
        ostr = UtilitiesLib.ofstream(file_name)
        opts = SubstructureSearchLib.XMLSubstructureOptions()
        w.write(
            self._substructure,
            opts,
            SubstructureSearchLib.XMLSubstructureWriter.SUBSTRUCTURE_SEARCH,
            ostr
        )
        ostr.close()

[docs]    def match_atom(self, atom, query_atom=None):
        '''Whether or not the given atom matches the query_atom in the given context.

        :param atom: a :class:`ccdc.molecule.Atom` instance.
        :param query_atom: a :class:`ccdc.search.QueryAtom` instance or ``None``.  If ``None``, the first atom of the substructure will be used.
        :returns: bool

        >>> s = QuerySubstructure()
        >>> _ = s.add_bond('Single', QueryAtom('Cl'), QueryAtom('C'))
        >>> mol = EntryReader('csd').molecule('AABHTZ')
        >>> s.match_atom(mol.atom('Cl1'))
        True
        >>> s.match_atom(mol.atom('C1'))
        False
        >>> s.match_atom(mol.atom('C1'), s.atoms[1])
        True
        '''
        if query_atom is None:
            index = 0
        else:
            index = query_atom.index
        matcher = SubstructureSearchLib.SubstructureMoleculeGraphSearch(
            self._substructure,
            SubstructureSearchLib.SubstructureMoleculeMatchCriteria()
        )
        ct = matcher.find_matches(
            atom._atom.molecule(),
            {index: atom.index}
        )
        return bool(ct)

[docs]    def nmatch_molecule(self, molecule):
        '''Returns number of query matches within the specified molecule.

        :param molecule: a :class:`ccdc.molecule.Molecule` instance.
        :returns: integer

        >>> s = QuerySubstructure()
        >>> _ = s.add_bond('Single', QueryAtom('Cl'), QueryAtom('C'))
        >>> mol = EntryReader('csd').molecule('AABHTZ')
        >>> s.nmatch_molecule(mol)
        2
        '''
        return len([a for a in molecule.atoms if self.match_atom(a)])

[docs]    def match_molecule(self, molecule):
        '''Whether or not the query matches the specified molecule.

        :param molecule: a :class:`ccdc.molecule.Molecule` instance.
        :returns: bool

        >>> s = QuerySubstructure()
        >>> _ = s.add_bond('Double', QueryAtom('C'), QueryAtom('O'))
        >>> mol = EntryReader('csd').molecule('AABHTZ')
        >>> s.match_molecule(mol)
        True
        '''
        matcher = SubstructureSearchLib.SubstructureMoleculeGraphSearch(
            self._substructure,
            SubstructureSearchLib.SubstructureMoleculeMatchCriteria()
        )
        return bool(matcher.find_matches(molecule._molecule))

###################################################################################

[docs]class SMARTSSubstructure(QuerySubstructure):
    '''Make a substructure from a SMARTS string.

    Let us create a ketone SMARTSSubstructure as an example.

    >>> smarts_query = SMARTSSubstructure("[CD4][CD3](=[OD1])[CD4]")
    >>> print(smarts_query.smarts)
    [CD4][CD3](=[OD1])[CD4]

    There is a minor extension to Daylight SMARTS to allow the representation of
    quadruple, delocalised and pi bonds, using the characters '_', '"' and '|' respectively.

    There is a second minor extension to allow easy access to the indices of the atoms.

    >>> query = SMARTSSubstructure("[#6:0]([#7]-H)[#8:1][#6:2]")
    >>> print(query.label_to_atom_index(0))
    0
    >>> print(query.label_to_atom_index(1))
    3
    '''
    def __init__(self, smarts):
        '''Initialise a SMARTS query with a string.'''
        self._reader = SubstructureSearchLib.SMARTSSubstructureReader()
        self.smarts = smarts
        QuerySubstructure.__init__(self, _substructure=self._substructure)

    @property
    def smarts(self):
        '''The SMARTS string.'''
        return self._smarts

[docs]    def label_to_atom_index(self, label):
        '''Translate a SMARTS label into the appropriate substructure atom index'''
        x = self._reader.label_to_atom(str(label))
        if not x:
            raise KeyError(f"No atom with label {label}")
        return x.index()

    @smarts.setter
    def smarts(self, smarts):
        '''Ensure _substructure is updated.'''
        self._smarts = smarts
        self._substructure = self._reader.substructure(self._smarts)
        self.measurements = []

###################################################################################

[docs]class MoleculeSubstructure(QuerySubstructure):
    '''Make a substructure query from an entire molecule.

    Can be used to search for exact matches of a molecule when appropraite num_bonds or
    add_connected_element_count constraints are set on the QueryAtoms. Furthermore if
    hydrogen atoms have been removed from the molecule used to initialise the
    MoleculeSubstructure it can be used to find hits that match the heavy
    atoms as a substructure.

    :param mol: :class:`ccdc.molecule.Molecule`
    :param match_stereochemistry: Should the substructure constrain target stereochemistry to match the input molecule's stereochemistry?
    :raises: TypeError if the passed in molecule has multiple components since multi-component molecule substructure searches are not supported. The components should be added as separate substructures.

    >>> mol = EntryReader('csd').molecule('AABHTZ')
    >>> sub = MoleculeSubstructure(mol)
    '''
    def __init__(self, mol, match_stereochemistry=False):
        '''Initialise a MoleculeSubstructure with a molecule.
        '''
        if len(mol.components) > 1:
            raise TypeError('Multi-component molecule substructures are not supported')
        stereo = SubstructureSearchLib.Substructure.MATCH_STEREOCHEMISTRY if match_stereochemistry else SubstructureSearchLib.Substructure.NO_STEREOCHEMISTRY
        substructure = SubstructureSearchLib.Substructure.instantiate(mol._molecule, stereo)
        QuerySubstructure.__init__(self, _substructure=substructure)

###################################################################################

[docs]class ConnserSubstructure(QuerySubstructure):
    '''Read a Conquest query language file.'''
    required_content = re.compile(r'\*CONN', re.IGNORECASE)

    def __init__(self, file_name, _conn=None):
        '''Read the file.

        :param file_name: path to the Connser file
        :raises: IOError if the file cannot be read or if it is empty or if it does not contain '*CONN'
        '''
        if _conn is None:
            try:
                f = open(file_name)
            except:
                raise IOError('File cannot be read: %s' % file_name)
            else:
                txt = f.read()
                f.close()
                if not txt or self.required_content.search(txt) is None:
                    raise IOError('File is not a connser file: %s' % file_name)
            self._conn = SubstructureSearchLib.ConnserFile(file_name)
            self.name = os.path.splitext(os.path.basename(file_name))[0]
        else:
            self._conn = _conn
            self.name = 'string'
        substructure = self._conn.substructure()
        QuerySubstructure.__init__(self, _substructure=substructure)

[docs]    def interaction_library_contact_atoms(self):
        '''Provide the list of indexes of atoms into the substructure (optionally) defined in the ConnSer query
        for generating the data in the CCDC interaction library

        The list of indexes are into the list of substructure atoms with the associated substructure

        see :mod:`ccdc.interactions` for more information on the interaction library
        '''
        return self._conn.isostar_contact_atom_indexes()

[docs]    @staticmethod
    def from_string(text):
        '''Create a substructure from a textual representation of a Connser file.'''
        _conn = SubstructureSearchLib.ConnserFile()
        stream = UtilitiesLib.istringstream(str(text))
        _conn.read(stream)
        return ConnserSubstructure('string', _conn=_conn)

###########################################################################

class XMLSubstructure(QuerySubstructure):
    '''A :class:`ccdc.search.QuerySubstructure` read from an XML file. Deprecated.'''
    def __init__(self, fname):
        '''Initialise from an XML formatted file.
        Deprecated.

        :param fname: path to XML file
        '''
        warnings.warn('''This class is deprecated and will be removed in a later version.''', DeprecationWarning)
        if not os.path.exists(fname):
            raise IOError('The file %s does not exist' % fname)
        QuerySubstructure.__init__(self)
        reader = SubstructureSearchLib.XMLSubstructureReader()
        reader.load(fname)
        self._substructure = reader.substructure(0)

###########################################################################
#   Searches
###########################################################################


[docs]class Search(object):
    '''Common base class for searches'''
[docs]    class Settings(object):
        '''Base class for search settings.'''
        def __init__(self, _settings=None):
            if _settings is None:
                _settings = CSDSQLDatabaseLib.GenericCrystalStructureDatabaseSearchSettings()
            self._settings = _settings

        def __str__(self):
            l = [
                'Settings(',
                '\n'.join('\t%s = %s' % (k, getattr(self, k)) for k, v in self.__class__.__dict__.items() if type(v) == property),
                ')'
            ]
            return '\n'.join(l)

        def _has_filter_set(self):
            '''Private.'''
            return (
                self.has_3d_coordinates or self.no_disorder or self.no_powder or
                self.only_organic or self.only_organometallic or self.max_r_factor < 10000.0 or
                self.not_polymeric or self.no_metals or self.must_have_elements or self.must_not_have_elements or
                self.no_ions
            )

        @property
        def has_3d_coordinates(self):
            '''Constrain hits to have 3d coordinates.'''
            return self._settings.has_3d_coordinates()

        @has_3d_coordinates.setter
        def has_3d_coordinates(self, value):
            self._settings.set_has_3d_coordinates(value)

        @property
        def no_disorder(self):
            '''Constrain hits to have no disorder.

            The value will be False (no filtering), 'Non-hydrogen' (filter structures with heavy atom disorder)
            or 'All' (filter structures with any disordered atoms).
            '''
            d = {
                self._settings.DISORDER_NOT_FILTERED: False,
                self._settings.NO_NON_HYDROGEN_DISORDER: 'Non-hydrogen',
                self._settings.NO_DISORDER: 'All'
            }
            return d[self._settings.disorder()]

        @no_disorder.setter
        def no_disorder(self, value):
            if not value:
                self._settings.set_disorder(self._settings.DISORDER_NOT_FILTERED)
            elif isinstance(value, str) and value.lower() == 'all':
                self._settings.set_disorder(self._settings.NO_DISORDER)
            else:
                self._settings.set_disorder(self._settings.NO_NON_HYDROGEN_DISORDER)

        @property
        def no_powder(self):
            '''Constrain hits not to be powder studies.'''
            return self._settings.powder() == self._settings.DOESNT_CONTAIN_POWDER_DIFFRACTION_DATA

        @no_powder.setter
        def no_powder(self, value):
            self._settings.set_powder(
                self._settings.DOESNT_CONTAIN_POWDER_DIFFRACTION_DATA if value else
                 self._settings.POWDER_NOT_FILTERED)

        @property
        def only_organic(self):
            '''Constrain hits to be organic compounds.'''
            return self._settings.only_organic()

        @only_organic.setter
        def only_organic(self, value):
            self._settings.set_only_organic(value)

        @property
        def only_organometallic(self):
            '''Constrain hits to be only organometallic compounds.'''
            return self._settings.only_organometallic()

        @only_organometallic.setter
        def only_organometallic(self, value):
            self._settings.set_only_organometallic(value)

        @property
        def max_r_factor(self):
            '''Constrain the hits to have an R-factor less than this.

            The R-factor will be expressed as a percentage.'''
            return self._settings.max_rfactor()

        @max_r_factor.setter
        def max_r_factor(self, value):
            self._settings.set_max_rfactor(value)

        @property
        def no_errors(self):
            '''Constrain the hits to have no suppressed errors.'''
            return self._settings.no_errors()

        @no_errors.setter
        def no_errors(self, value):
            self._settings.set_no_errors(value)

        @property
        def not_polymeric(self):
            '''Constrain the hits not to be polymeric structures.'''
            return self._settings.not_polymeric()

        @not_polymeric.setter
        def not_polymeric(self, value):
            self._settings.set_not_polymeric(value)

        @property
        def no_metals(self):
            '''Constrain the hits not to have a metal atom.'''
            return self._settings.no_metals()

        @no_metals.setter
        def no_metals(self, value):
            self._settings.set_no_metals(value)

        @property
        def no_ions(self):
            """Constrain the hits not to have a residue with a formal charge.
            The hits may include zwitterions.
            """
            return self._settings.no_charged_residues()

        @no_ions.setter
        def no_ions(self, tf):
            self._settings.set_no_charged_residues(tf)

        @property
        def must_have_elements(self):
            '''Elements which must be present in a hit.

            The elements will be presented as a list of atomic symbols.

            >>> settings = Search.Settings()
            >>> settings.must_have_elements = ['C', 'N', 'O', 'S']
            >>> print(settings.must_have_elements)
            [C (6), N (7), O (8), S (16)]

            '''
            es = self._settings.must_have()
            return es.elements()

        @must_have_elements.setter
        def must_have_elements(self, value):
            els = [ChemistryLib.Element(x) for x in value]
            es = ChemistryLib.ElementSet()
            es.add_elements(els)
            self._settings.set_must_have(es)
            for x in value:
                ChemistryLib.Element(x)

        @property
        def must_not_have_elements(self):
            '''Elements which must not be present in a hit.

            The elements will be presented as a list of symbols.

            >>> settings = Search.Settings()
            >>> settings.must_not_have_elements = ['S', 'P', 'K']
            >>> print(settings.must_not_have_elements)
            [P (15), S (16), K (19)]
            '''
            return self._settings.must_not_have().elements()

        @must_not_have_elements.setter
        def must_not_have_elements(self, value):
            es = ChemistryLib.ElementSet()
            es.add_elements([ChemistryLib.Element(x) for x in value])
            self._settings.set_must_not_have(es)

        @property
        def max_hit_structures(self):
            '''The number of structures which may be returned from a search.'''
            return self._settings.maximum_hits_limit()

        @max_hit_structures.setter
        def max_hit_structures(self, value):
            '''Set the number of structures to be returned.

            If set to 0, all hits will be returned.
            '''
            self._settings.set_maximum_hits_limit(value)

[docs]        def test(self, argument):
            '''Test that the argument satisfies the requirements of the settings instance.

            :param argument: a :class:`ccdc.entry.Entry`, :class:`ccdc.crystal.Crystal`
                             or :class:`ccdc.molecule.Molecule` instance.
            :returns: bool

            >>> entry = EntryReader('csd').entry('AABHTZ')
            >>> settings = Search.Settings()
            >>> settings.test(entry)
            True
            >>> settings.only_organometallic = True
            >>> settings.test(entry)
            False
            '''
            if isinstance(argument, Entry):
                return CSDSQLDatabaseLib.test_entry_settings_constraints(
                    self._settings, argument._entry
                )
            elif isinstance(argument, Crystal):
                try:
                    argument = argument.molecule
                except (RuntimeError, TypeError):
                    return False
            return CSDSQLDatabaseLib.test_molecule_settings_constraints(
                self._settings, argument._molecule
            )

[docs]    class SearchHit(object):
        '''Base class for search hits.

        Provides access to molecules, crystals and entries.
        '''
        def __init__(self, identifier,
                     _database=None, _entry=None, _crystal=None, _molecule=None, _binary_database=None):
            '''Initialise.'''
            if _database is not None or _binary_database is not None:
                _entry = _crystal = _molecule = None
            self._identifier = identifier
            self._database = _database
            self._entry = _entry
            self._crystal = _crystal
            self._molecule = _molecule
            self._binary_database = _binary_database

        @property
        def identifier(self):
            '''The string identifier of the hit.'''
            return self._identifier

        @identifier.setter
        def identifier(self, value):
            self._identifier = value

        @property
        def entry(self):
            '''The entry corresponding to a search hit.'''
            if self._entry is not None:  # pylint: disable=E0203
                return self._entry
            if self._database is not None:  # pylint: disable=E1101
                return self._database.entry(self.identifier)
            if self._binary_database is not None:
                return Entry(self._binary_database.entry(UtilitiesLib.DatabaseEntryIdentifier(self.identifier)))
            if self._molecule is not None:
                return Entry.from_molecule(self._molecule)  # pylint: disable=E1101
            if self._crystal is not None:
                return Entry.from_molecule(self._crystal.molecule)  # pylint: disable=E1101

        @property
        def crystal(self):
            '''The crystal corresponding to a search hit.'''
            if self._crystal is not None:
                return self._crystal
            if self._database is not None:
                return self._database.crystal(self.identifier)
            if self._binary_database is not None:
                return self.entry.crystal
            if self._entry is not None:
                return self._entry.crystal
            if self._molecule is not None:
                return Entry.from_molecule(self._molecule).crystal

        @property
        def molecule(self):
            '''The molecule corresponding to a search hit.'''
            if self._molecule is not None:
                return self._molecule
            if self._database is not None:
                return self._database.molecule(self.identifier)
            if self._binary_database is not None:
                return self.entry.molecule
            if self._entry is not None:
                return self._entry.molecule
            if self._crystal is not None:
                return self._crystal.molecule

    def __init__(self, settings=None):
        '''This class is abstract.'''

[docs]    def search(self, database=None, max_hit_structures=None, max_hits_per_structure=None):
        '''Perform a search.'''
        self.settings._settings.reset_hits()
        if max_hit_structures is not None or max_hits_per_structure is not None:
            self.settings.max_hit_structures = 0
            self.settings.max_hits_per_structure = 0
        if max_hit_structures is not None:
            self.settings.max_hit_structures = max_hit_structures
        if max_hits_per_structure is not None:
            if hasattr(self.settings, 'max_hits_per_structure'):
                self.settings.max_hits_per_structure = max_hits_per_structure

        if database is None or database == 'CSD':
            return self._search_reader(EntryReader('CSD'))
        if isinstance(database, _DatabaseReader):
            return self._search_reader(database)
        if isinstance(database, str):
            return self._search_reader(CrystalReader(database))
        if isinstance(database, molecule.Molecule):
            return self._search_molecule(database)
        if isinstance(database, Crystal):
            return self._search_crystal(database)
        if isinstance(database, Entry):
            return self._search_entry(database)
        if isinstance(database, (list, tuple)):
            # iterable - could be a database pool, or a gcd list or a list of mol/cryst/ent
            if isinstance(database[0], str):
                return self._search_reader(EntryReader(database))
            return [h for x in database for h in self.search(x)]
        raise TypeError('Cannot search this database: %s' % database)


[docs]class SimilaritySearch(Search):
    '''Class to define and run similarity searches.'''
[docs]    class Settings(Search.Settings):
        coeffs = utilities.bidirectional_dict(
            dice=SubstructureSearchLib.DICE,
            tanimoto=SubstructureSearchLib.TANIMOTO,
        )
        _sort_order = utilities.bidirectional_dict(
            value_order=SubstructureSearchLib.VALUE_ORDER,
            alphabetic_order=SubstructureSearchLib.ALPHABETIC_ORDER
        )
        '''Settings for a similarity search.'''
        def __init__(self, threshold=0.7, coefficient='tanimoto', _settings=None):
            if _settings is None:
                if threshold is None:
                    threshold = 0.7
                if coefficient is None:
                    coefficient = 'tanimoto'
                _settings = CSDSQLDatabaseLib.SimilaritySearchSettings(
                    self.coeffs.prefix_lookup(coefficient), threshold
                )
            self._settings = _settings
            super(SimilaritySearch.Settings, self).__init__(_settings=self._settings)

        @property
        def threshold(self):
            '''The similarity threshold to apply.

            This is a value between 0.0 and 1.0.
            '''
            return self._settings.threshold()

        @threshold.setter
        def threshold(self, value):
            self._settings.set_threshold(value)

        @property
        def coefficient(self):
            '''This should be either 'dice' or 'tanimoto', the default.
            '''
            return self.coeffs.inverse_lookup(self._settings.coefficient())

        @coefficient.setter
        def coefficient(self, value):
            if isinstance(value, str):
                self._settings.set_coefficient(self.coeffs.prefix_lookup(value))
            else:
                self._settings.set_coefficient(value)

        @property
        def sort_order(self):
            '''The order in which hits will be sorted.

            THis should be either 'alphabetic' or 'value', the default.
            '''
            return self._sort_order.inverse_lookup(self._settings.sort_order)

        @sort_order.setter
        def sort_order(self, value):
            self._settings.set_sort_order(self._sort_order.prefix_lookup(value))

[docs]    class SimilarityHit(Search.SearchHit):
        '''A search hit recording the similarity measure.

        The SimilarityHit instance will give access to the identifier of the hit, the value of the similarity to the query molecule,
        the entry, crystal or molecule of the hit.
        '''
        def __init__(self, similarity, identifier,
                     _database=None, _entry=None, _crystal=None, _molecule=None, _binary_database=None):
            Search.SearchHit.__init__(
                self, identifier,
                _database=_database, _entry=_entry, _crystal=_crystal, _molecule=_molecule,
                _binary_database=_binary_database)
            self.similarity = similarity
            self.identifier = identifier

    def __init__(self, mol=None, threshold=0.7, coefficient='tanimoto', settings=None):
        '''Save the threshold and instantiate the databases.

        :param mol: :class:`ccdc.molecule.Molecule` or :class:`ccdc.search.QuerySubstructure`
        :param threshold: float (0.0 to 1.0)
        :param coefficient: one of 'tanimoto' or 'dice'
        '''
        if settings is None:
            settings = SimilaritySearch.Settings(threshold, coefficient)
        self.settings = settings
        # ignoring threshold and coeff if settings is provided
        self.molecule = mol

    @property
    def molecule(self):
        '''The query molecule.'''
        return self._molecule

    @molecule.setter
    def molecule(self, mol):
        self._molecule = mol
        if mol is None:
            # then it'll have to be provided later
            self._substructure = None
        elif isinstance(mol, QuerySubstructure):
            self._fp = SubstructureSearchLib.ChemicalFingerprintBuilderSubstructure()
            self._sp = self._fp.similarity_fingerprint(mol._substructure)
            self._substructure = mol._substructure
        else:
            self._fp = SubstructureSearchLib.ChemicalFingerprintBuilderMolecule()
            self._sp = self._fp.similarity_fingerprint(mol._molecule)
            self._substructure = SubstructureSearchLib.Substructure.instantiate(mol._molecule)

[docs]    @staticmethod
    def from_xml(xml):
        '''Create a SimilaritySearch from an XML representation.

        :param xml: XML string
        '''
        stream = UtilitiesLib.istringstream(xml)
        reader = SubstructureSearchLib.XMLSubstructureReader()
        reader.load(stream)
        try:
            coeff = SimilaritySearch.Settings.coeffs.inverse_lookup(
                reader.options().similarity_coefficient()
            )
        except RuntimeError:
            coeff = None
        try:
            thresh = reader.options().similarity_threshold()
        except RuntimeError:
            thresh = None
        q = QuerySubstructure(_substructure=reader.substructure(0))
        return SimilaritySearch(q, thresh, coeff)

[docs]    @staticmethod
    def from_xml_file(file_name):
        '''Create a SimilaritySearch from an XML file.

        :param file_name: path to XML file

        :raises: IOError when the file does not exist
        '''
        if not os.path.exists(file_name):
            raise IOError('The file %s does not exist' % file_name)
        with open(file_name) as f:
            return SimilaritySearch.from_xml(f.read())

[docs]    def read_xml(self, xml):
        '''Read a query from an an XML representation.

        :param xml: XML string
        '''
        stream = UtilitiesLib.istringstream(xml)
        reader = SubstructureSearchLib.XMLSubstructureReader()
        reader.load(stream)
        try:
            self.settings.coefficient = SimilaritySearch.Settings.coeffs.inverse_lookup(
                reader.options().similarity_coefficient()
            )
        except RuntimeError:
            pass
        try:
            self.settings.threshold = reader.options().similarity_threshold()
        except RuntimeError:
            pass
        sub = reader.substructure(0)
        self._substructure = sub
        self._molecule = None

[docs]    def read_xml_file(self, file_name):
        '''Read an XML file into the similarity searcher.

        :param file_name: path to XML file
        :raises: IOError if the file cannot be read
        '''
        if not os.path.exists(file_name):
            raise IOError('The file %s does not exist' % file_name)
        with open(file_name) as f:
            self.read_xml(f.read())

    @property
    def threshold(self):
        '''The similarity threshold to use.'''
        return self.settings.threshold

    @threshold.setter
    def threshold(self, value):
        '''Sets the value of threshold.'''
        self.settings.threshold = value

    @property
    def coefficient(self):
        '''Which coefficient to use when determining similarity.'''
        return self.settings.coefficient

    @coefficient.setter
    def coefficient(self, value):
        self.settings.coefficient = value

[docs]    def search_molecule(self, mol):
        '''Search a molecule.

        This can be used to determine a similarity coefficient against the
        given molecule.

        :param mol: :class:`ccdc.molecule.Molecule`
        :returns: :class:`SimilaritySearch.SimilarityHit`

        >>> csd = EntryReader('csd')
        >>> ibuprofen = csd.molecule('HXACAN')
        >>> searcher = SimilaritySearch(ibuprofen)
        >>> hit = searcher.search_molecule(csd.molecule('IBPRAC'))
        >>> print(round(hit.similarity, 3))
        0.161
        '''
        fp = self._fp.similarity_fingerprint(mol._molecule)
        if self.settings.coefficient.lower() == 'dice':
            coeff = self._sp.dice(fp)
        else:
            coeff = self._sp.tanimoto(fp)
        return SimilaritySearch.SimilarityHit(coeff, mol.identifier, _molecule=mol)

    def _search_reader(self, reader):
        self.settings._settings.reset_hits()
        if not hasattr(reader, '_similarity_searcher'):
            reader._similarity_searcher = reader._db.searcher_factory().similarity_searcher()
        if self.settings._has_filter_set():
            max_hits = self.settings.max_hit_structures
            if max_hits != maxint32:
                self.settings.max_hit_structures = maxint32
            results = reader._similarity_searcher.search(self._substructure, self.settings._settings)
            hits = list()
            for r in results:
                if max_hits and len(hits) >= max_hits:
                    break
                h = SimilaritySearch.SimilarityHit(
                    r.similarity(), r.identifier().str(), _binary_database=reader._db)
                if self.settings.test(h.entry):
                    hits.append(h)
            self.settings.max_hit_structures = max_hits
        else:
            results = reader._similarity_searcher.search(self._substructure, self.settings._settings)
            hits = list(
                SimilaritySearch.SimilarityHit(r.similarity(), r.identifier().str(), _binary_database=reader._db)
                for r in results
            )
        return hits

    def _search_entry(self, entry):
        if self.settings.test(entry):
            try:
                mol = entry.molecule
            except TypeError:
                return []
            return self._search_molecule(mol)
        return []

    def _search_crystal(self, crystal):
        if self.settings.test(crystal):
            try:
                mol = crystal.molecule
            except TypeError:
                return []
            return self._search_molecule(mol)
        return []

    def _search_molecule(self, mol):
        if self.settings.test(mol):
            fp = self._fp.similarity_fingerprint(mol._molecule)
            if self.settings.coefficient.lower() == 'dice':
                coeff = self._sp.dice(fp)
            else:
                coeff = self._sp.tanimoto(fp)
            if coeff >= self.settings.threshold:
                return [SimilaritySearch.SimilarityHit(coeff, mol.identifier, _molecule=mol)]
        return []

###################################################################################

[docs]class TextNumericSearch(Search):
    '''Class to define and run text/numeric searches in a crystal structure database.

    It is possible to add one or more criterion for the query to match.

    >>> text_numeric_query = TextNumericSearch()
    >>> text_numeric_query.add_compound_name('aspirin')
    >>> text_numeric_query.add_citation(year=[2011, 2013])
    >>> for hit in text_numeric_query.search(max_hit_structures=3):
    ...     print(hit.identifier)
    ...
    ACSALA19
    ACSALA20
    ACSALA21

    A human-readable representation of the queries may be obtained:
    >>> print(', '.join(q for q in text_numeric_query.queries))
    Compound name aspirin anywhere , Journal year in range 2011-2013
    '''
    modes = utilities.bidirectional_dict(
        anywhere=DatabaseEntryLib.ANYWHERE,
        exact=DatabaseEntryLib.EXACT_WORD,
        separate=DatabaseEntryLib.EXACT_SPACE_SEPARATED_WORD,
        is_null=DatabaseEntryLib.IS_NULL,
        not_null=DatabaseEntryLib.NOT_NULL,
        start_of_word=DatabaseEntryLib.START_OF_WORD,
        start=DatabaseEntryLib.STARTS_WITH,
    )

    _numeric_fields = dict((
        (DatabaseEntryLib.CCDC_JOURNAL_CODEN, 'Journal identifier'),
        (DatabaseEntryLib.JOURNAL_YEAR, 'Journal year'),
        (DatabaseEntryLib.CCDC_DEPOSITION_NUMBER, 'CCDC number'),
        (DatabaseEntryLib.PREDICTED_SEMICONDUCTOR_DYNAMIC_DISORDER, 'Predicted semiconductor dynamic disorder'),
        (DatabaseEntryLib.PREDICTED_SEMICONDUCTOR_SINGLET_STATE_1_ENERGY, 'Predicted semiconductor singlet state 1 energy'),
        (DatabaseEntryLib.PREDICTED_SEMICONDUCTOR_SINGLET_STATE_2_ENERGY, 'Predicted semiconductor singlet state 2 energy'),
        (DatabaseEntryLib.PREDICTED_SEMICONDUCTOR_TRIPLET_STATE_1_ENERGY, 'Predicted semiconductor triplet state 1 energy'),
        (DatabaseEntryLib.PREDICTED_SEMICONDUCTOR_TRIPLET_STATE_2_ENERGY, 'Predicted semiconductor triplet state 2 energy'),
        (DatabaseEntryLib.PREDICTED_SEMICONDUCTOR_REORGANIZATION_ENERGY, 'Predicted semiconductor hole reorganization energy'),
        (DatabaseEntryLib.PREDICTED_SEMICONDUCTOR_TRANSFER_INTEGRAL, 'Predicted semiconductor transfer integral'),
        (DatabaseEntryLib.PREDICTED_SEMICONDUCTOR_HOMO_LUMO_GAP, 'Predicted semiconductor HOMO-LUMO gap'),
        (DatabaseEntryLib.PREDICTED_SEMICONDUCTOR_SINGLET_STATE_1_OSCILLATOR_STRENGTH, 'Predicted semiconductor singlet state 1 oscillator strength'),
        (DatabaseEntryLib.PREDICTED_SEMICONDUCTOR_SINGLET_STATE_2_OSCILLATOR_STRENGTH, 'Predicted semiconductor singlet state 2 oscillator strength'),
    ))

    _text_fields = dict((
        (DatabaseEntryLib.ALL_TEXT, 'All text'),
        (DatabaseEntryLib.ANALOGUES, 'Analogues'),
        (DatabaseEntryLib.AUTHOR_NAME, 'Author'),
        (DatabaseEntryLib.BIOACTIVITY, 'Bioactivity'),
        (DatabaseEntryLib.COLOR, 'Color'),
        (DatabaseEntryLib.COMPOUND_NAME, 'Compound name'),
        (DatabaseEntryLib.DISORDER, 'Disorder'),
        (DatabaseEntryLib.DOI, 'DOI'),
        (DatabaseEntryLib.HABIT, 'Habit'),
        (DatabaseEntryLib.JDS_DEPOSITION_NUMBER, 'JDS deposition number'),
        (DatabaseEntryLib.JOURNAL_PAGE, 'Journal page'),
        (DatabaseEntryLib.JOURNAL_VOLUME, 'Journal volume'),
        (DatabaseEntryLib.PEPTIDE_SEQUENCE, 'Peptide sequence'),
        (DatabaseEntryLib.PHASE_TRANSITIONS, 'Phase transitions'),
        (DatabaseEntryLib.POLYMORPH, 'Polymorph'),
        (DatabaseEntryLib.RECRYSTALLISATION_SOLVENT, 'Recrystallisation solvent'),
        (DatabaseEntryLib.REFCODE, 'All refcodes'),
        (DatabaseEntryLib.MAIN_REFCODE_ONLY, 'Refcode'),
        (DatabaseEntryLib.SOURCE, 'Source'),
        (DatabaseEntryLib.SPACEGROUP_NAME, 'Spacegroup'),
        (DatabaseEntryLib.SYNONYMS, 'Synonyms'),
        (DatabaseEntryLib.HEAT_CAPACITY_NOTES, 'Heat capacity notes'),
        (DatabaseEntryLib.HEAT_OF_FUSION_NOTES, 'Heat of fusion notes'),
        (DatabaseEntryLib.SOLUBILITY_NOTES, 'Solubility notes'),

    ))

[docs]    class TextNumericSearchSettings(Search.Settings):
        '''No settings apart from those provided by the base class required.'''

[docs]    class TextNumericHit(Search.SearchHit):
        '''Hit from a TextNumericSearch.'''
        def __init__(self, identifier, _db):
            '''Store identifier and database'''
            Search.SearchHit.__init__(self, identifier, _binary_database=_db)

    def __init__(self, settings=None):
        '''Initialise a text-numeric query.'''
        if settings is None:
            settings = TextNumericSearch.Settings()
        self.settings = settings
        self.clear()
        self._journal_list = None

[docs]    def clear(self):
        '''Restart a search.'''
        self._search = DatabaseEntryLib.CrystalStructureDatabaseTextNumericSearch()

    def _text_query(self, field, txt, mode='anywhere', ignore_non_alpha_num=False):
        '''Private: construct a text query.'''
        lower_mode = mode.lower()
        if txt or lower_mode == 'is_null' or lower_mode == 'not_null':
            query = DatabaseEntryLib.CrystalStructureDatabaseTextSearchQuery(
                field,
                self.modes[lower_mode],
                txt
            )
            query.set_option(DatabaseEntryLib.IGNORE_NON_ALPHABETIC_CHARS, ignore_non_alpha_num)
            self._search.add_query(query)

    def _numeric_query(self, field, value):
        '''Private: construct a numeric query.'''
        if isinstance(value, list) or isinstance(value, tuple):
            cond = SubstructureSearchLib.InclusiveRange(value[0], value[1])
        else:
            cond = SubstructureSearchLib.EqualTo(value)
        query = DatabaseEntryLib.CrystalStructureDatabaseNumericSearchQuery(
            field, cond
        )
        self._search.add_query(query)

    def _text_queries(self):
        '''Private: the tuple of formatted text queries.'''
        def format_query(q):
            return '%s %s %s %s' % (
                self._text_fields[q.field()],
                q.value(),
                self.modes.inverse_lookup(q.match_type()),
                'ignore non-alphanumeric' if q.is_option_set(0) else ''
            )
        return tuple(
            format_query(q) for q in self._search.text_queries()
        )

    def _numeric_queries(self):
        '''Private: the tuple of formatted numeric queries.'''
        def format_query(q):
            return '%s %s %s' % (self._numeric_fields[q.field()], q.condition().name(), q.condition())
        return tuple(
            format_query(q) for q in self._search.numeric_queries()
        )

    @property
    def queries(self):
        '''The current set of queries for this search.

        >>> tns = TextNumericSearch()
        >>> tns.add_all_text('ibuprofen')
        >>> tns.add_author('Haisa')
        >>> print('; '.join(str(q).strip() for q in tns.queries))
        All text ibuprofen anywhere; Author Haisa anywhere
        '''
        return self._text_queries() + self._numeric_queries()

[docs]    def add_all_text(self, txt, mode='anywhere', ignore_non_alpha_num=False):
        '''Search for text anywhere in the entry.'''
        self._text_query(DatabaseEntryLib.ALL_TEXT, txt, mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)

[docs]    def add_analogue(self, analogue, mode='anywhere', ignore_non_alpha_num=False):
        '''Search for an analogue.'''
        self._text_query(DatabaseEntryLib.ANALOGUES, analogue, mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)

[docs]    def add_author(self, author, mode='anywhere', ignore_non_alpha_num=False):
        '''Search for an author.'''
        self._text_query(DatabaseEntryLib.AUTHOR_NAME, author, mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)

[docs]    def add_bioactivity(self, activity, mode='anywhere', ignore_non_alpha_num=False):
        '''Search for a particular bio-activity.'''
        self._text_query(DatabaseEntryLib.BIOACTIVITY, activity, mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)

[docs]    def add_color(self, color, mode='anywhere', ignore_non_alpha_num=False):
        '''Search for a particular colour.'''
        self._text_query(DatabaseEntryLib.COLOR, color, mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)

[docs]    def add_compound_name(self, compound_name, mode='anywhere', ignore_non_alpha_num=False):
        '''Search for a compound name.

        The search checks the content both of
        :attr:`ccdc.entry.Entry.chemical_name` and
        :attr:`ccdc.entry.Entry.synonyms`.

        To illustrate this let us have a look at the CSD entry ``ABABEM``.

        >>> from ccdc.io import EntryReader
        >>> entry_reader = EntryReader('CSD')
        >>> ababem = entry_reader.entry('ABABEM')
        >>> print(ababem.chemical_name)
        Tetrahydro[1,3,4]thiadiazolo[3,4-a]pyridazine-1,3-dione
        >>> print(ababem.synonyms[0])
        8-Thia-1,6-diazabicyclo[4.3.0]nonane-7,9-dione

        The text ``azabicyclo[4.3.0]nonane`` is only found in the synonym. Let
        us search for it using a compound name search.

        >>> from ccdc.search import TextNumericSearch
        >>> query = TextNumericSearch()
        >>> query.add_compound_name('azabicyclo[4.3.0]nonane')
        >>> hits = query.search()

        Finally let us assert that we have found ``ABABEM``.

        >>> assert(u'ABABEM' in [h.identifier for h in hits])

        '''
        self._text_query(DatabaseEntryLib.COMPOUND_NAME, compound_name,
                         mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)

[docs]    def add_disorder(self, disorder, mode='anywhere', ignore_non_alpha_num=False):
        '''Search for a disorder comment.'''
        self._text_query(DatabaseEntryLib.DISORDER, disorder, mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)

[docs]    def add_doi(self, doi, mode='anywhere', ignore_non_alpha_num=False):
        '''Search for a DOI.'''
        self._text_query(DatabaseEntryLib.DOI, doi, mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)

[docs]    def add_habit(self, habit, mode='anywhere', ignore_non_alpha_num=False):
        '''Search for a particular habit.'''
        self._text_query(DatabaseEntryLib.HABIT, habit, mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)

[docs]    def add_peptide_sequence(self, peptide_sequence, mode='anywhere', ignore_non_alpha_num=False):
        '''Search for a peptide sequence.'''
        self._text_query(DatabaseEntryLib.PEPTIDE_SEQUENCE, peptide_sequence,
                         mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)

[docs]    def add_phase_transition(self, phase_transition, mode='anywhere', ignore_non_alpha_num=False):
        '''Search for a phase transition.'''
        self._text_query(DatabaseEntryLib.PHASE_TRANSITIONS, phase_transition,
                         mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)

[docs]    def add_polymorph(self, polymorph, mode='anywhere', ignore_non_alpha_num=False):
        '''Search for polymorph information.'''
        self._text_query(DatabaseEntryLib.POLYMORPH, polymorph, mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)

[docs]    def add_solvent(self, solvent, mode='anywhere', ignore_non_alpha_num=False):
        '''Search for a solvent.'''
        self._text_query(DatabaseEntryLib.RECRYSTALLISATION_SOLVENT, solvent,
                         mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)

[docs]    def add_identifier(self, refcode, mode='anywhere', ignore_non_alpha_num=False):
        '''Search for a refcode.'''
        self._text_query(DatabaseEntryLib.MAIN_REFCODE_ONLY, refcode,
                         mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)

[docs]    def add_all_identifiers(self, refcode, mode='anywhere', ignore_non_alpha_num=False):
        '''Search for an identifier, including previous identifiers.

        >>> from ccdc.search import TextNumericSearch
        >>> query = TextNumericSearch()
        >>> query.add_all_identifiers('DABHUJ')
        >>> hits = query.search()
        >>> print(hits[0].identifier)
        ACPRET03
        >>> print(hits[0].entry.previous_identifier)
        DABHUJ

        '''
        self._text_query(DatabaseEntryLib.REFCODE, refcode, mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)

[docs]    def add_source(self, source, mode='anywhere', ignore_non_alpha_num=False):
        '''Search for a source.

        >>> from ccdc.search import TextNumericSearch
        >>> searcher = TextNumericSearch()
        >>> searcher.add_source('toad')
        >>> hits = searcher.search(max_hit_structures=5)
        >>> for h in hits:
        ...     print('%-8s: %s' % (h.identifier, h.entry.source))
        ...
        CUXYAV  : Ch'an Su (dried venom of Chinese toad)
        EWAWUW  : isolated from the eggs of toad Bufo bufo gargarizans
        EWAXAD  : isolated from the eggs of toad Bufo bufo gargarizans
        FIFDUT  : dried venom of Chinese toad Ch'an Su
        FIFFAB  : dried venom of Chinese toad Ch'an Su


        '''
        self._text_query(DatabaseEntryLib.SOURCE, source, mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)

[docs]    def add_spacegroup_symbol(self, spacegroup_symbol, mode='anywhere', ignore_non_alpha_num=False):
        '''Search for a spacegroup symbol or any alias of that symbol.'''
        self._text_query(DatabaseEntryLib.SPACEGROUP_NAME, spacegroup_symbol, mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)

[docs]    def add_synonym(self, synonym, mode='anywhere', ignore_non_alpha_num=False):
        '''Search for a synonym.'''
        self._text_query(DatabaseEntryLib.SYNONYMS, synonym, mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)

[docs]    def add_citation(self, author='', journal='', volume=None, year=None, first_page=None,
                     ignore_non_alpha_num=False, _coden=None):
        '''Search for a citation.

        Note: the journal parameter requires the CSD to be present in order to translate the journal name to a coden identifier.
        If the CSD is not present, but an alternative database is, use the alternative database's journals dict to look up a
        coden identifier and specify the _coden parameter in this function.'''
        if author:
            self.add_author(author)
        coden = None
        if _coden is not None:
            coden = _coden
        elif journal:
            coden = self.journals.get(journal, None)
            if coden is None:
                raise NameError('The journal %s could not be found' % journal)
        if coden is not None:
            self._numeric_query(DatabaseEntryLib.CCDC_JOURNAL_CODEN, coden)
        if volume is not None:
            self._text_query(DatabaseEntryLib.JOURNAL_VOLUME, str(volume), 'exact',
                             ignore_non_alpha_num=ignore_non_alpha_num)
        if year is not None:
            self._numeric_query(DatabaseEntryLib.JOURNAL_YEAR, year)
        if first_page is not None:
            self._text_query(DatabaseEntryLib.JOURNAL_PAGE, str(first_page), 'exact',
                             ignore_non_alpha_num=ignore_non_alpha_num)

[docs]    def add_ccdc_number(self, value):
        '''Search for a particular or a range of CCDC deposition numbers.

        >>> from ccdc.search import TextNumericSearch
        >>> searcher = TextNumericSearch()
        >>> searcher.add_ccdc_number(241370)
        >>> hits = searcher.search()
        >>> len(hits)
        1
        >>> entry = hits[0].entry
        >>> print('%s %s' % (entry.identifier, entry.ccdc_number))
        ABEBUF 241370
        >>> searcher.clear()
        >>> searcher.add_ccdc_number((241368, 241372))
        >>> hits = searcher.search()
        >>> print(len(hits))
        3
        >>> for hit in hits:
        ...     print('%s %s' % (hit.identifier, hit.entry.ccdc_number))
        ...
        ABEBUF 241370
        BIBZIW 241371
        BIMGEK 241372

        '''
        self._numeric_query(DatabaseEntryLib.CCDC_DEPOSITION_NUMBER, value)

[docs]    def add_heat_capacity_notes(self, heat_capacity_notes, mode='anywhere', ignore_non_alpha_num=False):
        '''Search for heat capacity notes.'''
        SolubilityPlatformLib.SolventData(heat_capacity_notes, 0)
        self._text_query(DatabaseEntryLib.HEAT_CAPACITY_NOTES, heat_capacity_notes, mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)

[docs]    def add_heat_of_fusion_notes(self, heat_of_fusion_notes, mode='anywhere', ignore_non_alpha_num=False):
        '''Search for heat of fusion notes.'''
        SolubilityPlatformLib.SolventData(heat_of_fusion_notes, 0)
        self._text_query(DatabaseEntryLib.HEAT_OF_FUSION_NOTES, heat_of_fusion_notes, mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)

[docs]    def add_solubility_notes(self, solubility_notes, mode='anywhere', ignore_non_alpha_num=False):
        '''Search for solubility notes.'''
        SolubilityPlatformLib.SolventData(solubility_notes, 0)
        self._text_query(DatabaseEntryLib.SOLUBILITY_NOTES, solubility_notes, mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)

    def _add_fiz_depostion_number(self, value):
        '''Private.'''
        self._numeric_query(DatabaseEntryLib.FIZ_DEPOSITION_NUMBER, value)

    def _add_csd_accession_date(self, value):
        '''Private.'''
        self._numeric_query(DatabaseEntryLib.CSD_ACCESSION_DATE, value)

    def _add_csd_modification_date(self, value):
        '''Private.'''
        self._numeric_query(DatabaseEntryLib.CSD_MODIFICATION_DATE, value)

    def _add_entry_insertion_time(self, value):
        '''Private.'''
        self._numeric_query(DatabaseEntryLib.ENTRY_INSERTION_TIME, value)

[docs]    def add_predicted_semiconductor_dynamic_disorder(self, value):
        '''Search for predicted semiconductor dynamic disorder.

        See :attr:`ccdc.entry.SemiconductorPredictedProperties.dynamic_disorder`
        '''
        self._numeric_query(DatabaseEntryLib.PREDICTED_SEMICONDUCTOR_DYNAMIC_DISORDER, value)

[docs]    def add_predicted_semiconductor_singlet_state_1_energy(self, value):
        '''Search for predicted semiconductor singlet state 1 energy.

        See :attr:`ccdc.entry.SemiconductorPredictedProperties.singlet_state_1_energy`
        '''
        self._numeric_query(DatabaseEntryLib.PREDICTED_SEMICONDUCTOR_SINGLET_STATE_1_ENERGY, value)

[docs]    def add_predicted_semiconductor_singlet_state_2_energy(self, value):
        '''Search for predicted semiconductor singlet state 2 energy.

        See :attr:`ccdc.entry.SemiconductorPredictedProperties.singlet_state_2_energy`
        '''
        self._numeric_query(DatabaseEntryLib.PREDICTED_SEMICONDUCTOR_SINGLET_STATE_2_ENERGY, value)

[docs]    def add_predicted_semiconductor_triplet_state_1_energy(self, value):
        '''Search for predicted semiconductor triplet state 1 energy.

        See :attr:`ccdc.entry.SemiconductorPredictedProperties.triplet_state_1_energy`
        '''
        self._numeric_query(DatabaseEntryLib.PREDICTED_SEMICONDUCTOR_TRIPLET_STATE_1_ENERGY, value)

[docs]    def add_predicted_semiconductor_triplet_state_2_energy(self, value):
        '''Search for predicted semiconductor triplet state 2 energy.

        See :attr:`ccdc.entry.SemiconductorPredictedProperties.triplet_state_2_energy`
        '''
        self._numeric_query(DatabaseEntryLib.PREDICTED_SEMICONDUCTOR_TRIPLET_STATE_2_ENERGY, value)

[docs]    def add_predicted_semiconductor_hole_reorganization_energy(self, value):
        '''Search for predicted semiconductor hole reorganization energy.

        See :attr:`ccdc.entry.SemiconductorPredictedProperties.hole_reorganization_energy`
        '''
        self._numeric_query(DatabaseEntryLib.PREDICTED_SEMICONDUCTOR_REORGANIZATION_ENERGY, value)

[docs]    def add_predicted_semiconductor_transfer_integral(self, value):
        '''Search for predicted semiconductor transfer integral.

        See :attr:`ccdc.entry.SemiconductorPredictedProperties.transfer_integral`
        '''
        self._numeric_query(DatabaseEntryLib.PREDICTED_SEMICONDUCTOR_TRANSFER_INTEGRAL, value)

[docs]    def add_predicted_semiconductor_homo_lumo_gap(self, value):
        '''Search for predicted semiconductor HOMO-LUMO gap.

        See :attr:`ccdc.entry.SemiconductorPredictedProperties.homo_lumo_gap`
        '''
        self._numeric_query(DatabaseEntryLib.PREDICTED_SEMICONDUCTOR_HOMO_LUMO_GAP, value)

[docs]    def add_predicted_semiconductor_singlet_state_1_oscillator_strength(self, value):
        '''Search for predicted semiconductor singlet state 1 oscillator strength.

        See :attr:`ccdc.entry.SemiconductorPredictedProperties.singlet_state_1_oscillator_strength`
        '''
        self._numeric_query(DatabaseEntryLib.PREDICTED_SEMICONDUCTOR_SINGLET_STATE_1_OSCILLATOR_STRENGTH, value)

[docs]    def add_predicted_semiconductor_singlet_state_2_oscillator_strength(self, value):
        '''Search for predicted semiconductor singlet state 2 oscillator strength.

        See :attr:`ccdc.entry.SemiconductorPredictedProperties.singlet_state_2_oscillator_strength`
        '''
        self._numeric_query(DatabaseEntryLib.PREDICTED_SEMICONDUCTOR_SINGLET_STATE_2_OSCILLATOR_STRENGTH, value)

[docs]    def is_journal_valid(self, journal):
        '''Check the validity of a specified journal name in the CSD.

        This requires the CSD to be present.

        :param journal: str, journal name'''
        return self.journals.get(journal) is not None

    @property
    def journals(self):
        '''A dictionary of journal name : ccdc code number for journals in the CSD.

        This requires the CSD to be present.
        '''
        if self._journal_list is None:
            _binary_db = CSDSQLDatabaseLib.CSDSQLDatabase(
                _CSDDatabaseLocator.get_binary_csd_location()
            )
            self._journal_list = {
                j.name(): j.ccdc_coden()
                for j in _binary_db.journal_list_info().journal_list()
            }
        return self._journal_list

    def _search_reader(self, reader):
        self._search.settings().hits_limit_manager().reset_hits()
        if not hasattr(reader, '_text_numeric_searcher'):
            try:
                reader._text_numeric_searcher = reader._db.searcher_factory().text_numeric_searcher()
            except (RuntimeError, NameError, AttributeError):
                pass
        if not hasattr(reader, '_text_numeric_searcher'):
            raise NotImplementedError('This database does not support TextNumericSearch')
        if self.settings._has_filter_set():
            max_hits = self.settings.max_hit_structures
            self._search.settings().set_maximum_hits_limit(maxint32)
            ids = CSDSQLDatabaseLib.text_numeric_search(reader._text_numeric_searcher, self._search)
            self.settings.max_hit_structures = max_hits
            l = list()
            for i, x in enumerate(ids):
                hit = TextNumericSearch.TextNumericHit(x, reader._db)
                if self.settings.test(hit.entry):
                    l.append(hit)
                    if max_hits and len(l) >= max_hits:
                        break
        else:
            self._search.settings().set_maximum_hits_limit(self.settings.max_hit_structures)
            ids = CSDSQLDatabaseLib.text_numeric_search(reader._text_numeric_searcher, self._search)
            l = list(
                TextNumericSearch.TextNumericHit(x, reader._db) for x in ids
            )
        return l

    def _search_entry(self, entry):
        raise NotImplementedError('TextNumericSearch of an entry')

    def _search_crystal(self, crystal):
        raise NotImplementedError('TextNumericSearch of a crystal')

    def _search_molecule(self, molecule):
        raise NotImplementedError('TextNumericSearch of a molecule')

[docs]    @staticmethod
    def from_xml(xml):
        '''Create a TextNumericSearch from XML.

        :param xml: XML string
        '''
        stream = UtilitiesLib.istringstream(xml)
        parser = DatabaseEntryLib.TextNumericSearchXMLParser()
        tns = TextNumericSearch()
        tns._search = parser.parse(stream)
        return tns

[docs]    @staticmethod
    def from_xml_file(file_name):
        '''Create a TextNumericSearch from an XML file.

        :param file_name: path to XML file

        :raises: IOError when the file does not exist
        '''
        if not os.path.exists(file_name):
            raise IOError('The file %s does not exist' % file_name)
        with open(file_name) as f:
            return TextNumericSearch.from_xml(f.read())

[docs]    def read_xml(self, xml):
        '''Read a query from XML.

        :param xml: XML string
        '''
        stream = UtilitiesLib.istringstream(xml)
        parser = DatabaseEntryLib.TextNumericSearchXMLParser()
        self._search = parser.parse(stream)

[docs]    def read_xml_file(self, file_name):
        '''Read a text numeric search from an XML file.

        :param file_name: path to XML file
        :raises: IOError if the file cannot be read
        '''
        if not os.path.exists(file_name):
            raise IOError('The file %s does not exist' % file_name)
        with open(file_name) as f:
            self.read_xml(f.read())

###########################################################################


[docs]class SubstructureSearch(Search):
    '''Query crystal structures for interactions.'''
    _telemetry = 0

[docs]    class Settings(Search.Settings):
        '''Settings appropriate to a substructure search.'''

        _enantiomer_match_type_dict = utilities.bidirectional_dict(
            NEVER=MotifSearchLib.EnantiomerSensitiveConstraint.NEVER,
            SPACEGROUP_DEPENDENT=MotifSearchLib.EnantiomerSensitiveConstraint.SPACEGROUP_DEPENDENT,
            ALWAYS=MotifSearchLib.EnantiomerSensitiveConstraint.ALWAYS,
        )

        def __init__(self, max_hit_structures=None, max_hits_per_structure=None):
            settings = CSDSQLDatabaseLib.CrystalStructureDatabaseMotifSearchSettings()
            settings.set_match_mode(CSDSQLDatabaseLib.CrystalStructureDatabaseMotifSearchSettings.MATCH_3D_CRYSTAL_ONLY)
            if max_hit_structures is not None:
                settings.set_maximum_hits_limit(max_hit_structures)
            if max_hits_per_structure is None:
                settings.set_maximum_hits_per_structure(0)
            else:
                settings.set_maximum_hits_per_structure(max_hits_per_structure)
            Search.Settings.__init__(self, _settings=settings)
            self._match_enantiomers = MotifSearchLib.EnantiomerSensitiveConstraint.NEVER

        @property
        def max_hits_per_structure(self):
            '''Maximum number of hits per structure.'''
            return self._settings.maximum_hits_per_structure()

        @max_hits_per_structure.setter
        def max_hits_per_structure(self, value):
            self._settings.set_maximum_hits_per_structure(value)

        @property
        def match_enantiomers(self):
            '''Enantiomer matching behavior

            The value will be one of 'NEVER' meaning enantiomers are never checked, 'SPACEGROUP_DEPENDENT' meaning enantiomers are checked
            if the crystal's spacegroup implies the presence of enantiomers, or 'ALWAYS' meaning enantiomers are always checked.
            '''
            return SubstructureSearch.Settings._enantiomer_match_type_dict.inverse_lookup(self._match_enantiomers)

        @match_enantiomers.setter
        def match_enantiomers(self, value):
            self._match_enantiomers = SubstructureSearch.Settings._enantiomer_match_type_dict[value]


[docs]    class HitProcessor(object):
        '''Override this class to provide your own add_hit() method.

        This class allows a search to process hits as they are found by
        the search class, rather than waiting until all hits are found before
        allowing access to them, a procedure which may well run out of memory
        for very general searches.
        '''
[docs]        def search(self, searcher, database=None):
            '''Searches the database with the substructure search.

            :param searcher: a :class:`ccdc.search.SubstructureSearch` instance.
            :param database: a :class:`ccdc.io.EntryReader` instance. If not specified the CSD will be searched.

            For each hit found, :meth:`ccdc.Search.SubstructureSearch.HitProcessor.add_hit` will be
            called with a :class:`ccdc.search.SubstructureSearch.SubstructureHit` instance.
            '''
            self._cancelled = False
            self.searcher = searcher
            self.searcher._add_enantiomer_consistency()
            if database is None:
                database = EntryReader('csd')
            self.database = database
            if not hasattr(self.database, '_motif_searcher'):
                try:
                    self.database._motif_searcher = self.database._db.searcher_factory().motif_searcher()
                except (RuntimeError, AttributeError):
                    pass
            if hasattr(self.database, '_motif_searcher'):
                self.database._motif_searcher.progress_monitor().reset()
                results_writer = CSDSQLDatabaseLib.PythonResultsWriter(self)
                self.database._motif_searcher.search(
                    self.searcher._motif, self.searcher.settings._settings, results_writer
                )
            else:
                # there used to be fallback code here, but now we expect to always support motif search on any database
                raise NotImplementedError("Substructure search is not implemented on this database type")

        def __call__(self, **kw):
            '''Private: this method will be called from the search.'''
            if 'max_hits_reached' in kw:
                self.cancel()
                #print('Max hits reached')
            elif 'match' in kw:
                h = SubstructureSearch.SubstructureHit._from_match(
                        kw['match'], self.searcher, _binary_database=self.database._db
                )
                self.add_hit(h)
            elif 'hit' in kw:
                self.add_hit(kw['hit'])
            else:
                raise RuntimeError('Unknown keyword in __call__', kw)

[docs]        def cancel(self):
            '''Cancels the search.'''
            try:
                self.database._motif_searcher.progress_monitor().cancel()
            except AttributeError:
                pass
            self._cancelled = True

[docs]        def add_hit(self, hit):
            '''Override this to provide your own hit processing.'''
            raise NotImplementedError('add_hit() must be implemented.')


    class _MotifMatchHit(Search.SearchHit):
        '''A hit with motif match results.'''
        def __init__(self, identifier, match=None,
                     _database=None, _entry=None, _crystal=None, _molecule=None, _binary_database=None):
            if _database is not None or _binary_database is not None:
                _entry = _crystal = _molecule = None
            super(SubstructureSearch._MotifMatchHit, self).__init__(
                identifier,
                _database=_database, _entry=_entry, _crystal=_crystal,
                _molecule=_molecule, _binary_database=_binary_database)
            self._motif_match = match

        def match_components(self):
            '''
            Return the molecular components containing the atoms matched by the search.

            :returns: list of :class:`ccdc.molecule.Molecule`
            '''
            csv = ChemistryLib.CrystalStructureView.instantiate(self.crystal._crystal)
            ss = MotifSearchLib.MotifSearchStructure(csv)
            match_mols = set([
                molecule.Molecule('%02d' % i, _molecule=ss.molecule(self._motif_match.substructure_match(i)).create_editable_molecule())
                for i in range(self._motif_match.nsubstructure_matches())
            ])
            return list(match_mols)

        def match_atoms(self, indices=False):
            '''
            Return the atoms matched by the substructure.

            :param indices: Whether to return atom indices instead of :class:`ccdc.molecule.Atom` instances
            :returns: list of :class:`ccdc.molecule.Atom` instances or atom indices

            The atoms returned will all be in the asymmetric unit, so directly measuring constraints and measurements from
            these atoms will not give the correct results if a symmetry-generated copy was involved in the match.  See
            :meth:`ccdc.search.SubstructureSearch.SubstructureHit.match_symmetry_operators` for a way to determine if this is the case.
            '''
            if not hasattr(self, '_real_indices'):
                csv = ChemistryLib.CrystalStructureView.instantiate(self.crystal._crystal)
                ss = MotifSearchLib.MotifSearchStructure(csv)
                match_atoms = []
                mol = self.molecule
                def _matches(a, b, depth=0):
                    if a.coordinates is None:
                        if b.site() is None:
                            # match labels here and first neighbours
                            if depth >= 2:
                                return True
                            return a.label == b.label() and (len(a.neighbours) == 0 or len(b.get_neighbours()) == 0 or _matches(a.neighbours[0], b.get_neighbours()[0], depth+1))
                        else:
                            return False
                    else:
                        if b.site() is None:
                            return False
                        else:
                            return (a.label == b.label() and
                                    round(a.coordinates.x, 3) == round(b.site().orth().x(), 3) and
                                    round(a.coordinates.y, 3) == round(b.site().orth().y(), 3) and
                                    round(a.coordinates.z, 3) == round(b.site().orth().z(), 3)
                                )
                for j in range(self._motif_match.nsubstructure_matches()):
                    sub_matches = []
                    for i in range(len(self._motif_match.substructure_match(j).atom_match())):
                        _atom = ss.atom(self._motif_match, j, i)
                        _base = csv.base_atom(_atom)
                        # try same index first
                        added = False
                        if _atom.index() < len(mol.atoms):
                            a = mol.atoms[_atom.index()]
                            if not a in sub_matches and _matches(a, _base):
                                sub_matches.append(a)
                                added = True
                        if not added:
                            for a in mol.atoms:
                                if not a in sub_matches and _matches(a, _base):
                                    sub_matches.append(a)
                                    break
                            else:
                                raise RuntimeError('No matching atom??? %s %s - %s %s in %s' % (_atom.label(), str(_atom.site().orth()), _base.label(), str(_base.site().orth()), self.identifier))
                    match_atoms += sub_matches
                self._match_atoms = match_atoms
                self._real_indices = tuple(a.index for a in self._match_atoms)
            if indices:
                return tuple(self._real_indices)
            else:
                return self._match_atoms

        def match_substructures(self):
            '''Returns each substructure of the hit as a molecule with the bonds and atoms of the hit.

            The symmetry operations of the hit will be applied to the molecules, so measurement and
            constraints will be appropriate to the hit.

            :returns: tuple of :class:`ccdc.molecule.Molecule`, one for each substructure of the hit
            with the bonds and atoms of the hit
            '''
            csv = ChemistryLib.CrystalStructureView.instantiate(self.crystal._crystal)
            mss = MotifSearchLib.MotifSearchStructure(csv)
            at_matches = [self._motif_match.substructure_match(i).atom_match() for i in range(self._motif_match.nsubstructure_matches())]
            at_addrs = [[self._motif_match.atom_address(i, j) for j in range(len(at_matches[i]))] for i in range(len(at_matches))]
            ats = [[mss.atom(aa) for aa in l] for l in at_addrs]
            api_ats = [[molecule.Atom(_atom=a) for a in l] for l in ats]
            api_mols = [molecule.Molecule(self.identifier, _molecule=l[0].molecule().create_editable_molecule()) for l in ats]
            def _matching_ats(a, b):
                return (
                    a.atomic_symbol == b.atomic_symbol and
                    a.label == b.label and
                    str(a.coordinates) == str(b.coordinates)
                )
            for i, m in enumerate(api_mols):
                m._molecule.reorder_atoms([a.index() for a in ats[i]])
                m.remove_atoms(a for a in m.atoms if not any(_matching_ats(a, b) for b in api_ats[i]))
            return tuple(api_mols)

        def match_symmetry_operators(self):
            '''The symmetry operators required to form the match.

            :returns: a list of symmetry operators in the order of the matched atoms.
            '''
            crystal = self.crystal
            ats = self.match_atoms()
            csv = ChemistryLib.CrystalStructureView.instantiate(crystal._crystal)
            mss = MotifSearchLib.MotifSearchStructure(csv)
            motif_match = self._motif_match

            def _get_symmop(a):
                '''Get the appropriate symmop.'''
                z = a
                sub = 0
                while True:
                    subm = motif_match.substructure_match(sub)
                    if z >= len(subm.atom_match()):
                        z -= len(subm.atom_match())
                        sub += 1
                    else:
                        break
                at = mss.atom(motif_match, sub, z)
                base = csv.base_asymmetric_unit_atom(at)
                op = ChemistryLib.atom_atom_symmetry_relation(crystal._crystal, base, at)
                if op:
                    symmop = op.to_string()
                else:
                    symmop = ''
                return symmop

            symmops = [_get_symmop(i) for i in range(len(ats))]
            return symmops


[docs]    class SubstructureHit(_MotifMatchHit):
        '''A hit from a substructure search.'''
        def __init__(self, identifier, match=None, search_structure=None, query=None,
                     _database=None, _entry=None, _crystal=None, _molecule=None, _binary_database=None):
            super(SubstructureSearch.SubstructureHit, self).__init__(
                identifier, match,
                _database=_database, _entry=_entry, _crystal=_crystal,
                _molecule=_molecule, _binary_database=_binary_database)
            self._disorder_dealt_with = False
            self._search = query
            if match is not None:
                #self._make_geometric_objects()
                self._measure_measurements()
                self.query = query
            self._geometric_objects = None

        @staticmethod
        def _from_match(m, search, _binary_database=None, _database=None, _entry=None, _crystal=None, _molecule=None):
            '''Private: construct a SubstructureHit from a match object.'''
            h = SubstructureSearch.SubstructureHit(
                m.identifier().str(), m.data().motif_match(), query=search,
                _binary_database=_binary_database, _database=_database, _entry=_entry,
                _crystal=_crystal, _molecule=_molecule
            )
            return h

        def _make_geometric_object(self, obj, search_structure):
            '''PRIVATE: make a geometric object.'''
            if isinstance(obj, (SubstructureSearchLib.ConstraintAtomPoint,
                                SubstructureSearchLib.ConstraintCentroidPoint,
                                SubstructureSearchLib.ConstraintDummyPoint,
                                SubstructureSearchLib.ConstraintPoint)):
                p0 = self._motif_match.get_point(MotifSearchLib.Object_as_Point(obj), search_structure)
                return molecule.Coordinates(p0.x(), p0.y(), p0.z())
            elif isinstance(obj, SubstructureSearchLib.ConstraintPlane):
                p = GeometricDescriptors.Plane(
                    None, None, _plane=self._motif_match.get_plane(obj, search_structure)
                )
                return p
            elif isinstance(obj, SubstructureSearchLib.ConstraintVector):
                vec = self._motif_match.get_vector(obj, search_structure)
                p = GeometricDescriptors.Vector(vec.x(), vec.y(), vec.z())
                return p
            elif isinstance(obj, SubstructureSearchLib.ConstraintAtomGroup):
                _csv = ChemistryLib.CrystalStructureView.instantiate(self.crystal._crystal)
                _mss = MotifSearchLib.MotifSearchStructure(_csv)
                mgsm = MotifSearchLib.MotifGeometricSearchMatch(self._motif_match, _mss)
                atoms = obj.atoms(mgsm)
                return tuple(molecule.Atom(_atom=a) for a in atoms)
            raise NotImplementedError('Have not implemented geometric object %s' % obj)

        def _make_geometric_objects(self):
            '''PRIVATE: make all the geometric objects.'''
            if not self._search.geometric_objects:
                self._geometric_objects = {}
                return
            _csv = ChemistryLib.CrystalStructureView.instantiate(self.crystal._crystal)
            _mss = MotifSearchLib.MotifSearchStructure(_csv)
            self._geometric_objects = {
                name : self._make_geometric_object(obj, _mss)
                for name, obj in self._search.geometric_objects.items()
            }

        @property
        def geometric_objects(self):
            if self._geometric_objects is None:
                self._make_geometric_objects()
            return self._geometric_objects

        def _measure_measurements(self):
            '''PRIVATE: make all the measurements.'''
            self.measurements = dict()
            self.constraints = dict()
            for i in range(self._motif_match.nparameters()):
                mp = self._motif_match.parameter_value(i)
                if mp.parameter().name() in self._search.measurements:
                    self.measurements[mp.parameter().name()] = mp.value()
                else:
                    self.constraints[mp.parameter().name()] = mp.value()

[docs]        def measurement_atoms(self, name):
            '''The atoms involved in a measurement.

            :param name: the name of the measurement.
            :returns: a tuple of :class:`ccdc.molecule.Atom` instances.

            The atoms will be returned in an arbitrary order.  All atoms involved in the measurement will be present,
            so for example a centroid-centroid distance measurement will produce the atoms of both centroids.
            '''
            con = self._search.measurements[name]
            _csv = ChemistryLib.CrystalStructureView.instantiate(self.crystal._crystal)
            _mss = MotifSearchLib.MotifSearchStructure(_csv)
            mgsm = MotifSearchLib.MotifGeometricSearchMatch(self._motif_match, _mss)
            res = con.test(mgsm)
            ats = res.get_atoms()
            return tuple(molecule.Atom(_atom=a) for a in ats)

[docs]        def constraint_atoms(self, name):
            '''The atoms from which the constraint was defined.

            :param name: the name of the constraint.
            :returns: a tuple of :class:`ccdc.molecule.Atom` instances.

            The atoms will be returned in an arbitrary order.  All atoms involved in defining the constraint will be returned.
            '''
            con = self._search.constraints.get(name, self._search.contacts[name])
            _csv = ChemistryLib.CrystalStructureView.instantiate(self.crystal._crystal)
            _mss = MotifSearchLib.MotifSearchStructure(_csv)
            mgsm = MotifSearchLib.MotifGeometricSearchMatch(self._motif_match, _mss)
            if isinstance(con, SubstructureSearchLib.SubstructureContact):
                sub1 = con.substruct_a()
                at1 = con.atom_a()
                sub2 = con.substruct_b()
                at2 = con.atom_b()
                addr1 = self._motif_match.atom_address(sub1, at1)
                addr2 = self._motif_match.atom_address(sub2, at2)
                return molecule.Atom(_atom=_mss.atom(addr1)), molecule.Atom(_atom=_mss.atom(addr2))
            elif isinstance(con, SubstructureSearchLib.SubstructureObjectContact):
                obj1 = con.object_a()
                obj2 = con.object_b()
                return tuple(a for a in self._geometric_object_atoms(obj1.label())) + \
                       tuple(a for a in self._geometric_object_atoms(obj2.label()))
            res = con.test(mgsm)
            ats = res.get_atoms()
            return tuple(molecule.Atom(_atom=a) for a in ats)

        def _geometric_object_atoms(self, name):
            '''PRIVATE: the matched atoms of a constraint object.'''
            _csv = ChemistryLib.CrystalStructureView.instantiate(self.crystal._crystal)
            _mss = MotifSearchLib.MotifSearchStructure(_csv)
            mgsm = MotifSearchLib.MotifGeometricSearchMatch(self._motif_match, _mss)
            atoms = self._search.geometric_objects[name].atoms(mgsm)
            return tuple(molecule.Atom(_atom=a) for a in atoms)

[docs]        def centroid_atoms(self, name):
            '''The atoms from which the centroid is derived.'''
            return self._geometric_object_atoms(name)

[docs]        def dummy_point_atoms(self, name):
            '''The atoms from which the dummy point was defined.'''
            return self._geometric_object_atoms(name)

[docs]        def group_atoms(self, name):
            '''The atoms from which the group was defined.'''
            return self._geometric_object_atoms(name)

[docs]        def vector_atoms(self, name):
            '''The atoms from which the vector was defined.'''
            return self._geometric_object_atoms(name)

[docs]        def plane_atoms(self, name):
            '''The atoms from which the plane was defined.'''
            return self._geometric_object_atoms(name)

        ### Object names

        _constraint_types = dict(
            PlaneAngleConstraint=SubstructureSearchLib.GeometricConstraint_as_PlaneAngleConstraint,
            PointAngleConstraint=SubstructureSearchLib.GeometricConstraint_as_PointAngleConstraint,
            PointDistanceConstraint=SubstructureSearchLib.GeometricConstraint_as_PointDistanceConstraint,
            PointPlaneDistanceConstraint=SubstructureSearchLib.GeometricConstraint_as_PointPlaneDistanceConstraint,
            PointTorsionConstraint=SubstructureSearchLib.GeometricConstraint_as_PointTorsionConstraint,
            VectorAngleConstraint=SubstructureSearchLib.GeometricConstraint_as_VectorAngleConstraint,
            VectorPlaneAngleConstraint=SubstructureSearchLib.GeometricConstraint_as_VectorPlaneAngleConstraint,
            Atom3DPropertyConstraint=SubstructureSearchLib.GeometricConstraint_as_Atom3DPropertyConstraint,
            ConstantValueConstraint=SubstructureSearchLib.GeometricConstraint_as_ConstantValueConstraint,
            TransformConstraint=SubstructureSearchLib.GeometricConstraint_as_TransformConstraint,
            UnaryTransformConstraint=SubstructureSearchLib.GeometricConstraint_as_UnaryTransformConstraint,
            BinaryTransformConstraint=SubstructureSearchLib.GeometricConstraint_as_BinaryTransformConstraint,
        )

        @staticmethod
        def _find_objects(constraint):
            real_con = SubstructureSearch.SubstructureHit._constraint_types[constraint.class_name()](constraint)
            if real_con.class_name() == 'UnaryTransformConstraint':
                return SubstructureSearch.SubstructureHit._find_objects(real_con.sub_constraint())
            if real_con.class_name() == 'BinaryTransformConstraint':
                return SubstructureSearch.SubstructureHit._find_objects(real_con.sub_constraint1()) + SubstructureSearch.SubstructureHit._find_objects(real_con.sub_constraint2())
            if real_con.class_name() == 'ConstantValueConstraint':
                return ()
            objs = tuple(real_con.objects(i) for i in range(real_con.nobjects()))
            return objs

        def _object_name(self, _object):
            n = _object.label()
            if ':' in n:
                bits = n.split(':')
                if len(bits) == 2 and all(x.isdigit() for b in bits for x in b):
                    sub_inx = int(bits[0])
                    at_inx = int(bits[1])
                    substructs = self.match_substructures()
                    while at_inx >= len(substructs[sub_inx].atoms):
                        at_inx -= len(substructs[sub_inx].atoms)
                        sub_inx += 1
                    at = self.match_substructures()[sub_inx].atoms[at_inx]
                    return at
            return _object.label()

[docs]        def measurement_objects(self, measurement):
            '''A tuple of object names and atoms from which the measurement was taken.

            :param measurement: the string name of the measurement.
            :returns: a tuple of geometric object names or atoms.
            '''
            _constraint = self._search.measurements[measurement]
            return tuple(self._object_name(obj) for obj in self._find_objects(_constraint))

[docs]        def constraint_objects(self, constraint):
            '''A tuple of object names and atoms from which the constraint was defined.'''
            _constraint = self._search.constraints.get(constraint, self._search.contacts.get(constraint))
            if _constraint is None:
                raise IndexError('The constraint %s could not be found' % constraint)
            return tuple(self._object_name(obj) for obj in self._find_objects(_constraint))

        def _geometric_object_objects(self, _obj):
            return tuple(self._object_name(_obj.objects(i)) for i in range(_obj.nobjects()))

[docs]        def centroid_objects(self, name):
            '''The geometric object names and atoms from which the centroid was defined.'''
            return self._geometric_object_objects(self._search.geometric_objects[name])

[docs]        def dummy_point_objects(self, name):
            '''The geometric object names and atoms from which the dummy point was defined.'''
            return self._geometric_object_objects(self._search.geometric_objects[name])

[docs]        def group_objects(self, name):
            '''The geometric object names and atoms from which the group was defined.'''
            return self._geometric_object_objects(self._search.geometric_objects[name])

[docs]        def vector_objects(self, name):
            '''The geometric object names and atoms from which the vector was defined.'''
            return self._geometric_object_objects(self._search.geometric_objects[name])

[docs]        def plane_objects(self, name):
            '''The geometric object names and atoms from which the plane was defined.'''
            return self._geometric_object_objects(self._search.geometric_objects[name])


[docs]    class SubstructureHitList(list):
        '''List of hits from a :class:`ccdc.search.SubstructureSearch`'''
[docs]        def superimpose(self):
            '''Superimpose all matched molecules on their query atoms

            Just superimpose on first substructure
            '''
            ret = []
            if self:
                for i in range(len(self)):
                    if self[i].match_atoms():
                        inx0 = self[i].match_atoms()
                        mol0 = self[i].molecule.copy()
                        got_one = i
                        break
                else:
                    raise RuntimeError('No structure has matching atoms')
                ret.append(self[got_one].molecule.copy())
                for h in self[got_one+1:]:
                    inx1 = h.match_atoms()
                    mol1 = h.molecule.copy()
                    overlay = MolecularDescriptors.Overlay(mol0, mol1, atoms=zip(inx0, inx1))
                    ret.append(overlay.molecule)
                return ret

[docs]        def write_c2m_file(self, file_name):
            '''Write a ConQuest to Mercury interchange file.

            This file allows substructure search results to be read into the data analysis package
            of Mercury.

            :param file_name: file to which the data will be written.
            '''
            if not self:
                raise RuntimeError('No hits to write')

            def split_by_id():
                '''Split the hit list by identifier.'''
                parts = []
                identifier = None
                for h in self:
                    if h.identifier != identifier:
                        identifier = h.identifier
                        parts.append([])
                    parts[-1].append(h)
                return parts

            def make_atom(h, i, a, done, output):
                '''Make a tag representing an individual atom.'''
                motif_match = h._motif_match
                crystal = h.crystal
                csv = ChemistryLib.CrystalStructureView.instantiate(crystal._crystal)
                mss = MotifSearchLib.MotifSearchStructure(csv)
                # find the right substructure_match
                z = i
                sub = 0
                while True:
                    subm = motif_match.substructure_match(sub)
                    if z >= len(subm.atom_match()):
                        z -= len(subm.atom_match())
                        sub += 1
                    else:
                        break
                # Now subm is the right match, z is the right index in the match
                fo = a._atom.annotations().obtain_FileOrdering().file_order()
                subm.translation()
                at = mss.atom(motif_match, sub, z)
                base = csv.base_asymmetric_unit_atom(at)
                ChemistryLib.atom_atom_symmetry_relation(crystal._crystal, base, at)
                if fo in done:
                    fo = at.annotations().obtain_FileOrdering().file_order()
                done.add(fo)
                output.write('<atom id="%d" substructure_index="%d" aser_index="%d"/>\n' % (i, i, fo))

            def make_atoms(h, output):
                '''Make the atoms tag.'''
                output.write('<atoms>\n')
                s = set()
                for i, a in enumerate(h.match_atoms()):
                    if a._atom.annotations().obtain_FileOrdering().file_order() != 0:
                        make_atom(h, i, a, s, output)
                output.write('</atoms>\n')

            def make_absolute_index(motif_match, sub, a):
                '''Make the atom index absolute.'''
                atinx = a
                for i in range(sub):
                    atinx += len(motif_match.substructure_match(i).atom_match())
                return atinx

            def make_measure(h, name, value, output):
                '''Make a parameter tag for a measurement.'''
                motif_match = h._motif_match
                cc = h._search.measurements[name]
                if cc.label() == name:
                    objects = [cc.objects(i) for i in range(cc.nobjects())]
                    pts = [SubstructureSearchLib.Object_as_AtomPoint(o) for o in objects]
                    sub_at_inxs = [(p.substructure_index(), p.atom_index()) for p in pts]
                    at_inxs = [make_absolute_index(motif_match, s, a) for s, a in sub_at_inxs]
                    atom_inxs = ','.join('%d' % a for a in at_inxs)
                    if cc.nobjects() == 2:
                        flavour = 'distance'
                    elif cc.nobjects() == 3:
                        flavour = 'angle'
                    elif cc.nobjects() == 4:
                        flavour = 'torsion'
                    else:
                        raise NotImplementedError('Need the other constraints ' + str(type(cc)) + ' ' + name)
                    output.write('<parameter name="%s" type="%s" value="%.3f">\n' % (name, flavour, value))
                    output.write('<atom_ids>%s</atom_ids>\n' % atom_inxs)
                    output.write('</parameter>\n')

            def _get_atom_inxs(h, ct, name):
                sub_atoms = [a for s in h.match_substructures() for a in s.atoms]
                con = h._search.constraints[name]
                pts = [SubstructureSearchLib.Object_as_AtomPoint(con.objects(i)) for i in range(ct)]
                inxs = [make_absolute_index(h._motif_match, p.substructure_index(), p.atom_index()) for p in pts]
                return inxs

            def make_constraint(h, name, value, output):
                '''Make a parameter tag for a constraint or contact.'''
                motif_match = h._motif_match
                for tag, cc in h._search.constraints.items():
                    if tag == name:
                        if isinstance(cc, SubstructureSearchLib.SubstructureContact):
                            atom_inxs = [
                                make_absolute_index(motif_match, cc.substruct_a(), cc.atom_a()),
                                make_absolute_index(motif_match, cc.substruct_b(), cc.atom_b())
                            ]
                            flavour = 'contact'
                            break
                        elif cc.class_name() == 'PointAngleConstraint':
                            atom_inxs = _get_atom_inxs(h, 3, name)
                            flavour = 'angle'
                            break
                        elif cc.class_name() == 'PointTorsionConstraint':
                            atom_inxs = _get_atom_inxs(h, 4, name)
                            flavour = 'torsion'
                            break
                        elif cc.class_name() == 'PointDistanceConstraint':
                            atom_inxs = _get_atom_inxs(h, 2, name)
                            flavour = 'distance'
                            break
                        else:
                            raise NotImplementedError('Need the other constraints')
                else:
                    raise AttributeError('No constraint named %s' % name)
                output.write('<parameter name="%s" type="%s" value="%.3f">\n' % (name, flavour, value))
                output.write('<atom_ids>%s</atom_ids>\n' % (','.join('%d' % a for a in atom_inxs)))
                output.write('</parameter>\n')

            def make_params(h, output):
                '''Make the parameters tag.'''
                output.write('<parameters>\n')
                for p, v in h.measurements.items():
                    make_measure(h, p, v, output)
                for c, v in h.constraints.items():
                    make_constraint(h, c, v, output)
                output.write('</parameters>\n')

            def make_fragment(h, inx, output):
                '''Make a single fragment tag.'''
                output.write('<fragment type="3d_hit_fragment" fragment_index="%d" query_index="1">\n' % (inx+1))
                make_atoms(h, output)
                make_params(h, output)
                h._molecule = None
                output.write('</fragment>\n')

            def make_fragments(p, output):
                '''Make all the fragment tags.'''
                output.write('<fragments>\n')
                for i, h in enumerate(p):
                    make_fragment(h, i, output)
                output.write('</fragments>\n')

            def make_match(p, output):
                '''Make a match tag.'''
                h = p[0]
                if h._database:
                    db = h._database._real_database(h.identifier)
                    csd_loc = db.file_name
                    if hasattr(db, 'inf_file'):
                        csd_inf = db.inf_file().full_path()
                    else:
                        csd_inf = None
                else:
                    db = h._binary_database
                    if db is None:
                        csd_loc = ""
                    else:
                        try:
                            csd_loc = db.file_name()
                        except (AttributeError, RuntimeError):
                            try:
                                csd_loc = db._db.source_database_name(
                                    UtilitiesLib.DatabaseEntryIdentifier(h.identifier)
                                )
                            except:
                                csd_loc = ""
                            else:
                                if '_ASER' in csd_loc:
                                    csd_loc = csd_loc.replace('_ASER', '')
                    csd_inf = None

                output.write('<match identifier="%s">\n' % p[0].identifier)
                output.write('<database>%s</database>\n' % csd_loc)
                if csd_inf:
                    output.write('<inffile>%s</inffile>\n' % csd_inf)
                make_fragments(p, output)
                output.write('</match>\n')

            def make_tree(parts, output):
                '''Make the whole XML tree.'''
                output.write('<search_results version="2.0">\n')
                output.write('<search_label>search1</search_label>\n')
                output.write('<search_id>123456</search_id>\n')
                output.write('<active_hit>%s</active_hit>\n' % self[0].identifier)
                output.write('<action>analyse</action>\n')
                for p in parts:
                    make_match(p, output)
                output.write('</search_results>\n')

            parts = split_by_id()

            if not file_name.endswith('.c2m'):
                file_name += '.c2m'

            with open(file_name, 'w') as writer:
                make_tree(parts, writer)

    def __init__(self, settings=None):
        '''Initialise the query.'''
        self._motif = MotifSearchLib.Motif('')
        self.substructures = []
        self.measurements = dict()
        self.constraints = dict()
        self.contacts = dict()
        self.geometric_objects = dict()
        if settings is None:
            settings = SubstructureSearch.Settings()
        self.settings = settings
        if type(self)._telemetry == 0:
            UtilitiesLib.ccdc_motif_search_telemetry()
            type(self)._telemetry = 1

[docs]    def add_substructure(self, substructure):
        '''Add a substructure.

        Disconnected substructures may be accepted if the first substructure is contiguous at the start.
        Multiple substructures may be added as a result.

        :param substructure: :class:`ccdc.search.QuerySubstructure`.
        :returns: the index of the first substructure added.
        '''
        if isinstance(substructure, ConnserSubstructure):
            self._conn_motif = _motif = MotifSearchLib.create_motif(substructure._conn)
            self._motif = _motif
            if len(self.substructures) > 0:
                # Need to relocate the new substructure(s)
                raise NotImplementedError('Need to relocate substructures')
            # Currently needed, but I'm not sure if it's still relevant
            inxs = list(range(_motif.nsubstructures()))
            for i in range(_motif.nsubstructures()):
                self.substructures.append(QuerySubstructure(_substructure=_motif.substructure(i)))

            _conn = substructure._conn
            possibles = []
            for i in range(_conn.n_geometric_constraints()):
                c = _conn.geometric_constraint(i)
                if not c.label().startswith('?'):
                    self._motif.add_motif_parameter(MotifSearchLib.MotifGeometricConstraintParameter(c))
                cond = c.condition()
                if cond.name() == 'always false':
                    possibles.append(c)
                else:
                    self.constraints[c.label()] = c
                for inx in range(c.nobjects()):
                    obj = c.objects(inx)
                    object_types = dict(
                        ConstraintAtomGroup=SubstructureSearchLib.Object_as_AtomGroup,
                        ConstraintPlane=SubstructureSearchLib.Object_as_Plane,
                        ConstraintAtomPoint=SubstructureSearchLib.Object_as_AtomPoint,
                        ConstraintCentroidPoint=SubstructureSearchLib.Object_as_CentroidPoint,
                        ConstraintDummyPoint=SubstructureSearchLib.Object_as_DummyPoint,
                        ConstraintVector=SubstructureSearchLib.Object_as_Vector,
                    )
                    if ':' not in obj.label():
                        self.geometric_objects[obj.label()] = object_types.get(obj.class_name(), lambda x:x)(obj)
            contacts = _conn.contacts()
            for c, p in zip(contacts, possibles):
                clone = p.clone()
                cond = SubstructureSearchLib.InclusiveRange(c.criterion().min(), c.criterion().max())
                clone.set_condition(cond)
                self.constraints[p.label()] = clone
            for p in possibles[len(contacts):]:
                self.measurements[p.label()] = p
            for i in range(_motif.n_objects_contacts()):
                oc = _motif.object_contact(i)
                a = oc.object_a()
                if a.class_name() != 'ConstraintAtomPoint':
                    if ':' not in a.label():
                        self.geometric_objects[a.label()] = SubstructureSearchLib.Object_as_Point(a)
                b = oc.object_b()
                if b.class_name() != 'ConstraintAtomPoint':
                    if ':' not in b.label():
                        self.geometric_objects[b.label()] = SubstructureSearchLib.Object_as_Point(b)
                dist_con = SubstructureSearchLib.PointDistanceConstraint(
                    SubstructureSearchLib.Object_as_Point(a), SubstructureSearchLib.Object_as_Point(b),
                    SubstructureSearchLib.AlwaysTrue(), 'CONT%d' % (i+1)
                )
                self._motif.add_motif_parameter(MotifSearchLib.MotifGeometricConstraintParameter(dist_con))
                self.constraints['CONT%d' % (i+1)] = dist_con
                #self.constraints['%s_%s_CONTACT' % (a.label(), b.label())] = oc
                #self._motif.add_motif_parameter(MotifSearchLib.MotifGeometricConstraintParameter(oc))
            for i in range(_motif.nobjects()):
                obj = self.geometric_object(i)
                if ':' not in obj.label():
                    self.geometric_objects[obj.label()] = obj
            return inxs

        else:
            sizes = [substructure._substructure.natoms()]
            self.substructures.append(substructure)
            inxs = [self._motif.add_substructure(substructure._substructure)]
            if len(inxs) == 1:
                return inxs[0]
            else:
                return inxs

    def _point_must_have_site(self, sub_id, atom_id):
        sub = self.substructures[sub_id]
        atom = sub.atoms[atom_id]
        if not atom._substructure_atom.has_constraint_of_type(SubstructureSearchLib.AtomHas3DSiteConstraint()):
            atom.has_3d_coordinates = True

    def _args_to_points(self, required, args, require_3d=True):
        i = 0
        points = []
        while i < len(args):
            a = args[i]
            if isinstance(a, int):
                # Old style
                points.append(SubstructureSearchLib.ConstraintAtomPoint(a, args[i+1]))
                if require_3d:
                    self._point_must_have_site(a, args[i+1])
                i += 2
            elif isinstance(a, (tuple, list)):
                # New style
                points.append(SubstructureSearchLib.ConstraintAtomPoint(a[0], a[1]))
                if require_3d:
                    self._point_must_have_site(a[0], a[1])
                i += 1
            elif isinstance(a, str):
                g = self.geometric_objects[a]
                if isinstance(g, SubstructureSearchLib.ConstraintAtomGroup):
                    points.append(g.centroid_from_group())
                else:
                    points.append(self.geometric_objects[a])
                i += 1
            else:
                raise TypeError('Invalid type for a point')
        assert required == 0 or required == len(points)
        return points

    ##### GeometricObjects #####

[docs]    def add_centroid(self, name, *args):
        '''Adds a centroid to the substructure search.

        :param name: the name by which the centroid will be accessed.
        :param `*args`: the points or geometric objects from which to define the centroid.

        Each arg may be either a pair (substructure_index, atom_index) or the name of a geometric object.
        There must be at least two such arguments.

        >>> query = SubstructureSearch()
        >>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
        >>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
        >>> query.add_centroid('CENT1', (0, 0), (0, 1), (0, 2))
        >>> query.add_centroid('CENT2', (1, 0), (1, 1), (1, 2))
        >>> query.add_centroid('CENT3', 'CENT1', 'CENT2')
        '''
        points = self._args_to_points(0, args)
        centroid = SubstructureSearchLib.ConstraintCentroidPoint(points, name)
        self.geometric_objects[name] = centroid
        self._motif.add_object(centroid)

[docs]    def add_dummy_point(self, name, distance, *args):
        '''Creates a dummy point along a vector.

        :param name: the name by which this point will be accessed.
        :param distance: the distance along the vector subtentended by the two points.
        :param `*args`: two points specified as (substructure_index, atom_index) or the name of
            another geometric object.

        >>> query = SubstructureSearch()
        >>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
        >>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
        >>> query.add_centroid('CENT1', (0, 0), (0, 1), (0, 2))
        >>> query.add_dummy_point('DUM1', 2.0, 'CENT1', (1, 1))
        '''
        points = self._args_to_points(2, args)
        dummy = SubstructureSearchLib.ConstraintDummyPoint(points[0], points[1], distance, name)
        self.geometric_objects[name] = dummy
        self._motif.add_object(dummy)

[docs]    def add_group(self, name, *args):
        '''Creates a group of matched atoms.

        :param name: the name by which this group will be accessed.
        :param `*args`: pairs, (substructure_index, atom_index) defining the atoms of the group.
        >>> query = SubstructureSearch()
        >>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
        >>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
        >>> query.add_group('GP1', (0, 0), (0, 1), (0, 2))
        '''
        points = self._args_to_points(0, args)
        group = SubstructureSearchLib.ConstraintAtomGroup(points, name)
        self.geometric_objects[name] = group
        self._motif.add_object(group)

[docs]    def add_vector(self, name, *args):
        '''Add a vector.

        :param name: the name by which the vector will be accessed.
        :param `*args`: two point specifications as (substructure_index, atom_index) or the name of
            another geometric object.

        >>> query = SubstructureSearch()
        >>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
        >>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
        >>> query.add_centroid('CENT1', (0, 0), (0, 1), (0, 2))
        >>> query.add_vector('VEC1', 'CENT1', (1, 2))
        '''
        points = self._args_to_points(2, args)
        vec = SubstructureSearchLib.ConstraintVector(points[0], points[1], name)
        self._motif.add_object(vec)
        self.geometric_objects[name] = vec

[docs]    def add_plane(self, name, *args):
        '''Add a plane.

        :param name: the name by which the plane will be accessed.
        :param `*args`: at least two point specifications in the form (substructure_index,
            atom_index) or the name of another geometric object.

        >>> query = SubstructureSearch()
        >>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
        >>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
        >>> query.add_plane('PLANE1', (0, 0), (0, 1), (0, 2))
        >>> query.add_plane('PLANE2', (1, 0), (1, 1), (1, 2))
        '''
        points = self._args_to_points(0, args)
        plane = SubstructureSearchLib.ConstraintPlane(points, name)
        self._motif.add_object(plane)
        self.geometric_objects[name] = plane

    ##### Measurements #####


[docs]    def add_distance_measurement(self, name, *args):
        '''Add a distance measurement.

        >>> query = SubstructureSearch()
        >>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
        >>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
        >>> query.add_centroid('CENT1', (0, 0), (0, 1), (0, 2))
        >>> query.add_centroid('CENT2', (1, 0), (1, 1), (1, 2))
        >>> query.add_distance_measurement('DIST1', (0, 0), 'CENT2')
        '''

        points = self._args_to_points(2, args)
        constraint = SubstructureSearchLib.PointDistanceConstraint(
            points[0], points[1], SubstructureSearchLib.AlwaysTrue(), name
        )
        self.measurements[name] = constraint
        self._motif.add_motif_parameter(MotifSearchLib.MotifGeometricConstraintParameter(constraint))

[docs]    def add_angle_measurement(self, name, *args):
        '''Add an angle measurement.

        >>> query = SubstructureSearch()
        >>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
        >>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
        >>> query.add_centroid('CENT1', (0, 0), (0, 1), (0, 2))
        >>> query.add_centroid('CENT2', (1, 0), (1, 1), (1, 2))
        >>> query.add_angle_measurement('ANG1', (0, 0), (1, 1), (1, 0))
        '''

        points = self._args_to_points(3, args)
        constraint = SubstructureSearchLib.PointAngleConstraint(
            points[0], points[1], points[2], SubstructureSearchLib.AlwaysTrue(), name
        )
        self.measurements[name] = constraint
        self._motif.add_motif_parameter(MotifSearchLib.MotifGeometricConstraintParameter(constraint))

[docs]    def add_torsion_angle_measurement(self, name, *args):
        '''Add a torsion angle measurement.

        >>> query = SubstructureSearch()
        >>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
        >>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
        >>> query.add_centroid('CENT1', (0, 0), (0, 1), (0, 2))
        >>> query.add_centroid('CENT2', (1, 0), (1, 1), (1, 2))
        >>> query.add_torsion_angle_measurement('ANG1', (0, 0), (0, 1), (1, 1), (1, 0))
        '''
        points = self._args_to_points(4, args)
        constraint = SubstructureSearchLib.PointTorsionConstraint(
            points[0], points[1], points[2], points[3], SubstructureSearchLib.AlwaysTrue(), name
        )
        self.measurements[name] = constraint
        self._motif.add_motif_parameter(MotifSearchLib.MotifGeometricConstraintParameter(constraint))

[docs]    def add_vector_angle_measurement(self, name, *args):
        '''Add a vector angle measurement.

        >>> query = SubstructureSearch()
        >>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
        >>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
        >>> query.add_vector('VEC1', (0, 1), (1, 2))
        >>> query.add_vector('VEC2', (0, 2), (1, 1))
        >>> query.add_vector_angle_measurement('ANG1', 'VEC1', 'VEC2')
        '''
        points = self._args_to_points(2, args)
        constraint = SubstructureSearchLib.VectorAngleConstraint(
            points[0], points[1], SubstructureSearchLib.AlwaysTrue(), name
        )
        self.measurements[name] = constraint
        self._motif.add_motif_parameter(MotifSearchLib.MotifGeometricConstraintParameter(constraint))

[docs]    def add_plane_angle_measurement(self, name, *args):
        '''Add a plane angle measurement.

        >>> query = SubstructureSearch()
        >>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
        >>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
        >>> query.add_plane('PLANE1', (0, 0), (0, 1), (0, 2))
        >>> query.add_plane('PLANE2', (1, 0), (1, 1), (1, 2))
        >>> query.add_plane_angle_measurement('PA1', 'PLANE1', 'PLANE2')
        '''
        points = self._args_to_points(2, args)
        constraint = SubstructureSearchLib.PlaneAngleConstraint(
            points[0], points[1], SubstructureSearchLib.AlwaysTrue(), name
        )
        self.measurements[name] = constraint
        self._motif.add_motif_parameter(MotifSearchLib.MotifGeometricConstraintParameter(constraint))

[docs]    def add_point_plane_distance_measurement(self, name, *args):
        '''Add point plane distance measurement.

        >>> query = SubstructureSearch()
        >>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
        >>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
        >>> query.add_centroid('CENT1', (0, 0), (0, 1), (0, 2))
        >>> query.add_plane('PLANE2', (1, 0), (1, 1), (1, 2))
        >>> query.add_point_plane_distance_measurement('PP1', 'CENT1', 'PLANE2')
        '''
        points = self._args_to_points(2, args)
        constraint = SubstructureSearchLib.PointPlaneDistanceConstraint(
            points[0], points[1], SubstructureSearchLib.AlwaysTrue(), name, SubstructureSearchLib.PointPlaneDistanceConstraint.ABSOLUTE
        )
        self.measurements[name] = constraint
        self._motif.add_motif_parameter(MotifSearchLib.MotifGeometricConstraintParameter(constraint))

[docs]    def add_vector_plane_angle_measurement(self, name, *args):
        '''Add a vector plane angle measurement.

        >>> query = SubstructureSearch()
        >>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
        >>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
        >>> query.add_vector('VEC1', (0, 1), (1, 2))
        >>> query.add_plane('PLANE2', (1, 0), (1, 1), (1, 2))
        >>> query.add_vector_plane_angle_measurement('ANG1', 'VEC1', 'PLANE2')
        '''
        points = self._args_to_points(2, args)
        constraint = SubstructureSearchLib.VectorPlaneAngleConstraint(
            points[0], points[1], SubstructureSearchLib.AlwaysTrue(), name
        )
        self.measurements[name] = constraint
        self._motif.add_motif_parameter(MotifSearchLib.MotifGeometricConstraintParameter(constraint))

[docs]    def add_atom_property_measurement(self, name, *args, **kw):
        '''Add an atom property measurement.

        :param name: the name by which this measurement will be accessed.
        :param `*args`: a pair, (substructure_index, atom_index) specifying the atom to measure.
        :param which: one of TotalCoordinationNumber, AtomicNumber, VdwRadius, CovalentRadius

        >>> query = SubstructureSearch()
        >>> substructure = QuerySubstructure()
        >>> _ = substructure.add_atom(['C', 'N'])
        >>> _ = query.add_substructure(substructure)
        >>> query.add_atom_property_measurement('ATOM1', (0, 0), which='AtomicNumber')
        '''
        _which_dic = utilities.bidirectional_dict(
            TotalCoordinationNumber=SubstructureSearchLib.Atom3DPropertyConstraint.TotalCoordinationNumber,
            AtomicNumber=SubstructureSearchLib.Atom3DPropertyConstraint.AtomicNumber,
            VdwRadius=SubstructureSearchLib.Atom3DPropertyConstraint.VdwRadius,
            CovalentRadius=SubstructureSearchLib.Atom3DPropertyConstraint.CovalentRadius,
        )

        points = self._args_to_points(1, args, require_3d=False)
        which = _which_dic.prefix_lookup(kw['which'])
        constraint = SubstructureSearchLib.Atom3DPropertyConstraint(
            points[0], which, SubstructureSearchLib.AlwaysTrue(), name
        )
        self.measurements[name] = constraint
        self._motif.add_motif_parameter(MotifSearchLib.MotifGeometricConstraintParameter(constraint))

[docs]    def add_constant_value_measurement(self, name, value):
        '''Add a constant value.

        :param name: the name by which this constant will be accessed.
        :param value: a float.

        >>> query = SubstructureSearch()
        >>> substructure = QuerySubstructure()
        >>> _ = substructure.add_atom(['C', 'N'])
        >>> _ = query.add_substructure(substructure)
        >>> query.add_constant_value_measurement('PI', 3.14159)
        '''
        constraint = SubstructureSearchLib.ConstantValueConstraint(value)
        constraint.set_label(name)
        self.measurements[name] = constraint
        self._motif.add_motif_parameter(MotifSearchLib.MotifGeometricConstraintParameter(constraint))

[docs]    def add_unary_transform_measurement(self, name, which, arg):
        '''Add a mathematical operation.

        :param name: name by which the result will be accessed.
        :param which: one of 'ABS', 'LOG', 'LOG10', 'EXP', 'COS', 'SIN', 'TAN', 'ACOS', 'ASIN', 'ATAN', 'FLOOR', 'ROUND', 'SQRT', 'NEG'.
        :param arg: the name of the measurement or constraint to which to apply the function.

        >>> query = SubstructureSearch()
        >>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
        >>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
        >>> query.add_vector('VEC1', (0, 1), (1, 2))
        >>> query.add_vector('VEC2', (0, 2), (1, 1))
        >>> query.add_vector_angle_measurement('ANG1', 'VEC1', 'VEC2')
        >>> query.add_unary_transform_measurement('ABS_ANGLE', 'ABS', 'ANG1')
        '''
        _unary_transform_dic = utilities.bidirectional_dict(
            ABS=SubstructureSearchLib.UnaryTransformConstraint.ABS,
            LOG=SubstructureSearchLib.UnaryTransformConstraint.LOG,
            LOG10=SubstructureSearchLib.UnaryTransformConstraint.LOG10,
            EXP=SubstructureSearchLib.UnaryTransformConstraint.EXP,
            COS=SubstructureSearchLib.UnaryTransformConstraint.COS,
            SIN=SubstructureSearchLib.UnaryTransformConstraint.SIN,
            TAN=SubstructureSearchLib.UnaryTransformConstraint.TAN,
            ACOS=SubstructureSearchLib.UnaryTransformConstraint.ACOS,
            ASIN=SubstructureSearchLib.UnaryTransformConstraint.ASIN,
            ATAN=SubstructureSearchLib.UnaryTransformConstraint.ATAN,
            FLOOR=SubstructureSearchLib.UnaryTransformConstraint.INT,
            ROUND=SubstructureSearchLib.UnaryTransformConstraint.NINT,
            SQRT=SubstructureSearchLib.UnaryTransformConstraint.SQRT,
            NEG=SubstructureSearchLib.UnaryTransformConstraint.NEG,
            #ASSI=SubstructureSearchLib.UnaryTransformConstraint.ASSI
        )
        kind = _unary_transform_dic.prefix_lookup(which)
        c = self.measurements.get(arg, self.constraints.get(arg, self.contacts.get(arg, None)))
        if c is None:
            raise TypeError('No measurement or constraint for %s' % arg)
        constraint = SubstructureSearchLib.UnaryTransformConstraint(
            kind, c, SubstructureSearchLib.AlwaysTrue(), name
        )
        self.measurements[name] = constraint
        self._motif.add_motif_parameter(MotifSearchLib.MotifGeometricConstraintParameter(constraint))

[docs]    def add_binary_transform_measurement(self, name, which, arg1, arg2):
        '''Add a binary mathematical operation.

        :param name: the name by which this value will be accessed.
        :param which: one of 'MAX', 'MIN', 'ADD', 'SUBTRACT', 'MULTIPLY', 'DIVIDE', 'POW', 'RSIN', 'RCOS'.
        :param arg1, arg2: the name of a measurement to be used as arguments to the operator.

        >>> query = SubstructureSearch()
        >>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
        >>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
        >>> query.add_vector('VEC1', (0, 1), (1, 2))
        >>> query.add_vector('VEC2', (0, 2), (1, 1))
        >>> query.add_vector_angle_measurement('ANG1', 'VEC1', 'VEC2')
        >>> query.add_constant_value_measurement('D2R', 180/3.14159)
        >>> query.add_binary_transform_measurement('IN_RADIANS', 'MUL', 'ANG1', 'D2R')
        '''
        _binary_transform_dic = utilities.bidirectional_dict(
            MAX=SubstructureSearchLib.BinaryTransformConstraint.MAX,
            MIN=SubstructureSearchLib.BinaryTransformConstraint.MIN,
            ADD=SubstructureSearchLib.BinaryTransformConstraint.ADD,
            #SUM=SubstructureSearchLib.BinaryTransformConstraint.SUM, ???
            SUBTRACT=SubstructureSearchLib.BinaryTransformConstraint.SUBTRACT,
            MULTIPLY=SubstructureSearchLib.BinaryTransformConstraint.MULTIPLY,
            DIVIDE=SubstructureSearchLib.BinaryTransformConstraint.DIVIDE,
            POW=SubstructureSearchLib.BinaryTransformConstraint.POW,
            RSIN=SubstructureSearchLib.BinaryTransformConstraint.RSIN,
            RCOS=SubstructureSearchLib.BinaryTransformConstraint.RCOS
        )
        kind = _binary_transform_dic.prefix_lookup(which)
        c1 = self.measurements.get(arg1, self.constraints.get(arg1, self.contacts.get(arg1, None)))
        if c1 is None:
            raise TypeError('No measurement or constraint for %s' % arg1)
        c2 = self.measurements.get(arg2, self.constraints.get(arg2, self.contacts.get(arg2, None)))
        constraint = SubstructureSearchLib.BinaryTransformConstraint(
            kind, c1, c2, SubstructureSearchLib.AlwaysTrue(), name
        )
        self.measurements[name] = constraint
        self._motif.add_motif_parameter(MotifSearchLib.MotifGeometricConstraintParameter(constraint))


    def _add_constraint(self, name, r):
        constraint = self.measurements.pop(name)
        crit = _decode_condition(r)
        constraint.set_condition(crit)
        self.constraints[name] = constraint
        self._motif.add_object_constraint(constraint)

    ##### Constraints #####

[docs]    def add_distance_constraint(self, name, *args, **kw):
        '''Add a distance constraint.

        :param name: the name of this constraint.
        :param `*args`: specifications of points either as pairs (substructure_index, atom_index) or
            as names of geometric measurements.
        :param range: a condition, either as a pair of floats or a pair (operator, value) where operator may be
-        '==', '>', '<', '>=', '<=', '!=' or a pair ('in', list(values)).
        :param intermolecular: whether or not the distance should be within a unit cell molecule or
            between a unit cell molecule and a packing shell molecule.
        :param vdw_corrected: whether the distance range should be relative to the Van der Waals
            radii of the atoms involved.

        >>> query = SubstructureSearch()
        >>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
        >>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
        >>> query.add_distance_constraint('DIST1', (0, 1), (1, 1), (-5, 0), vdw_corrected=True, type='any')
        >>> query.add_distance_constraint('DIST2', (0, 2), (1, 2), ('<=', 3.0), vdw_corrected=True, type='any')
        '''
        kind = kw.get('type', 'inter')
        vdw_corrected = kw.get('vdw_corrected', False)
        off = 1
        if isinstance(args[-off], bool):
            vdw_corrected = args[-off]
            off += 1
        if isinstance(args[-off], str):
            kind = args[-off]
            off += 1
        if kind.lower().startswith('intra'):
            which = ChemistryLib.ContactCriterion.INTRAMOLECULAR
        elif kind.lower().startswith('any'):
            which = ChemistryLib.ContactCriterion.ANY
        else:
            which = ChemistryLib.ContactCriterion.INTERMOLECULAR
        r = args[-off]
        points = self._args_to_points(2, args[:-off])

        if isinstance(r, (list, tuple)):
            if isinstance(r[0], str) or any(isinstance(a, str) for a in args[:-off]):
                cond = _decode_condition(r)
                constraint = SubstructureSearchLib.PointDistanceConstraint(points[0], points[1], cond, name)
                self._motif.add_object_constraint(constraint)
                self.constraints[name] = constraint
                self._motif.add_motif_parameter(MotifSearchLib.MotifGeometricConstraintParameter(constraint))
            else:
                crit = SubstructureSearchLib.InterAtomicDistanceCriterion(min(r), max(r), which, vdw_corrected)
                crit.set_min_path_length(3)
                crit.set_max_path_length(999)
                i = 0
                while i < len(args):
                    a = args[i]
                    if isinstance(a, (list, tuple)):
                        if i == 0:
                            sub_inx1 = a[0]
                            at_inx1 = a[1]
                            i += 1
                        else:
                            sub_inx2 = a[0]
                            at_inx2 = a[1]
                            break
                    elif isinstance(a, int):
                        if i == 0:
                            sub_inx1 = a
                            at_inx1 = args[i+1]
                            i += 2
                        else:
                            sub_inx2 = a
                            at_inx2 = args[i+1]
                            break
                constraint = SubstructureSearchLib.SubstructureContact(sub_inx1, at_inx1, sub_inx2, at_inx2, crit)
                self._motif.add_contact(constraint)
                self.contacts[name] = constraint
                self._motif.add_motif_parameter(MotifSearchLib.MotifDistanceParameter(name, sub_inx1, at_inx1, sub_inx2, at_inx2))
        else:
            raise TypeError('Invalid value for condition {}'.format(r))

[docs]    def add_angle_constraint(self, name, *args):
        '''Add an angle constraint.

        :param name: by which the constraint will be accessed.
        :param `*args`: three instances either of a pair (substructure_index, atom_index) or of names
            of geometric objects.
        :param range: as for :meth:`ccdc.search.SubstructureSearch.add_distance_constraint`

        >>> query = SubstructureSearch()
        >>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
        >>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
        >>> query.add_centroid('CENT1', (0, 0), (0, 1), (0, 2))
        >>> query.add_centroid('CENT2', (1, 0), (1, 1), (1, 2))
        >>> query.add_angle_constraint('ANG1', (0, 0), (1, 1), (1, 0), ('>=', 120))
        '''
        self.add_angle_measurement(name, *args[:-1])
        self._add_constraint(name, args[-1])

[docs]    def add_torsion_angle_constraint(self, name, *args):
        '''Add a torsion angle constraint.

        :param name: the name by which this constraint is accessed.
        :param `*args`: as for :meth:`ccdc.search.SubstructureSearch.add_distance_constraint`

        >>> query = SubstructureSearch()
        >>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
        >>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
        >>> query.add_centroid('CENT1', (0, 0), (0, 1), (0, 2))
        >>> query.add_centroid('CENT2', (1, 0), (1, 1), (1, 2))
        >>> query.add_torsion_angle_constraint('ANG1', (0, 0), (0, 1), (1, 1), (1, 0), (120, 180))
        '''
        self.add_torsion_angle_measurement(name, *args[:-1])
        self._add_constraint(name, args[-1])

[docs]    def add_vector_angle_constraint(self, name, *args):
        '''Add a vector angle constraint.

        >>> query = SubstructureSearch()
        >>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
        >>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
        >>> query.add_vector('VEC1', (0, 1), (1, 2))
        >>> query.add_vector('VEC2', (0, 2), (1, 1))
        >>> query.add_vector_angle_constraint('ANG1', 'VEC1', 'VEC2', (0, 60))
        '''
        self.add_vector_angle_measurement(name, *args[:-1])
        self._add_constraint(name, args[-1])

[docs]    def add_plane_angle_constraint(self, name, *args):
        '''Add a plane angle constraint.

        >>> query = SubstructureSearch()
        >>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
        >>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
        >>> query.add_plane('PLANE1', (0, 0), (0, 1), (0, 2))
        >>> query.add_plane('PLANE2', (1, 0), (1, 1), (1, 2))
        >>> query.add_plane_angle_constraint('PA1', 'PLANE1', 'PLANE2', (-10, 10))
        '''
        self.add_plane_angle_measurement(name, *args[:-1])
        self._add_constraint(name, args[-1])

[docs]    def add_point_plane_distance_constraint(self, name, *args):
        '''Add a point plane distance constraint.

        >>> query = SubstructureSearch()
        >>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
        >>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
        >>> query.add_centroid('CENT1', (0, 0), (0, 1), (0, 2))
        >>> query.add_plane('PLANE2', (1, 0), (1, 1), (1, 2))
        >>> query.add_point_plane_distance_constraint('PP1', 'CENT1', 'PLANE2', ('<', 5))
        '''
        self.add_point_plane_distance_measurement(name, *args[:-1])
        self._add_constraint(name, args[-1])

[docs]    def add_vector_plane_angle_constraint(self, name, *args):
        '''Add a vector plane angle constraint.

        >>> query = SubstructureSearch()
        >>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
        >>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
        >>> query.add_vector('VEC1', (0, 1), (1, 2))
        >>> query.add_plane('PLANE2', (1, 0), (1, 1), (1, 2))
        >>> query.add_vector_plane_angle_constraint('ANG1', 'VEC1', 'PLANE2', ('>', 90))
        '''
        self.add_vector_plane_angle_measurement(name, *args[:-1])
        self._add_constraint(name, args[-1])

[docs]    def add_atom_property_constraint(self, name, *args, **kw):
        '''Add an atom property constraint.

        >>> query = SubstructureSearch()
        >>> _ = query.add_substructure(SMARTSSubstructure('[*H1]'))
        >>> query.add_atom_property_constraint('ATOM1', (0, 0), ('in', [7, 8]), which='AtomicNumber')
        '''
        self.add_atom_property_measurement(name, *args[:-1], **kw)
        self._add_constraint(name, args[-1])

[docs]    def add_unary_transform_constraint(self, name, *args):
        '''Add an arithmetical calculation constraint.

        >>> query = SubstructureSearch()
        >>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
        >>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
        >>> query.add_vector('VEC1', (0, 1), (1, 2))
        >>> query.add_vector('VEC2', (0, 2), (1, 1))
        >>> query.add_vector_angle_measurement('ANG1', 'VEC1', 'VEC2')
        >>> query.add_unary_transform_constraint('ABS_ANGLE', 'ABS', 'ANG1', (0, 10))
        '''
        self.add_unary_transform_measurement(name, *args[:-1])
        self._add_constraint(name, args[-1])

[docs]    def add_binary_transform_constraint(self, name, which, *args):
        '''Add a binary arithmetical calculation constraint.

        >>> query = SubstructureSearch()
        >>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
        >>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
        >>> query.add_vector('VEC1', (0, 1), (1, 2))
        >>> query.add_vector('VEC2', (0, 2), (1, 1))
        >>> query.add_vector_angle_measurement('ANG1', 'VEC1', 'VEC2')
        >>> query.add_constant_value_measurement('D2R', 180/3.14159)
        >>> query.add_binary_transform_constraint('IN_RADIANS', 'MUL', 'ANG1', 'D2R', (-1, 1))
        '''
        self.add_binary_transform_measurement(name, which, args[0], args[1])
        self._add_constraint(name, args[2])

[docs]    @staticmethod
    def from_xml(xml):
        '''Create a substructure search from XML. Deprecated.

        :param xml: XML string
        '''
        s = SubstructureSearch()
        s.read_xml(xml)
        return s

[docs]    @staticmethod
    def from_xml_file(file_name):
        '''Create a substructure search from an XML file. Deprecated.

        :param file_name: path to XML file

        :raises: IOError when the file does not exist
        '''
        if not os.path.exists(file_name):
            raise IOError('The file %s does not exist' % file_name)
        with open(file_name) as f:
            return SubstructureSearch.from_xml(f.read())

[docs]    def read_xml(self, xml):
        '''Read search query from XML. Deprecated.

        :param xml: XML string
        '''
        warnings.warn('''This method is deprecated and will be removed in a later version.''', DeprecationWarning)
        rdr = CSDSQLDatabaseLib.XMLMotifReader()
        stream = UtilitiesLib.istringstream(xml)
        rdr.load(stream)
        for i in range(rdr.nmolecules()):
            self._xml_motif = motif = rdr.motif(i)
            # merge this motif with ours
            for j in range(motif.nsubstructures()):
                self.add_substructure(QuerySubstructure(motif.substructure(j)))
            for j in range(motif.n_object_constraints()):
                obj = motif.object_constraint(j)
                if obj.class_name() == 'PointTorsionConstraint':
                    c = obj.condition()
                    p0 = SubstructureSearchLib.Object_as_Point(obj.objects(0))
                    p1 = SubstructureSearchLib.Object_as_Point(obj.objects(1))
                    p2 = SubstructureSearchLib.Object_as_Point(obj.objects(2))
                    p3 = SubstructureSearchLib.Object_as_Point(obj.objects(3))
                    obj = SubstructureSearchLib.PointTorsionConstraint(
                        p0, p1, p2, p3, SubstructureSearchLib.AlwaysTrue(), obj.label()
                    )
                    obj.set_condition(c)
                self.constraints[obj.label()] = obj
                self._motif.add_object_constraint(obj)
                self._motif.add_motif_parameter(MotifSearchLib.MotifGeometricConstraintParameter(obj))
            for j in range(motif.nconstraints()):
                con = motif.constraint(j)
                for f, tag in [
                        (MotifSearchLib.motif_constraint_as_angle_constraint, 'ANGLE'),
                        (MotifSearchLib.motif_constraint_as_combined_constraint, 'COMBINED'),
                        (MotifSearchLib.motif_constraint_as_contact_order_constraint, 'CONTACT_ORDER'),
                        (MotifSearchLib.motif_constraint_as_discrete_chain_constraint, 'DISCRETE_CHAIN'),
                        (MotifSearchLib.motif_constraint_as_hydrogen_bond_angle_present_constraint, 'HBOND_ANGLE_PRESENT'),
                        (MotifSearchLib.motif_constraint_as_hydrogen_bond_constraint, 'HBOND'),
                        (MotifSearchLib.motif_constraint_as_nunique_contacts_constraint, 'NUNIQUE_CONTACTS'),
                        (MotifSearchLib.motif_constraint_as_not_present_constraint, 'NOT_PRESENT'),
                        (MotifSearchLib.motif_constraint_as_shortest_path_constraint, 'SHORTEST_PATH'),
                        (MotifSearchLib.motif_constraint_as_torsion_constraint, 'TORSION'),
                        (MotifSearchLib.motif_constraint_as_translation_constraint, 'TRANSLATION'),
                        (MotifSearchLib.motif_constraint_as_unique_atoms_constraint, 'UNIQUE_ATOMS'),
                ]:
                    c = f(con)
                    if c:
                        self.constraints['%s_%d' % (tag, len(self.constraints)+1)] = c
                self._motif.add_motif_constraint(con)
                self._motif.add_motif_parameter(MotifSearchLib.MotifGeometricConstraintParameter(con))
            for j in range(motif.ncontacts()):
                con = motif.motif_contact(j)
                self.contacts['CONTACT_%d' % j] = con
                self._motif.add_contact(con)
                s1 = con.substruct_a()
                a1 = con.atom_a()
                s2 = con.substruct_b()
                a2 = con.atom_b()
                self._motif.add_motif_parameter(MotifSearchLib.MotifDistanceParameter('CONTACT_%d' % j, s1, a1, s2, a2))
            for j in range(motif.nparameters()):
                par = motif.parameter(i)
                self._motif.add_parameter(par)
                for f, tag in [
                        (MotifSearchLib.motif_parameter_as_distance_parameter, 'DISTANCE'),
                        (MotifSearchLib.motif_parameter_as_angle_parameter, 'ANGLE'),
                        (MotifSearchLib.motif_parameter_as_torsion_parameter, 'TORSION')
                ]:
                    p = f(par)
                    if p:
                        self.measurements[p.name()] = p
                        self._motif.add_motif_parameter(p)

[docs]    def read_xml_file(self, file_name):
        '''Read search parameters from an XML file. Deprecated.

        :param file_name: path to XML file
        :raises: IOError if the file cannot be read
        '''
        if not os.path.exists(file_name):
            raise IOError('The file %s does not exist' % file_name)
        with open(file_name) as f:
            self.read_xml(f.read())

    def _add_enantiomer_consistency(self):
        '''Apply the enantiomer search setting to the motif'''
        MotifSearchLib.MotifTorsionInversionConsistencyConstraint.add_to_motif(self._motif, self.settings._match_enantiomers)

    def _search_reader(self, database):
        if self._motif.nsubstructures() == 0:
            raise TypeError('No substructures to search')
        self._add_enantiomer_consistency()
        if not hasattr(database, '_motif_searcher'):
            try:
                database._motif_searcher = database._db.searcher_factory().motif_searcher()
            except (RuntimeError, NameError, AttributeError):
                pass
        if hasattr(database, '_motif_searcher'):
            results_writer = CSDSQLDatabaseLib.CrystalStructureDatabaseSearchVectorResultsWriter()
            hits = database._motif_searcher.search(self._motif, self.settings._settings, results_writer)
            hit_list = SubstructureSearch.SubstructureHitList(
                SubstructureSearch.SubstructureHit._from_match(x, self, _binary_database=database._db)
                for x in results_writer.matches()
            )
            if database.__class__.__name__ == 'MoleculeReader':
                def skip_suppressed_hit(hit):
                    try:
                        ats = hit.match_atoms()
                        return False
                    except RuntimeError:
                        return True
                hit_list = SubstructureSearch.SubstructureHitList(
                    hit for hit in hit_list if not skip_suppressed_hit(hit)
                )
            return hit_list
        else:
            # there used to be fallback code here, but now we expect to always support motif search on any database
            raise NotImplementedError("Substructure search is not implemented on this database type")

    def _search_entry(self, entry, _database=None):
        if self._motif.nsubstructures() == 0:
            raise TypeError('No substructures to search')
        if not CSDSQLDatabaseLib.test_entry_settings_constraints(self.settings._settings, entry._entry):
            return []
        self._add_enantiomer_consistency()
        if entry._entry.chemical_diagram_views() is None:
            generator = ChemistryLib.ChemicalDiagramGenerator()
            diagram = generator.create_chemical_diagram(entry.molecule._molecule)
            views = ChemistryLib.ChemicalDiagramViews2D(diagram)
            entry._entry.set_chemical_diagram_views(views)
        results_writer = CSDSQLDatabaseLib.CrystalStructureDatabaseSearchVectorResultsWriter()
        csd = EntryReader('csd')
        if hasattr(csd, '_component_dbs'):
            db = csd._component_dbs.values()[-1]
        else:
            db = csd._db
        _substructure_searcher = db.searcher_factory().substructure_searcher()
        _substructure_searcher = CSDSQLDatabaseLib.CSDSQLSubstructureSearcher(_substructure_searcher)
        if CSDSQLDatabaseLib.CSDSQLSubstructureSearcher_valid(_substructure_searcher):
            _substructure_searcher.search_entry(
                entry._entry, self._motif, self.settings._settings, results_writer
            )
            return SubstructureSearch.SubstructureHitList(
                SubstructureSearch.SubstructureHit._from_match(x, self, _entry=entry)
                for x in results_writer.matches()
            )
        else:
            return self._search_crystal(entry.crystal, _database=_database)

    def _search_crystal(self, crystal, _database=None):
        if self._motif.nsubstructures() == 0:
            raise TypeError('No substructures to search')
        try:
            if not CSDSQLDatabaseLib.test_molecule_settings_constraints(self.settings._settings,
                                                                        crystal.molecule._molecule):
                return []
        except TypeError:
            if self.settings.has_3d_coordinates:
                return []
        self._add_enantiomer_consistency()
        view = ChemistryLib.CrystalStructureView.instantiate(crystal._crystal)
        searcher = MotifSearchLib.MotifSearch()
        searcher.set_limit(self.settings.max_hits_per_structure)
        try:
            res = searcher.search(view, self._motif)
        except RuntimeError as e:
            if 'Too many steps' in str(e):
                raise RuntimeError(
                    'The crystal search failed: probably due to an over-complex substructure or target molecule.\n'
                    'Try restricting the number of hits with max_hits_per_structure or reducing the complexity '
                    'of the substructure.'
                )
            else:
                raise RuntimeError('Crystal search failed with: %s' % e)
        if _database is not None:
            _crystal = None
        else:
            _crystal = crystal
        hits = SubstructureSearch.SubstructureHitList(
            SubstructureSearch.SubstructureHit(
                crystal.identifier, m, searcher.search_structure(), self,
                _crystal=_crystal, _database=_database) for m in res)
        for h in hits:
            h._crystal = crystal
        return hits

    def _search_molecule(self, molecule, _database=None):
        if not CSDSQLDatabaseLib.test_molecule_settings_constraints(self.settings._settings, molecule._molecule):
            return []
        if hasattr(molecule, '_cell'):
            _cell = molecule._cell
        else:
            _cell = None
        self._add_enantiomer_consistency()
        molecule._cell = ChemistryLib.Cell()
        c = Entry.from_molecule(molecule).crystal
        c._crystal.set_cell(ChemistryLib.Cell(), ChemistryLib.CrystalStructure.KEEP_ORTHOGONAL_COORDINATES)
        ret = self._search_crystal(c, _database=_database)
        if _cell is not None:
            molecule._cell = _cell
        for h in ret:
            h._molecule = molecule
            h._crystal = None
        return ret

###########################################################################
#   Reduced cell search
###########################################################################


[docs]class ReducedCellSearch(Search):
    '''Provide reduced cell searches.'''

[docs]    @utilities.nested_class('ReducedCellSearch')
    class Settings(Search.Settings):
        '''Settings appropriate to a reduced cell search.'''
        def __init__(self, _settings=None):
            '''Initialis settings.'''
            if _settings is None:
                self._settings = CSDSQLDatabaseLib.CrystalStructureDatabaseReducedCellSearchSettings()
                Search.Settings.__init__(self, self._settings)
                self.max_hits_per_structure = 1
            else:
                self._settings = _settings

[docs]        def reset(self):
            '''Reset to default values.'''
            self._settings = CSDSQLDatabaseLib.CrystalStructureDatabaseReducedCellSearchSettings()
            Search.Settings.__init__(self, self._settings)
            self.max_hits_per_structure = 1

        @property
        def percent_length_tolerance(self):
            '''The cell length tolerance as a percentage of the longest cell dimension.'''
            return self._settings.percent_length_tolerance()

        @percent_length_tolerance.setter
        def percent_length_tolerance(self, val):
            '''Set the percent length tolerance.'''
            self._settings.set_percent_length_tolerance(val)

        @property
        def absolute_angle_tolerance(self):
            '''The absolute angle tolerance.'''
            return self._settings.absolute_angle_tolerance()

        @absolute_angle_tolerance.setter
        def absolute_angle_tolerance(self, val):
            '''Set the absolute angle tolerance.'''
            self._settings.set_absolute_angle_tolerance(val)

        @property
        def is_normalised(self):
            '''Whether the input cell is normalised.'''
            return self._settings.is_normalised()

        @is_normalised.setter
        def is_normalised(self, val):
            '''Set the is_normalised property.'''
            self._settings.set_is_normalised(val)

[docs]    @utilities.nested_class('ReducedCellSearch')
    class Query(object):
        '''Base query.'''
        def __init__(self, lengths=None, angles=None, lattice_centring=None):
            '''Initialise with cell lengths, cell angles and the lattice centring.'''
            self.lengths = lengths
            self.angles = angles
            self.lattice_centring = lattice_centring

        def _get_query(self, settings=None):
            '''Private: return an internal query object.'''
            if settings is None:
                settings = ReducedCellSearch.Settings()
            if isinstance(self.lattice_centring, str):
                centring = self.lattice_centring
            else:
                etm = ChemistryLib.Spacegroup.centring_text()
                centring = etm.text(self.lattice_centring)

            k = centring[0].upper()
            if k == 'R':
                sp = ChemistryLib.Spacegroup('R3', ChemistryLib.Spacegroup.UNKNOWN_SYSTEM)
            else:
                sp = ChemistryLib.Spacegroup(k + '1')
            cell = ChemistryLib.Cell(
                self.lengths[0], self.lengths[1], self.lengths[2],
                MathsLib.Angle(self.angles[0], MathsLib.Angle.DEGREES),
                MathsLib.Angle(self.angles[1], MathsLib.Angle.DEGREES),
                MathsLib.Angle(self.angles[2], MathsLib.Angle.DEGREES),
                sp
            )
            return CSDSQLDatabaseLib.CrystalStructureDatabaseReducedCellSearch(
                cell, settings._settings
            )

[docs]    @utilities.nested_class('ReducedCellSearch')
    class CrystalQuery(Query):
        '''Reduced cell query from a crystal.'''
        def __init__(self, crystal):
            super(ReducedCellSearch.CrystalQuery, self).__init__(
                crystal.cell_lengths,
                crystal.cell_angles,
                crystal.lattice_centring
            )

[docs]    @utilities.nested_class('ReducedCellSearch')
    class XMLQuery(Query):
        '''Reduced cell query from an XML representation.'''
        def __init__(self, xml):
            '''Initialise from xml.

            :param xml: XML string
            '''
            parser = CSDSQLDatabaseLib.ReducedCellSearchXMLParser()
            self._query = parser.parse(xml)

        def _get_query(self, settings=None):
            '''Private: return the underlying internal query.'''
            return self._query

[docs]    @utilities.nested_class('ReducedCellSearch')
    class XMLFileQuery(XMLQuery):
        '''Reduced cell query from a file name.'''
        def __init__(self, file_name):
            '''Initialise from a file name.'''
            with open(file_name) as f:
                super(ReducedCellSearch.XMLFileQuery, self).__init__(f.read())

    def __init__(self, query=None, settings=None):
        '''Initialise with optional query and settings.'''
        self.query = query
        if settings is None:
            settings = self.Settings()
        self.settings = settings

[docs]    @staticmethod
    def from_xml(xml):
        '''Construct a reduced cell search from an XML representation.

        :param xml: XML string
        '''
        stream = UtilitiesLib.istringstream(xml)
        parser = CSDSQLDatabaseLib.ReducedCellSearchXMLParser()
        q = parser.parse(stream)
        rcs = ReducedCellSearch(query=ReducedCellSearch.XMLQuery(xml))
        rcs.settings.percent_length_tolerance = q.settings().percent_length_tolerance()
        rcs.settings.absolute_angle_tolerance = q.settings().absolute_angle_tolerance()
        return rcs

[docs]    @staticmethod
    def from_xml_file(file_name):
        '''Construct a reduced cell search from an XML file.

        :param file_name: path to XML file

        :raises: IOError when the file does not exist
        '''
        if not os.path.exists(file_name):
            raise IOError('The file %s does not exist' % file_name)
        with open(file_name) as f:
            return ReducedCellSearch.from_xml(f.read())

[docs]    def read_xml(self, xml):
        '''Read XML into this ReducedCellSearch.

        :param xml: XML string
        '''
        self.set_query(ReducedCellSearch.XMLQuery(xml))
        parser = CSDSQLDatabaseLib.ReducedCellSearchXMLParser()
        q = parser.parse(xml)
        self.settings.percent_length_tolerance = q.settings().percent_length_tolerance()
        self.settings.absolute_angle_tolerance = q.settings().absolute_angle_tolerance()

[docs]    def read_xml_file(self, file_name):
        '''Read an XML file into this ReducedCellSearch.

        :param file_name: path to XML file
        :raises: IOError if the file cannot be read
        '''
        if not os.path.exists(file_name):
            raise IOError('The file %s does not exist' % file_name)
        with open(file_name) as f:
            self.read_xml(f.read())

[docs]    def set_query(self, query):
        '''Set the query.'''
        self.query = query
        self._search = self.query._get_query(self.settings)

[docs]    def compare_cells(self, r0, r1):
        '''Compare two reduced cells.

        :param r0: the first reduced cell, an instance of :class:`ccdc.crystal.Crystal.ReducedCell`
        :param r1: the second reduced cell similarly
        :returns: boolean
        '''
        if isinstance(r0, Crystal.ReducedCell):
            r0 = r0._reduced_cell
        if isinstance(r1, Crystal.ReducedCell):
            r1 = r1._reduced_cell
        len_tol = (self.settings.percent_length_tolerance/100.) * max(r0.a(), r0.b(), r0.c())
        ang_tol = self.settings.absolute_angle_tolerance

        def _compare_values(v0, v1, tol):
            '''Private: test value difference lies within tolerance.'''
            return abs(v0 - v1) <= tol

        return (
            _compare_values(r0.a(), r1.a(), len_tol) and
            _compare_values(r0.b(), r1.b(), len_tol) and
            _compare_values(r0.c(), r1.c(), len_tol) and
            _compare_values(r0.alpha().degrees(), r1.alpha().degrees(), ang_tol) and
            _compare_values(r0.beta().degrees(), r1.beta().degrees(), ang_tol) and
            _compare_values(r0.gamma().degrees(), r1.gamma().degrees(), ang_tol)
        )

    def _search_reader(self, database):
        '''Search a database.'''
        if not self.query:
            raise TypeError('The search has no query.')
        if not hasattr(database, '_reduced_cell_searcher'):
            try:
                database._reduced_cell_searcher = database._db.searcher_factory().reduced_cell_searcher()
            except (RuntimeError, NameError, AttributeError):
                pass
        if hasattr(database, '_reduced_cell_searcher'):
            if self.settings._has_filter_set():
                max_hits = self.settings.max_hit_structures
                if max_hits != maxint32:
                    self.settings.max_hit_structures = maxint32
                    hits = database._reduced_cell_searcher.search(self.query._get_query(self.settings))
                    ret = list()
                    for h in hits:
                        r = Search.SearchHit(h, _database=database)
                        if self.settings.test(r.entry):
                            ret.append(r)
                            if max_hits and len(ret) >= max_hits:
                                break
                self.settings.max_hit_structures = max_hits
            else:
                hits = database._reduced_cell_searcher.search(self.query._get_query(self.settings))
                ret = [Search.SearchHit(h, _database=database) for h in hits]
        else:
            # Have to do it one-by-one
            ret = []
            for c in database.crystals():
                if self.settings._has_filter_set() and self.settings.test(c):
                    ret.extend(self._search_crystal(c))
                else:
                    ret.extend(self._search_crystal(c))
                if self.settings.max_hit_structures and len(ret) > self.settings.max_hit_structures:
                    break
            for r in ret:
                r._crystal = None
                r._database = database
        return ret

    def _search_molecule(self, mol):
        '''Molecules don't have cells, so always fails.'''
        return []

    def _search_crystal(self, crystal):
        '''Test the query against a single crystal.'''
        red = ChemistryLib.ReducedCell(crystal._crystal.cell())
        que = self.query._get_query(self.settings)
        que_red = que.query_cell()
        if self.compare_cells(que_red, red):
            ret = [Search.SearchHit(crystal.identifier)]
            ret[0]._crystal = crystal
            if self.settings._has_filter_set():
                if self.settings.test(crystal):
                    return ret
                else:
                    return []
            return ret
        return []

###########################################################################
#   Combined search
###########################################################################

[docs]class CombinedSearch(Search):
    '''Boolean combinations of other searches.

    TextNumericSearch, SubstructureSearch, SimilaritySearch and ReducedCellSearch can be combined using and, or and not
    to provide a combined search.

    >>> csd = io.EntryReader('csd')
    >>> tns = TextNumericSearch()
    >>> tns.add_compound_name('Aspirin')
    >>> sub_search = SubstructureSearch()
    >>> _ = sub_search.add_substructure(SMARTSSubstructure('C(=O)OH'))
    >>> rcs = ReducedCellSearch(ReducedCellSearch.CrystalQuery(csd.crystal('ACSALA')))
    >>> combi_search = CombinedSearch(tns & (-rcs | -sub_search))
    >>> hits = combi_search.search()
    >>> print(len(hits))
    89

    '''
[docs]    class Settings(Search.Settings):
        '''Settings appropriate to a combined search.'''
        def __init__(self):
            super(self.__class__, self).__init__()

[docs]    class CombinedHit(Search.SearchHit):
        '''A hit from a combined search.'''
        def __init__(self, identifier, _database=None, _entry=None, _crystal=None, _molecule=None):
            super(self.__class__, self).__init__(identifier, _database=_database, _entry=_entry, _crystal=_crystal, _molecule=_molecule)
            self.measurements = dict()
            self.constraints = dict()
            self.geometric_objects = dict()
            self.similarities = dict()
            self._subhits = list()

        @staticmethod
        def _from_similarity_hit(identifier, comparators, similarities, _database=None):
            '''Make a CombinedHit from a SimilarityHit.'''
            hit = CombinedSearch.CombinedHit(identifier, _database=_database)
            hit.similarities.update({
                comp : float(sim)
                for comp, sim in zip(comparators, similarities)
            })
            return hit

        @staticmethod
        def _from_search_hit(search_hit):
            '''Make a CombinedHit from a TextNumericSearch or a ReducedCellSearch.'''
            return CombinedSearch.CombinedHit(search_hit.identifier, search_hit._database)

        @staticmethod
        def _from_substructure_hit(sub_hit, _database):
            '''Make a CombinedHit from a SubstructureSearchHit.'''
            hit = CombinedSearch.CombinedHit(sub_hit.identifier, _database)
            hit.measurements.update(sub_hit.measurements)
            hit.constraints.update(sub_hit.constraints)
            hit.geometric_objects.update(sub_hit.geometric_objects)
            sub_hit._database = _database
            sub_hit._entry = sub_hit._crystal = sub_hit._molecule = None
            hit._subhits.append(sub_hit)
            return hit

        def _merge(self, hit):
            '''Merge another hit into here.'''
            self.measurements.update(hit.measurements)
            self.constraints.update(hit.constraints)
            self.geometric_objects.update(hit.geometric_objects)
            self.similarities.update(hit.similarities)
            self._subhits.extend(hit._subhits)

        def copy(self):
            hit = CombinedSearch.CombinedHit(self.identifier, _database=self._database)
            hit.measurements.update(self.measurements)
            hit.constraints.update(self.constraints)
            hit.geometric_objects.update(self.geometric_objects)
            hit.similarities.update(self.similarities)
            hit._subhits = self._subhits[:]
            return hit

        def measurement_atoms(self, name):
            hs = [h for h in self._subhits if name in h.measurements]
            if hs:
                return hs[-1].measurement_atoms[name]

        def constraint_atoms(self, name):
            hs = [h for h in self._subhits if name in h.constraints]
            if hs:
                return hs[-1].constraint_atoms[name]

        def _geometric_object_atoms(self, name):
            hs = [h for h in self._subhits if name in h.geometric_objects]
            if hs:
                return hs[-1].geometric_objects[name]

        def centroid_atoms(self, name):
            return self._geometric_object_atoms(name)
        def dummy_point_atoms(self, name):
            return self._geometric_object_atoms(name)
        def group_atoms(self, name):
            return self._geometric_object_atoms(name)
        def vector_atoms(self, name):
            return self._geometric_object_atoms(name)
        def plane_atoms(self, name):
            return self._geometric_object_atoms(name)

        def match_components(self):
            return [m for h in self._subhits for m in h.match_components()]

        def match_atoms(self, indices=False):
            return [a for h in self._subhits for a in h.match_atoms(indices=indices)]

        def match_substructures(self):
            return [m for h in self._subhits for m in h.match_components()]

        def match_symmetry_operators(self):
            return [m for h in self._subhits for m in h.match_symmetry_operators()]

    def __init__(self, expression, settings=None):
        if settings is None:
            settings = CombinedSearch.Settings()
        self.settings = settings
        self._node = self._make_node(expression)
        self._searcher_dict = self._node._searcher_dict
        self._limit_dict = self._node._limit_dict
        #if not hasattr(TextNumericSearch, '__neg__'):
        #    self._monkey_patch()
        #self._searcher_dict = dict()
        #self._limit_dict = dict()

    @staticmethod
    def _make_node(other):
        '''Private: create a combined search node.'''
        if isinstance(other, TextNumericSearch):
            _node = CSDSQLDatabaseLib.TextNumericSearchNode(other._search)
            _node._searcher_dict = collections.OrderedDict([(str(other), other)])
            _node._limit_dict = {}
        elif isinstance(other, SubstructureSearch):
            _node = CSDSQLDatabaseLib.MotifNode(
                CSDSQLDatabaseLib.pair_motif_settings(other._motif, other.settings._settings)
            )
            adder = CSDSQLDatabaseLib.MatchStringDataItemAdder('substructure_search', str(other))
            _node = CSDSQLDatabaseLib.MatchMutatorNode(adder, _node)
            _node._searcher_dict = collections.OrderedDict([(str(other), other)])
            _node._limit_dict = {}
        elif isinstance(other, ReducedCellSearch):
            _node = CSDSQLDatabaseLib.ReducedCellNode(other.query._get_query(other.settings))
            _node._searcher_dict = collections.OrderedDict([(str(other), other)])
            _node._limit_dict = {}
        elif isinstance(other, SimilaritySearch):
            _node = CSDSQLDatabaseLib.SimilaritySearchNode(CSDSQLDatabaseLib.pair_substructure_simsettings(other._substructure, other.settings._settings))
            adder = CSDSQLDatabaseLib.MatchStringDataItemAdder('similarity_search', str(other))
            _node = CSDSQLDatabaseLib.MatchMutatorNode(adder, _node)
            _node._searcher_dict = collections.OrderedDict([(str(other), other)])
            _node._limit_dict = {}
        #elif isinstance(other, FormulaSearch):
        #    _node = CSDSQLDatabaseLib.FormulaSearchNode(other._search)
        elif isinstance(other, CSDSQLDatabaseLib.Node):
            _node = other
            _node._searcher_dict = other._searcher_dict.copy()
            _node._limit_dict = other._limit_dict.copy()
        else:
            raise TypeError('Not appropriate for a combined search %s', type(other))
        return _node

    def __and__(self, other):
        '''Conjoin this with another search.'''
        _node = self._make_node(other)
        if self._node is None:
            self._node = _node
        else:
            self._node = CSDSQLDatabaseLib.AndNode(self._node, _node)
        self._searcher_dict.update(_node._searcher_dict)
        self._limit_dict.update(_node._limit_dict)
        return self

    def __iand__(self, other):
        '''In-place conjunction.'''
        _node = self._make_node(other)
        if self._node is None:
            self._node = _node
        else:
            self._node = CSDSQLDatabaseLib.AndNode(self._node, _node)
        self._searcher_dict.update(_node._searcher_dict)
        self._limit_dict.update(_node._limit_dict)

    def __or__(self, other):
        '''Disjoin this with another search.'''
        _node = self._make_node(other)
        if self._node is None:
            self._node = _node
        else:
            self._node = CSDSQLDatabaseLib.OrNode(self._node, _node)
        self._searcher_dict.update(_node._searcher_dict)
        self._limit_dict.update(_node._limit_dict)

        return self

    def __ior__(self, other):
        '''In-place disjunction.'''
        _node = self._make_node(other)
        if self._node is None:
            self._node = _node
        else:
            self._node = CSDSQLDatabaseLib.OrNode(self._node, _node)
        self._searcher_dict.update(_node._searcher_dict)
        self._limit_dict.update(_node._limit_dict)

    def __neg__(self):
        '''Negate this search.'''
        if self._node is None:
            raise TypeError('No searches to negate')
        _node = CSDSQLDatabaseLib.NotNode(self._node)
        _node._searcher_dict = self._node._searcher_dict
        _node._limit_dict = self._node._limit_dict
        self._node = _node
        return self

    @staticmethod
    def _monkey_patch(extra=None):
        '''Private: ensure relevant classes have combination methods.'''
        def negate(s):
            _node = CombinedSearch._make_node(s)
            ret = CSDSQLDatabaseLib.NotNode(_node)
            ret._searcher_dict = _node._searcher_dict
            ret._limit_dict = _node._limit_dict
            return ret
        def conjoin(s, t):
            _s = CombinedSearch._make_node(s)
            _t = CombinedSearch._make_node(t)
            ret = CSDSQLDatabaseLib.AndNode(_s, _t)
            ret._searcher_dict = _s._searcher_dict.copy()
            ret._searcher_dict.update(_t._searcher_dict)
            ret._limit_dict = _s._limit_dict
            ret._limit_dict.update(_t._limit_dict)
            return ret
        def disjoin(s, t):
            _s = CombinedSearch._make_node(s)
            first_mutator = CSDSQLDatabaseLib.MatchStringDataItemAdder('disjunct', 'first')
            _sm = CSDSQLDatabaseLib.MatchMutatorNode(first_mutator, _s)
            _sm._searcher_dict = _s._searcher_dict
            _sm._limit_dict = _s._limit_dict
            _t = CombinedSearch._make_node(t)
            second_mutator = CSDSQLDatabaseLib.MatchStringDataItemAdder('disjunct', 'second')
            _tm = CSDSQLDatabaseLib.MatchMutatorNode(second_mutator, _t)
            _tm._searcher_dict = _t._searcher_dict
            _tm._limit_dict = _t._limit_dict
            ret = CSDSQLDatabaseLib.OrNode(_sm, _tm)
            ret._searcher_dict = _s._searcher_dict.copy()
            ret._searcher_dict.update(_t._searcher_dict)
            ret._limit_dict = _s._limit_dict
            ret._limit_dict.update(_t._limit_dict)
            return ret
        if extra is None:
            extra = []
        for cl in (
                TextNumericSearch, SimilaritySearch, SubstructureSearch, ReducedCellSearch,
                CSDSQLDatabaseLib.AndNode, CSDSQLDatabaseLib.OrNode, CSDSQLDatabaseLib.NotNode,
                CSDSQLDatabaseLib.EntryLimitNode, CSDSQLDatabaseLib.MatchMutatorNode,
            ) + tuple(extra):
            cl.__neg__ = negate
            cl.__and__ = conjoin
            cl.__or__ = disjoin

    @staticmethod
    def _which_node(node):
        possibilities = [
            CSDSQLDatabaseLib.Node_as_NotNode,
            CSDSQLDatabaseLib.Node_as_AndNode,
            CSDSQLDatabaseLib.Node_as_OrNode,
            CSDSQLDatabaseLib.Node_as_MatchMutatorNode,
            CSDSQLDatabaseLib.Node_as_TextNumericSearchNode,
            CSDSQLDatabaseLib.Node_as_MotifNode,
            CSDSQLDatabaseLib.Node_as_ReducedCellNode,
            #CSDSQLDatabaseLib.Node_as_FormulaSearchNode,
            CSDSQLDatabaseLib.Node_as_SimilaritySearchNode,
            CSDSQLDatabaseLib.Node_as_EntryLimitNode,
        ]
        for p in possibilities:
            n = p(node)
            if n is not None:
                return n
        else:
            raise NotImplementedError('Unknown Node type %s' % type(node))

    #@staticmethod
    def _show_node(self, n, indent=0):
        '''For debugging.'''
        _n = CombinedSearch._which_node(n)
        if isinstance(_n, CSDSQLDatabaseLib.NotNode):
            s = '%sNot(\n%s\n%s)' % (' '*indent, self._show_node(_n.child(), indent+2), ' '*indent)
        elif isinstance(_n, CSDSQLDatabaseLib.OrNode):
            s = '%sOr(\n%s,\n%s\n%s)' % (' '*indent, self._show_node(_n.left(), indent+2), self._show_node(_n.right(), indent+2), ' '*indent)
        elif isinstance(_n, CSDSQLDatabaseLib.AndNode):
            s = '%sAnd(\n%s,\n%s\n%s)' % (' '*indent, self._show_node(_n.left(), indent+2), self._show_node(_n.right(), indent+2), ' '*indent)
        elif isinstance(_n, CSDSQLDatabaseLib.MatchMutatorNode):
            mmm = CSDSQLDatabaseLib.Mutator_as_MatchStringDataItemAdder(_n.mutator())
            if mmm.key() == 'similarity_search':
                ident = self._searcher_dict[mmm.value()].molecule.identifier
            else:
                ident = ''
            s = '%sMutate(%s=%s(%s)\n%s\n%s)' % (' '*indent, mmm.key(), mmm.value(), ident, self._show_node(_n.child(), indent+2), ' '*indent)
        elif isinstance(_n, CSDSQLDatabaseLib.EntryLimitNode):
            s = '%sLimit(\n%s\n%s)' % (' '*indent, self._show_node(_n.child(), indent+2), ' '*indent)
        elif isinstance(_n, CSDSQLDatabaseLib.TextNumericSearchNode):
            s = '%sText()' % (' '*indent)
        elif isinstance(_n, CSDSQLDatabaseLib.SimilaritySearchNode):
            s = '%sSimilarity()' % (' '*indent)
        elif isinstance(_n, CSDSQLDatabaseLib.MotifNode):
            s = '%sMotif()' % (' '*indent)
        elif isinstance(_n, CSDSQLDatabaseLib.ReducedCellNode):
            s = '%sReduced()' % (' '*indent)
        else:
            raise NotImplementedError('WTF? %s' % type(_n))
        return s

    def _make_hits(self, match, node, _database=None, pars=None):
        '''The hits from an individual match.'''
        identifier = match.identifier().str()
        if pars is None:
            pars = match.data().parameters()
        n = self._which_node(node)
        if isinstance(n, CSDSQLDatabaseLib.NotNode):
            return [CombinedSearch.CombinedHit(identifier, _database=_database)]
        elif isinstance(n, CSDSQLDatabaseLib.OrNode):
            # Need to work out which disjunct, probably from a MatchMutator
            # Get the disjunct parameter
            if 'disjunct' not in pars:
                raise RuntimeError('No disjunct in pars')
            which = pars['disjunct'].pop()
            if which == 'first':
                left = self._which_node(n.left())
                left = left.child()
                ret = self._make_hits(match, left, _database=_database, pars=pars)
                return ret
            else:
                right = self._which_node(n.right())
                right = right.child()
                ret = self._make_hits(match, right, _database=_database, pars=pars)
                return ret
        elif isinstance(n, CSDSQLDatabaseLib.AndNode):
            # Cartesian product.
            left_hits = self._make_hits(match, n.left(), _database=_database, pars=pars)
            right_hits = self._make_hits(match, n.right(), _database=_database, pars=pars)
            result = []
            for l in left_hits:
                for r in right_hits:
                    h = l.copy()
                    h._merge(r)
                    result.append(h)
            return result
        elif isinstance(n, CSDSQLDatabaseLib.TextNumericSearchNode):
            return [CombinedSearch.CombinedHit(match.identifier().str(), _database=_database)]
        elif isinstance(n, CSDSQLDatabaseLib.ReducedCellNode):
            return [CombinedSearch.CombinedHit(match.identifier().str(), _database=_database)]
        elif isinstance(n, CSDSQLDatabaseLib.SimilaritySearchNode):
            # This won't happen - guarded by the MatchMutatorNode
            raise RuntimeError('SimilaritySearchNode: this cannot happen')
        elif isinstance(n, CSDSQLDatabaseLib.MatchMutatorNode):
            mmm = CSDSQLDatabaseLib.Mutator_as_MatchStringDataItemAdder(n.mutator())
            if 'substructure_search' in pars:
                if mmm.key() == 'substructure_search':
                    searcher = self._searcher_dict[mmm.value()]
                    cry = _database.crystal(match.identifier().str())
                    hits = [CombinedSearch.CombinedHit._from_substructure_hit(h, _database) for h in searcher._search_crystal(cry)]
                    return hits
                else:
                    return self._make_hits(match, n.child(), _database=_database, pars=pars)
            elif 'similarity_search' in pars:
                if mmm.key() == 'similarity_search':
                    assert len(pars['similarity']) == len(pars['similarity_search'])
                    searchers = [self._searcher_dict[x] for x in pars['similarity_search']]
                    ids = [s.molecule.identifier for s in searchers]
                    vals = [float(x) for x in pars['similarity']]
                    try:
                        return [CombinedSearch.CombinedHit._from_similarity_hit(match.identifier().str(), ids, vals, _database=_database)]
                    except:
                        print(self._show_node(n), pars)
                        raise
                else:
                    return self._make_hits(match, n.child(), _database=_database, pars=pars)
            elif 'entry_limit' in pars:
                return self._make_hits(match, n.child(), _database=_database, pars=pars)
            else:
                raise RuntimeError('Unexpected item in the bagging area\n%s' % ('\n'.join('%s: %s' % (k, v) for k, v in pars.items())))
        elif isinstance(n, CSDSQLDatabaseLib.MotifNode):
            raise RuntimeError('MotifNode: this cannot happen')
        elif isinstance(n, CSDSQLDatabaseLib.FormulaSearchNode):
            # Not implemented in the API yet
            raise NotImplementedError('No FormulaSearch in the API')
        elif isinstance(n, CSDSQLDatabaseLib.EntryLimitNode):
            return self._make_hits(match, n.child(), _database=_database, pars=pars)
        else:
            raise NotImplementedError('No implementation for %s' % type(n))

[docs]    @staticmethod
    def max_hit_structures(other, count):
        '''Limit the number of hits found by a combination search.

        :param other: a combination of searches.
        :param count: maximum number of hits to find.
        '''
        _other = CombinedSearch._make_node(other)
        _node = CSDSQLDatabaseLib.EntryLimitNode(_other, count)
        _node._searcher_dict = _other._searcher_dict
        s = str(_node)
        mutator = CSDSQLDatabaseLib.MatchStringDataItemAdder('entry_limit', s)
        _node = CSDSQLDatabaseLib.MatchMutatorNode(mutator, _node)
        _node._searcher_dict = { str(other) : other }
        _node._limit_dict = { s : count }
        return _node

    def _search_reader(self, database=None):
        if database is None:
            database = io.EntryReader('csd')
        subset_db = FileFormatsLib.CrystalStructureDatabaseAsCrystalStructureDatabaseSubset(database._db)
        if subset_db and isinstance(database._underlying_file_name, list):
            ids_to_db = collections.defaultdict(list)
            sub_dbs = [io.EntryReader(f) for f in database._underlying_file_name]
            for i in sorted(database.identifier(i) for i in range(len(database))):
                for x, d in enumerate(sub_dbs):
                    if d._db.identifier_exists(UtilitiesLib.DatabaseEntryIdentifier(i)):
                        ids_to_db[x].append(i)
                        break
            new_pool = FileFormatsLib.CrystalStructureDatabasePool()
            for k, v in sorted(ids_to_db.items(), key=operator.itemgetter(1)):
                sub_db = FileFormatsLib.CrystalStructureDatabaseSubset(v, sub_dbs[k]._db)
                s = FileFormatsLib.CrystalStructureDatabaseSubsetAsCrystalStructureDatabase(sub_db)
                new_pool.append(s)
            searcher = new_pool.searcher_factory().combination_searcher()
        else:
            searcher = database._db.searcher_factory().combination_searcher()
        searcher.set_search_definition_node(self._node)
        class ResultsWriter(object):
            def __init__(self, searcher, _database=None):
                self.searcher = searcher
                self._database = _database
                self.hits = []
                self._matches = []
                self.current_id = None
                self._limit_dict = searcher._limit_dict
            def __call__(self, **kw):
                match = kw['match']
                identifier = match.identifier().str()
                if identifier == self.current_id:
                    return
                pars = match.data().parameters()
                if 'entry_limit' in pars:
                    for p in pars['entry_limit']:
                        self._limit_dict[p] -= 1
                        if self._limit_dict[p] < 0:
                            return
                self.current_id = identifier
                new_hits = self.searcher._make_hits(match, self.searcher._node, _database=self._database)
                if self._database.__class__.__name__ == 'MoleculeReader':
                    def skip_suppressed_hit(hit):
                        try:
                            ats = hit.match_atoms()
                            return False
                        except RuntimeError as exc:
                            return True
                    new_hits = [h for h in new_hits if not skip_suppressed_hit(h)]
                self.hits.extend(new_hits)

        rw = ResultsWriter(self, _database=database)
        results_writer = CSDSQLDatabaseLib.PythonResultsWriter(rw)
        searcher.search(results_writer)
        return rw.hits

    def _search_entry(self, entry):
        raise NotImplementedError('Combined searches not implemented for an entry')

    def _search_crystal(self, crystal):
        raise NotImplementedError('Combined searches not implemented for a crystal')

    def _search_molecule(self, molecule):
        raise NotImplementedError('Combined searches not implemented for a molecule')

CombinedSearch._monkey_patch()