#
# This code is Copyright (C) 2015 The Cambridge Crystallographic Data Centre
# (CCDC) of 12 Union Road, Cambridge CB2 1EZ, UK and a proprietary work of CCDC.
# This code may not be used, reproduced, translated, modified, disassembled or
# copied, except in accordance with a valid licence agreement with CCDC and may
# not be disclosed or redistributed in any form, either in whole or in part, to
# any third party. All copies of this code made in accordance with a valid
# licence agreement as referred to above must contain this copyright notice.
#
# No representations, warranties, or liabilities are expressed or implied in the
# supply of this code by CCDC, its servants or agents, except where such
# exclusion or limitation is prohibited, void or unenforceable under governing
# law.
#
'''
The :mod:`ccdc.search` module provides various search classes.
The main classes of the :mod:`ccdc.search` module are:
- :class:`ccdc.search.TextNumericSearch`
- :class:`ccdc.search.SubstructureSearch`
- :class:`ccdc.search.SimilaritySearch`
- :class:`ccdc.search.ReducedCellSearch`
- :class:`ccdc.search.CombinedSearch`
These all inherit from the base class :class:`ccdc.search.Search`. The base
:class:`ccdc.search.Search` contains nested classes defining basic search hits
and settings:
- :class:`ccdc.search.Search.SearchHit`
- :class:`ccdc.search.Search.Settings`
The base class :class:`ccdc.search.Search` also contains the
:func:`ccdc.search.Search.search` function which is used to search the CSD.
All the searches except :class:`ccdc.search.TextNumericSearch` also support
searching of the following additional data sources:
- a Python list of identifiers
- a molecule file path
- a :mod:`ccdc.io` reader
- an individual :class:`ccdc.molecule.Molecule`
- an individual :class:`ccdc.crystal.Crystal`
- a list of molecules, crystals or entries
The :class:`ccdc.search.TextNumericSearch` can only sensibly be applied to
a crystal structure database, which is the CSD by default or a :class:`ccdc.io.EntryReader`
opened on a database file.
The :func:`ccdc.search.Search.search` returns a list of
:class:`ccdc.search.Search.SearchHit` instances. Some of the searches make use
of more specific search hit classes, namely:
- :class:`ccdc.search.TextNumericSearch.TextNumericHit`
- :class:`ccdc.search.SubstructureSearch.SubstructureHit`
- :class:`ccdc.search.SimilaritySearch.SimilarityHit`
Most of the searches return simple Python lists of search hits. However,
a search carried out using a :class:`ccdc.search.SubstructureSearch` returns a
:class:`ccdc.search.SubstructureSearch.SubstructureHitList`, which contains a
:func:`ccdc.search.SubstructureSearch.SubstructureHitList.superimpose` function for superimposing
all the hits on the first instance in the list.
To illustrate some of the searches let us first get an aspirin molecule.
>>> from ccdc.io import EntryReader
>>> csd_reader = EntryReader('CSD')
>>> mol = csd_reader.molecule('ACSALA')
Text numeric searching.
>>> from ccdc.search import TextNumericSearch
>>> text_numeric_search = TextNumericSearch()
>>> text_numeric_search.add_compound_name('aspirin')
>>> hits = text_numeric_search.search()
>>> len(hits)
102
Substructure searching.
>>> from ccdc.search import MoleculeSubstructure, SubstructureSearch
>>> substructure = MoleculeSubstructure(mol)
>>> substructure_search = SubstructureSearch()
>>> _ = substructure_search.add_substructure(substructure)
>>> hits = substructure_search.search()
>>> len(hits)
66
Similarity searching.
>>> from ccdc.search import SimilaritySearch
>>> similarity_search = SimilaritySearch(mol)
>>> hits = similarity_search.search()
>>> len(hits)
113
Reduced cell searching.
>>> from ccdc.search import ReducedCellSearch
>>> crystal = csd_reader.crystal('ACSALA')
>>> query = ReducedCellSearch.CrystalQuery(crystal)
>>> reduced_cell_searcher = ReducedCellSearch(query)
>>> hits = reduced_cell_searcher.search()
>>> len(hits)
17
Combined searches.
>>> from ccdc.search import CombinedSearch
>>> combined_search = CombinedSearch(similarity_search & -text_numeric_search)
>>> hits = combined_search.search()
>>> len(hits)
33
'''
###########################################################################
import sys
import os
import math
import re
import collections
import operator
import warnings
warnings.filterwarnings('always', '.*deprecated.*', DeprecationWarning, '.*', 0)
from ccdc import molecule, io
from ccdc.entry import Entry
from ccdc.crystal import Crystal
from ccdc.io import (
_CSDDatabaseLocator, _DatabaseReader, CrystalReader,
EntryReader
)
from ccdc.descriptors import MolecularDescriptors, GeometricDescriptors
from ccdc import utilities
from ccdc import maxint32
from ccdc.utilities import _private_importer
with _private_importer() as pi:
pi.import_ccdc_module('UtilitiesLib')
pi.import_ccdc_module('MathsLib')
pi.import_ccdc_module('ChemistryLib')
pi.import_ccdc_module('SubstructureSearchLib')
pi.import_ccdc_module('DatabaseEntryLib')
pi.import_ccdc_module('CSDSQLDatabaseLib')
pi.import_ccdc_module('MotifSearchLib')
pi.import_ccdc_module('ProteinLib')
pi.import_ccdc_module('FileFormatsLib')
pi.import_ccdc_module('AnnotationsLib')
pi.import_ccdc_module('SolubilityPlatformLib')
###########################################################################
# Queries
###########################################################################
def _decode_condition(r):
'''PRIVATE: work out the condition from the argument.'''
if isinstance(r, (int, float)):
crit = SubstructureSearchLib.EqualTo(r)
elif isinstance(r, (tuple, list)):
a = r[0]
if isinstance(a, (int, float)):
if len(r) == 2 and isinstance(r[1], (int, float)):
crit = SubstructureSearchLib.InclusiveRange(min(a, r[1]), max(a, r[1]))
else:
raise TypeError('Invalid type for condition %s' % r)
elif isinstance(a, str):
op = a.strip()
if op == '==':
crit = SubstructureSearchLib.EqualTo(r[1])
elif op == '>':
crit = SubstructureSearchLib.Greater(r[1])
elif op == '<':
crit = SubstructureSearchLib.Less(r[1])
elif op == '>=':
crit = SubstructureSearchLib.GreaterEqual(r[1])
elif op == '<=':
crit = SubstructureSearchLib.LessEqual(r[1])
elif op == '!=':
crit = SubstructureSearchLib.NotEqualTo(r[1])
elif op == 'in':
crit = SubstructureSearchLib.OneOf(r[1])
else:
raise TypeError('Invalid operator for condition %s' % r)
else:
raise TypeError('Invalid value for condition %s' % r)
return crit
def _constraint_property(which, doc, nullary=False):
'''Private: make a property from a class.'''
return property(
lambda x: x._get_constraint(which),
lambda x, value, nullary=nullary: x._set_constraint(which, value, nullary=nullary),
None,
doc
)
[docs]class QueryAtom(object):
'''Atom used to define a substructure search.
A QueryAtom can be used to represent a single atom type or a set of atom
types. A QueryAtom can also have additional constraints imposed on it, for
example that it should be aromatic.
Let us create a query atom representing an oxygen atom.
>>> query_atom = QueryAtom('O')
>>> print(query_atom)
QueryAtom(O)
Suppose that we wanted the query atom to be either a carbon or a nitrogen
atom.
>>> query_atom = QueryAtom(['C', 'N'])
>>> print(query_atom)
QueryAtom(C, N)
It is possible to add further constraints on a QueryAtom. For, example,
we can insist that it should be aromatic.
>>> query_atom.aromatic = True
>>> print(query_atom.aromatic)
AtomAromaticConstraint: 1
>>> print(query_atom)
QueryAtom(C, N)[atom aromaticity: equal to 1]
See :ref:`query_atoms` for further details.
'''
def __init__(self, atomic_symbol='', _substructure_atom=None):
'''Initialiser.
:param atomic_symbol: an atomic symbol or a list or tuple of atomic symbols.
The resulting QueryAtom will match any of the provided symbols.
'''
if _substructure_atom is not None:
self._substructure_atom = _substructure_atom
else:
if atomic_symbol:
if isinstance(atomic_symbol, (list, tuple)):
self._substructure_atom = SubstructureSearchLib.SubstructureAtom(
ChemistryLib.Element(atomic_symbol[0])
)
for sym in atomic_symbol[1:]:
self._substructure_atom.add_element(
ChemistryLib.Element(sym)
)
elif isinstance(atomic_symbol, molecule.Atom):
self._substructure_atom = SubstructureSearchLib.SubstructureAtom(
atomic_symbol._atom.element() # pylint: disable=E1103
)
else:
self._substructure_atom = SubstructureSearchLib.SubstructureAtom(
ChemistryLib.Element(atomic_symbol)
)
else:
self._substructure_atom = SubstructureSearchLib.SubstructureAtom()
def __str__(self):
'''String representation of a QueryAtom.
>>> q = QueryAtom(['C', 'N'])
>>> print(q)
QueryAtom(C, N)
'''
l = ['QueryAtom(']
if not self._substructure_atom.matches_any_element():
for i in range(self._substructure_atom.nelements()):
if i:
l.append(', ')
l.append(self._substructure_atom.element(i).atomic_symbol())
l.append(')')
if self._substructure_atom.nconstraints():
l.append('[')
for i in range(self._substructure_atom.nconstraints()):
if i:
l.append(', ')
l.append(str(self._substructure_atom.constraint(i)).strip('\n'))
l.append(']')
return ''.join(l).replace('\t', ' ')
__repr__ = __str__
def __eq__(self, other):
'''Return True if the underlying atoms have the same memory location.'''
return isinstance(other, QueryAtom) and self._substructure_atom == other._substructure_atom
def __ne__(self, other):
'''Inequality for atoms.'''
return not self == other
@property
def index(self):
'''Index of this atom in a substructure.
>>> atom = QueryAtom(['C', 'N'])
>>> print(atom.index)
None
>>> substructure = QuerySubstructure()
>>> _ = substructure.add_atom(atom)
>>> print(atom.index)
0
'''
try:
return self._substructure_atom.index()
except RuntimeError:
return None
def _get_constraint(self, which, boolean=False):
'''Private: get a printable representation of a constraint.'''
ty = which()
if self._substructure_atom.has_constraint_of_type(ty):
k = self._substructure_atom.constraint_of_type(ty)
if which == SubstructureSearchLib.AtomHas3DSiteConstraint:
cond = k.get_site_option()
elif which == SubstructureSearchLib.AtomLabelConstraint:
cond = k.regular_expression()
else:
cond = k.condition()
return '%s: %s' % (which.__name__, cond)
return None
def _set_constraint(self, which, value, nullary=False):
'''Private: set a constraint. Removes the constraint if value is None'''
if self._substructure_atom.has_constraint_of_type(which()):
self._substructure_atom.remove_constraints_of_type(which())
if value is None:
return
if which == SubstructureSearchLib.AtomHas3DSiteConstraint:
cond = bool(value)
constraint = which()
constraint.set_site_option(cond)
elif which == SubstructureSearchLib.AtomLabelConstraint:
constraint = which()
constraint.set_regular_expression(value)
elif nullary:
constraint = which()
elif value in (True, False):
cond = SubstructureSearchLib.EqualTo(value)
constraint = which(cond)
else:
cond = _decode_condition(value)
constraint = which(cond)
self._substructure_atom.add_constraint(constraint)
acceptor = _constraint_property(
SubstructureSearchLib.AtomAcceptorTypeConstraint,
'''Constraint specifying whether or not the QueryAtom is an acceptor.
>>> a = QueryAtom(['C', 'N'])
>>> a.acceptor = True
>>> print(a)
QueryAtom(C, N)[AtomAcceptorTypeConstraint]
''',
nullary=True
)
aromatic = _constraint_property(
SubstructureSearchLib.AtomAromaticConstraint,
'''Constraint specifying whether or not the QueryAtom is aromatic.
>>> a = QueryAtom(['C', 'N'])
>>> a.aromatic = True
>>> print(a)
QueryAtom(C, N)[atom aromaticity: equal to 1]
'''
)
donor = _constraint_property(
SubstructureSearchLib.AtomDonorTypeConstraint,
'''Constraint specifying whether or not the QueryAtom is a donor.
>>> a = QueryAtom(['C', 'N'])
>>> a.donor = True
>>> print(a)
QueryAtom(C, N)[AtomDonorTypeConstraint]
''',
nullary=True
)
cyclic = _constraint_property(
SubstructureSearchLib.AtomCyclicityConstraint,
'''Constraint specifying whether or not the QueryAtom is part of a cycle.
>>> a = QueryAtom(['C', 'N'])
>>> a.cyclic = True
>>> print(a)
QueryAtom(C, N)[atom cyclicity: equal to 1]
'''
)
formal_charge = _constraint_property(
SubstructureSearchLib.AtomFormalChargeConstraint,
'''Constraint specifying the formal charge on the QueryAtom.
>>> a = QueryAtom(['C', 'N'])
>>> a.formal_charge = ('in', [-1, 1])
>>> print(a)
QueryAtom(C, N)[charge: one of -1, 1]
'''
)
formal_valency = _constraint_property(
SubstructureSearchLib.AtomFormalValencyConstraint,
'''Constraint specifying the formal valency of the QueryAtom.
>>> a = QueryAtom(['C', 'N'])
>>> a.formal_valency = ('>', 3)
>>> print(a)
QueryAtom(C, N)[atom valency: greater than 3]
'''
)
cyclic_bonds = _constraint_property(
SubstructureSearchLib.AtomNCyclicBondsConstraint,
'''Constraint specifying the number of cyclic bonds of the QueryAtom.
>>> a = QueryAtom(['C', 'N'])
>>> a.cyclic_bonds = ('!=', 4)
>>> print(a)
QueryAtom(C, N)[number of cyclic bonds:not equal to 4]
'''
)
smallest_ring = _constraint_property(
SubstructureSearchLib.AtomSmallestRingConstraint,
'''Constraint specifying the size of the smallest ring the QueryAtom forms part of.
>>> a = QueryAtom(['C', 'N'])
>>> a.smallest_ring = (5, 6)
>>> print(a)
QueryAtom(C, N)[atom smallest ring: in range 5 to 6]
'''
)
num_bonds = _constraint_property(
SubstructureSearchLib.AtomNBondsConstraint,
'''Constraint specifying the number of bonds the QueryAtom may have.
>>> a = QueryAtom(['C', 'N'])
>>> a.num_bonds = ('<=', 3)
>>> print(a)
QueryAtom(C, N)[number of connected atoms: less than or equal to 3]
'''
)
num_hydrogens = _constraint_property(
SubstructureSearchLib.AtomNHydrogensConstraint,
'''Constraint specifying the number of hydrogens the QueryAtom may have.
>>> a = QueryAtom(['C', 'N'])
>>> a.num_hydrogens = 1
>>> print(a)
QueryAtom(C, N)[hydrogen count, including deuterium: equal to 1]
'''
)
unfused_unbridged_ring = _constraint_property(
SubstructureSearchLib.AtomUnfusedUnbridgedRingConstraint,
'''Constraint specifying whether or not the QueryAtom is part of an unfused and unbridged ring.
>>> a = QueryAtom(['C', 'N'])
>>> a.unfused_unbridged_ring = True
>>> print(a)
QueryAtom(C, N)[atom unfused/unbridged ring: equal to 1]
'''
)
nimplicit_hydrogens = _constraint_property(
SubstructureSearchLib.AtomNImplicitHydrogensConstraint,
'''Constraint specifying a count of implicit hydrogens.
>>> a = QueryAtom(['C', 'N'])
>>> a.nimplicit_hydrogens = 0
>>> print(a)
QueryAtom(C, N)[implicit hydrogen count: equal to 0]
'''
)
has_3d_coordinates = _constraint_property(
SubstructureSearchLib.AtomHas3DSiteConstraint,
'''Constraint specifying that the atom has 3d coordinates.
>>> a = QueryAtom(['C', 'N'])
>>> a.has_3d_coordinates = True
>>> print(a)
QueryAtom(C, N)[atom must have 3D site]
''',
nullary=True
)
label_match = _constraint_property(
SubstructureSearchLib.AtomLabelConstraint,
'''Constraint specifying that the atom label must match a regular expression.
>>> a = QueryAtom(['C'])
>>> a.label_match = '^C12$'
>>> print(a)
QueryAtom(C)[atom label must match regular expression with pattern: ^C12$]
''',
nullary=True
)
@property
def chirality(self):
'''Constraint specifying the chirality around an atom.
The return value will either be None or a tuple of 4 QueryAtoms in clockwise order.
>>> s = SMARTSSubstructure("FC(I)O[C@](S)(P)H")
>>> s.atoms[1].chirality is None
True
>>> s.atoms[4].chirality
(QueryAtom(O)[atom aromaticity: equal to 0], QueryAtom(H), QueryAtom(P)[atom aromaticity: equal to 0], QueryAtom(S)[atom aromaticity: equal to 0])
'''
rs = SubstructureSearchLib.get_chirality(self._substructure_atom)
if rs.atom() == self._substructure_atom:
atoms = rs.ordered_bound_atoms()
query_atoms = (QueryAtom(_substructure_atom=a) for a in atoms)
return tuple(query_atoms)
return None
@chirality.setter
def chirality(self, chirality):
'''Constraint specifying the chirality around an atom.
The set value may be None to clear a chirality constraint, or a tuple of 4 ordered atoms and
optionally a string 'clockwise' (the assumed default) or 'anticlockwise' specifying the chiral relationship.
>>> s = SMARTSSubstructure("O[C@](I)(F)H")
>>> s.atoms[1].chirality = None
>>> s.atoms[1].chirality is None
True
>>> s.atoms[1].chirality = (s.atoms[0],s.atoms[2],s.atoms[3],s.atoms[4])
>>> s.atoms[1].chirality
(QueryAtom(O)[atom aromaticity: equal to 0], QueryAtom(I), QueryAtom(F), QueryAtom(H))
'''
if chirality is None:
SubstructureSearchLib.remove_chirality(self._substructure_atom)
return
if len(chirality) >= 5:
if chirality[4] == "anticlockwise":
chirality = (chirality[0], chirality[1], chirality[3], chirality[2])
elif chirality[4] != "clockwise":
raise RuntimeError("Chirality description must be 'clockwise' or 'anticlockwise'")
chirality = chirality[0:4]
if not all(isinstance(atom, QueryAtom) for atom in chirality):
raise RuntimeError("QueryAtom chirality must be set to 4 QueryAtoms or None")
atoms = [a._substructure_atom for a in chirality]
SubstructureSearchLib.set_chirality(self._substructure_atom, atoms[0], atoms[1], atoms[2], atoms[3])
[docs] def add_connected_element_count(self, atomic_symbols, count):
'''Set the number of connected elements constraint.
Constraint to define the number of times the QueryAtom should be
connected to atoms with elements defined in the atomic_symbols list.
:param atomic_symbols: atomic symbol or list of atomic symbols.
:param count: see :ref:`conditions` for details.
>>> a = QueryAtom(['C', 'N'])
>>> a.add_connected_element_count(['F', 'Cl'], 2)
>>> print(a)
QueryAtom(C, N)[count connected elements equal to 2 from [F,Cl]]
'''
x = ChemistryLib.ElementSet()
if isinstance(atomic_symbols, list):
for s in atomic_symbols:
x.add_element(ChemistryLib.Element(s))
else:
x.add_element(ChemistryLib.Element(atomic_symbols))
if isinstance(count, list):
cond = SubstructureSearchLib.InclusiveRange(count[0], count[1])
else:
cond = SubstructureSearchLib.EqualTo(count)
constraint = SubstructureSearchLib.AtomConnectedElementCountConstraint(x, cond)
if self._substructure_atom.has_constraint_of_type(constraint):
self._substructure_atom.remove_constraints_of_type(constraint)
self._substructure_atom.add_constraint(constraint)
[docs] def add_protein_atom_type_constraint(self, *types):
'''Add a constraint that an atom be in one of the protein atom types.
This is of use only when searching a protein structure.
:param `*types`: one or more of 'AMINO_ACID', 'LIGAND', 'COFACTOR', 'WATER', 'METAL',
'NUCLEOTIDE', 'UNKNOWN'. Any case-insensitive, unique prefix may be used.
>>> a = QueryAtom('Zn')
>>> a.add_protein_atom_type_constraint('Ligand', 'Metal')
>>> print(a)
QueryAtom(Zn)[protein substructure type : one of 1, 3]
'''
_type_dict = utilities.bidirectional_dict(
AMINO_ACID=AnnotationsLib.ProteinSubstructureData.AMINOACID,
LIGAND=AnnotationsLib.ProteinSubstructureData.LIGAND,
COFACTOR=AnnotationsLib.ProteinSubstructureData.COFACTOR,
WATER=AnnotationsLib.ProteinSubstructureData.WATER,
METAL=AnnotationsLib.ProteinSubstructureData.METAL,
NUCLEOTIDE=AnnotationsLib.ProteinSubstructureData.NUCLEOTIDE,
UNKNOWN=AnnotationsLib.ProteinSubstructureData.UNKNOWN
)
indices = [_type_dict.prefix_lookup(t) for t in types]
if len(indices) == 1:
cond = SubstructureSearchLib.EqualTo(indices[0])
else:
cond = SubstructureSearchLib.OneOf(indices)
self._substructure_atom.add_constraint(ProteinLib.ProteinSubstructureTypeAtomConstraint(cond))
[docs]class QueryBond(object):
'''Bond used to define a substructure search.
A QueryBond can be used to represent a single bond type or a set of bond
types. A QueryBond can also have additional constraints imposed on it, for
example that it should be cyclic.
Let us create a QueryBond that will match any bond type.
>>> query_bond = QueryBond()
>>> print(query_bond) # doctest: +NORMALIZE_WHITESPACE
QueryBond(Unknown, Single, Double, Triple,
Quadruple, Aromatic, Delocalised, Pi)
To create a more specific QueryBond we need to specify some bond types.
>>> from ccdc.molecule import Bond
>>> single_bond = Bond.BondType('Single')
>>> double_bond = Bond.BondType('Double')
>>> query_bond = QueryBond(single_bond)
>>> print(query_bond)
QueryBond(Single)
>>> query_bond = QueryBond([single_bond, double_bond])
>>> print(query_bond) # doctest: +NORMALIZE_WHITESPACE
QueryBond(Single, Double)
Finally, let us set a constraint for the bond to be cyclic.
>>> query_bond.cyclic = True
>>> print(query_bond)
QueryBond(Single, Double)[bond cyclicity: equal to 1]
>>> print(query_bond.cyclic)
BondCyclicityConstraint: 1
'''
def __init__(self, bond_type=None, _substructure_bond=None):
'''Initialise a QueryBond.
:param bond_type: may be None, for a :class:`QueryBond` that will match any bond, a
:class:`ccdc.molecule.Bond.BondType` instance which will match only that
bond type, a string representation which will match only that bond type,
'any' that will match any bond, or a list of
:class:`ccdc.molecule.Bond.BondType` which will match any of those
specified.
'''
if _substructure_bond is not None:
self._substructure_bond = _substructure_bond
else:
if bond_type is None:
self._substructure_bond = SubstructureSearchLib.SubstructureBond()
elif isinstance(bond_type, (list, tuple)):
if len(bond_type):
if isinstance(bond_type[0], str):
b = molecule.Bond.BondType(bond_type[0])._bond_type
else:
b = bond_type[0]._bond_type
self._substructure_bond = SubstructureSearchLib.SubstructureBond(
b
)
for b in bond_type[1:]:
if isinstance(b, str):
bt = molecule.Bond.BondType(b)._bond_type
else:
bt = b._bond_type
self._substructure_bond.add_type(bt)
else:
self._substructure_bond = SubstructureSearchLib.SubstructureBond()
elif isinstance(bond_type, str):
if bond_type.lower() == 'any':
self._substructure_bond = SubstructureSearchLib.SubstructureBond()
else:
self._substructure_bond = SubstructureSearchLib.SubstructureBond(
molecule.Bond.BondType(bond_type)._bond_type
)
else:
self._substructure_bond = SubstructureSearchLib.SubstructureBond(
bond_type._bond_type
)
def __str__(self):
'''String representation of a QueryBond.
>>> b = QueryBond(['Single', 'Double'])
>>> print(b)
QueryBond(Single, Double)
'''
l = ['QueryBond(']
for i in range(self._substructure_bond.ntypes()):
if i:
l.append(', ')
l.append(str(molecule.Bond.BondType(self._substructure_bond.type(i))))
l.append(')')
if self._substructure_bond.nconstraints():
l.append('[')
for i in range(self._substructure_bond.nconstraints()):
if i:
l.append(', ')
l.append(str(self._substructure_bond.constraint(i)))
l.append(']')
return ''.join(l)
__repr__ = __str__
@property
def atoms(self):
'''A list of the two QueryAtoms of the bond, if it is in a substructure, or ``None``.
>>> s = QuerySubstructure()
>>> c = s.add_atom(QueryAtom('C'))
>>> n = s.add_atom(QueryAtom('N'))
>>> b = QueryBond(['Single', 'Double'])
>>> _ = s.add_bond(b, c, n)
>>> print(b)
QueryBond(Single, Double)
>>> print('%s, %s' % (b.atoms[0], b.atoms[1]))
QueryAtom(C), QueryAtom(N)
'''
try:
return [
QueryAtom(_substructure_atom=self._substructure_bond.atom1()),
QueryAtom(_substructure_atom=self._substructure_bond.atom2())
]
except RuntimeError:
return None
def _get_constraint(self, which):
'''Private: get a string representation of a bond constraint.'''
ty = which()
if self._substructure_bond.has_constraint_of_type(ty):
k = self._substructure_bond.constraint_of_type(ty)
cond = k.condition()
return '%s: %s' % (which.__name__, cond)
return None
def _set_constraint(self, which, value, nullary=False):
'''Private: set a bond constraint.'''
if self._substructure_bond.has_constraint_of_type(which()):
self._substructure_bond.remove_constraints_of_type(which())
if value is None:
return
if nullary or value in (True, False):
cond = SubstructureSearchLib.EqualTo(value)
else:
cond = _decode_condition(value)
constraint = which(cond)
self._substructure_bond.add_constraint(constraint)
cyclic = _constraint_property(
SubstructureSearchLib.BondCyclicityConstraint,
'''Constraint specifying whether or not the :class:`QueryBond` is part of a cycle.
>>> b = QueryBond('Single')
>>> b.cyclic = True
>>> print(b)
QueryBond(Single)[bond cyclicity: equal to 1]
'''
)
bond_length = _constraint_property(
SubstructureSearchLib.BondLengthConstraint,
'''Constraint specifying the length of the bond.
>>> b = QueryBond('Single')
>>> c1 = QueryAtom('C')
>>> c2 = QueryAtom('C')
>>> s = QuerySubstructure()
>>> _ = s.add_atom(c1)
>>> _ = s.add_atom(c2)
>>> _ = s.add_bond(b, c1, c2)
>>> b.bond_length = ('>', 1.6)
>>> print(b)
QueryBond(Single)[bond length: greater than 1.6]
'''
)
bond_polymeric = _constraint_property(
SubstructureSearchLib.BondPolymericConstraint,
'''Constraint specifying whether or not the :class:`QueryBond` is polymeric.
>>> b = QueryBond('Single')
>>> b.bond_polymeric = True
>>> print(b)
QueryBond(Single)[bond polymeric: equal to 1]
'''
)
bond_smallest_ring = _constraint_property(
SubstructureSearchLib.BondSmallestRingConstraint,
'''Constraint specifying the smallest ring the bond should be a part of.
>>> b = QueryBond('Aromatic')
>>> b.bond_smallest_ring = 5
>>> print(b)
QueryBond(Aromatic)[bond smallest ring: equal to 5]
'''
)
bond_unfused_unbridged_ring = _constraint_property(
SubstructureSearchLib.BondUnfusedUnbridgedRingConstraint,
'''Constraint specifying whether or not the :class:`QueryBond` is part of an unfused and unbridged ring.
>>> b = QueryBond('Single')
>>> b.bond_unfused_unbridged_ring = True
>>> print(b)
QueryBond(Single)[bond unfused/unbridged ring: equal to 1]
'''
)
@property
def stereochemistry(self):
r'''Constraint specifying the stereochemistry around a double bond.
The return value will either be None or a tuple of 2 QueryAtoms and one of 'cis' or 'trans'.
>>> s = SMARTSSubstructure(r"I/C=C\F")
>>> s.bonds[1].stereochemistry
(QueryAtom(I), QueryAtom(F), 'cis')
'''
ez = SubstructureSearchLib.get_stereochemistry(self._substructure_bond)
if self._substructure_bond.is_same_bond(ez.bond()):
if ez.stereochemistry() == SubstructureSearchLib.EZStereoChemistryFlag_E_STEREOCHEMISTRY:
stereo = "trans"
elif ez.stereochemistry() == SubstructureSearchLib.EZStereoChemistryFlag_Z_STEREOCHEMISTRY:
stereo = "cis"
else:
return None
return (QueryAtom(_substructure_atom=ez.adjacent_to_first()), QueryAtom(_substructure_atom=ez.adjacent_to_second()), stereo)
return None
@stereochemistry.setter
def stereochemistry(self, stereo):
'''Set a stereochemistry constraint on a bond.
The set value may be None to remove stereochemistry, or a tuple of 2 atoms adjacent to the bond's atoms and a string either 'cis' or 'trans'
>>> s = SMARTSSubstructure(R"IC=CF")
>>> s.bonds[1].stereochemistry = (s.atoms[0], s.atoms[3], 'trans')
>>> s.bonds[1].stereochemistry
(QueryAtom(I), QueryAtom(F), 'trans')
'''
if stereo is None:
SubstructureSearchLib.remove_stereochemistry(self._substructure_bond)
return
adj1, adj2, flag = stereo
if flag == "cis":
flag = SubstructureSearchLib.EZStereoChemistryFlag_Z_STEREOCHEMISTRY
elif flag == "trans":
flag = SubstructureSearchLib.EZStereoChemistryFlag_E_STEREOCHEMISTRY
else:
raise RuntimeError("stereochemistry flag must be either 'cis' or 'trans'")
ez = SubstructureSearchLib.SubstructureEZStereoChemistry(flag, self._substructure_bond, adj1._substructure_atom, adj2._substructure_atom)
SubstructureSearchLib.set_stereochemistry(ez)
###########################################################################
[docs]class QuerySubstructure(object):
'''Class to define and run substructure searches.
As an example let us set up a QuerySubstructure for a carbonyl (C=O).
>>> from ccdc.molecule import Bond
>>> double_bond = Bond.BondType('Double')
>>> substructure_query = QuerySubstructure()
>>> query_atom1 = substructure_query.add_atom('C')
>>> query_atom2 = substructure_query.add_atom('O')
>>> query_bond = substructure_query.add_bond(double_bond, query_atom1, query_atom2)
'''
def __init__(self, _substructure=None):
'''Create a substructure.
If the _substructure parameter is set it should be a
SubstructureSearchLib.Substructure.
'''
if _substructure is None:
self._substructure = SubstructureSearchLib.Substructure.instantiate()
else:
self._substructure = _substructure
self._searcher = None
self.measurements = []
self._constraints = None
self._geometric_constraints = None
self._geometric_objects = None
[docs] def clear(self):
'''Restart the query.'''
self._substructure = SubstructureSearchLib.Substructure.instantiate()
self._searcher = None
self.measurements = []
self._constraints = None
self._geometric_constraints = None
self._geometric_objects = None
[docs] def add_atom(self, atom):
'''Add an atom to the substructure.
:param atom: may be a QueryAtom separately constructed, an atom of a
molecule, or an atomic symbol.
:returns: :class:`QueryAtom`
>>> q = QuerySubstructure()
>>> a = q.add_atom(QueryAtom(['N', 'O']))
>>> print(a)
QueryAtom(N, O)
'''
if isinstance(atom, QueryAtom):
at = atom
elif isinstance(atom, molecule.Atom):
at = QueryAtom(atom.atomic_symbol)
else:
at = QueryAtom(atom)
self._substructure.add(at._substructure_atom)
return at
@property
def atoms(self):
'''The query atoms in the substructure.
>>> q = QuerySubstructure()
>>> _ = q.add_atom(QueryAtom('C'))
>>> _ = q.add_atom(QueryAtom(['O', 'N']))
>>> atoms = q.atoms
>>> print('%s, %s' % (atoms[0], atoms[1]))
QueryAtom(C), QueryAtom(N, O)
'''
return [
QueryAtom(_substructure_atom=self._substructure.atom(i))
for i in range(self._substructure.natoms())
]
[docs] def add_bond(self, bond, atom1=None, atom2=None):
'''Add a bond to the substructure.
:param bond: may be a :class:`QueryBond`, a
:class:`ccdc.molecule.Bond.BondType`, a
:class:`ccdc.molecule.Bond`, a string or an int.
:param atom1: :class:`QueryAtom` or ``None`` for any atom
:param atom2: :class:`QueryAtom` or ``None`` for any atom
:returns: :class:`QueryBond`
:raises: TypeError if an improper bond argument is supplied
>>> s = QuerySubstructure()
>>> c = s.add_atom(QueryAtom('C'))
>>> o1 = s.add_atom(QueryAtom('O'))
>>> o2 = s.add_atom(QueryAtom('O'))
>>> h = s.add_atom(QueryAtom('H'))
>>> _ = s.add_bond(QueryBond('Double'), c, o1)
>>> _ = s.add_bond(QueryBond('Single'), c, o2)
>>> _ = s.add_bond(QueryBond('Single'), o2, h)
'''
if isinstance(bond, molecule.Bond.BondType):
sub_bond = SubstructureSearchLib.SubstructureBond(bond._bond_type)
bond = QueryBond(_substructure_bond=sub_bond)
elif isinstance(bond, molecule.Bond):
sub_bond = SubstructureSearchLib.SubstructureBond(bond.bond_type._bond_type)
bond = QueryBond(_substructure_bond=sub_bond)
elif isinstance(bond, QueryBond):
pass
elif isinstance(bond, str):
if bond.lower() == 'any':
bond = QueryBond()
else:
ty = molecule.Bond.BondType(bond)._bond_type
sub_bond = SubstructureSearchLib.SubstructureBond(ty)
bond = QueryBond(_substructure_bond=sub_bond)
elif isinstance(bond, int):
ty = ChemistryLib.BondType(bond)
sub_bond = SubstructureSearchLib.SubstructureBond(ty)
bond = QueryBond(_substructure_bond=sub_bond)
else:
raise TypeError('Improper argument to add_bond(%s)' % bond)
if atom1 is None:
atom1 = QueryAtom()
if isinstance(atom1, molecule.Atom):
atom1 = self.add_atom(atom1)
elif isinstance(atom1, str):
atom1 = self.add_atom(atom1)
if atom1.index is None:
atom1 = self.add_atom(atom1)
if atom2 is None:
atom2 = QueryAtom()
if isinstance(atom2, molecule.Atom):
atom2 = self.add_atom(atom2)
elif isinstance(atom2, str):
atom2 = self.add_atom(atom2)
if atom2.index is None:
atom2 = self.add_atom(atom2)
self._substructure.add(
bond._substructure_bond, atom1.index, atom2.index
)
return bond
@property
def bonds(self):
'''The bonds in the substructure.
>>> s = QuerySubstructure()
>>> b = s.add_bond('Single', QueryAtom('C'), QueryAtom('F'))
>>> bonds = s.bonds
>>> print(bonds[0])
QueryBond(Single)
'''
return [
QueryBond(_substructure_bond=self._substructure.bond(i))
for i in range(self._substructure.nbonds())
]
[docs] def write_xml(self, file_name):
'''Write an XML representation of the substructure.
Deprecated.
:param fname: path to XML file
'''
warnings.warn('''This method is deprecated and will be removed in a later version.''', DeprecationWarning)
w = SubstructureSearchLib.XMLSubstructureWriter()
ostr = UtilitiesLib.ofstream(file_name)
opts = SubstructureSearchLib.XMLSubstructureOptions()
w.write(
self._substructure,
opts,
SubstructureSearchLib.XMLSubstructureWriter.SUBSTRUCTURE_SEARCH,
ostr
)
ostr.close()
[docs] def match_atom(self, atom, query_atom=None):
'''Whether or not the given atom matches the query_atom in the given context.
:param atom: a :class:`ccdc.molecule.Atom` instance.
:param query_atom: a :class:`ccdc.search.QueryAtom` instance or ``None``. If ``None``, the first atom of the substructure will be used.
:returns: bool
>>> s = QuerySubstructure()
>>> _ = s.add_bond('Single', QueryAtom('Cl'), QueryAtom('C'))
>>> mol = EntryReader('csd').molecule('AABHTZ')
>>> s.match_atom(mol.atom('Cl1'))
True
>>> s.match_atom(mol.atom('C1'))
False
>>> s.match_atom(mol.atom('C1'), s.atoms[1])
True
'''
if query_atom is None:
index = 0
else:
index = query_atom.index
matcher = SubstructureSearchLib.SubstructureMoleculeGraphSearch(
self._substructure,
SubstructureSearchLib.SubstructureMoleculeMatchCriteria()
)
ct = matcher.find_matches(
atom._atom.molecule(),
{index: atom.index}
)
return bool(ct)
[docs] def nmatch_molecule(self, molecule):
'''Returns number of query matches within the specified molecule.
:param molecule: a :class:`ccdc.molecule.Molecule` instance.
:returns: integer
>>> s = QuerySubstructure()
>>> _ = s.add_bond('Single', QueryAtom('Cl'), QueryAtom('C'))
>>> mol = EntryReader('csd').molecule('AABHTZ')
>>> s.nmatch_molecule(mol)
2
'''
return len([a for a in molecule.atoms if self.match_atom(a)])
[docs] def match_molecule(self, molecule):
'''Whether or not the query matches the specified molecule.
:param molecule: a :class:`ccdc.molecule.Molecule` instance.
:returns: bool
>>> s = QuerySubstructure()
>>> _ = s.add_bond('Double', QueryAtom('C'), QueryAtom('O'))
>>> mol = EntryReader('csd').molecule('AABHTZ')
>>> s.match_molecule(mol)
True
'''
matcher = SubstructureSearchLib.SubstructureMoleculeGraphSearch(
self._substructure,
SubstructureSearchLib.SubstructureMoleculeMatchCriteria()
)
return bool(matcher.find_matches(molecule._molecule))
###################################################################################
[docs]class SMARTSSubstructure(QuerySubstructure):
'''Make a substructure from a SMARTS string.
Let us create a ketone SMARTSSubstructure as an example.
>>> smarts_query = SMARTSSubstructure("[CD4][CD3](=[OD1])[CD4]")
>>> print(smarts_query.smarts)
[CD4][CD3](=[OD1])[CD4]
There is a minor extension to Daylight SMARTS to allow the representation of
quadruple, delocalised and pi bonds, using the characters '_', '"' and '|' respectively.
There is a second minor extension to allow easy access to the indices of the atoms.
>>> query = SMARTSSubstructure("[#6:0]([#7]-H)[#8:1][#6:2]")
>>> print(query.label_to_atom_index(0))
0
>>> print(query.label_to_atom_index(1))
3
'''
def __init__(self, smarts):
'''Initialise a SMARTS query with a string.'''
self._reader = SubstructureSearchLib.SMARTSSubstructureReader()
self.smarts = smarts
QuerySubstructure.__init__(self, _substructure=self._substructure)
@property
def smarts(self):
'''The SMARTS string.'''
return self._smarts
[docs] def label_to_atom_index(self, label):
'''Translate a SMARTS label into the appropriate substructure atom index'''
x = self._reader.label_to_atom(str(label))
if not x:
raise KeyError(f"No atom with label {label}")
return x.index()
@smarts.setter
def smarts(self, smarts):
'''Ensure _substructure is updated.'''
self._smarts = smarts
self._substructure = self._reader.substructure(self._smarts)
self.measurements = []
###################################################################################
[docs]class MoleculeSubstructure(QuerySubstructure):
'''Make a substructure query from an entire molecule.
Can be used to search for exact matches of a molecule when appropraite num_bonds or
add_connected_element_count constraints are set on the QueryAtoms. Furthermore if
hydrogen atoms have been removed from the molecule used to initialise the
MoleculeSubstructure it can be used to find hits that match the heavy
atoms as a substructure.
:param mol: :class:`ccdc.molecule.Molecule`
:param match_stereochemistry: Should the substructure constrain target stereochemistry to match the input molecule's stereochemistry?
:raises: TypeError if the passed in molecule has multiple components since multi-component molecule substructure searches are not supported. The components should be added as separate substructures.
>>> mol = EntryReader('csd').molecule('AABHTZ')
>>> sub = MoleculeSubstructure(mol)
'''
def __init__(self, mol, match_stereochemistry=False):
'''Initialise a MoleculeSubstructure with a molecule.
'''
if len(mol.components) > 1:
raise TypeError('Multi-component molecule substructures are not supported')
stereo = SubstructureSearchLib.Substructure.MATCH_STEREOCHEMISTRY if match_stereochemistry else SubstructureSearchLib.Substructure.NO_STEREOCHEMISTRY
substructure = SubstructureSearchLib.Substructure.instantiate(mol._molecule, stereo)
QuerySubstructure.__init__(self, _substructure=substructure)
###################################################################################
[docs]class ConnserSubstructure(QuerySubstructure):
'''Read a Conquest query language file.'''
required_content = re.compile(r'\*CONN', re.IGNORECASE)
def __init__(self, file_name, _conn=None):
'''Read the file.
:param file_name: path to the Connser file
:raises: IOError if the file cannot be read or if it is empty or if it does not contain '*CONN'
'''
if _conn is None:
try:
f = open(file_name)
except:
raise IOError('File cannot be read: %s' % file_name)
else:
txt = f.read()
f.close()
if not txt or self.required_content.search(txt) is None:
raise IOError('File is not a connser file: %s' % file_name)
self._conn = SubstructureSearchLib.ConnserFile(file_name)
self.name = os.path.splitext(os.path.basename(file_name))[0]
else:
self._conn = _conn
self.name = 'string'
substructure = self._conn.substructure()
QuerySubstructure.__init__(self, _substructure=substructure)
[docs] @staticmethod
def from_string(text):
'''Create a substructure from a textual representation of a Connser file.'''
_conn = SubstructureSearchLib.ConnserFile()
stream = UtilitiesLib.istringstream(str(text))
_conn.read(stream)
return ConnserSubstructure('string', _conn=_conn)
###########################################################################
class XMLSubstructure(QuerySubstructure):
'''A :class:`ccdc.search.QuerySubstructure` read from an XML file. Deprecated.'''
def __init__(self, fname):
'''Initialise from an XML formatted file.
Deprecated.
:param fname: path to XML file
'''
warnings.warn('''This class is deprecated and will be removed in a later version.''', DeprecationWarning)
if not os.path.exists(fname):
raise IOError('The file %s does not exist' % fname)
QuerySubstructure.__init__(self)
reader = SubstructureSearchLib.XMLSubstructureReader()
reader.load(fname)
self._substructure = reader.substructure(0)
###########################################################################
# Searches
###########################################################################
[docs]class Search(object):
'''Common base class for searches'''
[docs] class Settings(object):
'''Base class for search settings.'''
def __init__(self, _settings=None):
if _settings is None:
_settings = CSDSQLDatabaseLib.GenericCrystalStructureDatabaseSearchSettings()
self._settings = _settings
def __str__(self):
l = [
'Settings(',
'\n'.join('\t%s = %s' % (k, getattr(self, k)) for k, v in self.__class__.__dict__.items() if type(v) == property),
')'
]
return '\n'.join(l)
def _has_filter_set(self):
'''Private.'''
return (
self.has_3d_coordinates or self.no_disorder or self.no_powder or
self.only_organic or self.only_organometallic or self.max_r_factor < 10000.0 or
self.not_polymeric or self.no_metals or self.must_have_elements or self.must_not_have_elements or
self.no_ions
)
@property
def has_3d_coordinates(self):
'''Constrain hits to have 3d coordinates.'''
return self._settings.has_3d_coordinates()
@has_3d_coordinates.setter
def has_3d_coordinates(self, value):
self._settings.set_has_3d_coordinates(value)
@property
def no_disorder(self):
'''Constrain hits to have no disorder.
The value will be False (no filtering), 'Non-hydrogen' (filter structures with heavy atom disorder)
or 'All' (filter structures with any disordered atoms).
'''
d = {
self._settings.DISORDER_NOT_FILTERED: False,
self._settings.NO_NON_HYDROGEN_DISORDER: 'Non-hydrogen',
self._settings.NO_DISORDER: 'All'
}
return d[self._settings.disorder()]
@no_disorder.setter
def no_disorder(self, value):
if not value:
self._settings.set_disorder(self._settings.DISORDER_NOT_FILTERED)
elif isinstance(value, str) and value.lower() == 'all':
self._settings.set_disorder(self._settings.NO_DISORDER)
else:
self._settings.set_disorder(self._settings.NO_NON_HYDROGEN_DISORDER)
@property
def no_powder(self):
'''Constrain hits not to be powder studies.'''
return self._settings.powder() == self._settings.DOESNT_CONTAIN_POWDER_DIFFRACTION_DATA
@no_powder.setter
def no_powder(self, value):
self._settings.set_powder(
self._settings.DOESNT_CONTAIN_POWDER_DIFFRACTION_DATA if value else
self._settings.POWDER_NOT_FILTERED)
@property
def only_organic(self):
'''Constrain hits to be organic compounds.'''
return self._settings.only_organic()
@only_organic.setter
def only_organic(self, value):
self._settings.set_only_organic(value)
@property
def only_organometallic(self):
'''Constrain hits to be only organometallic compounds.'''
return self._settings.only_organometallic()
@only_organometallic.setter
def only_organometallic(self, value):
self._settings.set_only_organometallic(value)
@property
def max_r_factor(self):
'''Constrain the hits to have an R-factor less than this.
The R-factor will be expressed as a percentage.'''
return self._settings.max_rfactor()
@max_r_factor.setter
def max_r_factor(self, value):
self._settings.set_max_rfactor(value)
@property
def no_errors(self):
'''Constrain the hits to have no suppressed errors.'''
return self._settings.no_errors()
@no_errors.setter
def no_errors(self, value):
self._settings.set_no_errors(value)
@property
def not_polymeric(self):
'''Constrain the hits not to be polymeric structures.'''
return self._settings.not_polymeric()
@not_polymeric.setter
def not_polymeric(self, value):
self._settings.set_not_polymeric(value)
@property
def no_metals(self):
'''Constrain the hits not to have a metal atom.'''
return self._settings.no_metals()
@no_metals.setter
def no_metals(self, value):
self._settings.set_no_metals(value)
@property
def no_ions(self):
"""Constrain the hits not to have a residue with a formal charge.
The hits may include zwitterions.
"""
return self._settings.no_charged_residues()
@no_ions.setter
def no_ions(self, tf):
self._settings.set_no_charged_residues(tf)
@property
def must_have_elements(self):
'''Elements which must be present in a hit.
The elements will be presented as a list of atomic symbols.
>>> settings = Search.Settings()
>>> settings.must_have_elements = ['C', 'N', 'O', 'S']
>>> print(settings.must_have_elements)
[C (6), N (7), O (8), S (16)]
'''
es = self._settings.must_have()
return es.elements()
@must_have_elements.setter
def must_have_elements(self, value):
els = [ChemistryLib.Element(x) for x in value]
es = ChemistryLib.ElementSet()
es.add_elements(els)
self._settings.set_must_have(es)
for x in value:
ChemistryLib.Element(x)
@property
def must_not_have_elements(self):
'''Elements which must not be present in a hit.
The elements will be presented as a list of symbols.
>>> settings = Search.Settings()
>>> settings.must_not_have_elements = ['S', 'P', 'K']
>>> print(settings.must_not_have_elements)
[P (15), S (16), K (19)]
'''
return self._settings.must_not_have().elements()
@must_not_have_elements.setter
def must_not_have_elements(self, value):
es = ChemistryLib.ElementSet()
es.add_elements([ChemistryLib.Element(x) for x in value])
self._settings.set_must_not_have(es)
@property
def max_hit_structures(self):
'''The number of structures which may be returned from a search.'''
return self._settings.maximum_hits_limit()
@max_hit_structures.setter
def max_hit_structures(self, value):
'''Set the number of structures to be returned.
If set to 0, all hits will be returned.
'''
self._settings.set_maximum_hits_limit(value)
[docs] def test(self, argument):
'''Test that the argument satisfies the requirements of the settings instance.
:param argument: a :class:`ccdc.entry.Entry`, :class:`ccdc.crystal.Crystal`
or :class:`ccdc.molecule.Molecule` instance.
:returns: bool
>>> entry = EntryReader('csd').entry('AABHTZ')
>>> settings = Search.Settings()
>>> settings.test(entry)
True
>>> settings.only_organometallic = True
>>> settings.test(entry)
False
'''
if isinstance(argument, Entry):
return CSDSQLDatabaseLib.test_entry_settings_constraints(
self._settings, argument._entry
)
elif isinstance(argument, Crystal):
try:
argument = argument.molecule
except (RuntimeError, TypeError):
return False
return CSDSQLDatabaseLib.test_molecule_settings_constraints(
self._settings, argument._molecule
)
[docs] class SearchHit(object):
'''Base class for search hits.
Provides access to molecules, crystals and entries.
'''
def __init__(self, identifier,
_database=None, _entry=None, _crystal=None, _molecule=None, _binary_database=None):
'''Initialise.'''
if _database is not None or _binary_database is not None:
_entry = _crystal = _molecule = None
self._identifier = identifier
self._database = _database
self._entry = _entry
self._crystal = _crystal
self._molecule = _molecule
self._binary_database = _binary_database
@property
def identifier(self):
'''The string identifier of the hit.'''
return self._identifier
@identifier.setter
def identifier(self, value):
self._identifier = value
@property
def entry(self):
'''The entry corresponding to a search hit.'''
if self._entry is not None: # pylint: disable=E0203
return self._entry
if self._database is not None: # pylint: disable=E1101
return self._database.entry(self.identifier)
if self._binary_database is not None:
return Entry(self._binary_database.entry(UtilitiesLib.DatabaseEntryIdentifier(self.identifier)))
if self._molecule is not None:
return Entry.from_molecule(self._molecule) # pylint: disable=E1101
if self._crystal is not None:
return Entry.from_molecule(self._crystal.molecule) # pylint: disable=E1101
@property
def crystal(self):
'''The crystal corresponding to a search hit.'''
if self._crystal is not None:
return self._crystal
if self._database is not None:
return self._database.crystal(self.identifier)
if self._binary_database is not None:
return self.entry.crystal
if self._entry is not None:
return self._entry.crystal
if self._molecule is not None:
return Entry.from_molecule(self._molecule).crystal
@property
def molecule(self):
'''The molecule corresponding to a search hit.'''
if self._molecule is not None:
return self._molecule
if self._database is not None:
return self._database.molecule(self.identifier)
if self._binary_database is not None:
return self.entry.molecule
if self._entry is not None:
return self._entry.molecule
if self._crystal is not None:
return self._crystal.molecule
def __init__(self, settings=None):
'''This class is abstract.'''
[docs] def search(self, database=None, max_hit_structures=None, max_hits_per_structure=None):
'''Perform a search.'''
self.settings._settings.reset_hits()
if max_hit_structures is not None or max_hits_per_structure is not None:
self.settings.max_hit_structures = 0
self.settings.max_hits_per_structure = 0
if max_hit_structures is not None:
self.settings.max_hit_structures = max_hit_structures
if max_hits_per_structure is not None:
if hasattr(self.settings, 'max_hits_per_structure'):
self.settings.max_hits_per_structure = max_hits_per_structure
if database is None or database == 'CSD':
return self._search_reader(EntryReader('CSD'))
if isinstance(database, _DatabaseReader):
return self._search_reader(database)
if isinstance(database, str):
return self._search_reader(CrystalReader(database))
if isinstance(database, molecule.Molecule):
return self._search_molecule(database)
if isinstance(database, Crystal):
return self._search_crystal(database)
if isinstance(database, Entry):
return self._search_entry(database)
if isinstance(database, (list, tuple)):
# iterable - could be a database pool, or a gcd list or a list of mol/cryst/ent
if isinstance(database[0], str):
return self._search_reader(EntryReader(database))
return [h for x in database for h in self.search(x)]
raise TypeError('Cannot search this database: %s' % database)
[docs]class SimilaritySearch(Search):
'''Class to define and run similarity searches.'''
[docs] class Settings(Search.Settings):
coeffs = utilities.bidirectional_dict(
dice=SubstructureSearchLib.DICE,
tanimoto=SubstructureSearchLib.TANIMOTO,
)
_sort_order = utilities.bidirectional_dict(
value_order=SubstructureSearchLib.VALUE_ORDER,
alphabetic_order=SubstructureSearchLib.ALPHABETIC_ORDER
)
'''Settings for a similarity search.'''
def __init__(self, threshold=0.7, coefficient='tanimoto', _settings=None):
if _settings is None:
if threshold is None:
threshold = 0.7
if coefficient is None:
coefficient = 'tanimoto'
_settings = CSDSQLDatabaseLib.SimilaritySearchSettings(
self.coeffs.prefix_lookup(coefficient), threshold
)
self._settings = _settings
super(SimilaritySearch.Settings, self).__init__(_settings=self._settings)
@property
def threshold(self):
'''The similarity threshold to apply.
This is a value between 0.0 and 1.0.
'''
return self._settings.threshold()
@threshold.setter
def threshold(self, value):
self._settings.set_threshold(value)
@property
def coefficient(self):
'''This should be either 'dice' or 'tanimoto', the default.
'''
return self.coeffs.inverse_lookup(self._settings.coefficient())
@coefficient.setter
def coefficient(self, value):
if isinstance(value, str):
self._settings.set_coefficient(self.coeffs.prefix_lookup(value))
else:
self._settings.set_coefficient(value)
@property
def sort_order(self):
'''The order in which hits will be sorted.
THis should be either 'alphabetic' or 'value', the default.
'''
return self._sort_order.inverse_lookup(self._settings.sort_order)
@sort_order.setter
def sort_order(self, value):
self._settings.set_sort_order(self._sort_order.prefix_lookup(value))
[docs] class SimilarityHit(Search.SearchHit):
'''A search hit recording the similarity measure.
The SimilarityHit instance will give access to the identifier of the hit, the value of the similarity to the query molecule,
the entry, crystal or molecule of the hit.
'''
def __init__(self, similarity, identifier,
_database=None, _entry=None, _crystal=None, _molecule=None, _binary_database=None):
Search.SearchHit.__init__(
self, identifier,
_database=_database, _entry=_entry, _crystal=_crystal, _molecule=_molecule,
_binary_database=_binary_database)
self.similarity = similarity
self.identifier = identifier
def __init__(self, mol=None, threshold=0.7, coefficient='tanimoto', settings=None):
'''Save the threshold and instantiate the databases.
:param mol: :class:`ccdc.molecule.Molecule` or :class:`ccdc.search.QuerySubstructure`
:param threshold: float (0.0 to 1.0)
:param coefficient: one of 'tanimoto' or 'dice'
'''
if settings is None:
settings = SimilaritySearch.Settings(threshold, coefficient)
self.settings = settings
# ignoring threshold and coeff if settings is provided
self.molecule = mol
@property
def molecule(self):
'''The query molecule.'''
return self._molecule
@molecule.setter
def molecule(self, mol):
self._molecule = mol
if mol is None:
# then it'll have to be provided later
self._substructure = None
elif isinstance(mol, QuerySubstructure):
self._fp = SubstructureSearchLib.ChemicalFingerprintBuilderSubstructure()
self._sp = self._fp.similarity_fingerprint(mol._substructure)
self._substructure = mol._substructure
else:
self._fp = SubstructureSearchLib.ChemicalFingerprintBuilderMolecule()
self._sp = self._fp.similarity_fingerprint(mol._molecule)
self._substructure = SubstructureSearchLib.Substructure.instantiate(mol._molecule)
[docs] @staticmethod
def from_xml(xml):
'''Create a SimilaritySearch from an XML representation.
:param xml: XML string
'''
stream = UtilitiesLib.istringstream(xml)
reader = SubstructureSearchLib.XMLSubstructureReader()
reader.load(stream)
try:
coeff = SimilaritySearch.Settings.coeffs.inverse_lookup(
reader.options().similarity_coefficient()
)
except RuntimeError:
coeff = None
try:
thresh = reader.options().similarity_threshold()
except RuntimeError:
thresh = None
q = QuerySubstructure(_substructure=reader.substructure(0))
return SimilaritySearch(q, thresh, coeff)
[docs] @staticmethod
def from_xml_file(file_name):
'''Create a SimilaritySearch from an XML file.
:param file_name: path to XML file
:raises: IOError when the file does not exist
'''
if not os.path.exists(file_name):
raise IOError('The file %s does not exist' % file_name)
with open(file_name) as f:
return SimilaritySearch.from_xml(f.read())
[docs] def read_xml(self, xml):
'''Read a query from an an XML representation.
:param xml: XML string
'''
stream = UtilitiesLib.istringstream(xml)
reader = SubstructureSearchLib.XMLSubstructureReader()
reader.load(stream)
try:
self.settings.coefficient = SimilaritySearch.Settings.coeffs.inverse_lookup(
reader.options().similarity_coefficient()
)
except RuntimeError:
pass
try:
self.settings.threshold = reader.options().similarity_threshold()
except RuntimeError:
pass
sub = reader.substructure(0)
self._substructure = sub
self._molecule = None
[docs] def read_xml_file(self, file_name):
'''Read an XML file into the similarity searcher.
:param file_name: path to XML file
:raises: IOError if the file cannot be read
'''
if not os.path.exists(file_name):
raise IOError('The file %s does not exist' % file_name)
with open(file_name) as f:
self.read_xml(f.read())
@property
def threshold(self):
'''The similarity threshold to use.'''
return self.settings.threshold
@threshold.setter
def threshold(self, value):
'''Sets the value of threshold.'''
self.settings.threshold = value
@property
def coefficient(self):
'''Which coefficient to use when determining similarity.'''
return self.settings.coefficient
@coefficient.setter
def coefficient(self, value):
self.settings.coefficient = value
[docs] def search_molecule(self, mol):
'''Search a molecule.
This can be used to determine a similarity coefficient against the
given molecule.
:param mol: :class:`ccdc.molecule.Molecule`
:returns: :class:`SimilaritySearch.SimilarityHit`
>>> csd = EntryReader('csd')
>>> ibuprofen = csd.molecule('HXACAN')
>>> searcher = SimilaritySearch(ibuprofen)
>>> hit = searcher.search_molecule(csd.molecule('IBPRAC'))
>>> print(round(hit.similarity, 3))
0.161
'''
fp = self._fp.similarity_fingerprint(mol._molecule)
if self.settings.coefficient.lower() == 'dice':
coeff = self._sp.dice(fp)
else:
coeff = self._sp.tanimoto(fp)
return SimilaritySearch.SimilarityHit(coeff, mol.identifier, _molecule=mol)
def _search_reader(self, reader):
self.settings._settings.reset_hits()
if not hasattr(reader, '_similarity_searcher'):
reader._similarity_searcher = reader._db.searcher_factory().similarity_searcher()
if self.settings._has_filter_set():
max_hits = self.settings.max_hit_structures
if max_hits != maxint32:
self.settings.max_hit_structures = maxint32
results = reader._similarity_searcher.search(self._substructure, self.settings._settings)
hits = list()
for r in results:
if max_hits and len(hits) >= max_hits:
break
h = SimilaritySearch.SimilarityHit(
r.similarity(), r.identifier().str(), _binary_database=reader._db)
if self.settings.test(h.entry):
hits.append(h)
self.settings.max_hit_structures = max_hits
else:
results = reader._similarity_searcher.search(self._substructure, self.settings._settings)
hits = list(
SimilaritySearch.SimilarityHit(r.similarity(), r.identifier().str(), _binary_database=reader._db)
for r in results
)
return hits
def _search_entry(self, entry):
if self.settings.test(entry):
try:
mol = entry.molecule
except TypeError:
return []
return self._search_molecule(mol)
return []
def _search_crystal(self, crystal):
if self.settings.test(crystal):
try:
mol = crystal.molecule
except TypeError:
return []
return self._search_molecule(mol)
return []
def _search_molecule(self, mol):
if self.settings.test(mol):
fp = self._fp.similarity_fingerprint(mol._molecule)
if self.settings.coefficient.lower() == 'dice':
coeff = self._sp.dice(fp)
else:
coeff = self._sp.tanimoto(fp)
if coeff >= self.settings.threshold:
return [SimilaritySearch.SimilarityHit(coeff, mol.identifier, _molecule=mol)]
return []
###################################################################################
[docs]class TextNumericSearch(Search):
'''Class to define and run text/numeric searches in a crystal structure database.
It is possible to add one or more criterion for the query to match.
>>> text_numeric_query = TextNumericSearch()
>>> text_numeric_query.add_compound_name('aspirin')
>>> text_numeric_query.add_citation(year=[2011, 2013])
>>> for hit in text_numeric_query.search(max_hit_structures=3):
... print(hit.identifier)
...
ACSALA19
ACSALA20
ACSALA21
A human-readable representation of the queries may be obtained:
>>> print(', '.join(q for q in text_numeric_query.queries))
Compound name aspirin anywhere , Journal year in range 2011-2013
'''
modes = utilities.bidirectional_dict(
anywhere=DatabaseEntryLib.ANYWHERE,
exact=DatabaseEntryLib.EXACT_WORD,
separate=DatabaseEntryLib.EXACT_SPACE_SEPARATED_WORD,
is_null=DatabaseEntryLib.IS_NULL,
not_null=DatabaseEntryLib.NOT_NULL,
start_of_word=DatabaseEntryLib.START_OF_WORD,
start=DatabaseEntryLib.STARTS_WITH,
)
_numeric_fields = dict((
(DatabaseEntryLib.CCDC_JOURNAL_CODEN, 'Journal identifier'),
(DatabaseEntryLib.JOURNAL_YEAR, 'Journal year'),
(DatabaseEntryLib.CCDC_DEPOSITION_NUMBER, 'CCDC number'),
(DatabaseEntryLib.PREDICTED_SEMICONDUCTOR_DYNAMIC_DISORDER, 'Predicted semiconductor dynamic disorder'),
(DatabaseEntryLib.PREDICTED_SEMICONDUCTOR_SINGLET_STATE_1_ENERGY, 'Predicted semiconductor singlet state 1 energy'),
(DatabaseEntryLib.PREDICTED_SEMICONDUCTOR_SINGLET_STATE_2_ENERGY, 'Predicted semiconductor singlet state 2 energy'),
(DatabaseEntryLib.PREDICTED_SEMICONDUCTOR_TRIPLET_STATE_1_ENERGY, 'Predicted semiconductor triplet state 1 energy'),
(DatabaseEntryLib.PREDICTED_SEMICONDUCTOR_TRIPLET_STATE_2_ENERGY, 'Predicted semiconductor triplet state 2 energy'),
(DatabaseEntryLib.PREDICTED_SEMICONDUCTOR_REORGANIZATION_ENERGY, 'Predicted semiconductor hole reorganization energy'),
(DatabaseEntryLib.PREDICTED_SEMICONDUCTOR_TRANSFER_INTEGRAL, 'Predicted semiconductor transfer integral'),
(DatabaseEntryLib.PREDICTED_SEMICONDUCTOR_HOMO_LUMO_GAP, 'Predicted semiconductor HOMO-LUMO gap'),
(DatabaseEntryLib.PREDICTED_SEMICONDUCTOR_SINGLET_STATE_1_OSCILLATOR_STRENGTH, 'Predicted semiconductor singlet state 1 oscillator strength'),
(DatabaseEntryLib.PREDICTED_SEMICONDUCTOR_SINGLET_STATE_2_OSCILLATOR_STRENGTH, 'Predicted semiconductor singlet state 2 oscillator strength'),
))
_text_fields = dict((
(DatabaseEntryLib.ALL_TEXT, 'All text'),
(DatabaseEntryLib.ANALOGUES, 'Analogues'),
(DatabaseEntryLib.AUTHOR_NAME, 'Author'),
(DatabaseEntryLib.BIOACTIVITY, 'Bioactivity'),
(DatabaseEntryLib.COLOR, 'Color'),
(DatabaseEntryLib.COMPOUND_NAME, 'Compound name'),
(DatabaseEntryLib.DISORDER, 'Disorder'),
(DatabaseEntryLib.DOI, 'DOI'),
(DatabaseEntryLib.HABIT, 'Habit'),
(DatabaseEntryLib.JDS_DEPOSITION_NUMBER, 'JDS deposition number'),
(DatabaseEntryLib.JOURNAL_PAGE, 'Journal page'),
(DatabaseEntryLib.JOURNAL_VOLUME, 'Journal volume'),
(DatabaseEntryLib.PEPTIDE_SEQUENCE, 'Peptide sequence'),
(DatabaseEntryLib.PHASE_TRANSITIONS, 'Phase transitions'),
(DatabaseEntryLib.POLYMORPH, 'Polymorph'),
(DatabaseEntryLib.RECRYSTALLISATION_SOLVENT, 'Recrystallisation solvent'),
(DatabaseEntryLib.REFCODE, 'All refcodes'),
(DatabaseEntryLib.MAIN_REFCODE_ONLY, 'Refcode'),
(DatabaseEntryLib.SOURCE, 'Source'),
(DatabaseEntryLib.SPACEGROUP_NAME, 'Spacegroup'),
(DatabaseEntryLib.SYNONYMS, 'Synonyms'),
(DatabaseEntryLib.HEAT_CAPACITY_NOTES, 'Heat capacity notes'),
(DatabaseEntryLib.HEAT_OF_FUSION_NOTES, 'Heat of fusion notes'),
(DatabaseEntryLib.SOLUBILITY_NOTES, 'Solubility notes'),
))
[docs] class TextNumericSearchSettings(Search.Settings):
'''No settings apart from those provided by the base class required.'''
[docs] class TextNumericHit(Search.SearchHit):
'''Hit from a TextNumericSearch.'''
def __init__(self, identifier, _db):
'''Store identifier and database'''
Search.SearchHit.__init__(self, identifier, _binary_database=_db)
def __init__(self, settings=None):
'''Initialise a text-numeric query.'''
if settings is None:
settings = TextNumericSearch.Settings()
self.settings = settings
self.clear()
self._journal_list = None
[docs] def clear(self):
'''Restart a search.'''
self._search = DatabaseEntryLib.CrystalStructureDatabaseTextNumericSearch()
def _text_query(self, field, txt, mode='anywhere', ignore_non_alpha_num=False):
'''Private: construct a text query.'''
lower_mode = mode.lower()
if txt or lower_mode == 'is_null' or lower_mode == 'not_null':
query = DatabaseEntryLib.CrystalStructureDatabaseTextSearchQuery(
field,
self.modes[lower_mode],
txt
)
query.set_option(DatabaseEntryLib.IGNORE_NON_ALPHABETIC_CHARS, ignore_non_alpha_num)
self._search.add_query(query)
def _numeric_query(self, field, value):
'''Private: construct a numeric query.'''
if isinstance(value, list) or isinstance(value, tuple):
cond = SubstructureSearchLib.InclusiveRange(value[0], value[1])
else:
cond = SubstructureSearchLib.EqualTo(value)
query = DatabaseEntryLib.CrystalStructureDatabaseNumericSearchQuery(
field, cond
)
self._search.add_query(query)
def _text_queries(self):
'''Private: the tuple of formatted text queries.'''
def format_query(q):
return '%s %s %s %s' % (
self._text_fields[q.field()],
q.value(),
self.modes.inverse_lookup(q.match_type()),
'ignore non-alphanumeric' if q.is_option_set(0) else ''
)
return tuple(
format_query(q) for q in self._search.text_queries()
)
def _numeric_queries(self):
'''Private: the tuple of formatted numeric queries.'''
def format_query(q):
return '%s %s %s' % (self._numeric_fields[q.field()], q.condition().name(), q.condition())
return tuple(
format_query(q) for q in self._search.numeric_queries()
)
@property
def queries(self):
'''The current set of queries for this search.
>>> tns = TextNumericSearch()
>>> tns.add_all_text('ibuprofen')
>>> tns.add_author('Haisa')
>>> print('; '.join(str(q).strip() for q in tns.queries))
All text ibuprofen anywhere; Author Haisa anywhere
'''
return self._text_queries() + self._numeric_queries()
[docs] def add_all_text(self, txt, mode='anywhere', ignore_non_alpha_num=False):
'''Search for text anywhere in the entry.'''
self._text_query(DatabaseEntryLib.ALL_TEXT, txt, mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)
[docs] def add_analogue(self, analogue, mode='anywhere', ignore_non_alpha_num=False):
'''Search for an analogue.'''
self._text_query(DatabaseEntryLib.ANALOGUES, analogue, mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)
[docs] def add_author(self, author, mode='anywhere', ignore_non_alpha_num=False):
'''Search for an author.'''
self._text_query(DatabaseEntryLib.AUTHOR_NAME, author, mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)
[docs] def add_bioactivity(self, activity, mode='anywhere', ignore_non_alpha_num=False):
'''Search for a particular bio-activity.'''
self._text_query(DatabaseEntryLib.BIOACTIVITY, activity, mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)
[docs] def add_color(self, color, mode='anywhere', ignore_non_alpha_num=False):
'''Search for a particular colour.'''
self._text_query(DatabaseEntryLib.COLOR, color, mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)
[docs] def add_compound_name(self, compound_name, mode='anywhere', ignore_non_alpha_num=False):
'''Search for a compound name.
The search checks the content both of
:attr:`ccdc.entry.Entry.chemical_name` and
:attr:`ccdc.entry.Entry.synonyms`.
To illustrate this let us have a look at the CSD entry ``ABABEM``.
>>> from ccdc.io import EntryReader
>>> entry_reader = EntryReader('CSD')
>>> ababem = entry_reader.entry('ABABEM')
>>> print(ababem.chemical_name)
Tetrahydro[1,3,4]thiadiazolo[3,4-a]pyridazine-1,3-dione
>>> print(ababem.synonyms[0])
8-Thia-1,6-diazabicyclo[4.3.0]nonane-7,9-dione
The text ``azabicyclo[4.3.0]nonane`` is only found in the synonym. Let
us search for it using a compound name search.
>>> from ccdc.search import TextNumericSearch
>>> query = TextNumericSearch()
>>> query.add_compound_name('azabicyclo[4.3.0]nonane')
>>> hits = query.search()
Finally let us assert that we have found ``ABABEM``.
>>> assert(u'ABABEM' in [h.identifier for h in hits])
'''
self._text_query(DatabaseEntryLib.COMPOUND_NAME, compound_name,
mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)
[docs] def add_disorder(self, disorder, mode='anywhere', ignore_non_alpha_num=False):
'''Search for a disorder comment.'''
self._text_query(DatabaseEntryLib.DISORDER, disorder, mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)
[docs] def add_doi(self, doi, mode='anywhere', ignore_non_alpha_num=False):
'''Search for a DOI.'''
self._text_query(DatabaseEntryLib.DOI, doi, mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)
[docs] def add_habit(self, habit, mode='anywhere', ignore_non_alpha_num=False):
'''Search for a particular habit.'''
self._text_query(DatabaseEntryLib.HABIT, habit, mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)
[docs] def add_peptide_sequence(self, peptide_sequence, mode='anywhere', ignore_non_alpha_num=False):
'''Search for a peptide sequence.'''
self._text_query(DatabaseEntryLib.PEPTIDE_SEQUENCE, peptide_sequence,
mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)
[docs] def add_phase_transition(self, phase_transition, mode='anywhere', ignore_non_alpha_num=False):
'''Search for a phase transition.'''
self._text_query(DatabaseEntryLib.PHASE_TRANSITIONS, phase_transition,
mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)
[docs] def add_polymorph(self, polymorph, mode='anywhere', ignore_non_alpha_num=False):
'''Search for polymorph information.'''
self._text_query(DatabaseEntryLib.POLYMORPH, polymorph, mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)
[docs] def add_solvent(self, solvent, mode='anywhere', ignore_non_alpha_num=False):
'''Search for a solvent.'''
self._text_query(DatabaseEntryLib.RECRYSTALLISATION_SOLVENT, solvent,
mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)
[docs] def add_identifier(self, refcode, mode='anywhere', ignore_non_alpha_num=False):
'''Search for a refcode.'''
self._text_query(DatabaseEntryLib.MAIN_REFCODE_ONLY, refcode,
mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)
[docs] def add_all_identifiers(self, refcode, mode='anywhere', ignore_non_alpha_num=False):
'''Search for an identifier, including previous identifiers.
>>> from ccdc.search import TextNumericSearch
>>> query = TextNumericSearch()
>>> query.add_all_identifiers('DABHUJ')
>>> hits = query.search()
>>> print(hits[0].identifier)
ACPRET03
>>> print(hits[0].entry.previous_identifier)
DABHUJ
'''
self._text_query(DatabaseEntryLib.REFCODE, refcode, mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)
[docs] def add_source(self, source, mode='anywhere', ignore_non_alpha_num=False):
'''Search for a source.
>>> from ccdc.search import TextNumericSearch
>>> searcher = TextNumericSearch()
>>> searcher.add_source('toad')
>>> hits = searcher.search(max_hit_structures=5)
>>> for h in hits:
... print('%-8s: %s' % (h.identifier, h.entry.source))
...
CUXYAV : Ch'an Su (dried venom of Chinese toad)
EWAWUW : isolated from the eggs of toad Bufo bufo gargarizans
EWAXAD : isolated from the eggs of toad Bufo bufo gargarizans
FIFDUT : dried venom of Chinese toad Ch'an Su
FIFFAB : dried venom of Chinese toad Ch'an Su
'''
self._text_query(DatabaseEntryLib.SOURCE, source, mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)
[docs] def add_spacegroup_symbol(self, spacegroup_symbol, mode='anywhere', ignore_non_alpha_num=False):
'''Search for a spacegroup symbol or any alias of that symbol.'''
self._text_query(DatabaseEntryLib.SPACEGROUP_NAME, spacegroup_symbol, mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)
[docs] def add_synonym(self, synonym, mode='anywhere', ignore_non_alpha_num=False):
'''Search for a synonym.'''
self._text_query(DatabaseEntryLib.SYNONYMS, synonym, mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)
[docs] def add_citation(self, author='', journal='', volume=None, year=None, first_page=None,
ignore_non_alpha_num=False, _coden=None):
'''Search for a citation.
Note: the journal parameter requires the CSD to be present in order to translate the journal name to a coden identifier.
If the CSD is not present, but an alternative database is, use the alternative database's journals dict to look up a
coden identifier and specify the _coden parameter in this function.'''
if author:
self.add_author(author)
coden = None
if _coden is not None:
coden = _coden
elif journal:
coden = self.journals.get(journal, None)
if coden is None:
raise NameError('The journal %s could not be found' % journal)
if coden is not None:
self._numeric_query(DatabaseEntryLib.CCDC_JOURNAL_CODEN, coden)
if volume is not None:
self._text_query(DatabaseEntryLib.JOURNAL_VOLUME, str(volume), 'exact',
ignore_non_alpha_num=ignore_non_alpha_num)
if year is not None:
self._numeric_query(DatabaseEntryLib.JOURNAL_YEAR, year)
if first_page is not None:
self._text_query(DatabaseEntryLib.JOURNAL_PAGE, str(first_page), 'exact',
ignore_non_alpha_num=ignore_non_alpha_num)
[docs] def add_ccdc_number(self, value):
'''Search for a particular or a range of CCDC deposition numbers.
>>> from ccdc.search import TextNumericSearch
>>> searcher = TextNumericSearch()
>>> searcher.add_ccdc_number(241370)
>>> hits = searcher.search()
>>> len(hits)
1
>>> entry = hits[0].entry
>>> print('%s %s' % (entry.identifier, entry.ccdc_number))
ABEBUF 241370
>>> searcher.clear()
>>> searcher.add_ccdc_number((241368, 241372))
>>> hits = searcher.search()
>>> print(len(hits))
3
>>> for hit in hits:
... print('%s %s' % (hit.identifier, hit.entry.ccdc_number))
...
ABEBUF 241370
BIBZIW 241371
BIMGEK 241372
'''
self._numeric_query(DatabaseEntryLib.CCDC_DEPOSITION_NUMBER, value)
[docs] def add_heat_capacity_notes(self, heat_capacity_notes, mode='anywhere', ignore_non_alpha_num=False):
'''Search for heat capacity notes.'''
SolubilityPlatformLib.SolventData(heat_capacity_notes, 0)
self._text_query(DatabaseEntryLib.HEAT_CAPACITY_NOTES, heat_capacity_notes, mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)
[docs] def add_heat_of_fusion_notes(self, heat_of_fusion_notes, mode='anywhere', ignore_non_alpha_num=False):
'''Search for heat of fusion notes.'''
SolubilityPlatformLib.SolventData(heat_of_fusion_notes, 0)
self._text_query(DatabaseEntryLib.HEAT_OF_FUSION_NOTES, heat_of_fusion_notes, mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)
[docs] def add_solubility_notes(self, solubility_notes, mode='anywhere', ignore_non_alpha_num=False):
'''Search for solubility notes.'''
SolubilityPlatformLib.SolventData(solubility_notes, 0)
self._text_query(DatabaseEntryLib.SOLUBILITY_NOTES, solubility_notes, mode=mode, ignore_non_alpha_num=ignore_non_alpha_num)
def _add_fiz_depostion_number(self, value):
'''Private.'''
self._numeric_query(DatabaseEntryLib.FIZ_DEPOSITION_NUMBER, value)
def _add_csd_accession_date(self, value):
'''Private.'''
self._numeric_query(DatabaseEntryLib.CSD_ACCESSION_DATE, value)
def _add_csd_modification_date(self, value):
'''Private.'''
self._numeric_query(DatabaseEntryLib.CSD_MODIFICATION_DATE, value)
def _add_entry_insertion_time(self, value):
'''Private.'''
self._numeric_query(DatabaseEntryLib.ENTRY_INSERTION_TIME, value)
[docs] def add_predicted_semiconductor_dynamic_disorder(self, value):
'''Search for predicted semiconductor dynamic disorder.
See :attr:`ccdc.entry.SemiconductorPredictedProperties.dynamic_disorder`
'''
self._numeric_query(DatabaseEntryLib.PREDICTED_SEMICONDUCTOR_DYNAMIC_DISORDER, value)
[docs] def add_predicted_semiconductor_singlet_state_1_energy(self, value):
'''Search for predicted semiconductor singlet state 1 energy.
See :attr:`ccdc.entry.SemiconductorPredictedProperties.singlet_state_1_energy`
'''
self._numeric_query(DatabaseEntryLib.PREDICTED_SEMICONDUCTOR_SINGLET_STATE_1_ENERGY, value)
[docs] def add_predicted_semiconductor_singlet_state_2_energy(self, value):
'''Search for predicted semiconductor singlet state 2 energy.
See :attr:`ccdc.entry.SemiconductorPredictedProperties.singlet_state_2_energy`
'''
self._numeric_query(DatabaseEntryLib.PREDICTED_SEMICONDUCTOR_SINGLET_STATE_2_ENERGY, value)
[docs] def add_predicted_semiconductor_triplet_state_1_energy(self, value):
'''Search for predicted semiconductor triplet state 1 energy.
See :attr:`ccdc.entry.SemiconductorPredictedProperties.triplet_state_1_energy`
'''
self._numeric_query(DatabaseEntryLib.PREDICTED_SEMICONDUCTOR_TRIPLET_STATE_1_ENERGY, value)
[docs] def add_predicted_semiconductor_triplet_state_2_energy(self, value):
'''Search for predicted semiconductor triplet state 2 energy.
See :attr:`ccdc.entry.SemiconductorPredictedProperties.triplet_state_2_energy`
'''
self._numeric_query(DatabaseEntryLib.PREDICTED_SEMICONDUCTOR_TRIPLET_STATE_2_ENERGY, value)
[docs] def add_predicted_semiconductor_hole_reorganization_energy(self, value):
'''Search for predicted semiconductor hole reorganization energy.
See :attr:`ccdc.entry.SemiconductorPredictedProperties.hole_reorganization_energy`
'''
self._numeric_query(DatabaseEntryLib.PREDICTED_SEMICONDUCTOR_REORGANIZATION_ENERGY, value)
[docs] def add_predicted_semiconductor_transfer_integral(self, value):
'''Search for predicted semiconductor transfer integral.
See :attr:`ccdc.entry.SemiconductorPredictedProperties.transfer_integral`
'''
self._numeric_query(DatabaseEntryLib.PREDICTED_SEMICONDUCTOR_TRANSFER_INTEGRAL, value)
[docs] def add_predicted_semiconductor_homo_lumo_gap(self, value):
'''Search for predicted semiconductor HOMO-LUMO gap.
See :attr:`ccdc.entry.SemiconductorPredictedProperties.homo_lumo_gap`
'''
self._numeric_query(DatabaseEntryLib.PREDICTED_SEMICONDUCTOR_HOMO_LUMO_GAP, value)
[docs] def add_predicted_semiconductor_singlet_state_1_oscillator_strength(self, value):
'''Search for predicted semiconductor singlet state 1 oscillator strength.
See :attr:`ccdc.entry.SemiconductorPredictedProperties.singlet_state_1_oscillator_strength`
'''
self._numeric_query(DatabaseEntryLib.PREDICTED_SEMICONDUCTOR_SINGLET_STATE_1_OSCILLATOR_STRENGTH, value)
[docs] def add_predicted_semiconductor_singlet_state_2_oscillator_strength(self, value):
'''Search for predicted semiconductor singlet state 2 oscillator strength.
See :attr:`ccdc.entry.SemiconductorPredictedProperties.singlet_state_2_oscillator_strength`
'''
self._numeric_query(DatabaseEntryLib.PREDICTED_SEMICONDUCTOR_SINGLET_STATE_2_OSCILLATOR_STRENGTH, value)
[docs] def is_journal_valid(self, journal):
'''Check the validity of a specified journal name in the CSD.
This requires the CSD to be present.
:param journal: str, journal name'''
return self.journals.get(journal) is not None
@property
def journals(self):
'''A dictionary of journal name : ccdc code number for journals in the CSD.
This requires the CSD to be present.
'''
if self._journal_list is None:
_binary_db = CSDSQLDatabaseLib.CSDSQLDatabase(
_CSDDatabaseLocator.get_binary_csd_location()
)
self._journal_list = {
j.name(): j.ccdc_coden()
for j in _binary_db.journal_list_info().journal_list()
}
return self._journal_list
def _search_reader(self, reader):
self._search.settings().hits_limit_manager().reset_hits()
if not hasattr(reader, '_text_numeric_searcher'):
try:
reader._text_numeric_searcher = reader._db.searcher_factory().text_numeric_searcher()
except (RuntimeError, NameError, AttributeError):
pass
if not hasattr(reader, '_text_numeric_searcher'):
raise NotImplementedError('This database does not support TextNumericSearch')
if self.settings._has_filter_set():
max_hits = self.settings.max_hit_structures
self._search.settings().set_maximum_hits_limit(maxint32)
ids = CSDSQLDatabaseLib.text_numeric_search(reader._text_numeric_searcher, self._search)
self.settings.max_hit_structures = max_hits
l = list()
for i, x in enumerate(ids):
hit = TextNumericSearch.TextNumericHit(x, reader._db)
if self.settings.test(hit.entry):
l.append(hit)
if max_hits and len(l) >= max_hits:
break
else:
self._search.settings().set_maximum_hits_limit(self.settings.max_hit_structures)
ids = CSDSQLDatabaseLib.text_numeric_search(reader._text_numeric_searcher, self._search)
l = list(
TextNumericSearch.TextNumericHit(x, reader._db) for x in ids
)
return l
def _search_entry(self, entry):
raise NotImplementedError('TextNumericSearch of an entry')
def _search_crystal(self, crystal):
raise NotImplementedError('TextNumericSearch of a crystal')
def _search_molecule(self, molecule):
raise NotImplementedError('TextNumericSearch of a molecule')
[docs] @staticmethod
def from_xml(xml):
'''Create a TextNumericSearch from XML.
:param xml: XML string
'''
stream = UtilitiesLib.istringstream(xml)
parser = DatabaseEntryLib.TextNumericSearchXMLParser()
tns = TextNumericSearch()
tns._search = parser.parse(stream)
return tns
[docs] @staticmethod
def from_xml_file(file_name):
'''Create a TextNumericSearch from an XML file.
:param file_name: path to XML file
:raises: IOError when the file does not exist
'''
if not os.path.exists(file_name):
raise IOError('The file %s does not exist' % file_name)
with open(file_name) as f:
return TextNumericSearch.from_xml(f.read())
[docs] def read_xml(self, xml):
'''Read a query from XML.
:param xml: XML string
'''
stream = UtilitiesLib.istringstream(xml)
parser = DatabaseEntryLib.TextNumericSearchXMLParser()
self._search = parser.parse(stream)
[docs] def read_xml_file(self, file_name):
'''Read a text numeric search from an XML file.
:param file_name: path to XML file
:raises: IOError if the file cannot be read
'''
if not os.path.exists(file_name):
raise IOError('The file %s does not exist' % file_name)
with open(file_name) as f:
self.read_xml(f.read())
###########################################################################
[docs]class SubstructureSearch(Search):
'''Query crystal structures for interactions.'''
_telemetry = 0
[docs] class Settings(Search.Settings):
'''Settings appropriate to a substructure search.'''
_enantiomer_match_type_dict = utilities.bidirectional_dict(
NEVER=MotifSearchLib.EnantiomerSensitiveConstraint.NEVER,
SPACEGROUP_DEPENDENT=MotifSearchLib.EnantiomerSensitiveConstraint.SPACEGROUP_DEPENDENT,
ALWAYS=MotifSearchLib.EnantiomerSensitiveConstraint.ALWAYS,
)
def __init__(self, max_hit_structures=None, max_hits_per_structure=None):
settings = CSDSQLDatabaseLib.CrystalStructureDatabaseMotifSearchSettings()
settings.set_match_mode(CSDSQLDatabaseLib.CrystalStructureDatabaseMotifSearchSettings.MATCH_3D_CRYSTAL_ONLY)
if max_hit_structures is not None:
settings.set_maximum_hits_limit(max_hit_structures)
if max_hits_per_structure is None:
settings.set_maximum_hits_per_structure(0)
else:
settings.set_maximum_hits_per_structure(max_hits_per_structure)
Search.Settings.__init__(self, _settings=settings)
self._match_enantiomers = MotifSearchLib.EnantiomerSensitiveConstraint.NEVER
@property
def max_hits_per_structure(self):
'''Maximum number of hits per structure.'''
return self._settings.maximum_hits_per_structure()
@max_hits_per_structure.setter
def max_hits_per_structure(self, value):
self._settings.set_maximum_hits_per_structure(value)
@property
def match_enantiomers(self):
'''Enantiomer matching behavior
The value will be one of 'NEVER' meaning enantiomers are never checked, 'SPACEGROUP_DEPENDENT' meaning enantiomers are checked
if the crystal's spacegroup implies the presence of enantiomers, or 'ALWAYS' meaning enantiomers are always checked.
'''
return SubstructureSearch.Settings._enantiomer_match_type_dict.inverse_lookup(self._match_enantiomers)
@match_enantiomers.setter
def match_enantiomers(self, value):
self._match_enantiomers = SubstructureSearch.Settings._enantiomer_match_type_dict[value]
[docs] class HitProcessor(object):
'''Override this class to provide your own add_hit() method.
This class allows a search to process hits as they are found by
the search class, rather than waiting until all hits are found before
allowing access to them, a procedure which may well run out of memory
for very general searches.
'''
[docs] def search(self, searcher, database=None):
'''Searches the database with the substructure search.
:param searcher: a :class:`ccdc.search.SubstructureSearch` instance.
:param database: a :class:`ccdc.io.EntryReader` instance. If not specified the CSD will be searched.
For each hit found, :meth:`ccdc.Search.SubstructureSearch.HitProcessor.add_hit` will be
called with a :class:`ccdc.search.SubstructureSearch.SubstructureHit` instance.
'''
self._cancelled = False
self.searcher = searcher
self.searcher._add_enantiomer_consistency()
if database is None:
database = EntryReader('csd')
self.database = database
if not hasattr(self.database, '_motif_searcher'):
try:
self.database._motif_searcher = self.database._db.searcher_factory().motif_searcher()
except (RuntimeError, AttributeError):
pass
if hasattr(self.database, '_motif_searcher'):
self.database._motif_searcher.progress_monitor().reset()
results_writer = CSDSQLDatabaseLib.PythonResultsWriter(self)
self.database._motif_searcher.search(
self.searcher._motif, self.searcher.settings._settings, results_writer
)
else:
# there used to be fallback code here, but now we expect to always support motif search on any database
raise NotImplementedError("Substructure search is not implemented on this database type")
def __call__(self, **kw):
'''Private: this method will be called from the search.'''
if 'max_hits_reached' in kw:
self.cancel()
#print('Max hits reached')
elif 'match' in kw:
h = SubstructureSearch.SubstructureHit._from_match(
kw['match'], self.searcher, _binary_database=self.database._db
)
self.add_hit(h)
elif 'hit' in kw:
self.add_hit(kw['hit'])
else:
raise RuntimeError('Unknown keyword in __call__', kw)
[docs] def cancel(self):
'''Cancels the search.'''
try:
self.database._motif_searcher.progress_monitor().cancel()
except AttributeError:
pass
self._cancelled = True
[docs] def add_hit(self, hit):
'''Override this to provide your own hit processing.'''
raise NotImplementedError('add_hit() must be implemented.')
class _MotifMatchHit(Search.SearchHit):
'''A hit with motif match results.'''
def __init__(self, identifier, match=None,
_database=None, _entry=None, _crystal=None, _molecule=None, _binary_database=None):
if _database is not None or _binary_database is not None:
_entry = _crystal = _molecule = None
super(SubstructureSearch._MotifMatchHit, self).__init__(
identifier,
_database=_database, _entry=_entry, _crystal=_crystal,
_molecule=_molecule, _binary_database=_binary_database)
self._motif_match = match
def match_components(self):
'''
Return the molecular components containing the atoms matched by the search.
:returns: list of :class:`ccdc.molecule.Molecule`
'''
csv = ChemistryLib.CrystalStructureView.instantiate(self.crystal._crystal)
ss = MotifSearchLib.MotifSearchStructure(csv)
match_mols = set([
molecule.Molecule('%02d' % i, _molecule=ss.molecule(self._motif_match.substructure_match(i)).create_editable_molecule())
for i in range(self._motif_match.nsubstructure_matches())
])
return list(match_mols)
def match_atoms(self, indices=False):
'''
Return the atoms matched by the substructure.
:param indices: Whether to return atom indices instead of :class:`ccdc.molecule.Atom` instances
:returns: list of :class:`ccdc.molecule.Atom` instances or atom indices
The atoms returned will all be in the asymmetric unit, so directly measuring constraints and measurements from
these atoms will not give the correct results if a symmetry-generated copy was involved in the match. See
:meth:`ccdc.search.SubstructureSearch.SubstructureHit.match_symmetry_operators` for a way to determine if this is the case.
'''
if not hasattr(self, '_real_indices'):
csv = ChemistryLib.CrystalStructureView.instantiate(self.crystal._crystal)
ss = MotifSearchLib.MotifSearchStructure(csv)
match_atoms = []
mol = self.molecule
def _matches(a, b, depth=0):
if a.coordinates is None:
if b.site() is None:
# match labels here and first neighbours
if depth >= 2:
return True
return a.label == b.label() and (len(a.neighbours) == 0 or len(b.get_neighbours()) == 0 or _matches(a.neighbours[0], b.get_neighbours()[0], depth+1))
else:
return False
else:
if b.site() is None:
return False
else:
return (a.label == b.label() and
round(a.coordinates.x, 3) == round(b.site().orth().x(), 3) and
round(a.coordinates.y, 3) == round(b.site().orth().y(), 3) and
round(a.coordinates.z, 3) == round(b.site().orth().z(), 3)
)
for j in range(self._motif_match.nsubstructure_matches()):
sub_matches = []
for i in range(len(self._motif_match.substructure_match(j).atom_match())):
_atom = ss.atom(self._motif_match, j, i)
_base = csv.base_atom(_atom)
# try same index first
added = False
if _atom.index() < len(mol.atoms):
a = mol.atoms[_atom.index()]
if not a in sub_matches and _matches(a, _base):
sub_matches.append(a)
added = True
if not added:
for a in mol.atoms:
if not a in sub_matches and _matches(a, _base):
sub_matches.append(a)
break
else:
raise RuntimeError('No matching atom??? %s %s - %s %s in %s' % (_atom.label(), str(_atom.site().orth()), _base.label(), str(_base.site().orth()), self.identifier))
match_atoms += sub_matches
self._match_atoms = match_atoms
self._real_indices = tuple(a.index for a in self._match_atoms)
if indices:
return tuple(self._real_indices)
else:
return self._match_atoms
def match_substructures(self):
'''Returns each substructure of the hit as a molecule with the bonds and atoms of the hit.
The symmetry operations of the hit will be applied to the molecules, so measurement and
constraints will be appropriate to the hit.
:returns: tuple of :class:`ccdc.molecule.Molecule`, one for each substructure of the hit
with the bonds and atoms of the hit
'''
csv = ChemistryLib.CrystalStructureView.instantiate(self.crystal._crystal)
mss = MotifSearchLib.MotifSearchStructure(csv)
at_matches = [self._motif_match.substructure_match(i).atom_match() for i in range(self._motif_match.nsubstructure_matches())]
at_addrs = [[self._motif_match.atom_address(i, j) for j in range(len(at_matches[i]))] for i in range(len(at_matches))]
ats = [[mss.atom(aa) for aa in l] for l in at_addrs]
api_ats = [[molecule.Atom(_atom=a) for a in l] for l in ats]
api_mols = [molecule.Molecule(self.identifier, _molecule=l[0].molecule().create_editable_molecule()) for l in ats]
def _matching_ats(a, b):
return (
a.atomic_symbol == b.atomic_symbol and
a.label == b.label and
str(a.coordinates) == str(b.coordinates)
)
for i, m in enumerate(api_mols):
m._molecule.reorder_atoms([a.index() for a in ats[i]])
m.remove_atoms(a for a in m.atoms if not any(_matching_ats(a, b) for b in api_ats[i]))
return tuple(api_mols)
def match_symmetry_operators(self):
'''The symmetry operators required to form the match.
:returns: a list of symmetry operators in the order of the matched atoms.
'''
crystal = self.crystal
ats = self.match_atoms()
csv = ChemistryLib.CrystalStructureView.instantiate(crystal._crystal)
mss = MotifSearchLib.MotifSearchStructure(csv)
motif_match = self._motif_match
def _get_symmop(a):
'''Get the appropriate symmop.'''
z = a
sub = 0
while True:
subm = motif_match.substructure_match(sub)
if z >= len(subm.atom_match()):
z -= len(subm.atom_match())
sub += 1
else:
break
at = mss.atom(motif_match, sub, z)
base = csv.base_asymmetric_unit_atom(at)
op = ChemistryLib.atom_atom_symmetry_relation(crystal._crystal, base, at)
if op:
symmop = op.to_string()
else:
symmop = ''
return symmop
symmops = [_get_symmop(i) for i in range(len(ats))]
return symmops
[docs] class SubstructureHit(_MotifMatchHit):
'''A hit from a substructure search.'''
def __init__(self, identifier, match=None, search_structure=None, query=None,
_database=None, _entry=None, _crystal=None, _molecule=None, _binary_database=None):
super(SubstructureSearch.SubstructureHit, self).__init__(
identifier, match,
_database=_database, _entry=_entry, _crystal=_crystal,
_molecule=_molecule, _binary_database=_binary_database)
self._disorder_dealt_with = False
self._search = query
if match is not None:
#self._make_geometric_objects()
self._measure_measurements()
self.query = query
self._geometric_objects = None
@staticmethod
def _from_match(m, search, _binary_database=None, _database=None, _entry=None, _crystal=None, _molecule=None):
'''Private: construct a SubstructureHit from a match object.'''
h = SubstructureSearch.SubstructureHit(
m.identifier().str(), m.data().motif_match(), query=search,
_binary_database=_binary_database, _database=_database, _entry=_entry,
_crystal=_crystal, _molecule=_molecule
)
return h
def _make_geometric_object(self, obj, search_structure):
'''PRIVATE: make a geometric object.'''
if isinstance(obj, (SubstructureSearchLib.ConstraintAtomPoint,
SubstructureSearchLib.ConstraintCentroidPoint,
SubstructureSearchLib.ConstraintDummyPoint,
SubstructureSearchLib.ConstraintPoint)):
p0 = self._motif_match.get_point(MotifSearchLib.Object_as_Point(obj), search_structure)
return molecule.Coordinates(p0.x(), p0.y(), p0.z())
elif isinstance(obj, SubstructureSearchLib.ConstraintPlane):
p = GeometricDescriptors.Plane(
None, None, _plane=self._motif_match.get_plane(obj, search_structure)
)
return p
elif isinstance(obj, SubstructureSearchLib.ConstraintVector):
vec = self._motif_match.get_vector(obj, search_structure)
p = GeometricDescriptors.Vector(vec.x(), vec.y(), vec.z())
return p
elif isinstance(obj, SubstructureSearchLib.ConstraintAtomGroup):
_csv = ChemistryLib.CrystalStructureView.instantiate(self.crystal._crystal)
_mss = MotifSearchLib.MotifSearchStructure(_csv)
mgsm = MotifSearchLib.MotifGeometricSearchMatch(self._motif_match, _mss)
atoms = obj.atoms(mgsm)
return tuple(molecule.Atom(_atom=a) for a in atoms)
raise NotImplementedError('Have not implemented geometric object %s' % obj)
def _make_geometric_objects(self):
'''PRIVATE: make all the geometric objects.'''
if not self._search.geometric_objects:
self._geometric_objects = {}
return
_csv = ChemistryLib.CrystalStructureView.instantiate(self.crystal._crystal)
_mss = MotifSearchLib.MotifSearchStructure(_csv)
self._geometric_objects = {
name : self._make_geometric_object(obj, _mss)
for name, obj in self._search.geometric_objects.items()
}
@property
def geometric_objects(self):
if self._geometric_objects is None:
self._make_geometric_objects()
return self._geometric_objects
def _measure_measurements(self):
'''PRIVATE: make all the measurements.'''
self.measurements = dict()
self.constraints = dict()
for i in range(self._motif_match.nparameters()):
mp = self._motif_match.parameter_value(i)
if mp.parameter().name() in self._search.measurements:
self.measurements[mp.parameter().name()] = mp.value()
else:
self.constraints[mp.parameter().name()] = mp.value()
[docs] def measurement_atoms(self, name):
'''The atoms involved in a measurement.
:param name: the name of the measurement.
:returns: a tuple of :class:`ccdc.molecule.Atom` instances.
The atoms will be returned in an arbitrary order. All atoms involved in the measurement will be present,
so for example a centroid-centroid distance measurement will produce the atoms of both centroids.
'''
con = self._search.measurements[name]
_csv = ChemistryLib.CrystalStructureView.instantiate(self.crystal._crystal)
_mss = MotifSearchLib.MotifSearchStructure(_csv)
mgsm = MotifSearchLib.MotifGeometricSearchMatch(self._motif_match, _mss)
res = con.test(mgsm)
ats = res.get_atoms()
return tuple(molecule.Atom(_atom=a) for a in ats)
[docs] def constraint_atoms(self, name):
'''The atoms from which the constraint was defined.
:param name: the name of the constraint.
:returns: a tuple of :class:`ccdc.molecule.Atom` instances.
The atoms will be returned in an arbitrary order. All atoms involved in defining the constraint will be returned.
'''
con = self._search.constraints.get(name, self._search.contacts[name])
_csv = ChemistryLib.CrystalStructureView.instantiate(self.crystal._crystal)
_mss = MotifSearchLib.MotifSearchStructure(_csv)
mgsm = MotifSearchLib.MotifGeometricSearchMatch(self._motif_match, _mss)
if isinstance(con, SubstructureSearchLib.SubstructureContact):
sub1 = con.substruct_a()
at1 = con.atom_a()
sub2 = con.substruct_b()
at2 = con.atom_b()
addr1 = self._motif_match.atom_address(sub1, at1)
addr2 = self._motif_match.atom_address(sub2, at2)
return molecule.Atom(_atom=_mss.atom(addr1)), molecule.Atom(_atom=_mss.atom(addr2))
elif isinstance(con, SubstructureSearchLib.SubstructureObjectContact):
obj1 = con.object_a()
obj2 = con.object_b()
return tuple(a for a in self._geometric_object_atoms(obj1.label())) + \
tuple(a for a in self._geometric_object_atoms(obj2.label()))
res = con.test(mgsm)
ats = res.get_atoms()
return tuple(molecule.Atom(_atom=a) for a in ats)
def _geometric_object_atoms(self, name):
'''PRIVATE: the matched atoms of a constraint object.'''
_csv = ChemistryLib.CrystalStructureView.instantiate(self.crystal._crystal)
_mss = MotifSearchLib.MotifSearchStructure(_csv)
mgsm = MotifSearchLib.MotifGeometricSearchMatch(self._motif_match, _mss)
atoms = self._search.geometric_objects[name].atoms(mgsm)
return tuple(molecule.Atom(_atom=a) for a in atoms)
[docs] def centroid_atoms(self, name):
'''The atoms from which the centroid is derived.'''
return self._geometric_object_atoms(name)
[docs] def dummy_point_atoms(self, name):
'''The atoms from which the dummy point was defined.'''
return self._geometric_object_atoms(name)
[docs] def group_atoms(self, name):
'''The atoms from which the group was defined.'''
return self._geometric_object_atoms(name)
[docs] def vector_atoms(self, name):
'''The atoms from which the vector was defined.'''
return self._geometric_object_atoms(name)
[docs] def plane_atoms(self, name):
'''The atoms from which the plane was defined.'''
return self._geometric_object_atoms(name)
### Object names
_constraint_types = dict(
PlaneAngleConstraint=SubstructureSearchLib.GeometricConstraint_as_PlaneAngleConstraint,
PointAngleConstraint=SubstructureSearchLib.GeometricConstraint_as_PointAngleConstraint,
PointDistanceConstraint=SubstructureSearchLib.GeometricConstraint_as_PointDistanceConstraint,
PointPlaneDistanceConstraint=SubstructureSearchLib.GeometricConstraint_as_PointPlaneDistanceConstraint,
PointTorsionConstraint=SubstructureSearchLib.GeometricConstraint_as_PointTorsionConstraint,
VectorAngleConstraint=SubstructureSearchLib.GeometricConstraint_as_VectorAngleConstraint,
VectorPlaneAngleConstraint=SubstructureSearchLib.GeometricConstraint_as_VectorPlaneAngleConstraint,
Atom3DPropertyConstraint=SubstructureSearchLib.GeometricConstraint_as_Atom3DPropertyConstraint,
ConstantValueConstraint=SubstructureSearchLib.GeometricConstraint_as_ConstantValueConstraint,
TransformConstraint=SubstructureSearchLib.GeometricConstraint_as_TransformConstraint,
UnaryTransformConstraint=SubstructureSearchLib.GeometricConstraint_as_UnaryTransformConstraint,
BinaryTransformConstraint=SubstructureSearchLib.GeometricConstraint_as_BinaryTransformConstraint,
)
@staticmethod
def _find_objects(constraint):
real_con = SubstructureSearch.SubstructureHit._constraint_types[constraint.class_name()](constraint)
if real_con.class_name() == 'UnaryTransformConstraint':
return SubstructureSearch.SubstructureHit._find_objects(real_con.sub_constraint())
if real_con.class_name() == 'BinaryTransformConstraint':
return SubstructureSearch.SubstructureHit._find_objects(real_con.sub_constraint1()) + SubstructureSearch.SubstructureHit._find_objects(real_con.sub_constraint2())
if real_con.class_name() == 'ConstantValueConstraint':
return ()
objs = tuple(real_con.objects(i) for i in range(real_con.nobjects()))
return objs
def _object_name(self, _object):
n = _object.label()
if ':' in n:
bits = n.split(':')
if len(bits) == 2 and all(x.isdigit() for b in bits for x in b):
sub_inx = int(bits[0])
at_inx = int(bits[1])
substructs = self.match_substructures()
while at_inx >= len(substructs[sub_inx].atoms):
at_inx -= len(substructs[sub_inx].atoms)
sub_inx += 1
at = self.match_substructures()[sub_inx].atoms[at_inx]
return at
return _object.label()
[docs] def measurement_objects(self, measurement):
'''A tuple of object names and atoms from which the measurement was taken.
:param measurement: the string name of the measurement.
:returns: a tuple of geometric object names or atoms.
'''
_constraint = self._search.measurements[measurement]
return tuple(self._object_name(obj) for obj in self._find_objects(_constraint))
[docs] def constraint_objects(self, constraint):
'''A tuple of object names and atoms from which the constraint was defined.'''
_constraint = self._search.constraints.get(constraint, self._search.contacts.get(constraint))
if _constraint is None:
raise IndexError('The constraint %s could not be found' % constraint)
return tuple(self._object_name(obj) for obj in self._find_objects(_constraint))
def _geometric_object_objects(self, _obj):
return tuple(self._object_name(_obj.objects(i)) for i in range(_obj.nobjects()))
[docs] def centroid_objects(self, name):
'''The geometric object names and atoms from which the centroid was defined.'''
return self._geometric_object_objects(self._search.geometric_objects[name])
[docs] def dummy_point_objects(self, name):
'''The geometric object names and atoms from which the dummy point was defined.'''
return self._geometric_object_objects(self._search.geometric_objects[name])
[docs] def group_objects(self, name):
'''The geometric object names and atoms from which the group was defined.'''
return self._geometric_object_objects(self._search.geometric_objects[name])
[docs] def vector_objects(self, name):
'''The geometric object names and atoms from which the vector was defined.'''
return self._geometric_object_objects(self._search.geometric_objects[name])
[docs] def plane_objects(self, name):
'''The geometric object names and atoms from which the plane was defined.'''
return self._geometric_object_objects(self._search.geometric_objects[name])
[docs] class SubstructureHitList(list):
'''List of hits from a :class:`ccdc.search.SubstructureSearch`'''
[docs] def superimpose(self):
'''Superimpose all matched molecules on their query atoms
Just superimpose on first substructure
'''
ret = []
if self:
for i in range(len(self)):
if self[i].match_atoms():
inx0 = self[i].match_atoms()
mol0 = self[i].molecule.copy()
got_one = i
break
else:
raise RuntimeError('No structure has matching atoms')
ret.append(self[got_one].molecule.copy())
for h in self[got_one+1:]:
inx1 = h.match_atoms()
mol1 = h.molecule.copy()
overlay = MolecularDescriptors.Overlay(mol0, mol1, atoms=zip(inx0, inx1))
ret.append(overlay.molecule)
return ret
[docs] def write_c2m_file(self, file_name):
'''Write a ConQuest to Mercury interchange file.
This file allows substructure search results to be read into the data analysis package
of Mercury.
:param file_name: file to which the data will be written.
'''
if not self:
raise RuntimeError('No hits to write')
def split_by_id():
'''Split the hit list by identifier.'''
parts = []
identifier = None
for h in self:
if h.identifier != identifier:
identifier = h.identifier
parts.append([])
parts[-1].append(h)
return parts
def make_atom(h, i, a, done, output):
'''Make a tag representing an individual atom.'''
motif_match = h._motif_match
crystal = h.crystal
csv = ChemistryLib.CrystalStructureView.instantiate(crystal._crystal)
mss = MotifSearchLib.MotifSearchStructure(csv)
# find the right substructure_match
z = i
sub = 0
while True:
subm = motif_match.substructure_match(sub)
if z >= len(subm.atom_match()):
z -= len(subm.atom_match())
sub += 1
else:
break
# Now subm is the right match, z is the right index in the match
fo = a._atom.annotations().obtain_FileOrdering().file_order()
subm.translation()
at = mss.atom(motif_match, sub, z)
base = csv.base_asymmetric_unit_atom(at)
ChemistryLib.atom_atom_symmetry_relation(crystal._crystal, base, at)
if fo in done:
fo = at.annotations().obtain_FileOrdering().file_order()
done.add(fo)
output.write('<atom id="%d" substructure_index="%d" aser_index="%d"/>\n' % (i, i, fo))
def make_atoms(h, output):
'''Make the atoms tag.'''
output.write('<atoms>\n')
s = set()
for i, a in enumerate(h.match_atoms()):
if a._atom.annotations().obtain_FileOrdering().file_order() != 0:
make_atom(h, i, a, s, output)
output.write('</atoms>\n')
def make_absolute_index(motif_match, sub, a):
'''Make the atom index absolute.'''
atinx = a
for i in range(sub):
atinx += len(motif_match.substructure_match(i).atom_match())
return atinx
def make_measure(h, name, value, output):
'''Make a parameter tag for a measurement.'''
motif_match = h._motif_match
cc = h._search.measurements[name]
if cc.label() == name:
objects = [cc.objects(i) for i in range(cc.nobjects())]
pts = [SubstructureSearchLib.Object_as_AtomPoint(o) for o in objects]
sub_at_inxs = [(p.substructure_index(), p.atom_index()) for p in pts]
at_inxs = [make_absolute_index(motif_match, s, a) for s, a in sub_at_inxs]
atom_inxs = ','.join('%d' % a for a in at_inxs)
if cc.nobjects() == 2:
flavour = 'distance'
elif cc.nobjects() == 3:
flavour = 'angle'
elif cc.nobjects() == 4:
flavour = 'torsion'
else:
raise NotImplementedError('Need the other constraints ' + str(type(cc)) + ' ' + name)
output.write('<parameter name="%s" type="%s" value="%.3f">\n' % (name, flavour, value))
output.write('<atom_ids>%s</atom_ids>\n' % atom_inxs)
output.write('</parameter>\n')
def _get_atom_inxs(h, ct, name):
sub_atoms = [a for s in h.match_substructures() for a in s.atoms]
con = h._search.constraints[name]
pts = [SubstructureSearchLib.Object_as_AtomPoint(con.objects(i)) for i in range(ct)]
inxs = [make_absolute_index(h._motif_match, p.substructure_index(), p.atom_index()) for p in pts]
return inxs
def make_constraint(h, name, value, output):
'''Make a parameter tag for a constraint or contact.'''
motif_match = h._motif_match
for tag, cc in h._search.constraints.items():
if tag == name:
if isinstance(cc, SubstructureSearchLib.SubstructureContact):
atom_inxs = [
make_absolute_index(motif_match, cc.substruct_a(), cc.atom_a()),
make_absolute_index(motif_match, cc.substruct_b(), cc.atom_b())
]
flavour = 'contact'
break
elif cc.class_name() == 'PointAngleConstraint':
atom_inxs = _get_atom_inxs(h, 3, name)
flavour = 'angle'
break
elif cc.class_name() == 'PointTorsionConstraint':
atom_inxs = _get_atom_inxs(h, 4, name)
flavour = 'torsion'
break
elif cc.class_name() == 'PointDistanceConstraint':
atom_inxs = _get_atom_inxs(h, 2, name)
flavour = 'distance'
break
else:
raise NotImplementedError('Need the other constraints')
else:
raise AttributeError('No constraint named %s' % name)
output.write('<parameter name="%s" type="%s" value="%.3f">\n' % (name, flavour, value))
output.write('<atom_ids>%s</atom_ids>\n' % (','.join('%d' % a for a in atom_inxs)))
output.write('</parameter>\n')
def make_params(h, output):
'''Make the parameters tag.'''
output.write('<parameters>\n')
for p, v in h.measurements.items():
make_measure(h, p, v, output)
for c, v in h.constraints.items():
make_constraint(h, c, v, output)
output.write('</parameters>\n')
def make_fragment(h, inx, output):
'''Make a single fragment tag.'''
output.write('<fragment type="3d_hit_fragment" fragment_index="%d" query_index="1">\n' % (inx+1))
make_atoms(h, output)
make_params(h, output)
h._molecule = None
output.write('</fragment>\n')
def make_fragments(p, output):
'''Make all the fragment tags.'''
output.write('<fragments>\n')
for i, h in enumerate(p):
make_fragment(h, i, output)
output.write('</fragments>\n')
def make_match(p, output):
'''Make a match tag.'''
h = p[0]
if h._database:
db = h._database._real_database(h.identifier)
csd_loc = db.file_name
if hasattr(db, 'inf_file'):
csd_inf = db.inf_file().full_path()
else:
csd_inf = None
else:
db = h._binary_database
if db is None:
csd_loc = ""
else:
try:
csd_loc = db.file_name()
except (AttributeError, RuntimeError):
try:
csd_loc = db._db.source_database_name(
UtilitiesLib.DatabaseEntryIdentifier(h.identifier)
)
except:
csd_loc = ""
else:
if '_ASER' in csd_loc:
csd_loc = csd_loc.replace('_ASER', '')
csd_inf = None
output.write('<match identifier="%s">\n' % p[0].identifier)
output.write('<database>%s</database>\n' % csd_loc)
if csd_inf:
output.write('<inffile>%s</inffile>\n' % csd_inf)
make_fragments(p, output)
output.write('</match>\n')
def make_tree(parts, output):
'''Make the whole XML tree.'''
output.write('<search_results version="2.0">\n')
output.write('<search_label>search1</search_label>\n')
output.write('<search_id>123456</search_id>\n')
output.write('<active_hit>%s</active_hit>\n' % self[0].identifier)
output.write('<action>analyse</action>\n')
for p in parts:
make_match(p, output)
output.write('</search_results>\n')
parts = split_by_id()
if not file_name.endswith('.c2m'):
file_name += '.c2m'
with open(file_name, 'w') as writer:
make_tree(parts, writer)
def __init__(self, settings=None):
'''Initialise the query.'''
self._motif = MotifSearchLib.Motif('')
self.substructures = []
self.measurements = dict()
self.constraints = dict()
self.contacts = dict()
self.geometric_objects = dict()
if settings is None:
settings = SubstructureSearch.Settings()
self.settings = settings
if type(self)._telemetry == 0:
UtilitiesLib.ccdc_motif_search_telemetry()
type(self)._telemetry = 1
[docs] def add_substructure(self, substructure):
'''Add a substructure.
Disconnected substructures may be accepted if the first substructure is contiguous at the start.
Multiple substructures may be added as a result.
:param substructure: :class:`ccdc.search.QuerySubstructure`.
:returns: the index of the first substructure added.
'''
if isinstance(substructure, ConnserSubstructure):
self._conn_motif = _motif = MotifSearchLib.create_motif(substructure._conn)
self._motif = _motif
if len(self.substructures) > 0:
# Need to relocate the new substructure(s)
raise NotImplementedError('Need to relocate substructures')
# Currently needed, but I'm not sure if it's still relevant
inxs = list(range(_motif.nsubstructures()))
for i in range(_motif.nsubstructures()):
self.substructures.append(QuerySubstructure(_substructure=_motif.substructure(i)))
_conn = substructure._conn
possibles = []
for i in range(_conn.n_geometric_constraints()):
c = _conn.geometric_constraint(i)
if not c.label().startswith('?'):
self._motif.add_motif_parameter(MotifSearchLib.MotifGeometricConstraintParameter(c))
cond = c.condition()
if cond.name() == 'always false':
possibles.append(c)
else:
self.constraints[c.label()] = c
for inx in range(c.nobjects()):
obj = c.objects(inx)
object_types = dict(
ConstraintAtomGroup=SubstructureSearchLib.Object_as_AtomGroup,
ConstraintPlane=SubstructureSearchLib.Object_as_Plane,
ConstraintAtomPoint=SubstructureSearchLib.Object_as_AtomPoint,
ConstraintCentroidPoint=SubstructureSearchLib.Object_as_CentroidPoint,
ConstraintDummyPoint=SubstructureSearchLib.Object_as_DummyPoint,
ConstraintVector=SubstructureSearchLib.Object_as_Vector,
)
if ':' not in obj.label():
self.geometric_objects[obj.label()] = object_types.get(obj.class_name(), lambda x:x)(obj)
contacts = _conn.contacts()
for c, p in zip(contacts, possibles):
clone = p.clone()
cond = SubstructureSearchLib.InclusiveRange(c.criterion().min(), c.criterion().max())
clone.set_condition(cond)
self.constraints[p.label()] = clone
for p in possibles[len(contacts):]:
self.measurements[p.label()] = p
for i in range(_motif.n_objects_contacts()):
oc = _motif.object_contact(i)
a = oc.object_a()
if a.class_name() != 'ConstraintAtomPoint':
if ':' not in a.label():
self.geometric_objects[a.label()] = SubstructureSearchLib.Object_as_Point(a)
b = oc.object_b()
if b.class_name() != 'ConstraintAtomPoint':
if ':' not in b.label():
self.geometric_objects[b.label()] = SubstructureSearchLib.Object_as_Point(b)
dist_con = SubstructureSearchLib.PointDistanceConstraint(
SubstructureSearchLib.Object_as_Point(a), SubstructureSearchLib.Object_as_Point(b),
SubstructureSearchLib.AlwaysTrue(), 'CONT%d' % (i+1)
)
self._motif.add_motif_parameter(MotifSearchLib.MotifGeometricConstraintParameter(dist_con))
self.constraints['CONT%d' % (i+1)] = dist_con
#self.constraints['%s_%s_CONTACT' % (a.label(), b.label())] = oc
#self._motif.add_motif_parameter(MotifSearchLib.MotifGeometricConstraintParameter(oc))
for i in range(_motif.nobjects()):
obj = self.geometric_object(i)
if ':' not in obj.label():
self.geometric_objects[obj.label()] = obj
return inxs
else:
sizes = [substructure._substructure.natoms()]
self.substructures.append(substructure)
inxs = [self._motif.add_substructure(substructure._substructure)]
if len(inxs) == 1:
return inxs[0]
else:
return inxs
def _point_must_have_site(self, sub_id, atom_id):
sub = self.substructures[sub_id]
atom = sub.atoms[atom_id]
if not atom._substructure_atom.has_constraint_of_type(SubstructureSearchLib.AtomHas3DSiteConstraint()):
atom.has_3d_coordinates = True
def _args_to_points(self, required, args, require_3d=True):
i = 0
points = []
while i < len(args):
a = args[i]
if isinstance(a, int):
# Old style
points.append(SubstructureSearchLib.ConstraintAtomPoint(a, args[i+1]))
if require_3d:
self._point_must_have_site(a, args[i+1])
i += 2
elif isinstance(a, (tuple, list)):
# New style
points.append(SubstructureSearchLib.ConstraintAtomPoint(a[0], a[1]))
if require_3d:
self._point_must_have_site(a[0], a[1])
i += 1
elif isinstance(a, str):
g = self.geometric_objects[a]
if isinstance(g, SubstructureSearchLib.ConstraintAtomGroup):
points.append(g.centroid_from_group())
else:
points.append(self.geometric_objects[a])
i += 1
else:
raise TypeError('Invalid type for a point')
assert required == 0 or required == len(points)
return points
##### GeometricObjects #####
[docs] def add_centroid(self, name, *args):
'''Adds a centroid to the substructure search.
:param name: the name by which the centroid will be accessed.
:param `*args`: the points or geometric objects from which to define the centroid.
Each arg may be either a pair (substructure_index, atom_index) or the name of a geometric object.
There must be at least two such arguments.
>>> query = SubstructureSearch()
>>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
>>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
>>> query.add_centroid('CENT1', (0, 0), (0, 1), (0, 2))
>>> query.add_centroid('CENT2', (1, 0), (1, 1), (1, 2))
>>> query.add_centroid('CENT3', 'CENT1', 'CENT2')
'''
points = self._args_to_points(0, args)
centroid = SubstructureSearchLib.ConstraintCentroidPoint(points, name)
self.geometric_objects[name] = centroid
self._motif.add_object(centroid)
[docs] def add_dummy_point(self, name, distance, *args):
'''Creates a dummy point along a vector.
:param name: the name by which this point will be accessed.
:param distance: the distance along the vector subtentended by the two points.
:param `*args`: two points specified as (substructure_index, atom_index) or the name of
another geometric object.
>>> query = SubstructureSearch()
>>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
>>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
>>> query.add_centroid('CENT1', (0, 0), (0, 1), (0, 2))
>>> query.add_dummy_point('DUM1', 2.0, 'CENT1', (1, 1))
'''
points = self._args_to_points(2, args)
dummy = SubstructureSearchLib.ConstraintDummyPoint(points[0], points[1], distance, name)
self.geometric_objects[name] = dummy
self._motif.add_object(dummy)
[docs] def add_group(self, name, *args):
'''Creates a group of matched atoms.
:param name: the name by which this group will be accessed.
:param `*args`: pairs, (substructure_index, atom_index) defining the atoms of the group.
>>> query = SubstructureSearch()
>>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
>>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
>>> query.add_group('GP1', (0, 0), (0, 1), (0, 2))
'''
points = self._args_to_points(0, args)
group = SubstructureSearchLib.ConstraintAtomGroup(points, name)
self.geometric_objects[name] = group
self._motif.add_object(group)
[docs] def add_vector(self, name, *args):
'''Add a vector.
:param name: the name by which the vector will be accessed.
:param `*args`: two point specifications as (substructure_index, atom_index) or the name of
another geometric object.
>>> query = SubstructureSearch()
>>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
>>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
>>> query.add_centroid('CENT1', (0, 0), (0, 1), (0, 2))
>>> query.add_vector('VEC1', 'CENT1', (1, 2))
'''
points = self._args_to_points(2, args)
vec = SubstructureSearchLib.ConstraintVector(points[0], points[1], name)
self._motif.add_object(vec)
self.geometric_objects[name] = vec
[docs] def add_plane(self, name, *args):
'''Add a plane.
:param name: the name by which the plane will be accessed.
:param `*args`: at least two point specifications in the form (substructure_index,
atom_index) or the name of another geometric object.
>>> query = SubstructureSearch()
>>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
>>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
>>> query.add_plane('PLANE1', (0, 0), (0, 1), (0, 2))
>>> query.add_plane('PLANE2', (1, 0), (1, 1), (1, 2))
'''
points = self._args_to_points(0, args)
plane = SubstructureSearchLib.ConstraintPlane(points, name)
self._motif.add_object(plane)
self.geometric_objects[name] = plane
##### Measurements #####
[docs] def add_distance_measurement(self, name, *args):
'''Add a distance measurement.
>>> query = SubstructureSearch()
>>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
>>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
>>> query.add_centroid('CENT1', (0, 0), (0, 1), (0, 2))
>>> query.add_centroid('CENT2', (1, 0), (1, 1), (1, 2))
>>> query.add_distance_measurement('DIST1', (0, 0), 'CENT2')
'''
points = self._args_to_points(2, args)
constraint = SubstructureSearchLib.PointDistanceConstraint(
points[0], points[1], SubstructureSearchLib.AlwaysTrue(), name
)
self.measurements[name] = constraint
self._motif.add_motif_parameter(MotifSearchLib.MotifGeometricConstraintParameter(constraint))
[docs] def add_angle_measurement(self, name, *args):
'''Add an angle measurement.
>>> query = SubstructureSearch()
>>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
>>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
>>> query.add_centroid('CENT1', (0, 0), (0, 1), (0, 2))
>>> query.add_centroid('CENT2', (1, 0), (1, 1), (1, 2))
>>> query.add_angle_measurement('ANG1', (0, 0), (1, 1), (1, 0))
'''
points = self._args_to_points(3, args)
constraint = SubstructureSearchLib.PointAngleConstraint(
points[0], points[1], points[2], SubstructureSearchLib.AlwaysTrue(), name
)
self.measurements[name] = constraint
self._motif.add_motif_parameter(MotifSearchLib.MotifGeometricConstraintParameter(constraint))
[docs] def add_torsion_angle_measurement(self, name, *args):
'''Add a torsion angle measurement.
>>> query = SubstructureSearch()
>>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
>>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
>>> query.add_centroid('CENT1', (0, 0), (0, 1), (0, 2))
>>> query.add_centroid('CENT2', (1, 0), (1, 1), (1, 2))
>>> query.add_torsion_angle_measurement('ANG1', (0, 0), (0, 1), (1, 1), (1, 0))
'''
points = self._args_to_points(4, args)
constraint = SubstructureSearchLib.PointTorsionConstraint(
points[0], points[1], points[2], points[3], SubstructureSearchLib.AlwaysTrue(), name
)
self.measurements[name] = constraint
self._motif.add_motif_parameter(MotifSearchLib.MotifGeometricConstraintParameter(constraint))
[docs] def add_vector_angle_measurement(self, name, *args):
'''Add a vector angle measurement.
>>> query = SubstructureSearch()
>>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
>>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
>>> query.add_vector('VEC1', (0, 1), (1, 2))
>>> query.add_vector('VEC2', (0, 2), (1, 1))
>>> query.add_vector_angle_measurement('ANG1', 'VEC1', 'VEC2')
'''
points = self._args_to_points(2, args)
constraint = SubstructureSearchLib.VectorAngleConstraint(
points[0], points[1], SubstructureSearchLib.AlwaysTrue(), name
)
self.measurements[name] = constraint
self._motif.add_motif_parameter(MotifSearchLib.MotifGeometricConstraintParameter(constraint))
[docs] def add_plane_angle_measurement(self, name, *args):
'''Add a plane angle measurement.
>>> query = SubstructureSearch()
>>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
>>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
>>> query.add_plane('PLANE1', (0, 0), (0, 1), (0, 2))
>>> query.add_plane('PLANE2', (1, 0), (1, 1), (1, 2))
>>> query.add_plane_angle_measurement('PA1', 'PLANE1', 'PLANE2')
'''
points = self._args_to_points(2, args)
constraint = SubstructureSearchLib.PlaneAngleConstraint(
points[0], points[1], SubstructureSearchLib.AlwaysTrue(), name
)
self.measurements[name] = constraint
self._motif.add_motif_parameter(MotifSearchLib.MotifGeometricConstraintParameter(constraint))
[docs] def add_point_plane_distance_measurement(self, name, *args):
'''Add point plane distance measurement.
>>> query = SubstructureSearch()
>>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
>>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
>>> query.add_centroid('CENT1', (0, 0), (0, 1), (0, 2))
>>> query.add_plane('PLANE2', (1, 0), (1, 1), (1, 2))
>>> query.add_point_plane_distance_measurement('PP1', 'CENT1', 'PLANE2')
'''
points = self._args_to_points(2, args)
constraint = SubstructureSearchLib.PointPlaneDistanceConstraint(
points[0], points[1], SubstructureSearchLib.AlwaysTrue(), name, SubstructureSearchLib.PointPlaneDistanceConstraint.ABSOLUTE
)
self.measurements[name] = constraint
self._motif.add_motif_parameter(MotifSearchLib.MotifGeometricConstraintParameter(constraint))
[docs] def add_vector_plane_angle_measurement(self, name, *args):
'''Add a vector plane angle measurement.
>>> query = SubstructureSearch()
>>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
>>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
>>> query.add_vector('VEC1', (0, 1), (1, 2))
>>> query.add_plane('PLANE2', (1, 0), (1, 1), (1, 2))
>>> query.add_vector_plane_angle_measurement('ANG1', 'VEC1', 'PLANE2')
'''
points = self._args_to_points(2, args)
constraint = SubstructureSearchLib.VectorPlaneAngleConstraint(
points[0], points[1], SubstructureSearchLib.AlwaysTrue(), name
)
self.measurements[name] = constraint
self._motif.add_motif_parameter(MotifSearchLib.MotifGeometricConstraintParameter(constraint))
[docs] def add_atom_property_measurement(self, name, *args, **kw):
'''Add an atom property measurement.
:param name: the name by which this measurement will be accessed.
:param `*args`: a pair, (substructure_index, atom_index) specifying the atom to measure.
:param which: one of TotalCoordinationNumber, AtomicNumber, VdwRadius, CovalentRadius
>>> query = SubstructureSearch()
>>> substructure = QuerySubstructure()
>>> _ = substructure.add_atom(['C', 'N'])
>>> _ = query.add_substructure(substructure)
>>> query.add_atom_property_measurement('ATOM1', (0, 0), which='AtomicNumber')
'''
_which_dic = utilities.bidirectional_dict(
TotalCoordinationNumber=SubstructureSearchLib.Atom3DPropertyConstraint.TotalCoordinationNumber,
AtomicNumber=SubstructureSearchLib.Atom3DPropertyConstraint.AtomicNumber,
VdwRadius=SubstructureSearchLib.Atom3DPropertyConstraint.VdwRadius,
CovalentRadius=SubstructureSearchLib.Atom3DPropertyConstraint.CovalentRadius,
)
points = self._args_to_points(1, args, require_3d=False)
which = _which_dic.prefix_lookup(kw['which'])
constraint = SubstructureSearchLib.Atom3DPropertyConstraint(
points[0], which, SubstructureSearchLib.AlwaysTrue(), name
)
self.measurements[name] = constraint
self._motif.add_motif_parameter(MotifSearchLib.MotifGeometricConstraintParameter(constraint))
[docs] def add_constant_value_measurement(self, name, value):
'''Add a constant value.
:param name: the name by which this constant will be accessed.
:param value: a float.
>>> query = SubstructureSearch()
>>> substructure = QuerySubstructure()
>>> _ = substructure.add_atom(['C', 'N'])
>>> _ = query.add_substructure(substructure)
>>> query.add_constant_value_measurement('PI', 3.14159)
'''
constraint = SubstructureSearchLib.ConstantValueConstraint(value)
constraint.set_label(name)
self.measurements[name] = constraint
self._motif.add_motif_parameter(MotifSearchLib.MotifGeometricConstraintParameter(constraint))
def _add_constraint(self, name, r):
constraint = self.measurements.pop(name)
crit = _decode_condition(r)
constraint.set_condition(crit)
self.constraints[name] = constraint
self._motif.add_object_constraint(constraint)
##### Constraints #####
[docs] def add_distance_constraint(self, name, *args, **kw):
'''Add a distance constraint.
:param name: the name of this constraint.
:param `*args`: specifications of points either as pairs (substructure_index, atom_index) or
as names of geometric measurements.
:param range: a condition, either as a pair of floats or a pair (operator, value) where operator may be
- '==', '>', '<', '>=', '<=', '!=' or a pair ('in', list(values)).
:param intermolecular: whether or not the distance should be within a unit cell molecule or
between a unit cell molecule and a packing shell molecule.
:param vdw_corrected: whether the distance range should be relative to the Van der Waals
radii of the atoms involved.
>>> query = SubstructureSearch()
>>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
>>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
>>> query.add_distance_constraint('DIST1', (0, 1), (1, 1), (-5, 0), vdw_corrected=True, type='any')
>>> query.add_distance_constraint('DIST2', (0, 2), (1, 2), ('<=', 3.0), vdw_corrected=True, type='any')
'''
kind = kw.get('type', 'inter')
vdw_corrected = kw.get('vdw_corrected', False)
off = 1
if isinstance(args[-off], bool):
vdw_corrected = args[-off]
off += 1
if isinstance(args[-off], str):
kind = args[-off]
off += 1
if kind.lower().startswith('intra'):
which = ChemistryLib.ContactCriterion.INTRAMOLECULAR
elif kind.lower().startswith('any'):
which = ChemistryLib.ContactCriterion.ANY
else:
which = ChemistryLib.ContactCriterion.INTERMOLECULAR
r = args[-off]
points = self._args_to_points(2, args[:-off])
if isinstance(r, (list, tuple)):
if isinstance(r[0], str) or any(isinstance(a, str) for a in args[:-off]):
cond = _decode_condition(r)
constraint = SubstructureSearchLib.PointDistanceConstraint(points[0], points[1], cond, name)
self._motif.add_object_constraint(constraint)
self.constraints[name] = constraint
self._motif.add_motif_parameter(MotifSearchLib.MotifGeometricConstraintParameter(constraint))
else:
crit = SubstructureSearchLib.InterAtomicDistanceCriterion(min(r), max(r), which, vdw_corrected)
crit.set_min_path_length(3)
crit.set_max_path_length(999)
i = 0
while i < len(args):
a = args[i]
if isinstance(a, (list, tuple)):
if i == 0:
sub_inx1 = a[0]
at_inx1 = a[1]
i += 1
else:
sub_inx2 = a[0]
at_inx2 = a[1]
break
elif isinstance(a, int):
if i == 0:
sub_inx1 = a
at_inx1 = args[i+1]
i += 2
else:
sub_inx2 = a
at_inx2 = args[i+1]
break
constraint = SubstructureSearchLib.SubstructureContact(sub_inx1, at_inx1, sub_inx2, at_inx2, crit)
self._motif.add_contact(constraint)
self.contacts[name] = constraint
self._motif.add_motif_parameter(MotifSearchLib.MotifDistanceParameter(name, sub_inx1, at_inx1, sub_inx2, at_inx2))
else:
raise TypeError('Invalid value for condition {}'.format(r))
[docs] def add_angle_constraint(self, name, *args):
'''Add an angle constraint.
:param name: by which the constraint will be accessed.
:param `*args`: three instances either of a pair (substructure_index, atom_index) or of names
of geometric objects.
:param range: as for :meth:`ccdc.search.SubstructureSearch.add_distance_constraint`
>>> query = SubstructureSearch()
>>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
>>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
>>> query.add_centroid('CENT1', (0, 0), (0, 1), (0, 2))
>>> query.add_centroid('CENT2', (1, 0), (1, 1), (1, 2))
>>> query.add_angle_constraint('ANG1', (0, 0), (1, 1), (1, 0), ('>=', 120))
'''
self.add_angle_measurement(name, *args[:-1])
self._add_constraint(name, args[-1])
[docs] def add_torsion_angle_constraint(self, name, *args):
'''Add a torsion angle constraint.
:param name: the name by which this constraint is accessed.
:param `*args`: as for :meth:`ccdc.search.SubstructureSearch.add_distance_constraint`
>>> query = SubstructureSearch()
>>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
>>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
>>> query.add_centroid('CENT1', (0, 0), (0, 1), (0, 2))
>>> query.add_centroid('CENT2', (1, 0), (1, 1), (1, 2))
>>> query.add_torsion_angle_constraint('ANG1', (0, 0), (0, 1), (1, 1), (1, 0), (120, 180))
'''
self.add_torsion_angle_measurement(name, *args[:-1])
self._add_constraint(name, args[-1])
[docs] def add_vector_angle_constraint(self, name, *args):
'''Add a vector angle constraint.
>>> query = SubstructureSearch()
>>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
>>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
>>> query.add_vector('VEC1', (0, 1), (1, 2))
>>> query.add_vector('VEC2', (0, 2), (1, 1))
>>> query.add_vector_angle_constraint('ANG1', 'VEC1', 'VEC2', (0, 60))
'''
self.add_vector_angle_measurement(name, *args[:-1])
self._add_constraint(name, args[-1])
[docs] def add_plane_angle_constraint(self, name, *args):
'''Add a plane angle constraint.
>>> query = SubstructureSearch()
>>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
>>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
>>> query.add_plane('PLANE1', (0, 0), (0, 1), (0, 2))
>>> query.add_plane('PLANE2', (1, 0), (1, 1), (1, 2))
>>> query.add_plane_angle_constraint('PA1', 'PLANE1', 'PLANE2', (-10, 10))
'''
self.add_plane_angle_measurement(name, *args[:-1])
self._add_constraint(name, args[-1])
[docs] def add_point_plane_distance_constraint(self, name, *args):
'''Add a point plane distance constraint.
>>> query = SubstructureSearch()
>>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
>>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
>>> query.add_centroid('CENT1', (0, 0), (0, 1), (0, 2))
>>> query.add_plane('PLANE2', (1, 0), (1, 1), (1, 2))
>>> query.add_point_plane_distance_constraint('PP1', 'CENT1', 'PLANE2', ('<', 5))
'''
self.add_point_plane_distance_measurement(name, *args[:-1])
self._add_constraint(name, args[-1])
[docs] def add_vector_plane_angle_constraint(self, name, *args):
'''Add a vector plane angle constraint.
>>> query = SubstructureSearch()
>>> _ = query.add_substructure(SMARTSSubstructure('C(=O)O'))
>>> _ = query.add_substructure(SMARTSSubstructure('N(-H)H'))
>>> query.add_vector('VEC1', (0, 1), (1, 2))
>>> query.add_plane('PLANE2', (1, 0), (1, 1), (1, 2))
>>> query.add_vector_plane_angle_constraint('ANG1', 'VEC1', 'PLANE2', ('>', 90))
'''
self.add_vector_plane_angle_measurement(name, *args[:-1])
self._add_constraint(name, args[-1])
[docs] def add_atom_property_constraint(self, name, *args, **kw):
'''Add an atom property constraint.
>>> query = SubstructureSearch()
>>> _ = query.add_substructure(SMARTSSubstructure('[*H1]'))
>>> query.add_atom_property_constraint('ATOM1', (0, 0), ('in', [7, 8]), which='AtomicNumber')
'''
self.add_atom_property_measurement(name, *args[:-1], **kw)
self._add_constraint(name, args[-1])
[docs] @staticmethod
def from_xml(xml):
'''Create a substructure search from XML. Deprecated.
:param xml: XML string
'''
s = SubstructureSearch()
s.read_xml(xml)
return s
[docs] @staticmethod
def from_xml_file(file_name):
'''Create a substructure search from an XML file. Deprecated.
:param file_name: path to XML file
:raises: IOError when the file does not exist
'''
if not os.path.exists(file_name):
raise IOError('The file %s does not exist' % file_name)
with open(file_name) as f:
return SubstructureSearch.from_xml(f.read())
[docs] def read_xml(self, xml):
'''Read search query from XML. Deprecated.
:param xml: XML string
'''
warnings.warn('''This method is deprecated and will be removed in a later version.''', DeprecationWarning)
rdr = CSDSQLDatabaseLib.XMLMotifReader()
stream = UtilitiesLib.istringstream(xml)
rdr.load(stream)
for i in range(rdr.nmolecules()):
self._xml_motif = motif = rdr.motif(i)
# merge this motif with ours
for j in range(motif.nsubstructures()):
self.add_substructure(QuerySubstructure(motif.substructure(j)))
for j in range(motif.n_object_constraints()):
obj = motif.object_constraint(j)
if obj.class_name() == 'PointTorsionConstraint':
c = obj.condition()
p0 = SubstructureSearchLib.Object_as_Point(obj.objects(0))
p1 = SubstructureSearchLib.Object_as_Point(obj.objects(1))
p2 = SubstructureSearchLib.Object_as_Point(obj.objects(2))
p3 = SubstructureSearchLib.Object_as_Point(obj.objects(3))
obj = SubstructureSearchLib.PointTorsionConstraint(
p0, p1, p2, p3, SubstructureSearchLib.AlwaysTrue(), obj.label()
)
obj.set_condition(c)
self.constraints[obj.label()] = obj
self._motif.add_object_constraint(obj)
self._motif.add_motif_parameter(MotifSearchLib.MotifGeometricConstraintParameter(obj))
for j in range(motif.nconstraints()):
con = motif.constraint(j)
for f, tag in [
(MotifSearchLib.motif_constraint_as_angle_constraint, 'ANGLE'),
(MotifSearchLib.motif_constraint_as_combined_constraint, 'COMBINED'),
(MotifSearchLib.motif_constraint_as_contact_order_constraint, 'CONTACT_ORDER'),
(MotifSearchLib.motif_constraint_as_discrete_chain_constraint, 'DISCRETE_CHAIN'),
(MotifSearchLib.motif_constraint_as_hydrogen_bond_angle_present_constraint, 'HBOND_ANGLE_PRESENT'),
(MotifSearchLib.motif_constraint_as_hydrogen_bond_constraint, 'HBOND'),
(MotifSearchLib.motif_constraint_as_nunique_contacts_constraint, 'NUNIQUE_CONTACTS'),
(MotifSearchLib.motif_constraint_as_not_present_constraint, 'NOT_PRESENT'),
(MotifSearchLib.motif_constraint_as_shortest_path_constraint, 'SHORTEST_PATH'),
(MotifSearchLib.motif_constraint_as_torsion_constraint, 'TORSION'),
(MotifSearchLib.motif_constraint_as_translation_constraint, 'TRANSLATION'),
(MotifSearchLib.motif_constraint_as_unique_atoms_constraint, 'UNIQUE_ATOMS'),
]:
c = f(con)
if c:
self.constraints['%s_%d' % (tag, len(self.constraints)+1)] = c
self._motif.add_motif_constraint(con)
self._motif.add_motif_parameter(MotifSearchLib.MotifGeometricConstraintParameter(con))
for j in range(motif.ncontacts()):
con = motif.motif_contact(j)
self.contacts['CONTACT_%d' % j] = con
self._motif.add_contact(con)
s1 = con.substruct_a()
a1 = con.atom_a()
s2 = con.substruct_b()
a2 = con.atom_b()
self._motif.add_motif_parameter(MotifSearchLib.MotifDistanceParameter('CONTACT_%d' % j, s1, a1, s2, a2))
for j in range(motif.nparameters()):
par = motif.parameter(i)
self._motif.add_parameter(par)
for f, tag in [
(MotifSearchLib.motif_parameter_as_distance_parameter, 'DISTANCE'),
(MotifSearchLib.motif_parameter_as_angle_parameter, 'ANGLE'),
(MotifSearchLib.motif_parameter_as_torsion_parameter, 'TORSION')
]:
p = f(par)
if p:
self.measurements[p.name()] = p
self._motif.add_motif_parameter(p)
[docs] def read_xml_file(self, file_name):
'''Read search parameters from an XML file. Deprecated.
:param file_name: path to XML file
:raises: IOError if the file cannot be read
'''
if not os.path.exists(file_name):
raise IOError('The file %s does not exist' % file_name)
with open(file_name) as f:
self.read_xml(f.read())
def _add_enantiomer_consistency(self):
'''Apply the enantiomer search setting to the motif'''
MotifSearchLib.MotifTorsionInversionConsistencyConstraint.add_to_motif(self._motif, self.settings._match_enantiomers)
def _search_reader(self, database):
if self._motif.nsubstructures() == 0:
raise TypeError('No substructures to search')
self._add_enantiomer_consistency()
if not hasattr(database, '_motif_searcher'):
try:
database._motif_searcher = database._db.searcher_factory().motif_searcher()
except (RuntimeError, NameError, AttributeError):
pass
if hasattr(database, '_motif_searcher'):
results_writer = CSDSQLDatabaseLib.CrystalStructureDatabaseSearchVectorResultsWriter()
hits = database._motif_searcher.search(self._motif, self.settings._settings, results_writer)
hit_list = SubstructureSearch.SubstructureHitList(
SubstructureSearch.SubstructureHit._from_match(x, self, _binary_database=database._db)
for x in results_writer.matches()
)
if database.__class__.__name__ == 'MoleculeReader':
def skip_suppressed_hit(hit):
try:
ats = hit.match_atoms()
return False
except RuntimeError:
return True
hit_list = SubstructureSearch.SubstructureHitList(
hit for hit in hit_list if not skip_suppressed_hit(hit)
)
return hit_list
else:
# there used to be fallback code here, but now we expect to always support motif search on any database
raise NotImplementedError("Substructure search is not implemented on this database type")
def _search_entry(self, entry, _database=None):
if self._motif.nsubstructures() == 0:
raise TypeError('No substructures to search')
if not CSDSQLDatabaseLib.test_entry_settings_constraints(self.settings._settings, entry._entry):
return []
self._add_enantiomer_consistency()
if entry._entry.chemical_diagram_views() is None:
generator = ChemistryLib.ChemicalDiagramGenerator()
diagram = generator.create_chemical_diagram(entry.molecule._molecule)
views = ChemistryLib.ChemicalDiagramViews2D(diagram)
entry._entry.set_chemical_diagram_views(views)
results_writer = CSDSQLDatabaseLib.CrystalStructureDatabaseSearchVectorResultsWriter()
csd = EntryReader('csd')
if hasattr(csd, '_component_dbs'):
db = csd._component_dbs.values()[-1]
else:
db = csd._db
_substructure_searcher = db.searcher_factory().substructure_searcher()
_substructure_searcher = CSDSQLDatabaseLib.CSDSQLSubstructureSearcher(_substructure_searcher)
if CSDSQLDatabaseLib.CSDSQLSubstructureSearcher_valid(_substructure_searcher):
_substructure_searcher.search_entry(
entry._entry, self._motif, self.settings._settings, results_writer
)
return SubstructureSearch.SubstructureHitList(
SubstructureSearch.SubstructureHit._from_match(x, self, _entry=entry)
for x in results_writer.matches()
)
else:
return self._search_crystal(entry.crystal, _database=_database)
def _search_crystal(self, crystal, _database=None):
if self._motif.nsubstructures() == 0:
raise TypeError('No substructures to search')
try:
if not CSDSQLDatabaseLib.test_molecule_settings_constraints(self.settings._settings,
crystal.molecule._molecule):
return []
except TypeError:
if self.settings.has_3d_coordinates:
return []
self._add_enantiomer_consistency()
view = ChemistryLib.CrystalStructureView.instantiate(crystal._crystal)
searcher = MotifSearchLib.MotifSearch()
searcher.set_limit(self.settings.max_hits_per_structure)
try:
res = searcher.search(view, self._motif)
except RuntimeError as e:
if 'Too many steps' in str(e):
raise RuntimeError(
'The crystal search failed: probably due to an over-complex substructure or target molecule.\n'
'Try restricting the number of hits with max_hits_per_structure or reducing the complexity '
'of the substructure.'
)
else:
raise RuntimeError('Crystal search failed with: %s' % e)
if _database is not None:
_crystal = None
else:
_crystal = crystal
hits = SubstructureSearch.SubstructureHitList(
SubstructureSearch.SubstructureHit(
crystal.identifier, m, searcher.search_structure(), self,
_crystal=_crystal, _database=_database) for m in res)
for h in hits:
h._crystal = crystal
return hits
def _search_molecule(self, molecule, _database=None):
if not CSDSQLDatabaseLib.test_molecule_settings_constraints(self.settings._settings, molecule._molecule):
return []
if hasattr(molecule, '_cell'):
_cell = molecule._cell
else:
_cell = None
self._add_enantiomer_consistency()
molecule._cell = ChemistryLib.Cell()
c = Entry.from_molecule(molecule).crystal
c._crystal.set_cell(ChemistryLib.Cell(), ChemistryLib.CrystalStructure.KEEP_ORTHOGONAL_COORDINATES)
ret = self._search_crystal(c, _database=_database)
if _cell is not None:
molecule._cell = _cell
for h in ret:
h._molecule = molecule
h._crystal = None
return ret
###########################################################################
# Reduced cell search
###########################################################################
[docs]class ReducedCellSearch(Search):
'''Provide reduced cell searches.'''
[docs] @utilities.nested_class('ReducedCellSearch')
class Settings(Search.Settings):
'''Settings appropriate to a reduced cell search.'''
def __init__(self, _settings=None):
'''Initialis settings.'''
if _settings is None:
self._settings = CSDSQLDatabaseLib.CrystalStructureDatabaseReducedCellSearchSettings()
Search.Settings.__init__(self, self._settings)
self.max_hits_per_structure = 1
else:
self._settings = _settings
[docs] def reset(self):
'''Reset to default values.'''
self._settings = CSDSQLDatabaseLib.CrystalStructureDatabaseReducedCellSearchSettings()
Search.Settings.__init__(self, self._settings)
self.max_hits_per_structure = 1
@property
def percent_length_tolerance(self):
'''The cell length tolerance as a percentage of the longest cell dimension.'''
return self._settings.percent_length_tolerance()
@percent_length_tolerance.setter
def percent_length_tolerance(self, val):
'''Set the percent length tolerance.'''
self._settings.set_percent_length_tolerance(val)
@property
def absolute_angle_tolerance(self):
'''The absolute angle tolerance.'''
return self._settings.absolute_angle_tolerance()
@absolute_angle_tolerance.setter
def absolute_angle_tolerance(self, val):
'''Set the absolute angle tolerance.'''
self._settings.set_absolute_angle_tolerance(val)
@property
def is_normalised(self):
'''Whether the input cell is normalised.'''
return self._settings.is_normalised()
@is_normalised.setter
def is_normalised(self, val):
'''Set the is_normalised property.'''
self._settings.set_is_normalised(val)
[docs] @utilities.nested_class('ReducedCellSearch')
class Query(object):
'''Base query.'''
def __init__(self, lengths=None, angles=None, lattice_centring=None):
'''Initialise with cell lengths, cell angles and the lattice centring.'''
self.lengths = lengths
self.angles = angles
self.lattice_centring = lattice_centring
def _get_query(self, settings=None):
'''Private: return an internal query object.'''
if settings is None:
settings = ReducedCellSearch.Settings()
if isinstance(self.lattice_centring, str):
centring = self.lattice_centring
else:
etm = ChemistryLib.Spacegroup.centring_text()
centring = etm.text(self.lattice_centring)
k = centring[0].upper()
if k == 'R':
sp = ChemistryLib.Spacegroup('R3', ChemistryLib.Spacegroup.UNKNOWN_SYSTEM)
else:
sp = ChemistryLib.Spacegroup(k + '1')
cell = ChemistryLib.Cell(
self.lengths[0], self.lengths[1], self.lengths[2],
MathsLib.Angle(self.angles[0], MathsLib.Angle.DEGREES),
MathsLib.Angle(self.angles[1], MathsLib.Angle.DEGREES),
MathsLib.Angle(self.angles[2], MathsLib.Angle.DEGREES),
sp
)
return CSDSQLDatabaseLib.CrystalStructureDatabaseReducedCellSearch(
cell, settings._settings
)
[docs] @utilities.nested_class('ReducedCellSearch')
class CrystalQuery(Query):
'''Reduced cell query from a crystal.'''
def __init__(self, crystal):
super(ReducedCellSearch.CrystalQuery, self).__init__(
crystal.cell_lengths,
crystal.cell_angles,
crystal.lattice_centring
)
[docs] @utilities.nested_class('ReducedCellSearch')
class XMLQuery(Query):
'''Reduced cell query from an XML representation.'''
def __init__(self, xml):
'''Initialise from xml.
:param xml: XML string
'''
parser = CSDSQLDatabaseLib.ReducedCellSearchXMLParser()
self._query = parser.parse(xml)
def _get_query(self, settings=None):
'''Private: return the underlying internal query.'''
return self._query
[docs] @utilities.nested_class('ReducedCellSearch')
class XMLFileQuery(XMLQuery):
'''Reduced cell query from a file name.'''
def __init__(self, file_name):
'''Initialise from a file name.'''
with open(file_name) as f:
super(ReducedCellSearch.XMLFileQuery, self).__init__(f.read())
def __init__(self, query=None, settings=None):
'''Initialise with optional query and settings.'''
self.query = query
if settings is None:
settings = self.Settings()
self.settings = settings
[docs] @staticmethod
def from_xml(xml):
'''Construct a reduced cell search from an XML representation.
:param xml: XML string
'''
stream = UtilitiesLib.istringstream(xml)
parser = CSDSQLDatabaseLib.ReducedCellSearchXMLParser()
q = parser.parse(stream)
rcs = ReducedCellSearch(query=ReducedCellSearch.XMLQuery(xml))
rcs.settings.percent_length_tolerance = q.settings().percent_length_tolerance()
rcs.settings.absolute_angle_tolerance = q.settings().absolute_angle_tolerance()
return rcs
[docs] @staticmethod
def from_xml_file(file_name):
'''Construct a reduced cell search from an XML file.
:param file_name: path to XML file
:raises: IOError when the file does not exist
'''
if not os.path.exists(file_name):
raise IOError('The file %s does not exist' % file_name)
with open(file_name) as f:
return ReducedCellSearch.from_xml(f.read())
[docs] def read_xml(self, xml):
'''Read XML into this ReducedCellSearch.
:param xml: XML string
'''
self.set_query(ReducedCellSearch.XMLQuery(xml))
parser = CSDSQLDatabaseLib.ReducedCellSearchXMLParser()
q = parser.parse(xml)
self.settings.percent_length_tolerance = q.settings().percent_length_tolerance()
self.settings.absolute_angle_tolerance = q.settings().absolute_angle_tolerance()
[docs] def read_xml_file(self, file_name):
'''Read an XML file into this ReducedCellSearch.
:param file_name: path to XML file
:raises: IOError if the file cannot be read
'''
if not os.path.exists(file_name):
raise IOError('The file %s does not exist' % file_name)
with open(file_name) as f:
self.read_xml(f.read())
[docs] def set_query(self, query):
'''Set the query.'''
self.query = query
self._search = self.query._get_query(self.settings)
[docs] def compare_cells(self, r0, r1):
'''Compare two reduced cells.
:param r0: the first reduced cell, an instance of :class:`ccdc.crystal.Crystal.ReducedCell`
:param r1: the second reduced cell similarly
:returns: boolean
'''
if isinstance(r0, Crystal.ReducedCell):
r0 = r0._reduced_cell
if isinstance(r1, Crystal.ReducedCell):
r1 = r1._reduced_cell
len_tol = (self.settings.percent_length_tolerance/100.) * max(r0.a(), r0.b(), r0.c())
ang_tol = self.settings.absolute_angle_tolerance
def _compare_values(v0, v1, tol):
'''Private: test value difference lies within tolerance.'''
return abs(v0 - v1) <= tol
return (
_compare_values(r0.a(), r1.a(), len_tol) and
_compare_values(r0.b(), r1.b(), len_tol) and
_compare_values(r0.c(), r1.c(), len_tol) and
_compare_values(r0.alpha().degrees(), r1.alpha().degrees(), ang_tol) and
_compare_values(r0.beta().degrees(), r1.beta().degrees(), ang_tol) and
_compare_values(r0.gamma().degrees(), r1.gamma().degrees(), ang_tol)
)
def _search_reader(self, database):
'''Search a database.'''
if not self.query:
raise TypeError('The search has no query.')
if not hasattr(database, '_reduced_cell_searcher'):
try:
database._reduced_cell_searcher = database._db.searcher_factory().reduced_cell_searcher()
except (RuntimeError, NameError, AttributeError):
pass
if hasattr(database, '_reduced_cell_searcher'):
if self.settings._has_filter_set():
max_hits = self.settings.max_hit_structures
if max_hits != maxint32:
self.settings.max_hit_structures = maxint32
hits = database._reduced_cell_searcher.search(self.query._get_query(self.settings))
ret = list()
for h in hits:
r = Search.SearchHit(h, _database=database)
if self.settings.test(r.entry):
ret.append(r)
if max_hits and len(ret) >= max_hits:
break
self.settings.max_hit_structures = max_hits
else:
hits = database._reduced_cell_searcher.search(self.query._get_query(self.settings))
ret = [Search.SearchHit(h, _database=database) for h in hits]
else:
# Have to do it one-by-one
ret = []
for c in database.crystals():
if self.settings._has_filter_set() and self.settings.test(c):
ret.extend(self._search_crystal(c))
else:
ret.extend(self._search_crystal(c))
if self.settings.max_hit_structures and len(ret) > self.settings.max_hit_structures:
break
for r in ret:
r._crystal = None
r._database = database
return ret
def _search_molecule(self, mol):
'''Molecules don't have cells, so always fails.'''
return []
def _search_crystal(self, crystal):
'''Test the query against a single crystal.'''
red = ChemistryLib.ReducedCell(crystal._crystal.cell())
que = self.query._get_query(self.settings)
que_red = que.query_cell()
if self.compare_cells(que_red, red):
ret = [Search.SearchHit(crystal.identifier)]
ret[0]._crystal = crystal
if self.settings._has_filter_set():
if self.settings.test(crystal):
return ret
else:
return []
return ret
return []
###########################################################################
# Combined search
###########################################################################
[docs]class CombinedSearch(Search):
'''Boolean combinations of other searches.
TextNumericSearch, SubstructureSearch, SimilaritySearch and ReducedCellSearch can be combined using and, or and not
to provide a combined search.
>>> csd = io.EntryReader('csd')
>>> tns = TextNumericSearch()
>>> tns.add_compound_name('Aspirin')
>>> sub_search = SubstructureSearch()
>>> _ = sub_search.add_substructure(SMARTSSubstructure('C(=O)OH'))
>>> rcs = ReducedCellSearch(ReducedCellSearch.CrystalQuery(csd.crystal('ACSALA')))
>>> combi_search = CombinedSearch(tns & (-rcs | -sub_search))
>>> hits = combi_search.search()
>>> print(len(hits))
89
'''
[docs] class Settings(Search.Settings):
'''Settings appropriate to a combined search.'''
def __init__(self):
super(self.__class__, self).__init__()
[docs] class CombinedHit(Search.SearchHit):
'''A hit from a combined search.'''
def __init__(self, identifier, _database=None, _entry=None, _crystal=None, _molecule=None):
super(self.__class__, self).__init__(identifier, _database=_database, _entry=_entry, _crystal=_crystal, _molecule=_molecule)
self.measurements = dict()
self.constraints = dict()
self.geometric_objects = dict()
self.similarities = dict()
self._subhits = list()
@staticmethod
def _from_similarity_hit(identifier, comparators, similarities, _database=None):
'''Make a CombinedHit from a SimilarityHit.'''
hit = CombinedSearch.CombinedHit(identifier, _database=_database)
hit.similarities.update({
comp : float(sim)
for comp, sim in zip(comparators, similarities)
})
return hit
@staticmethod
def _from_search_hit(search_hit):
'''Make a CombinedHit from a TextNumericSearch or a ReducedCellSearch.'''
return CombinedSearch.CombinedHit(search_hit.identifier, search_hit._database)
@staticmethod
def _from_substructure_hit(sub_hit, _database):
'''Make a CombinedHit from a SubstructureSearchHit.'''
hit = CombinedSearch.CombinedHit(sub_hit.identifier, _database)
hit.measurements.update(sub_hit.measurements)
hit.constraints.update(sub_hit.constraints)
hit.geometric_objects.update(sub_hit.geometric_objects)
sub_hit._database = _database
sub_hit._entry = sub_hit._crystal = sub_hit._molecule = None
hit._subhits.append(sub_hit)
return hit
def _merge(self, hit):
'''Merge another hit into here.'''
self.measurements.update(hit.measurements)
self.constraints.update(hit.constraints)
self.geometric_objects.update(hit.geometric_objects)
self.similarities.update(hit.similarities)
self._subhits.extend(hit._subhits)
def copy(self):
hit = CombinedSearch.CombinedHit(self.identifier, _database=self._database)
hit.measurements.update(self.measurements)
hit.constraints.update(self.constraints)
hit.geometric_objects.update(self.geometric_objects)
hit.similarities.update(self.similarities)
hit._subhits = self._subhits[:]
return hit
def measurement_atoms(self, name):
hs = [h for h in self._subhits if name in h.measurements]
if hs:
return hs[-1].measurement_atoms[name]
def constraint_atoms(self, name):
hs = [h for h in self._subhits if name in h.constraints]
if hs:
return hs[-1].constraint_atoms[name]
def _geometric_object_atoms(self, name):
hs = [h for h in self._subhits if name in h.geometric_objects]
if hs:
return hs[-1].geometric_objects[name]
def centroid_atoms(self, name):
return self._geometric_object_atoms(name)
def dummy_point_atoms(self, name):
return self._geometric_object_atoms(name)
def group_atoms(self, name):
return self._geometric_object_atoms(name)
def vector_atoms(self, name):
return self._geometric_object_atoms(name)
def plane_atoms(self, name):
return self._geometric_object_atoms(name)
def match_components(self):
return [m for h in self._subhits for m in h.match_components()]
def match_atoms(self, indices=False):
return [a for h in self._subhits for a in h.match_atoms(indices=indices)]
def match_substructures(self):
return [m for h in self._subhits for m in h.match_components()]
def match_symmetry_operators(self):
return [m for h in self._subhits for m in h.match_symmetry_operators()]
def __init__(self, expression, settings=None):
if settings is None:
settings = CombinedSearch.Settings()
self.settings = settings
self._node = self._make_node(expression)
self._searcher_dict = self._node._searcher_dict
self._limit_dict = self._node._limit_dict
#if not hasattr(TextNumericSearch, '__neg__'):
# self._monkey_patch()
#self._searcher_dict = dict()
#self._limit_dict = dict()
@staticmethod
def _make_node(other):
'''Private: create a combined search node.'''
if isinstance(other, TextNumericSearch):
_node = CSDSQLDatabaseLib.TextNumericSearchNode(other._search)
_node._searcher_dict = collections.OrderedDict([(str(other), other)])
_node._limit_dict = {}
elif isinstance(other, SubstructureSearch):
_node = CSDSQLDatabaseLib.MotifNode(
CSDSQLDatabaseLib.pair_motif_settings(other._motif, other.settings._settings)
)
adder = CSDSQLDatabaseLib.MatchStringDataItemAdder('substructure_search', str(other))
_node = CSDSQLDatabaseLib.MatchMutatorNode(adder, _node)
_node._searcher_dict = collections.OrderedDict([(str(other), other)])
_node._limit_dict = {}
elif isinstance(other, ReducedCellSearch):
_node = CSDSQLDatabaseLib.ReducedCellNode(other.query._get_query(other.settings))
_node._searcher_dict = collections.OrderedDict([(str(other), other)])
_node._limit_dict = {}
elif isinstance(other, SimilaritySearch):
_node = CSDSQLDatabaseLib.SimilaritySearchNode(CSDSQLDatabaseLib.pair_substructure_simsettings(other._substructure, other.settings._settings))
adder = CSDSQLDatabaseLib.MatchStringDataItemAdder('similarity_search', str(other))
_node = CSDSQLDatabaseLib.MatchMutatorNode(adder, _node)
_node._searcher_dict = collections.OrderedDict([(str(other), other)])
_node._limit_dict = {}
#elif isinstance(other, FormulaSearch):
# _node = CSDSQLDatabaseLib.FormulaSearchNode(other._search)
elif isinstance(other, CSDSQLDatabaseLib.Node):
_node = other
_node._searcher_dict = other._searcher_dict.copy()
_node._limit_dict = other._limit_dict.copy()
else:
raise TypeError('Not appropriate for a combined search %s', type(other))
return _node
def __and__(self, other):
'''Conjoin this with another search.'''
_node = self._make_node(other)
if self._node is None:
self._node = _node
else:
self._node = CSDSQLDatabaseLib.AndNode(self._node, _node)
self._searcher_dict.update(_node._searcher_dict)
self._limit_dict.update(_node._limit_dict)
return self
def __iand__(self, other):
'''In-place conjunction.'''
_node = self._make_node(other)
if self._node is None:
self._node = _node
else:
self._node = CSDSQLDatabaseLib.AndNode(self._node, _node)
self._searcher_dict.update(_node._searcher_dict)
self._limit_dict.update(_node._limit_dict)
def __or__(self, other):
'''Disjoin this with another search.'''
_node = self._make_node(other)
if self._node is None:
self._node = _node
else:
self._node = CSDSQLDatabaseLib.OrNode(self._node, _node)
self._searcher_dict.update(_node._searcher_dict)
self._limit_dict.update(_node._limit_dict)
return self
def __ior__(self, other):
'''In-place disjunction.'''
_node = self._make_node(other)
if self._node is None:
self._node = _node
else:
self._node = CSDSQLDatabaseLib.OrNode(self._node, _node)
self._searcher_dict.update(_node._searcher_dict)
self._limit_dict.update(_node._limit_dict)
def __neg__(self):
'''Negate this search.'''
if self._node is None:
raise TypeError('No searches to negate')
_node = CSDSQLDatabaseLib.NotNode(self._node)
_node._searcher_dict = self._node._searcher_dict
_node._limit_dict = self._node._limit_dict
self._node = _node
return self
@staticmethod
def _monkey_patch(extra=None):
'''Private: ensure relevant classes have combination methods.'''
def negate(s):
_node = CombinedSearch._make_node(s)
ret = CSDSQLDatabaseLib.NotNode(_node)
ret._searcher_dict = _node._searcher_dict
ret._limit_dict = _node._limit_dict
return ret
def conjoin(s, t):
_s = CombinedSearch._make_node(s)
_t = CombinedSearch._make_node(t)
ret = CSDSQLDatabaseLib.AndNode(_s, _t)
ret._searcher_dict = _s._searcher_dict.copy()
ret._searcher_dict.update(_t._searcher_dict)
ret._limit_dict = _s._limit_dict
ret._limit_dict.update(_t._limit_dict)
return ret
def disjoin(s, t):
_s = CombinedSearch._make_node(s)
first_mutator = CSDSQLDatabaseLib.MatchStringDataItemAdder('disjunct', 'first')
_sm = CSDSQLDatabaseLib.MatchMutatorNode(first_mutator, _s)
_sm._searcher_dict = _s._searcher_dict
_sm._limit_dict = _s._limit_dict
_t = CombinedSearch._make_node(t)
second_mutator = CSDSQLDatabaseLib.MatchStringDataItemAdder('disjunct', 'second')
_tm = CSDSQLDatabaseLib.MatchMutatorNode(second_mutator, _t)
_tm._searcher_dict = _t._searcher_dict
_tm._limit_dict = _t._limit_dict
ret = CSDSQLDatabaseLib.OrNode(_sm, _tm)
ret._searcher_dict = _s._searcher_dict.copy()
ret._searcher_dict.update(_t._searcher_dict)
ret._limit_dict = _s._limit_dict
ret._limit_dict.update(_t._limit_dict)
return ret
if extra is None:
extra = []
for cl in (
TextNumericSearch, SimilaritySearch, SubstructureSearch, ReducedCellSearch,
CSDSQLDatabaseLib.AndNode, CSDSQLDatabaseLib.OrNode, CSDSQLDatabaseLib.NotNode,
CSDSQLDatabaseLib.EntryLimitNode, CSDSQLDatabaseLib.MatchMutatorNode,
) + tuple(extra):
cl.__neg__ = negate
cl.__and__ = conjoin
cl.__or__ = disjoin
@staticmethod
def _which_node(node):
possibilities = [
CSDSQLDatabaseLib.Node_as_NotNode,
CSDSQLDatabaseLib.Node_as_AndNode,
CSDSQLDatabaseLib.Node_as_OrNode,
CSDSQLDatabaseLib.Node_as_MatchMutatorNode,
CSDSQLDatabaseLib.Node_as_TextNumericSearchNode,
CSDSQLDatabaseLib.Node_as_MotifNode,
CSDSQLDatabaseLib.Node_as_ReducedCellNode,
#CSDSQLDatabaseLib.Node_as_FormulaSearchNode,
CSDSQLDatabaseLib.Node_as_SimilaritySearchNode,
CSDSQLDatabaseLib.Node_as_EntryLimitNode,
]
for p in possibilities:
n = p(node)
if n is not None:
return n
else:
raise NotImplementedError('Unknown Node type %s' % type(node))
#@staticmethod
def _show_node(self, n, indent=0):
'''For debugging.'''
_n = CombinedSearch._which_node(n)
if isinstance(_n, CSDSQLDatabaseLib.NotNode):
s = '%sNot(\n%s\n%s)' % (' '*indent, self._show_node(_n.child(), indent+2), ' '*indent)
elif isinstance(_n, CSDSQLDatabaseLib.OrNode):
s = '%sOr(\n%s,\n%s\n%s)' % (' '*indent, self._show_node(_n.left(), indent+2), self._show_node(_n.right(), indent+2), ' '*indent)
elif isinstance(_n, CSDSQLDatabaseLib.AndNode):
s = '%sAnd(\n%s,\n%s\n%s)' % (' '*indent, self._show_node(_n.left(), indent+2), self._show_node(_n.right(), indent+2), ' '*indent)
elif isinstance(_n, CSDSQLDatabaseLib.MatchMutatorNode):
mmm = CSDSQLDatabaseLib.Mutator_as_MatchStringDataItemAdder(_n.mutator())
if mmm.key() == 'similarity_search':
ident = self._searcher_dict[mmm.value()].molecule.identifier
else:
ident = ''
s = '%sMutate(%s=%s(%s)\n%s\n%s)' % (' '*indent, mmm.key(), mmm.value(), ident, self._show_node(_n.child(), indent+2), ' '*indent)
elif isinstance(_n, CSDSQLDatabaseLib.EntryLimitNode):
s = '%sLimit(\n%s\n%s)' % (' '*indent, self._show_node(_n.child(), indent+2), ' '*indent)
elif isinstance(_n, CSDSQLDatabaseLib.TextNumericSearchNode):
s = '%sText()' % (' '*indent)
elif isinstance(_n, CSDSQLDatabaseLib.SimilaritySearchNode):
s = '%sSimilarity()' % (' '*indent)
elif isinstance(_n, CSDSQLDatabaseLib.MotifNode):
s = '%sMotif()' % (' '*indent)
elif isinstance(_n, CSDSQLDatabaseLib.ReducedCellNode):
s = '%sReduced()' % (' '*indent)
else:
raise NotImplementedError('WTF? %s' % type(_n))
return s
def _make_hits(self, match, node, _database=None, pars=None):
'''The hits from an individual match.'''
identifier = match.identifier().str()
if pars is None:
pars = match.data().parameters()
n = self._which_node(node)
if isinstance(n, CSDSQLDatabaseLib.NotNode):
return [CombinedSearch.CombinedHit(identifier, _database=_database)]
elif isinstance(n, CSDSQLDatabaseLib.OrNode):
# Need to work out which disjunct, probably from a MatchMutator
# Get the disjunct parameter
if 'disjunct' not in pars:
raise RuntimeError('No disjunct in pars')
which = pars['disjunct'].pop()
if which == 'first':
left = self._which_node(n.left())
left = left.child()
ret = self._make_hits(match, left, _database=_database, pars=pars)
return ret
else:
right = self._which_node(n.right())
right = right.child()
ret = self._make_hits(match, right, _database=_database, pars=pars)
return ret
elif isinstance(n, CSDSQLDatabaseLib.AndNode):
# Cartesian product.
left_hits = self._make_hits(match, n.left(), _database=_database, pars=pars)
right_hits = self._make_hits(match, n.right(), _database=_database, pars=pars)
result = []
for l in left_hits:
for r in right_hits:
h = l.copy()
h._merge(r)
result.append(h)
return result
elif isinstance(n, CSDSQLDatabaseLib.TextNumericSearchNode):
return [CombinedSearch.CombinedHit(match.identifier().str(), _database=_database)]
elif isinstance(n, CSDSQLDatabaseLib.ReducedCellNode):
return [CombinedSearch.CombinedHit(match.identifier().str(), _database=_database)]
elif isinstance(n, CSDSQLDatabaseLib.SimilaritySearchNode):
# This won't happen - guarded by the MatchMutatorNode
raise RuntimeError('SimilaritySearchNode: this cannot happen')
elif isinstance(n, CSDSQLDatabaseLib.MatchMutatorNode):
mmm = CSDSQLDatabaseLib.Mutator_as_MatchStringDataItemAdder(n.mutator())
if 'substructure_search' in pars:
if mmm.key() == 'substructure_search':
searcher = self._searcher_dict[mmm.value()]
cry = _database.crystal(match.identifier().str())
hits = [CombinedSearch.CombinedHit._from_substructure_hit(h, _database) for h in searcher._search_crystal(cry)]
return hits
else:
return self._make_hits(match, n.child(), _database=_database, pars=pars)
elif 'similarity_search' in pars:
if mmm.key() == 'similarity_search':
assert len(pars['similarity']) == len(pars['similarity_search'])
searchers = [self._searcher_dict[x] for x in pars['similarity_search']]
ids = [s.molecule.identifier for s in searchers]
vals = [float(x) for x in pars['similarity']]
try:
return [CombinedSearch.CombinedHit._from_similarity_hit(match.identifier().str(), ids, vals, _database=_database)]
except:
print(self._show_node(n), pars)
raise
else:
return self._make_hits(match, n.child(), _database=_database, pars=pars)
elif 'entry_limit' in pars:
return self._make_hits(match, n.child(), _database=_database, pars=pars)
else:
raise RuntimeError('Unexpected item in the bagging area\n%s' % ('\n'.join('%s: %s' % (k, v) for k, v in pars.items())))
elif isinstance(n, CSDSQLDatabaseLib.MotifNode):
raise RuntimeError('MotifNode: this cannot happen')
elif isinstance(n, CSDSQLDatabaseLib.FormulaSearchNode):
# Not implemented in the API yet
raise NotImplementedError('No FormulaSearch in the API')
elif isinstance(n, CSDSQLDatabaseLib.EntryLimitNode):
return self._make_hits(match, n.child(), _database=_database, pars=pars)
else:
raise NotImplementedError('No implementation for %s' % type(n))
[docs] @staticmethod
def max_hit_structures(other, count):
'''Limit the number of hits found by a combination search.
:param other: a combination of searches.
:param count: maximum number of hits to find.
'''
_other = CombinedSearch._make_node(other)
_node = CSDSQLDatabaseLib.EntryLimitNode(_other, count)
_node._searcher_dict = _other._searcher_dict
s = str(_node)
mutator = CSDSQLDatabaseLib.MatchStringDataItemAdder('entry_limit', s)
_node = CSDSQLDatabaseLib.MatchMutatorNode(mutator, _node)
_node._searcher_dict = { str(other) : other }
_node._limit_dict = { s : count }
return _node
def _search_reader(self, database=None):
if database is None:
database = io.EntryReader('csd')
subset_db = FileFormatsLib.CrystalStructureDatabaseAsCrystalStructureDatabaseSubset(database._db)
if subset_db and isinstance(database._underlying_file_name, list):
ids_to_db = collections.defaultdict(list)
sub_dbs = [io.EntryReader(f) for f in database._underlying_file_name]
for i in sorted(database.identifier(i) for i in range(len(database))):
for x, d in enumerate(sub_dbs):
if d._db.identifier_exists(UtilitiesLib.DatabaseEntryIdentifier(i)):
ids_to_db[x].append(i)
break
new_pool = FileFormatsLib.CrystalStructureDatabasePool()
for k, v in sorted(ids_to_db.items(), key=operator.itemgetter(1)):
sub_db = FileFormatsLib.CrystalStructureDatabaseSubset(v, sub_dbs[k]._db)
s = FileFormatsLib.CrystalStructureDatabaseSubsetAsCrystalStructureDatabase(sub_db)
new_pool.append(s)
searcher = new_pool.searcher_factory().combination_searcher()
else:
searcher = database._db.searcher_factory().combination_searcher()
searcher.set_search_definition_node(self._node)
class ResultsWriter(object):
def __init__(self, searcher, _database=None):
self.searcher = searcher
self._database = _database
self.hits = []
self._matches = []
self.current_id = None
self._limit_dict = searcher._limit_dict
def __call__(self, **kw):
match = kw['match']
identifier = match.identifier().str()
if identifier == self.current_id:
return
pars = match.data().parameters()
if 'entry_limit' in pars:
for p in pars['entry_limit']:
self._limit_dict[p] -= 1
if self._limit_dict[p] < 0:
return
self.current_id = identifier
new_hits = self.searcher._make_hits(match, self.searcher._node, _database=self._database)
if self._database.__class__.__name__ == 'MoleculeReader':
def skip_suppressed_hit(hit):
try:
ats = hit.match_atoms()
return False
except RuntimeError as exc:
return True
new_hits = [h for h in new_hits if not skip_suppressed_hit(h)]
self.hits.extend(new_hits)
rw = ResultsWriter(self, _database=database)
results_writer = CSDSQLDatabaseLib.PythonResultsWriter(rw)
searcher.search(results_writer)
return rw.hits
def _search_entry(self, entry):
raise NotImplementedError('Combined searches not implemented for an entry')
def _search_crystal(self, crystal):
raise NotImplementedError('Combined searches not implemented for a crystal')
def _search_molecule(self, molecule):
raise NotImplementedError('Combined searches not implemented for a molecule')
CombinedSearch._monkey_patch()