#
# This code is Copyright (C) 2015 The Cambridge Crystallographic Data Centre
# (CCDC) of 12 Union Road, Cambridge CB2 1EZ, UK and a proprietary work of CCDC.
# This code may not be used, reproduced, translated, modified, disassembled or
# copied, except in accordance with a valid licence agreement with CCDC and may
# not be disclosed or redistributed in any form, either in whole or in part, to
# any third party. All copies of this code made in accordance with a valid
# licence agreement as referred to above must contain this copyright notice.
#
# No representations, warranties, or liabilities are expressed or implied in the
# supply of this code by CCDC, its servants or agents, except where such
# exclusion or limitation is prohibited, void or unenforceable under governing
# law.
#
"""
pharmacophore.py - access to pharmacophore searches.
This allows pharmacophore models to be read and written, and using these to run searches.
Access to the hits is also provided.
"""
#########################################################################################################
import os
import sys
import collections
import time
import multiprocessing
import warnings
from ccdc import io, search, molecule, utilities, protein
from ccdc.descriptors import GeometricDescriptors
from ccdc.utilities import Colour
from ccdc.utilities import _import_ccdc_modules
_import_ccdc_modules([
'MotifPharmacophoreLib',
'SubstructureSearchLib',
'ChemistryLib',
'MathsLib',
'UtilitiesLib'
])
MotifPharmacophoreLib.licence_check()
#########################################################################################################
[docs]
class Pharmacophore(object):
"""Namespace for the Pharmacophore functionality."""
crossminer_version = '1.4.0'
feature_definitions = dict()
@staticmethod
def _feature_definition_directory():
"""Location of the default feature definitions."""
loc = io._CSDDatabaseLocator.get_crossminer_feature_definition_directory()
if loc is None or not os.path.exists(loc):
import ccdc
loc = ccdc._find_crossminer_feature_directory()
if loc is None or not os.path.exists(loc):
raise RuntimeError('Unable to find the pharmacophore feature definition directory')
return loc
[docs]
@staticmethod
def read_feature_definitions(directory=None):
"""Read all the feature definitions found in any subdirectory of the provided argument.
By default this will read the feature definitions supplied by the CSD.
"""
if directory is None:
directory = Pharmacophore._feature_definition_directory()
for dirpath, dirnames, filenames in os.walk(directory):
for fname in filenames:
if fname.endswith('.cpf'):
fd = Pharmacophore.FeatureDefinition.from_file(os.path.join(dirpath, fname))
Pharmacophore.feature_definitions[fd.identifier] = fd
[docs]
@staticmethod
def default_feature_database():
"""The CSD provided CrossMiner feature database."""
return Pharmacophore.FeatureDatabase.from_file(
io._CSDDatabaseLocator.get_crossminer_database_location()
)
[docs]
@staticmethod
def default_feature_database_location():
"""The path to the default CrossMiner database."""
return io._CSDDatabaseLocator.get_crossminer_database_location()
_component_label_dict = utilities.bidirectional_dict((
(0, 'protein'),
(1, 'small_molecule'),
(2, 'any')
))
_apply_rule_dic = utilities.bidirectional_dict((
(MotifPharmacophoreLib.MotifPharmacophoreSMARTSFeature.ALWAYS, 'Always'),
(MotifPharmacophoreLib.MotifPharmacophoreSMARTSFeature.H_PRESENT, 'With_h'),
(MotifPharmacophoreLib.MotifPharmacophoreSMARTSFeature.NO_H_PRESENT, 'No_h')
))
# Not currently used
_planarity_dic = utilities.bidirectional_dict((
(MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorCentroid.ANY, 'Any'),
(MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorCentroid.PLANAR, 'Planar'),
(MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorCentroid.NON_PLANAR, 'Non_planar'),
))
@staticmethod
def _default_n_threads():
n_threads = max(int(multiprocessing.cpu_count() / 2), 1)
return n_threads
@staticmethod
def _database_feature_def_from_fd(fd):
_fd = MotifPharmacophoreLib.MotifPharmacophoreFeatureDatabaseFeatureDefinition()
_fd.set_feature_deducer(fd._feature_def.feature_deducer())
_fd.set_colour(MotifPharmacophoreLib.MotifPharmacophoreFeatureColour(*fd.colour))
label = Pharmacophore._component_label_dict.inverse_lookup(fd.component_label)
if label == 2:
labels = [0, 1]
else:
labels = [label]
_fd.set_component_labels(labels)
return _fd
[docs]
class PointGenerator(object):
"""A mechanism for generating spheres from a SMARTS match."""
def __init__(self, _pg=None):
self._pg = _pg
@property
def parameters(self):
"""The optional parameters of the point generator."""
return {
p.replace(' ', '_') : v
for p, v in zip(self._pg.optional_parameter_names(), self._pg.optional_parameters())
}
[docs]
def feature_points(self, atoms):
"""The points generated by this generator.
:raises: RuntimeError if the sequence of points is inappropriate for the generator.
"""
_motif_pts = self._pg.motif_features([a._atom for a in atoms])
return tuple(p for mf in _motif_pts for p in mf.points())
[docs]
class DummyPointGenerator(PointGenerator):
"""Generates no points."""
def __init__(self, identifier):
self._pg = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorDummy()
self.identifier = identifier
[docs]
class PointPointGenerator(PointGenerator):
"""Generates a single point at the position of the relevant atom."""
def __init__(self, identifier):
self._pg = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorPoint()
self.identifier = identifier
[docs]
class LinearPointGenerator(PointGenerator):
"""Generates a point displaced from the relevant atom."""
def __init__(self, identifier, distance=2.8, neighbour_is_base_feature=False):
self._pg = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorLinear(
distance, neighbour_is_base_feature)
self.identifier = identifier
[docs]
class NormalPointGenerator(PointGenerator):
"""Generates a point from the normal to the plane of atoms."""
def __init__(self, identifier, distance=2.8):
self._pg = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorNormal(
distance, False, 0.0
)
self.identifier = identifier
[docs]
class PlanarNormalPointGenerator(PointGenerator):
"""Generates a point from the normal to the plane of a planar ring."""
def __init__(self, identifier, distance=2.8, planar_ring_tolerance=0.0):
self._pg = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorNormal(
distance, True, planar_ring_tolerance
)
self.identifier = identifier
[docs]
class AnyCentroidPointGenerator(PointGenerator):
"""Generates a point at the centroid of atoms."""
def __init__(self, identifier):
self._pg = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorCentroid(
MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorCentroid.ANY, 0.0
)
self.identifier = identifier
[docs]
class PlanarCentroidPointGenerator(PointGenerator):
"""Generates a point at the centre of a planar ring."""
def __init__(self, identifier, atom_planarity_threshold=0.0):
self._pg = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorCentroid(
MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorCentroid.PLANAR, atom_planarity_threshold
)
self.identifier = identifier
[docs]
class NonPlanarCentroidPointGenerator(PointGenerator):
"""Generates a point at the centroid of atoms which are not planar."""
def __init__(self, identifier, atom_planarity_threshold=0.0):
self._pg = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorCentroid(
MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorCentroid.NON_PLANAR, atom_planarity_threshold
)
self.identifier = identifier
[docs]
class TrigonalPointGenerator(PointGenerator):
"""Generates two points with a trigonal geometry."""
def __init__(self, identifier, distance=2.8):
self._pg = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorTrigonal(distance)
self.identifier = identifier
[docs]
class TetrahedralPointGenerator(PointGenerator):
"""Generates two points with a tetrahedral geometry."""
def __init__(self, identifier, distance=2.8):
self._pg = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorTetrahedral(distance)
self.identifier = identifier
[docs]
class SMARTSDefinition(object):
"""A SMARTS definition with associated point generation schemes."""
def __init__(self, smarts, point_generators, _sd=None, **kw):
# _incomplete if made by hand. Need to resolve the motif_feature_types() when in a feature_def
#
if _sd is None:
self._sd = MotifPharmacophoreLib.MotifPharmacophoreSMARTSFeature()
self._sd.set_SMARTSdefinition(smarts)
self._point_generators = point_generators
self._incomplete = True
app_rule = kw.get('apply_rule')
if app_rule is None:
app = MotifPharmacophoreLib.MotifPharmacophoreSMARTSFeature.ALWAYS
else:
app = Pharmacophore._apply_rule_dic.inverse_lookup(app_rule)
self._sd.set_apply_rule(app)
self._sd.set_permutational_equivalence(kw.get('permutational_equivalence', True))
self._sd.set_allow_overlapping_substructures(kw.get('allow_overlaps', False))
else:
self._sd = _sd
self._incomplete = False
self._point_generators = point_generators
@staticmethod
def _from_SMARTS_feature_def(_sd, _pgs):
return Pharmacophore.SMARTSDefinition(
_sd.SMARTSdefinition(),
tuple(
(_pgs.feature_types()[i].name(), v)
for i, v in _sd.motif_feature_types()
),
_sd,
apply_rule=Pharmacophore._apply_rule_dic[_sd.apply_rule()],
permutional_equivalence=_sd.permutational_equivalence(),
allow_overlaps=_sd.allow_fully_overlapping_substructures()
)
@property
def SMARTS(self):
"""The SMARTS string."""
return self._sd.SMARTSdefinition()
@property
def apply_rule(self):
"""How to apply the SMARTS definition."""
return Pharmacophore._apply_rule_dic[self._sd.apply_rule()]
@property
def permutational_equivalence(self):
"""Whether permutionally equivalent structures are deemed to be a single match."""
return self._sd.permutational_equivalence()
@property
def allow_overlaps(self):
"""Whether fully overlapping structures are permitted to be a hit."""
return self._sd.allow_fully_overlapping_substructures()
@property
def point_generators(self):
"""The generators for feature points."""
return self._point_generators
[docs]
def search(self, crystal):
"""Search a crystal for instances of the SMARTS pattern."""
raise NotImplementedError('Pharmacophore.SMARTSDefinition.search')
[docs]
class FeatureDefinition(object):
"""A feature definition in a feature database."""
_point_generators = utilities.bidirectional_dict(
Dummy= MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorDummy,
Point= MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorPoint,
Linear= MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorLinear,
Normal= MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorNormal,
Centroid= MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorCentroid,
Trigonal= MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorTrigonal,
Tetrahedral=MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorTetrahedral,
)
def __init__(self, _feature_def=None):
self._feature_def = _feature_def
def __repr__(self):
return 'FeatureDefinition(%s)' % self.identifier
[docs]
@staticmethod
def from_file(file_name):
"""Reads a structural feature definition from a file."""
component = os.path.basename(os.path.dirname(file_name))
try:
tag = Pharmacophore._component_label_dict.inverse_lookup(component)
except Exception as exc:
tag = 2
_creator = MotifPharmacophoreLib.MotifPharmacophoreFeatureDatabaseCreator()
_fd = _creator.read_feature_definition(file_name)
_feature_definition = MotifPharmacophoreLib.MotifPharmacophoreFeatureDefinition()
_feature_definition.set_feature_deducer(_fd.feature_deducer())
_feature_definition.set_component_label(tag)
_feature_definition.set_colour(_fd.colour())
return Pharmacophore.FeatureDefinition(_feature_def=_feature_definition)
[docs]
@staticmethod
def from_SMARTS_definitions(identifier, smarts, point_generators):
"""Create a feature from SMARTS definitions.
:param identifier: string
:param smarts: iterable of :class:`ccdc.pharmacophore.Pharmacophore.SMARTS_definition` instances.
:param point_generators: iterable of :class:`ccdc.pharmacophore.Pharmaophore.PointGenerator` instances.
"""
pgs = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGenerationScheme()
name_dic = dict()
for i, pg in enumerate(point_generators):
name_dic[pg.identifier] = i
pgs.add_feature_type(pg.identifier, pg._pg)
_sdefs = []
for sd in smarts:
_sdefs.append(sd._sd)
pg_spec = []
for name, ats in sd.point_generators:
inx = name_dic[name]
pg_spec.append((inx, ats))
sd._sd.set_motif_feature_types(pg_spec)
ded = MotifPharmacophoreLib.MotifPharmacophoreFeatureDeducerSubstructure(_sdefs, pgs)
ded.set_name(identifier)
fd = Pharmacophore.FeatureDefinition()
fd._feature_def = MotifPharmacophoreLib.MotifPharmacophoreFeatureDefinition()
fd._feature_def.set_feature_deducer(ded)
fd._feature_def.set_description(identifier)
return fd
@property
def SMARTS_definitions(self):
deducer = self._feature_def.feature_deducer()
if deducer.class_name() == 'MotifPharmacophoreFeatureDeducerSubstructure':
_s = MotifPharmacophoreLib.deducer_as_substructure(deducer)
return tuple(
Pharmacophore.SMARTSDefinition._from_SMARTS_feature_def(_fd, _s.point_generation_scheme())
for _fd in _s.SMARTS_motif_feature_types()
)
@property
def identifier(self):
"""The identifier of the feature."""
return self._feature_def.name()
@property
def component_label(self):
"""Which component ('protein', 'small_molecule' or 'any') the feature def applies to."""
if hasattr(self._feature_def, 'component_label'):
return Pharmacophore._component_label_dict[self._feature_def.component_label()]
else:
t = tuple(Pharmacophore._component_label_dict[x] for x in self._feature_def.component_labels())
if len(t) == 1:
return t[0]
else:
return 'Any'
@component_label.setter
def component_label(self, value):
self._feature_def.set_component_label(
Pharmacophore._component_label_dict.inverse_lookup(value)
)
@property
def colour(self):
"""The colour associated with this feature."""
col = self._feature_def.colour()
return Colour(col.r_, col.g_, col.b_, col.a_)
@colour.setter
def colour(self, val):
col = MotifPharmacophoreLib.MotifPharmacophoreFeatureColour(*val)
self._feature_def.set_colour(col)
[docs]
def write(self, file_name):
"""Writes the feature definition."""
if self._feature_def.feature_deducer().class_name() == 'MotifPharmacophoreFeatureDeducerSubstructure':
ded = MotifPharmacophoreLib.deducer_as_substructure(self._feature_def.feature_deducer())
writer = MotifPharmacophoreLib.SMARTSMotifFeatureTypeFileWriter(ded)
ostr = MotifPharmacophoreLib.ofstream(file_name)
writer.write_content('', ostr)
ostr.close()
else:
raise RuntimeError('Not a SMARTS feature definition')
@property
def point_generator_names(self):
"""The names of the point generator schemes associated with this feature definition."""
ded = MotifPharmacophoreLib.deducer_as_substructure(self._feature_def.feature_deducer())
pgs = ded.point_generation_scheme()
fts = pgs.feature_types()
return tuple(f.name() for f in fts)
def detect_features(self, crystal):
_csv = ChemistryLib.CrystalStructureView.instantiate(crystal._crystal)
_ssc = MotifPharmacophoreLib.MotifPharmacophoreSearchStructureCreator()
_ssc.register_components_from_feature_definitions((self._feature_def,))
_mss = _ssc.search_structure(_csv)
_ded = self._feature_def.feature_deducer()
_feats = _ded.generate_motif_features(_mss, self._feature_def.component_label())
features = []
for i in range(_feats.end_index()):
code, feat_pts = _feats.at(i)
for fp in feat_pts:
pts = fp.points()
spheres = tuple(GeometricDescriptors.Sphere((p[0], p[1], p[2]), 1.0) for p in pts)
# Skip duplicates
dup = False
if len(spheres) == 2:
for f in features:
if len(f.spheres) == 2:
if (
f.spheres[0] == spheres[1] and
f.spheres[1] == spheres[0]
):
dup = True
break
if not dup:
features.append(
Pharmacophore.Feature(self._clone(), *spheres)
)
return tuple(features)
[docs]
def features_from_atoms(self, point_generator, *atoms):
"""Create features from this feature definition and an atom of the appropriate type.
:param point_generator: a string giving one of the point_generator_names associated with this definition.
:param atoms: an iterable of :class:`ccdc.molecule.Atom` instances.
:returns: a :class:`ccdc.pharmacophore.Pharmacophore.Feature` instance.
:raises RuntimeError: if the atom is not suitable for the feature definition.
"""
ded = MotifPharmacophoreLib.deducer_as_substructure(self._feature_def.feature_deducer())
pgs = ded.point_generation_scheme()
for ft in pgs.feature_types():
if ft.name().upper() == point_generator.upper():
mfg = ft.motif_feature_generator()
mpts = mfg.motif_features([a._atom for a in atoms])
pts = [mpt.points() for mpt in mpts]
spheres = [[GeometricDescriptors.Sphere(tuple(p[i] for i in range(3)), 1.0) for p in l] for l in pts]
feats = [Pharmacophore.Feature(self._clone(), *ss) for ss in spheres]
return tuple(feats)
else:
raise RuntimeError('Unknown point generator %s' % point_generator)
[docs]
def copy(self):
"""Copies the Feature so it can be edited freely."""
return self._clone()
def _clone(self):
_fd = MotifPharmacophoreLib.MotifPharmacophoreFeatureDefinition()
_fd.set_feature_deducer(self._feature_def.feature_deducer())
_fd.set_component_label(self._feature_def.component_label())
_fd.set_colour(self._feature_def.colour())
_fd.set_available_component_labels(self._feature_def.available_component_labels())
_fd.set_description(self._feature_def.description())
_fd.set_id(self._feature_def.id()) # Maybe increment?
return Pharmacophore.FeatureDefinition(_fd)
[docs]
class Feature(object):
"""A feature in a Query."""
def __init__(self, feature_definition, *spheres):
if feature_definition is None:
# Required for ExcludedVolume and AnnotatoinFilter
self._feature_def = MotifPharmacophoreLib.MotifPharmacophoreFeatureDefinition()
else:
self._feature_def = feature_definition._clone()._feature_def
for s in spheres:
self.add_sphere(s)
def __repr__(self):
return 'Feature(%s)' % self.identifier
@property
def colour(self):
"""The colour of the spheres."""
col = self._feature_def.colour()
return Colour(col.r_, col.g_, col.b_, col.a_)
@colour.setter
def colour(self, value):
col = MotifPharmacophoreLib.MotifPharmacophoreFeatureColour(*value)
self._feature_def.set_colour(col)
@property
def component_label(self):
"""The component label"""
return Pharmacophore._component_label_dict[self._feature_def.component_label()]
@component_label.setter
def component_label(self, value):
val = Pharmacophore._component_label_dict.inverse_lookup(value)
self._feature_def.set_component_label(val)
if hasattr(self, '_query'):
self._query._pharmacophore.set_component_label_of_feature(self._feature_def.id(), int(val))
@property
def description(self):
"""Description of the feature definition."""
return self._feature_def.description()
@description.setter
def description(self, value):
self._feature_def.set_description(value)
@property
def identifier(self):
"""The identifier of the feature definition."""
return self._feature_def.name()
@property
def spheres(self):
"""The spheres defined by this feature definition."""
return tuple(GeometricDescriptors.Sphere(s.centre(), s.radius()) for s in self._feature_def.spheres())
@spheres.setter
def spheres(self, values):
if len(self.spheres) != len(values):
raise RuntimeError('Mismatch of number of spheres')
_feature_def = MotifPharmacophoreLib.MotifPharmacophoreFeatureDefinition()
_feature_def.set_feature_deducer(self._feature_def.feature_deducer())
_feature_def.set_filter(self._feature_def.filter())
_feature_def.set_component_label(self._feature_def.component_label())
for s in values:
_feature_def.add_sphere(
MathsLib.Sphere(tuple(s.centre[i] for i in range(3)), s.radius)
)
_feature_def.set_colour(self._feature_def.colour())
_feature_def.set_available_component_labels(self._feature_def.available_component_labels())
_feature_def.set_id(self._feature_def.id())
_feature_def.set_description(self._feature_def.description())
self._feature_def.set_spheres(
tuple(MathsLib.Sphere(tuple(v.centre[i] for i in range(3)), v.radius) for v in values)
)
[docs]
def add_sphere(self, sphere):
"""Add a sphere to the feature definition.
:param sphere: :class:`ccdc.descriptors.GeometricDescriptors.Sphere` instance
"""
_sphere = MathsLib.Sphere([sphere.centre[i] for i in range(3)], sphere.radius)
self._feature_def.add_sphere(_sphere)
[docs]
class ExcludedVolume(Feature):
"""
An excluded volume feature. This expresses a region to exclude if atoms are in this region.
:param sphere: the sphere to exclude
:param smarts: an optional SMARTS pattern that means you can exclude hits that have a specific substructure
within the sphere
"""
def __init__(self, sphere, smarts="[*]"):
Pharmacophore.Feature.__init__(self, None)
deducer = MotifPharmacophoreLib.MotifPharmacophoreFeatureDeducerExcludedVolume()
self._feature_def.set_feature_deducer(deducer)
self._feature_def.set_component_label(2)
self._filterer = MotifPharmacophoreLib.MotifPharmacophoreFilterSubstructure()
self._filterer.set_name(deducer.name())
self._filterer.set_key(smarts)
self._feature_def.set_filter(self._filterer)
self.add_sphere(sphere)
@property
def filter_smarts(self):
"""The SMARTS pattern that controls when an atom excludes in a given volume by substructure matching."""
return self._filterer.key()
@filter_smarts.setter
def filter_smarts(self, smarts):
self._filterer.set_key(smarts)
[docs]
class AnnotationFilter(Feature):
def __init__(self, key, value):
Pharmacophore.Feature.__init__(self, None)
deducer = MotifPharmacophoreLib.MotifPharmacophoreFeatureDeducerAnnotation()
deducer.set_key(key)
deducer.set_value(value)
self._feature_def.set_feature_deducer(deducer)
@property
def key(self):
"""The annotation filter key."""
return MotifPharmacophoreLib.deducer_as_annotation(self._feature_def.feature_deducer()).key()
@property
def value(self):
"""The annotation filter value."""
return MotifPharmacophoreLib.deducer_as_annotation(self._feature_def.feature_deducer()).value()
[docs]
class Query(object):
"""A pharmacophore query."""
def __init__(self, features=None, _motif_pharmacophore=None):
if _motif_pharmacophore:
self._pharmacophore = _motif_pharmacophore
else:
self._pharmacophore = MotifPharmacophoreLib.MotifPharmacophore()
self._features = []
if features is not None:
for f in features:
self.add_feature(f)
[docs]
@staticmethod
def from_file(file_name):
"""Read a pharmacophore query from a file."""
reader = MotifPharmacophoreLib.MotifPharmacophoreReader(file_name)
q = Pharmacophore.Query(_motif_pharmacophore=reader.pharmacophore())
q._features_from_pharmacophore()
for f in q.features:
f._query = q
return q
[docs]
def write(self, pharmacophore_path):
"""Write a pharmacophore query to a file."""
writer = MotifPharmacophoreLib.MotifPharmacophoreWriter()
writer.write_pharmacophore(self._pharmacophore, pharmacophore_path)
@property
def packing(self):
"""Whether to use crystal packing for query structure."""
return self._pharmacophore.use_packing()
@packing.setter
def packing(self, val):
self._pharmacophore.set_use_packing(val)
@property
def intra_only(self):
"""Whether all features should be intramolecular."""
return self._pharmacophore.intra_only()
@intra_only.setter
def intra_only(self, value):
if value == self.intra_only:
return
self._pharmacophore.set_intra_only(value)
if value:
ty = ChemistryLib.ContactCriterion.INTRAMOLECULAR
else:
ty = ChemistryLib.ContactCriterion.ANY
for dc in self._pharmacophore.distance_constraints():
f1 = self.features[dc.id_feature_a()]
f2 = self.features[dc.id_feature_b()]
if f1.component_label == 'small_molecule' and f2.component_label == 'small_molecule':
crit = dc.distance_criterion()
crit.set_type(ty)
dc.set_distance_criterion(crit)
@property
def bounding_sphere(self):
"""A minimum volume sphere containing all feature points."""
s = self._pharmacophore.bounding_sphere()
return GeometricDescriptors.Sphere(tuple(s.centre()[i] for i in range(3)), s.radius())
@property
def maximum_feature_distance(self):
"""The maximum distance between features."""
return self._pharmacophore.maximum_feature_distance()
@maximum_feature_distance.setter
def maximum_feature_distance(self, value):
self._pharmacophore.set_maximum_feature_distance(value)
[docs]
def add_feature(self, feature):
"""Add a feature to the pharmacophore."""
self._pharmacophore.add_feature_definition(feature._feature_def)
self._features.append(feature)
feature._query = self
@property
def features(self):
"""The features of the pharmacophore."""
return self._features
def _features_from_pharmacophore(self):
def _make_spheres(_fd):
return tuple(GeometricDescriptors.Sphere((s.centre().x(), s.centre().y(), s.centre().z()), s.radius()) for s in _fd.spheres())
self._features.extend(tuple(
Pharmacophore.Feature(Pharmacophore.FeatureDefinition(_fd), *_make_spheres(_fd))
for _fd in self._pharmacophore.all_feature_definitions()
))
DistanceConstraint = collections.namedtuple('DistanceConstraint',
['feature_point1', 'feature_point2', 'distance_range', 'which'])
def _repr_distance_constraint(dc):
"""Representation of the distance constraint."""
return "DistanceConstraint((%d, %d), (%d, %d) (%.3f, %.3f) which='%s')" % (
dc.feature_point1[0], dc.feature_point1[1],
dc.feature_point2[0], dc.feature_point2[1],
dc.distance_range[0], dc.distance_range[1],
dc.which
)
DistanceConstraint.__repr__ = _repr_distance_constraint
@property
def distance_constraints(self):
cons = []
for dc in self._pharmacophore.distance_constraints():
fp1 = dc.id_feature_a(), dc.id_feature_point_a()
fp2 = dc.id_feature_b(), dc.id_feature_point_b()
crit = dc.distance_criterion()
rng = crit.min(), crit.max()
w = crit.type()
if w == ChemistryLib.ContactCriterion.ANY:
which = 'Any'
elif w == ChemistryLib.ContactCriterion.INTERMOLECULAR:
which = 'Intermolecular'
else:
which = 'Intramolecular'
cons.append(Pharmacophore.Query.DistanceConstraint(fp1, fp2, rng, which=which))
return tuple(cons)
[docs]
def add_distance_constraint(self, feature1, feature2, distance_range, which):
"""Add a distance constraint between features.
:param feature1: a feature in the model, the tuple contains an id and a point.
:param feature2: a feature in the model, the tuple contains an id and a point.
:param distance_range: a pair of numbers specifying the desired range.
:param which: 'intramolecular', 'intermolecular' or 'any'.
"""
self.intra_only = False
if which.lower().startswith('intra'):
which = ChemistryLib.ContactCriterion.INTRAMOLECULAR
elif which.lower().startswith('inter'):
which = ChemistryLib.ContactCriterion.INTERMOLECULAR
else:
which = ChemistryLib.ContactCriterion.ANY
crit = SubstructureSearchLib.InterAtomicDistanceCriterion(min(distance_range), max(distance_range), which, False)
feat_id1, point_id1 = feature1
feat_id2, point_id2 = feature2
for dc in self._pharmacophore.distance_constraints():
if (
(dc.id_feature_a() == feat_id1 and dc.id_feature_point_a() == point_id1 and
dc.id_feature_b() == feat_id2 and dc.id_feature_point_b() == point_id2) or
(dc.id_feature_a() == feat_id2 and dc.id_feature_point_a() == point_id2 and
dc.id_feature_b() == feat_id1 and dc.id_feature_point_b() == point_id1)
):
dc.set_distance_criterion(crit)
break
else:
assert 0 <= feat_id1 < len(self.features)
assert 0 <= point_id1 < len(self.features[feat_id1].spheres)
fd1 = self.features[feat_id1]._feature_def
s = self.features[feat_id1].spheres[point_id1]
fs1 = MathsLib.Sphere(tuple(s.centre[i] for i in range(3)), s.radius)
assert 0 <= feat_id2 < len(self.features)
assert 0 <= point_id2 < len(self.features[feat_id2].spheres)
fd2 = self.features[feat_id2]._feature_def
s = self.features[feat_id2].spheres[point_id2]
fs2 = MathsLib.Sphere(tuple(s.centre[i] for i in range(3)), s.radius)
constraint = MotifPharmacophoreLib.MotifPharmacophoreConstraintDistance(
feat_id1, point_id1, feat_id2, point_id2, fs1, fs2, fd1, fd2
)
constraint.set_distance_criterion(crit)
self._pharmacophore.add_constraint(constraint)
[docs]
class SearchHit:
"""Pharmacophore search hit."""
def __init__(self, model, _motif_pharmacophore_hit=None):
if _motif_pharmacophore_hit:
self._pharmacophore_hit = _motif_pharmacophore_hit
else:
self._pharmacophore_hit = MotifPharmacophoreLib.MotifPharmacophoreHit()
self.model = model
@property
def identifier(self):
"""The hit identifier"""
return self._pharmacophore_hit.identifier()
@property
def overlay_rmsd(self):
"""The root-mean-square deviation of the pharmacophore overlay"""
return self._pharmacophore_hit.overlay_rmsd()
@property
def overlay_transformation(self):
"""The transformation matrix of the pharmacophore overlay"""
return molecule.Molecule.Transformation(_transformation=self._pharmacophore_hit.overlay_transformation())
@property
def cluster_number(self):
"""The pharmacophore hit cluster number"""
return self._pharmacophore_hit.cluster_number()
@property
def molecule(self):
"""The pharmacophore overlay molecule"""
return molecule.Molecule(self._pharmacophore_hit.identifier(), _molecule=self._pharmacophore_hit.molecule())
@property
def protein(self):
"""The hit's protein, if any."""
return protein.Protein(self.identifier, self._pharmacophore_hit.protein().create_editable_molecule())
@property
def small_molecule(self):
"""The hit's small molecule."""
return molecule.Molecule(self.identifier, self._pharmacophore_hit.small_molecule())
@property
def annotations(self):
"""The annotations of the hit."""
return {
k.strip() : v
for k, v in self._pharmacophore_hit.external_annotations().items()
}
@property
def points(self):
"""The matched points of the hit."""
_m = self._pharmacophore_hit.pharmacophore_match()
pts = []
pdic = dict()
for i in range(_m.nfeature_matches()):
_fm = _m.feature_match(i)
pdic[_fm.pharmacophore_feature_id_] = _fm.points_
for i, p in sorted(pdic.items()):
pts.extend(p)
return tuple(pts)
[docs]
def constraint_values(self):
"""The values of the distance constraints of the query."""
pts = self.points
counts = [len(f.spheres) for f in self.model.features]
values = []
for dc in self.model.distance_constraints:
f_inx, s_inx = dc.feature_point1
inx1 = sum(counts[:f_inx]) + s_inx
f_inx, s_inx = dc.feature_point2
inx2 = sum(counts[:f_inx]) + s_inx
dist = GeometricDescriptors.point_distance(pts[inx1], pts[inx2])
values.append(dist)
return values
[docs]
def feature_points(self, feature):
"""The points in the hit associated with the given feature."""
pts = self.points
counts = [len(f.spheres) for f in self.model.features]
inx = feature._feature_def.id()
n = len(feature.spheres)
start = sum(counts[:inx])
return tuple(pts[start:start+n])
[docs]
class Search(object):
"""A pharmacophore search"""
[docs]
class Settings(object):
"""Settings for a pharmacophore search."""
def __init__(self, max_hits_per_structure=5, max_hit_structures=100000):
"""Initialise settings.
:param max_hits_per_structure: keep only top n matches per database entry
:param max_hit_structures: restrict maximum number of matches per database entry
"""
self._search_manager = MotifPharmacophoreLib.PharmacophoreSearchManager()
self.max_hit_structures = max_hit_structures
self.max_hits_per_structure = max_hits_per_structure
self.n_threads = Pharmacophore._default_n_threads()
@property
def n_threads(self):
"""The number of threads to use in the search."""
return self._search_manager.n_search_threads()
@n_threads.setter
def n_threads(self, value):
self._search_manager.set_n_search_threads(value)
@property
def max_hit_structures(self):
"""The number of hits to retain."""
return self._search_manager.hitGenerationKeepTopNHits()
@max_hit_structures.setter
def max_hit_structures(self, value):
self._search_manager.setHitGenerationKeepTopNHits(value)
@property
def max_hits_per_structure(self):
"""The number of hits per structure to be returned."""
return self._search_manager.searchOptionMaxHitsPerStructure()
@max_hits_per_structure.setter
def max_hits_per_structure(self, value):
self._search_manager.setMaxHitsPerStructure(value)
@property
def max_hit_rmsd(self):
"""The maximum RMSD of hit structures to be returned."""
return self._search_manager.hitGenerationMaxHitRMSD()
@max_hit_rmsd.setter
def max_hit_rmsd(self, value):
self._search_manager.setHitGenerationMaxHitRMSD(value)
@property
def skip_proteins(self):
"""Whether or not to skip protein structures."""
return self._search_manager.hitGenerationSkipProteins()
@skip_proteins.setter
def skip_proteins(self, value):
self._search_manager.setHitGenerationSkipProteins(value)
@property
def complete_small_molecules(self):
"""Whether or not to return the complete small molecule hit."""
return self._search_manager.hitGenerationCompleteSmallMolecules()
@complete_small_molecules.setter
def complete_small_molecules(self, value):
self._search_manager.setHitGenerationCompleteSmallMolecules(value)
@property
def complete_proteins(self):
"""Whether or not to return the complete protein of a hit."""
return self._search_manager.hitGenerationCompleteProteins()
@complete_proteins.setter
def complete_proteins(self, value):
self._search_manager.setHitGenerationCompleteProteins(value)
@property
def three_cubed_packing(self):
"""Whether or not to use 3-cubed packing."""
return self._search_manager.searchOptionUseThreeCubedPacking()
@three_cubed_packing.setter
def three_cubed_packing(self, value):
self._search_manager.setSearchOptionUseThreeCubedPacking(value)
def _clone(self):
settings = Pharmacophore.Search.Settings()
settings.n_threads = self.n_threads
settings.max_hit_structures = self.max_hit_structures
settings.max_hits_per_structure = self.max_hits_per_structure
settings.max_hit_rmsd = self.max_hit_rmsd
settings.skip_proteins = self.skip_proteins
settings.complete_small_molecules = self.complete_small_molecules
settings.complete_proteins = self.complete_proteins
settings.three_cubed_packing = self.three_cubed_packing
return settings
def __init__(self, settings=None):
if settings is None:
settings = Pharmacophore.Search.Settings()
self.settings = settings
self._feature_database = None
self._feature_database_model = MotifPharmacophoreLib.PharmacophoreFeatureDatabaseModel()
self._pm_factory = MotifPharmacophoreLib.PharmacophoreManagerFactory()
self._search_manager = self.settings._clone()._search_manager
self._hits = None
def __del__(self):
self._feature_database_model = None
self._results_manager = None
self._search_manager = None
self._pm_factory = None
@property
def feature_database(self):
"""The CSD-CrossMiner feature database."""
return self._feature_database
@feature_database.setter
def feature_database(self, val):
self._feature_database = val
self._feature_database_model.set_feature_database(val._feature_db)
self._search_manager.set_feature_database_filtered_view(self._feature_database_model.feature_database_filtered_view())
[docs]
def search(self, model, database=None, verbose=False, stop_after=sys.maxsize):
"""Perform a pharmacophore search."""
if database is None:
if self.feature_database is None:
self.feature_database = Pharmacophore.default_feature_database()
database = self.feature_database
try:
self._search_manager = self.settings._clone()._search_manager
self._results_manager = MotifPharmacophoreLib.PharmacophoreResultsModel()
self._search_manager.add_results_writer(self._results_manager)
self._feature_database_model.set_feature_database(database._feature_db)
self._search_manager.set_feature_database_filtered_view(self._feature_database_model.feature_database_filtered_view())
self._search_manager.set_pharmacophore(model._pharmacophore)
self._search_manager.non_qt_update_feature_mapping()
self._search_manager.update()
self._results_manager.set_maximum_number_of_hits_to_save(self.settings.max_hit_structures)
self._hits = None
# This is ugly. Currently we need to reload the feature database to correctly initialise other
# underlying CrossMiner search dependencies.
feature_database_file = database.file_name
fdb_filtered_view = self._search_manager.feature_database_filtered_view()
if verbose:
print('filtered feature database size {}'.format(fdb_filtered_view.size()))
selector = MotifPharmacophoreLib.SelectCSDDatabase()
selector = MotifPharmacophoreLib.select_csd_as_select_structure(selector)
self._feature_database_model.process_feature_database(feature_database_file,
UtilitiesLib.ProgressMonitor(), selector)
db_pool = self._feature_database_model.structure_databases()
self._search_manager.set_structure_databases(db_pool)
self._results_manager.set_accept_hits(True)
self._search_manager.start_search()
# Wait for search to finish.
i = 0
while self._search_manager.is_running():
i += 1
time.sleep(0.1)
if verbose:
print('{} n_searched = {} n_hits = {}'.format(
i, self._search_manager.n_searched(), self._results_manager.n_hits()
))
if self._results_manager.n_hits() >= stop_after:
break
self._search_manager.stop_search()
self._results_manager.set_accept_hits(False)
self._hits = self._results_manager.hits()
finally:
self._search_manager.remove_results_writer(self._results_manager)
self._results_manager = None
if verbose:
for hit in self._hits:
print('Hit id: {} rmsd: {} '.format(hit.identifier(), hit.overlay_rmsd()))
pharmacophore_hits = list(Pharmacophore.SearchHit(model, _motif_pharmacophore_hit=hit) for hit in self._hits)
return pharmacophore_hits
[docs]
class FeatureDatabase(object):
"""A database of pharmacophore features."""
DatabaseInfo = collections.namedtuple('DatabaseInfo', ['file_name', 'n_entries', 'colour'])
def show_dbi(dbi):
return "DatabaseInfo(file_name='%s', n_entries=%d, colour=%s)" % (dbi.file_name, dbi.n_entries, str(dbi.colour))
DatabaseInfo.__repr__ = show_dbi
def __init__(self, file_name, _feature_db=None):
if _feature_db is None:
_feature_db = MotifPharmacophoreLib.MotifPharmacophoreFeatureDatabase()
self.file_name = file_name
self._feature_db = _feature_db
fd_names = [f.identifier for f in self.feature_definitions]
if 'excluded_volume' not in fd_names:
self.add_feature_definition(Pharmacophore.ExcludedVolume(GeometricDescriptors.Sphere((0, 0, 0), 0)))
def __len__(self):
return self._feature_db.size()
[docs]
@staticmethod
def from_file(feature_database_path):
"""Read a pharmacophore feature database from the specified path.
:param feature_database_path: pharmacophore feature database file path.
:returns: :class:`ccdc.Pharmacophore.FeatureDatabase` instance.
"""
progress_monitor = UtilitiesLib.ProgressMonitor()
feature_database = MotifPharmacophoreLib.MotifPharmacophoreFeatureDatabase()
feature_database.read(feature_database_path, progress_monitor)
return Pharmacophore.FeatureDatabase(feature_database_path, feature_database)
[docs]
def write(self, file_name):
"""Write a pharmacophore feature database to the specified path.
:param file_name: pharmacophore feature database file path.
"""
progress_monitor = UtilitiesLib.ProgressMonitor()
self._feature_db.write(file_name, progress_monitor)
@property
def schema_version(self):
"""The database schema version."""
return self._feature_db.version()
@property
def databases(self):
"""The structure databases used in the construction of this feature database.
Each database is presented as a tuple of file_name, nunmber of entries, and colour."""
colours = tuple(
self._feature_db.colour(i) for i in range(self._feature_db.number_of_source_databases())
)
return tuple(
Pharmacophore.FeatureDatabase.DatabaseInfo(
self._feature_db.file_name(i),
self._feature_db.nentries_from_source_database(i),
Colour(c.r_, c.g_, c.b_, c.a_)
) for i, c in enumerate(colours)
)
@property
def identifiers(self):
"""All identifiers from the database."""
return self._feature_db.identifiers()
def entries(self):
for i in range(self._feature_db.size()):
yield Pharmacophore.FeatureDatabase.Entry(_entry=self._feature_db.entry(i))
def __iter__(self):
"""Iterator."""
return self.entries() # pylint: disable=E1101
def __getitem__(self, index):
if index < 0 or index >= len(self):
raise IndexError('FeatureDatabase: index out of range')
return Pharmacophore.FeatureDatabase.Entry(_entry=self._feature_db.entry(index))
[docs]
def entry(self, identifier):
"""The :class:`ccdc.pharmacophore.Pharmacophore.FeatureDatabase.Entry` corresponding to the identifier.
:param identifier: identifier of the entry.
:returns: a :class:`ccdc.pharmacophore.Pharmacophore.FeatureDatabase.Entry` instance.
:raises: RuntimeError if the identifier is not present.
"""
if not self._feature_db.identifier_exists(UtilitiesLib.DatabaseEntryIdentifier(identifier)):
raise RuntimeError('FeatureDatabase: %s not present' % identifier)
return Pharmacophore.FeatureDatabase.Entry(
_entry=self._feature_db.entry(identifier)
)
@property
def feature_definitions(self):
"""The feature definitions present in the database."""
return tuple(
Pharmacophore.FeatureDefinition(_feature_def=f)
for f in self._feature_db.feature_definitions()
) + tuple(
Pharmacophore.FeatureDefinition(_feature_def=f)
for f in self._feature_db.non_indexed_feature_definitions()
) + tuple(
Pharmacophore.FeatureDefinition(_feature_def=f)
for f in self._feature_db.non_matchable_feature_definitions()
)
[docs]
def add_feature_definition(self, feature_definition):
"""Add a feature definition to the database.
Note: this feature definition will not be indexed in the database, so will run rather more slowly if
required in a search. To add the corresponding index, the database will need to be recreated with the
feature present.
"""
if isinstance(feature_definition, Pharmacophore.FeatureDefinition):
_fd = feature_definition._feature_def
elif isinstance(feature_definition, Pharmacophore.Feature):
_fd = feature_definition._feature_def
else:
raise NotImplementedError('Cannot add one of these to a database %s' % str(feature_definition))
db_fd = MotifPharmacophoreLib.MotifPharmacophoreFeatureDatabaseFeatureDefinition()
db_fd.set_feature_deducer(_fd.feature_deducer())
label = _fd.component_label()
if label == 2:
labels = [0, 1]
else:
labels = [label]
db_fd.set_component_labels(labels)
if _fd.is_matchable():
self._feature_db.add_non_indexed_feature_definition(db_fd)
else:
self._feature_db.add_non_matchable_feature_definition(db_fd)
@property
def annotation_keys(self):
"""The annotations available in this database."""
return self._feature_db.available_annotations()
[docs]
def annotate(self, pattern, **kw):
"""Add or replace an annotation in the database.
:param pattern: a glob-style pattern which will be used to match against identifiers.
All entries in the database with a matching identifier will be annotated with the key, value pairs of the `kw` argument.
"""
for k, w in kw.items():
self._feature_db.set_external_annotation(pattern, k, w)
[docs]
def clear_annotations(self):
"""Remove all annotations from the database."""
self._feature_db.clear_external_annotations()
[docs]
class Entry(object):
"""An entry in a FeatureDatabase."""
class _write_through_dict(dict):
def __init__(self, _entry, *args, **kw):
self._entry = _entry
dict.__init__(self, *args, **kw)
def __setitem__(self, key, value):
dict.__setitem__(self, key, value)
self._entry.set_external_annotation(key, str(value))
def update(self, dic):
for k, v in dic.items():
self[k] = v
def __init__(self, _entry=None):
self._entry = _entry
@property
def identifier(self):
"""The identifier of the entry."""
return self._entry.name()
@identifier.setter
def identifier(self, value):
self._entry.set_name(value)
@property
def annotations(self):
"""The dictionary of annotations for the entry."""
return Pharmacophore.FeatureDatabase.Entry._write_through_dict(self._entry, self._entry.external_annotations())
[docs]
def clear_annotations(self):
"""Clear all annotations from the entry."""
self._entry.clear_external_annotations()
@property
def source_database_index(self):
return self._entry.source_db_id()
[docs]
class Creator(object):
"""The creator for feature databases."""
[docs]
class Settings(object):
"""Settings for feature database creation."""
def __init__(self, feature_definition_directory=None, n_threads=None, skip_duplicate_identifiers=False):
if feature_definition_directory is None:
feature_definition_directory = Pharmacophore._feature_definition_directory()
self.feature_definition_directory = feature_definition_directory
if n_threads is None or n_threads < 1:
n_threads = Pharmacophore._default_n_threads()
self.n_threads = n_threads
self.skip_duplicate_identifiers = skip_duplicate_identifiers
[docs]
class StructureDatabase(object):
"""A database of structures from which a feature database may be created."""
def __init__(self, db_info, use_crystal_symmetry=True, sqlmol2_database_path=None,
structure_filters=None, structure_database_path=None):
"""Initialise for creation.
If you are creating a Structure database from the CSD set DatabaseInfo.file_name to a list of CSD
database file names. If you want to include any available CSD updates include them in the list.
Alternatively, set DatabaseInfo.file_name to the string 'csd' rather than a list and all CSD updates
will be automatically included.
:param db_info: a :class:`ccdc.pharmacophore.FeatureDatabase.DatabaseInfo` instance. Here the
n_entries attribute is the desired number of entries in the resulting database.
:param use_crystal_symmetry: whether or not to use crystal symmetry when generating features.
:param structure_database_path: where to save the generated structure database. Supported types are
.csdsqlx and .sqlmol2.
:param structure_filters: a :class:`ccdc.search.Settings` instance to filter the source database.
"""
self.db_info = db_info
self.use_crystal_symmetry = use_crystal_symmetry
if sqlmol2_database_path is not None:
warnings.warn("sqlmol2_database_path is deprecated and will be removed. Use structure_database_path instead.", DeprecationWarning)
self.structure_database_path = structure_database_path or sqlmol2_database_path
self.structure_filters = structure_filters
def _database_data(self, skip_duplicate_identifiers):
if self.structure_database_path is not None:
self._database_path = self.db_info.file_name
if not os.path.exists(self.structure_database_path):
self._create_structure_database(self.structure_database_path, skip_duplicate_identifiers)
with io.EntryReader(self.structure_database_path) as reader:
database_data = MotifPharmacophoreLib.StructureDatabaseFeatureCreatorData(
os.path.basename(self.structure_database_path),
MotifPharmacophoreLib.MotifPharmacophoreFeatureColour(*self.db_info.colour),
not self.use_crystal_symmetry,
reader._db
)
return [database_data]
else:
csd = io.EntryReader(self.db_info.file_name)
if self.structure_filters is None:
self.structure_filters = self.default_csd_filters()
if isinstance(csd.file_name, (list, tuple)):
paths = csd.file_name
else:
paths = [csd.file_name]
database_data = []
if 0 < self.db_info.n_entries < sys.maxsize:
to_get = self.db_info.n_entries
else:
to_get = sys.maxsize
for self._database_path in paths:
with io.EntryReader(self._database_path) as reader:
if self.structure_filters or 0 < self.db_info.n_entries < sys.maxsize:
identifiers = self._apply_csd_filters(to_get, self.structure_filters)
data = MotifPharmacophoreLib.StructureDatabaseFeatureCreatorData(
os.path.basename(self._database_path),
MotifPharmacophoreLib.MotifPharmacophoreFeatureColour(*self.db_info.colour),
not self.use_crystal_symmetry,
reader._db,
identifiers
)
database_data.append(data)
to_get -= len(identifiers)
if to_get <= 0:
break
return database_data
def _create_structure_database(self, structure_database_path, skip_duplicate_identifiers):
"""Convert input structure database to a structure database.
:param structure_database_path: The path to the output structure database file.
"""
progress_monitor = UtilitiesLib.ProgressMonitor()
self._append_to_structure_database(progress_monitor, self._database_path, structure_database_path, skip_duplicate_identifiers)
def _append_to_structure_database(self, progress_monitor, database_path, structure_database_path, skip_duplicate_identifiers):
input_files = []
if os.path.isdir(database_path):
for path, dirs, files in os.walk(database_path):
input_files.extend([os.path.join(path,f) for f in files
if f.endswith(".mol2") or f.endswith(".sdf")])
else:
input_files.append(self._database_path)
if os.path.exists(structure_database_path):
os.unlink(structure_database_path)
suffix = structure_database_path[-8:]
if structure_database_path.endswith('.sqlmol2') or structure_database_path.endswith('.csdsqlx'):
structure_database_path = structure_database_path[:-8]
try:
if skip_duplicate_identifiers:
duplicates_policy = MotifPharmacophoreLib.SkipDuplicateEntryPolicy()
else:
duplicates_policy = MotifPharmacophoreLib.StopDuplicateEntryPolicy()
if suffix == '.csdsqlx':
MotifPharmacophoreLib.convert_bindingsites_to_csdsqlx(
input_files, structure_database_path, progress_monitor, duplicates_policy
)
else:
MotifPharmacophoreLib.convert_bindingsites_to_sqlmol2(
input_files, structure_database_path, progress_monitor, duplicates_policy
)
except RuntimeError as e:
for ext in ['.csdsqlx', '.sqlmol2']:
db_path = structure_database_path + ext
if os.path.isfile(db_path):
os.remove(db_path)
if len(e.args) > 0 and "is already in the database" in e.args[0]:
raise RuntimeError(("Error creating structure database {}, "
"entry identifiers should be unique".format(structure_database_path), e.args[0]))
else:
raise
[docs]
@staticmethod
def default_csd_filters():
"""Suggested filters for CSD structures to include in pharmacophore feature database."""
filter_settings = search.Search.Settings()
filter_settings.has_3d_coordinates = True
filter_settings.max_r_factor = 10.
filter_settings.no_disorder = True
filter_settings.not_polymeric = True
filter_settings.only_organic = False
filter_settings.must_not_have_elements = [
'He', 'Be', 'Ne', 'Al', 'Si', 'Ar', 'Sc', 'Ti', 'V', 'Cr',
'Ga', 'Ge', 'As', 'Se', 'Kr', 'Rb', 'Y', 'Zr', 'Nb', 'Mo',
'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'In', 'Sn', 'Sb', 'Te',
'Xe', 'Cs', 'Ba', 'La', 'Ce', 'Pr', 'Nd', 'Pm', 'Sm', 'Eu',
'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb', 'Lu', 'Hf', 'Ta',
'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg', 'Tl', 'Pb', 'Bi',
'Po', 'Rn', 'Fr', 'Ra', 'Ac', 'Th', 'Pa', 'U'
]
return filter_settings
def _apply_csd_filters(self, max_hit_structures, filter_settings):
"""Filter input CSD database.
:param maximum_hit_structures: The maximum number of structures to retain, or ``None``
for no limit. Default ``None``.
:param filter_settings: Structure quality filters to be applied. Object of type
:class:`SubstructureSearch.Settings`, boolean ``True`` for default filters, or ``None``
for no filtering. Default ``None``.
"""
with io.EntryReader(self._database_path) as reader:
if not filter_settings:
filter_settings = search.Search.Settings()
elif filter_settings is True: # default filters, for backwards compatibility.
filter_settings = self.default_csd_filters()
identifiers = []
for e in reader:
if filter_settings.test(e):
identifiers.append(e.identifier)
if len(identifiers) >= max_hit_structures:
break
return identifiers
def __init__(self, settings=None):
if settings is None:
settings = Pharmacophore.FeatureDatabase.Creator.Settings()
self.settings = settings
self._creator = MotifPharmacophoreLib.MotifPharmacophoreFeatureDatabaseCreator()
self._creator.set_number_of_threads(self.settings.n_threads)
[docs]
def create(self, structure_databases, feature_defs=None):
"""Create the feature database.
:param structure_databases: an iterable of :class:`ccdc.pharmacophore.Pharmacophore.FeatureDatabase.Creator.StructureDatabase` instances.
:param feature_defs: an iterable of :class:`ccdc.pharmacophore.Pharmacophore.FeatureDefinition`
instances or None. If None, the default set of feature definitions in read in by
:meth:`ccdc.pharmacophore.Pharmacophore.read_feature_definitions` will be used.
"""
if feature_defs is None:
if not Pharmacophore.feature_definitions:
Pharmacophore.read_feature_definitions()
feature_defs = list(Pharmacophore.feature_definitions.values())
def sort_key(fd):
d = dict(any=0, small_molecule=1, protein=2)
adict = dict(
acceptor=0,
acceptor_projected=1,
donor_ch_projected=2,
donor_projected=3,
heavy_atom=4,
hydrophobe=5,
ring=6,
ring_non_planar=7,
ring_planar_projected=8,
purine=9,
pyrimidine=10,
adenine=11,
cytosine=12,
guanine=13,
thymine=14,
uracil=15,
deoxyribose=16,
ribose=17
)
sdict = dict(
exit_vector=0,
halogen=1,
bromine=2,
chlorine=3,
fluorine=4,
metal=5,
water=6
)
pdict = dict(
ALA=0,
ARG=1,
ASN=2,
ASP=3,
CYS=4,
GLN=5,
GLU=6,
GLY=7,
HIS=8,
ILE=9,
LEU=10,
LYS=11,
MET=12,
PHE=13,
PRO=14,
SER=15,
THR=16,
TRP=17,
TYR=18,
VAL=19,
)
k = fd.component_label
n = fd.identifier
if k == 'any':
v = (d[k], adict.get(n, 99), n)
if k == 'small_molecule':
v = (d[k], sdict.get(n, 99), n)
if k == 'protein':
v = (d[k], pdict.get(n, 99), n)
return v
feature_defs.sort(key=sort_key)
_fds = [Pharmacophore._database_feature_def_from_fd(fd) for fd in feature_defs]
databases = []
for sdb in structure_databases:
databases.extend(sdb._database_data(self.settings.skip_duplicate_identifiers))
progress_monitor = UtilitiesLib.ProgressMonitor()
feature_db = self._creator.create_feature_database(databases, _fds, progress_monitor)
return Pharmacophore.FeatureDatabase('', _feature_db=feature_db)