#
# This code is Copyright (C) 2015 The Cambridge Crystallographic Data Centre
# (CCDC) of 12 Union Road, Cambridge CB2 1EZ, UK and a proprietary work of CCDC.
# This code may not be used, reproduced, translated, modified, disassembled or
# copied, except in accordance with a valid licence agreement with CCDC and may
# not be disclosed or redistributed in any form, either in whole or in part, to
# any third party. All copies of this code made in accordance with a valid
# licence agreement as referred to above must contain this copyright notice.
#
# No representations, warranties, or liabilities are expressed or implied in the
# supply of this code by CCDC, its servants or agents, except where such
# exclusion or limitation is prohibited, void or unenforceable under governing
# law.
#
"""
pharmacophore.py - access to pharmacophore searches.
This allows pharmacophore models to be read and written, and using these to run searches.
Access to the hits is also provided.
"""
#########################################################################################################
import os
import sys
import collections
import time
import multiprocessing
import warnings
from ccdc import io, search, molecule, utilities, protein
from ccdc.descriptors import GeometricDescriptors
from ccdc.utilities import Colour
with utilities._private_importer() as pi:
pi.import_ccdc_module('MotifPharmacophoreLib')
pi.import_ccdc_module('SubstructureSearchLib')
pi.import_ccdc_module('ChemistryLib')
pi.import_ccdc_module('MathsLib')
pi.import_ccdc_module('UtilitiesLib')
MotifPharmacophoreLib.licence_check()
#########################################################################################################
[docs]class Pharmacophore(object):
"""Namespace for the Pharmacophore functionality."""
crossminer_version = '1.4.0'
feature_definitions = dict()
@staticmethod
def _feature_definition_directory():
"""Location of the default feature definitions."""
loc = io._CSDDatabaseLocator.get_crossminer_feature_definition_directory()
if loc is None or not os.path.exists(loc):
import ccdc
loc = ccdc._find_crossminer_feature_directory()
if loc is None or not os.path.exists(loc):
raise RuntimeError('Unable to find the pharmacophore feature definition directory')
return loc
[docs] @staticmethod
def read_feature_definitions(directory=None):
"""Read all the feature definitions found in any subdirectory of the provided argument.
By default this will read the feature definitions supplied by the CSD.
"""
if directory is None:
directory = Pharmacophore._feature_definition_directory()
for dirpath, dirnames, filenames in os.walk(directory):
for fname in filenames:
if fname.endswith('.cpf'):
fd = Pharmacophore.FeatureDefinition.from_file(os.path.join(dirpath, fname))
Pharmacophore.feature_definitions[fd.identifier] = fd
[docs] @staticmethod
def default_feature_database():
"""The CSD provided CrossMiner feature database."""
return Pharmacophore.FeatureDatabase.from_file(
io._CSDDatabaseLocator.get_crossminer_database_location()
)
[docs] @staticmethod
def default_feature_database_location():
"""The path to the default CrossMiner database."""
return io._CSDDatabaseLocator.get_crossminer_database_location()
_component_label_dict = utilities.bidirectional_dict((
(0, 'protein'),
(1, 'small_molecule'),
(2, 'any')
))
_apply_rule_dic = utilities.bidirectional_dict((
(MotifPharmacophoreLib.MotifPharmacophoreSMARTSFeature.ALWAYS, 'Always'),
(MotifPharmacophoreLib.MotifPharmacophoreSMARTSFeature.H_PRESENT, 'With_h'),
(MotifPharmacophoreLib.MotifPharmacophoreSMARTSFeature.NO_H_PRESENT, 'No_h')
))
# Not currently used
_planarity_dic = utilities.bidirectional_dict((
(MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorCentroid.ANY, 'Any'),
(MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorCentroid.PLANAR, 'Planar'),
(MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorCentroid.NON_PLANAR, 'Non_planar'),
))
@staticmethod
def _default_n_threads():
n_threads = max(int(multiprocessing.cpu_count() / 2), 1)
return n_threads
@staticmethod
def _database_feature_def_from_fd(fd):
_fd = MotifPharmacophoreLib.MotifPharmacophoreFeatureDatabaseFeatureDefinition()
_fd.set_feature_deducer(fd._feature_def.feature_deducer())
_fd.set_colour(MotifPharmacophoreLib.MotifPharmacophoreFeatureColour(*fd.colour))
label = Pharmacophore._component_label_dict.inverse_lookup(fd.component_label)
if label == 2:
labels = [0, 1]
else:
labels = [label]
_fd.set_component_labels(labels)
return _fd
[docs] class PointGenerator(object):
"""A mechanism for generating spheres from a SMARTS match."""
def __init__(self, _pg=None):
self._pg = _pg
@property
def parameters(self):
"""The optional parameters of the point generator."""
return {
p.replace(' ', '_') : v
for p, v in zip(self._pg.optional_parameter_names(), self._pg.optional_parameters())
}
[docs] def feature_points(self, atoms):
"""The points generated by this generator.
:raises: RuntimeError if the sequence of points is inappropriate for the generator.
"""
_motif_pts = self._pg.motif_features([a._atom for a in atoms])
return tuple(p for mf in _motif_pts for p in mf.points())
[docs] class DummyPointGenerator(PointGenerator):
"""Generates no points."""
def __init__(self, identifier):
self._pg = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorDummy()
self.identifier = identifier
[docs] class PointPointGenerator(PointGenerator):
"""Generates a single point at the position of the relevant atom."""
def __init__(self, identifier):
self._pg = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorPoint()
self.identifier = identifier
[docs] class LinearPointGenerator(PointGenerator):
"""Generates a point displaced from the relevant atom."""
def __init__(self, identifier, distance=2.8, neighbour_is_base_feature=False):
self._pg = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorLinear(
distance, neighbour_is_base_feature)
self.identifier = identifier
[docs] class NormalPointGenerator(PointGenerator):
"""Generates a point from the normal to the plane of atoms."""
def __init__(self, identifier, distance=2.8):
self._pg = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorNormal(
distance, False, 0.0
)
self.identifier = identifier
[docs] class PlanarNormalPointGenerator(PointGenerator):
"""Generates a point from the normal to the plane of a planar ring."""
def __init__(self, identifier, distance=2.8, planar_ring_tolerance=0.0):
self._pg = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorNormal(
distance, True, planar_ring_tolerance
)
self.identifier = identifier
[docs] class AnyCentroidPointGenerator(PointGenerator):
"""Generates a point at the centroid of atoms."""
def __init__(self, identifier):
self._pg = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorCentroid(
MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorCentroid.ANY, 0.0
)
self.identifier = identifier
[docs] class PlanarCentroidPointGenerator(PointGenerator):
"""Generates a point at the centre of a planar ring."""
def __init__(self, identifier, atom_planarity_threshold=0.0):
self._pg = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorCentroid(
MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorCentroid.PLANAR, atom_planarity_threshold
)
self.identifier = identifier
[docs] class NonPlanarCentroidPointGenerator(PointGenerator):
"""Generates a point at the centroid of atoms which are not planar."""
def __init__(self, identifier, atom_planarity_threshold=0.0):
self._pg = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorCentroid(
MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorCentroid.NON_PLANAR, atom_planarity_threshold
)
self.identifier = identifier
[docs] class TrigonalPointGenerator(PointGenerator):
"""Generates two points with a trigonal geometry."""
def __init__(self, identifier, distance=2.8):
self._pg = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorTrigonal(distance)
self.identifier = identifier
[docs] class TetrahedralPointGenerator(PointGenerator):
"""Generates two points with a tetrahedral geometry."""
def __init__(self, identifier, distance=2.8):
self._pg = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorTetrahedral(distance)
self.identifier = identifier
[docs] class SMARTSDefinition(object):
"""A SMARTS definition with associated point generation schemes."""
def __init__(self, smarts, point_generators, _sd=None, **kw):
# _incomplete if made by hand. Need to resolve the motif_feature_types() when in a feature_def
#
if _sd is None:
self._sd = MotifPharmacophoreLib.MotifPharmacophoreSMARTSFeature()
self._sd.set_SMARTSdefinition(smarts)
self._point_generators = point_generators
self._incomplete = True
app_rule = kw.get('apply_rule')
if app_rule is None:
app = MotifPharmacophoreLib.MotifPharmacophoreSMARTSFeature.ALWAYS
else:
app = Pharmacophore._apply_rule_dic.inverse_lookup(app_rule)
self._sd.set_apply_rule(app)
self._sd.set_permutational_equivalence(kw.get('permutational_equivalence', True))
self._sd.set_allow_overlapping_substructures(kw.get('allow_overlaps', False))
else:
self._sd = _sd
self._incomplete = False
self._point_generators = point_generators
@staticmethod
def _from_SMARTS_feature_def(_sd, _pgs):
return Pharmacophore.SMARTSDefinition(
_sd.SMARTSdefinition(),
tuple(
(_pgs.feature_types()[i].name(), v)
for i, v in _sd.motif_feature_types()
),
_sd,
apply_rule=Pharmacophore._apply_rule_dic[_sd.apply_rule()],
permutional_equivalence=_sd.permutational_equivalence(),
allow_overlaps=_sd.allow_fully_overlapping_substructures()
)
@property
def SMARTS(self):
"""The SMARTS string."""
return self._sd.SMARTSdefinition()
@property
def apply_rule(self):
"""How to apply the SMARTS definition."""
return Pharmacophore._apply_rule_dic[self._sd.apply_rule()]
@property
def permutational_equivalence(self):
"""Whether permutionally equivalent structures are deemed to be a single match."""
return self._sd.permutational_equivalence()
@property
def allow_overlaps(self):
"""Whether fully overlapping structures are permitted to be a hit."""
return self._sd.allow_fully_overlapping_substructures()
@property
def point_generators(self):
"""The generators for feature points."""
return self._point_generators
[docs] def search(self, crystal):
"""Search a crystal for instances of the SMARTS pattern."""
raise NotImplementedError('Pharmacophore.SMARTSDefinition.search')
[docs] class FeatureDefinition(object):
"""A feature definition in a feature database."""
_point_generators = utilities.bidirectional_dict(
Dummy= MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorDummy,
Point= MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorPoint,
Linear= MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorLinear,
Normal= MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorNormal,
Centroid= MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorCentroid,
Trigonal= MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorTrigonal,
Tetrahedral=MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorTetrahedral,
)
def __init__(self, _feature_def=None):
self._feature_def = _feature_def
def __repr__(self):
return 'FeatureDefinition(%s)' % self.identifier
[docs] @staticmethod
def from_file(file_name):
"""Reads a structural feature definition from a file."""
component = os.path.basename(os.path.dirname(file_name))
try:
tag = Pharmacophore._component_label_dict.inverse_lookup(component)
except Exception as exc:
tag = 2
_creator = MotifPharmacophoreLib.MotifPharmacophoreFeatureDatabaseCreator()
_fd = _creator.read_feature_definition(file_name)
_feature_definition = MotifPharmacophoreLib.MotifPharmacophoreFeatureDefinition()
_feature_definition.set_feature_deducer(_fd.feature_deducer())
_feature_definition.set_component_label(tag)
_feature_definition.set_colour(_fd.colour())
return Pharmacophore.FeatureDefinition(_feature_def=_feature_definition)
[docs] @staticmethod
def from_SMARTS_definitions(identifier, smarts, point_generators):
"""Create a feature from SMARTS definitions.
:param identifier: string
:param smarts: iterable of :class:`ccdc.pharmacophore.Pharmacophore.SMARTS_definition` instances.
:param point_generators: iterable of :class:`ccdc.pharmacophore.Pharmaophore.PointGenerator` instances.
"""
pgs = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGenerationScheme()
name_dic = dict()
for i, pg in enumerate(point_generators):
name_dic[pg.identifier] = i
pgs.add_feature_type(pg.identifier, pg._pg)
_sdefs = []
for sd in smarts:
_sdefs.append(sd._sd)
pg_spec = []
for name, ats in sd.point_generators:
inx = name_dic[name]
pg_spec.append((inx, ats))
sd._sd.set_motif_feature_types(pg_spec)
ded = MotifPharmacophoreLib.MotifPharmacophoreFeatureDeducerSubstructure(_sdefs, pgs)
ded.set_name(identifier)
fd = Pharmacophore.FeatureDefinition()
fd._feature_def = MotifPharmacophoreLib.MotifPharmacophoreFeatureDefinition()
fd._feature_def.set_feature_deducer(ded)
fd._feature_def.set_description(identifier)
return fd
@property
def SMARTS_definitions(self):
deducer = self._feature_def.feature_deducer()
if deducer.class_name() == 'MotifPharmacophoreFeatureDeducerSubstructure':
_s = MotifPharmacophoreLib.deducer_as_substructure(deducer)
return tuple(
Pharmacophore.SMARTSDefinition._from_SMARTS_feature_def(_fd, _s.point_generation_scheme())
for _fd in _s.SMARTS_motif_feature_types()
)
@property
def identifier(self):
"""The identifier of the feature."""
return self._feature_def.name()
@property
def component_label(self):
"""Which component ('protein', 'small_molecule' or 'any') the feature def applies to."""
if hasattr(self._feature_def, 'component_label'):
return Pharmacophore._component_label_dict[self._feature_def.component_label()]
else:
t = tuple(Pharmacophore._component_label_dict[x] for x in self._feature_def.component_labels())
if len(t) == 1:
return t[0]
else:
return 'Any'
@component_label.setter
def component_label(self, value):
self._feature_def.set_component_label(
Pharmacophore._component_label_dict.inverse_lookup(value)
)
@property
def colour(self):
"""The colour associated with this feature."""
col = self._feature_def.colour()
return Colour(col.r_, col.g_, col.b_, col.a_)
@colour.setter
def colour(self, val):
col = MotifPharmacophoreLib.MotifPharmacophoreFeatureColour(*val)
self._feature_def.set_colour(col)
[docs] def write(self, file_name):
"""Writes the feature definition."""
if self._feature_def.feature_deducer().class_name() == 'MotifPharmacophoreFeatureDeducerSubstructure':
ded = MotifPharmacophoreLib.deducer_as_substructure(self._feature_def.feature_deducer())
writer = MotifPharmacophoreLib.SMARTSMotifFeatureTypeFileWriter(ded)
ostr = MotifPharmacophoreLib.ofstream(file_name)
writer.write_content('', ostr)
ostr.close()
else:
raise RuntimeError('Not a SMARTS feature definition')
@property
def point_generator_names(self):
"""The names of the point generator schemes associated with this feature definition."""
ded = MotifPharmacophoreLib.deducer_as_substructure(self._feature_def.feature_deducer())
pgs = ded.point_generation_scheme()
fts = pgs.feature_types()
return tuple(f.name() for f in fts)
def detect_features(self, crystal):
_csv = ChemistryLib.CrystalStructureView.instantiate(crystal._crystal)
_ssc = MotifPharmacophoreLib.MotifPharmacophoreSearchStructureCreator()
_ssc.register_components_from_feature_definitions((self._feature_def,))
_mss = _ssc.search_structure(_csv)
_ded = self._feature_def.feature_deducer()
_feats = _ded.generate_motif_features(_mss, self._feature_def.component_label())
features = []
for i in range(_feats.end_index()):
code, feat_pts = _feats.at(i)
for fp in feat_pts:
pts = fp.points()
spheres = tuple(GeometricDescriptors.Sphere((p[0], p[1], p[2]), 1.0) for p in pts)
# Skip duplicates
dup = False
if len(spheres) == 2:
for f in features:
if len(f.spheres) == 2:
if (
f.spheres[0] == spheres[1] and
f.spheres[1] == spheres[0]
):
dup = True
break
if not dup:
features.append(
Pharmacophore.Feature(self._clone(), *spheres)
)
return tuple(features)
[docs] def features_from_atoms(self, point_generator, *atoms):
"""Create features from this feature definition and an atom of the appropriate type.
:param point_generator: a string giving one of the point_generator_names associated with this definition.
:param atoms: an iterable of :class:`ccdc.molecule.Atom` instances.
:returns: a :class:`ccdc.pharmacophore.Pharmacophore.Feature` instance.
:raises RuntimeError: if the atom is not suitable for the feature definition.
"""
ded = MotifPharmacophoreLib.deducer_as_substructure(self._feature_def.feature_deducer())
pgs = ded.point_generation_scheme()
for ft in pgs.feature_types():
if ft.name().upper() == point_generator.upper():
mfg = ft.motif_feature_generator()
mpts = mfg.motif_features([a._atom for a in atoms])
pts = [mpt.points() for mpt in mpts]
spheres = [[GeometricDescriptors.Sphere(tuple(p[i] for i in range(3)), 1.0) for p in l] for l in pts]
feats = [Pharmacophore.Feature(self._clone(), *ss) for ss in spheres]
return tuple(feats)
else:
raise RuntimeError('Unknown point generator %s' % point_generator)
[docs] def copy(self):
"""Copies the Feature so it can be edited freely."""
return self._clone()
def _clone(self):
_fd = MotifPharmacophoreLib.MotifPharmacophoreFeatureDefinition()
_fd.set_feature_deducer(self._feature_def.feature_deducer())
_fd.set_component_label(self._feature_def.component_label())
_fd.set_colour(self._feature_def.colour())
_fd.set_available_component_labels(self._feature_def.available_component_labels())
_fd.set_description(self._feature_def.description())
_fd.set_id(self._feature_def.id()) # Maybe increment?
return Pharmacophore.FeatureDefinition(_fd)
[docs] class Feature(object):
"""A feature in a Query."""
def __init__(self, feature_definition, *spheres):
if feature_definition is None:
# Required for ExcludedVolume and AnnotatoinFilter
self._feature_def = MotifPharmacophoreLib.MotifPharmacophoreFeatureDefinition()
else:
self._feature_def = feature_definition._clone()._feature_def
for s in spheres:
self.add_sphere(s)
def __repr__(self):
return 'Feature(%s)' % self.identifier
@property
def colour(self):
"""The colour of the spheres."""
col = self._feature_def.colour()
return Colour(col.r_, col.g_, col.b_, col.a_)
@colour.setter
def colour(self, value):
col = MotifPharmacophoreLib.MotifPharmacophoreFeatureColour(*value)
self._feature_def.set_colour(col)
@property
def component_label(self):
"""The component label"""
return Pharmacophore._component_label_dict[self._feature_def.component_label()]
@component_label.setter
def component_label(self, value):
val = Pharmacophore._component_label_dict.inverse_lookup(value)
self._feature_def.set_component_label(val)
if hasattr(self, '_query'):
self._query._pharmacophore.set_component_label_of_feature(self._feature_def.id(), int(val))
@property
def description(self):
"""Description of the feature definition."""
return self._feature_def.description()
@description.setter
def description(self, value):
self._feature_def.set_description(value)
@property
def identifier(self):
"""The identifier of the feature definition."""
return self._feature_def.name()
@property
def spheres(self):
"""The spheres defined by this feature definition."""
return tuple(GeometricDescriptors.Sphere(s.centre(), s.radius()) for s in self._feature_def.spheres())
@spheres.setter
def spheres(self, values):
if len(self.spheres) != len(values):
raise RuntimeError('Mismatch of number of spheres')
_feature_def = MotifPharmacophoreLib.MotifPharmacophoreFeatureDefinition()
_feature_def.set_feature_deducer(self._feature_def.feature_deducer())
_feature_def.set_filter(self._feature_def.filter())
_feature_def.set_component_label(self._feature_def.component_label())
for s in values:
_feature_def.add_sphere(
MathsLib.Sphere(tuple(s.centre[i] for i in range(3)), s.radius)
)
_feature_def.set_colour(self._feature_def.colour())
_feature_def.set_available_component_labels(self._feature_def.available_component_labels())
_feature_def.set_id(self._feature_def.id())
_feature_def.set_description(self._feature_def.description())
self._feature_def.set_spheres(
tuple(MathsLib.Sphere(tuple(v.centre[i] for i in range(3)), v.radius) for v in values)
)
[docs] def add_sphere(self, sphere):
"""Add a sphere to the feature definition.
:param sphere: :class:`ccdc.descriptors.GeometricDescriptors.Sphere` instance
"""
_sphere = MathsLib.Sphere([sphere.centre[i] for i in range(3)], sphere.radius)
self._feature_def.add_sphere(_sphere)
[docs] class ExcludedVolume(Feature):
"""
An excluded volume feature. This expresses a region to exclude if atoms are in this region.
:param sphere: the sphere to exclude
:param smarts: an optional SMARTS pattern that means you can exclude hits that have a specific substructure
within the sphere
"""
def __init__(self, sphere, smarts="[*]"):
Pharmacophore.Feature.__init__(self, None)
deducer = MotifPharmacophoreLib.MotifPharmacophoreFeatureDeducerExcludedVolume()
self._feature_def.set_feature_deducer(deducer)
self._feature_def.set_component_label(2)
self._filterer = MotifPharmacophoreLib.MotifPharmacophoreFilterSubstructure()
self._filterer.set_name(deducer.name())
self._filterer.set_key(smarts)
self._feature_def.set_filter(self._filterer)
self.add_sphere(sphere)
@property
def filter_smarts(self):
"""The SMARTS pattern that controls when an atom excludes in a given volume by substructure matching."""
return self._filterer.key()
@filter_smarts.setter
def filter_smarts(self, smarts):
self._filterer.set_key(smarts)
[docs] class AnnotationFilter(Feature):
def __init__(self, key, value):
Pharmacophore.Feature.__init__(self, None)
deducer = MotifPharmacophoreLib.MotifPharmacophoreFeatureDeducerAnnotation()
deducer.set_key(key)
deducer.set_value(value)
self._feature_def.set_feature_deducer(deducer)
@property
def key(self):
"""The annotation filter key."""
return MotifPharmacophoreLib.deducer_as_annotation(self._feature_def.feature_deducer()).key()
@property
def value(self):
"""The annotation filter value."""
return MotifPharmacophoreLib.deducer_as_annotation(self._feature_def.feature_deducer()).value()
[docs] class Query(object):
"""A pharmacophore query."""
def __init__(self, features=None, _motif_pharmacophore=None):
if _motif_pharmacophore:
self._pharmacophore = _motif_pharmacophore
else:
self._pharmacophore = MotifPharmacophoreLib.MotifPharmacophore()
self._features = []
if features is not None:
for f in features:
self.add_feature(f)
[docs] @staticmethod
def from_file(file_name):
"""Read a pharmacophore query from a file."""
reader = MotifPharmacophoreLib.MotifPharmacophoreReader(file_name)
q = Pharmacophore.Query(_motif_pharmacophore=reader.pharmacophore())
q._features_from_pharmacophore()
for f in q.features:
f._query = q
return q
[docs] def write(self, pharmacophore_path):
"""Write a pharmacophore query to a file."""
writer = MotifPharmacophoreLib.MotifPharmacophoreWriter()
writer.write_pharmacophore(self._pharmacophore, pharmacophore_path)
@property
def packing(self):
"""Whether to use crystal packing for query structure."""
return self._pharmacophore.use_packing()
@packing.setter
def packing(self, val):
self._pharmacophore.set_use_packing(val)
@property
def intra_only(self):
"""Whether all features should be intramolecular."""
return self._pharmacophore.intra_only()
@intra_only.setter
def intra_only(self, value):
if value == self.intra_only:
return
self._pharmacophore.set_intra_only(value)
if value:
ty = ChemistryLib.ContactCriterion.INTRAMOLECULAR
else:
ty = ChemistryLib.ContactCriterion.ANY
for dc in self._pharmacophore.distance_constraints():
f1 = self.features[dc.id_feature_a()]
f2 = self.features[dc.id_feature_b()]
if f1.component_label == 'small_molecule' and f2.component_label == 'small_molecule':
crit = dc.distance_criterion()
crit.set_type(ty)
dc.set_distance_criterion(crit)
@property
def bounding_sphere(self):
"""A minimum volume sphere containing all feature points."""
s = self._pharmacophore.bounding_sphere()
return GeometricDescriptors.Sphere(tuple(s.centre()[i] for i in range(3)), s.radius())
@property
def maximum_feature_distance(self):
"""The maximum distance between features."""
return self._pharmacophore.maximum_feature_distance()
@maximum_feature_distance.setter
def maximum_feature_distance(self, value):
self._pharmacophore.set_maximum_feature_distance(value)
[docs] def add_feature(self, feature):
"""Add a feature to the pharmacophore."""
self._pharmacophore.add_feature_definition(feature._feature_def)
self._features.append(feature)
feature._query = self
@property
def features(self):
"""The features of the pharmacophore."""
return self._features
def _features_from_pharmacophore(self):
def _make_spheres(_fd):
return tuple(GeometricDescriptors.Sphere((s.centre().x(), s.centre().y(), s.centre().z()), s.radius()) for s in _fd.spheres())
self._features.extend(tuple(
Pharmacophore.Feature(Pharmacophore.FeatureDefinition(_fd), *_make_spheres(_fd))
for _fd in self._pharmacophore.all_feature_definitions()
))
DistanceConstraint = collections.namedtuple('DistanceConstraint',
['feature_point1', 'feature_point2', 'distance_range', 'which'])
def _repr_distance_constraint(dc):
"""Representation of the distance constraint."""
return "DistanceConstraint((%d, %d), (%d, %d) (%.3f, %.3f) which='%s')" % (
dc.feature_point1[0], dc.feature_point1[1],
dc.feature_point2[0], dc.feature_point2[1],
dc.distance_range[0], dc.distance_range[1],
dc.which
)
DistanceConstraint.__repr__ = _repr_distance_constraint
@property
def distance_constraints(self):
cons = []
for dc in self._pharmacophore.distance_constraints():
fp1 = dc.id_feature_a(), dc.id_feature_point_a()
fp2 = dc.id_feature_b(), dc.id_feature_point_b()
crit = dc.distance_criterion()
rng = crit.min(), crit.max()
w = crit.type()
if w == ChemistryLib.ContactCriterion.ANY:
which = 'Any'
elif w == ChemistryLib.ContactCriterion.INTERMOLECULAR:
which = 'Intermolecular'
else:
which = 'Intramolecular'
cons.append(Pharmacophore.Query.DistanceConstraint(fp1, fp2, rng, which=which))
return tuple(cons)
[docs] def add_distance_constraint(self, feature1, feature2, distance_range, which):
"""Add a distance constraint between features.
:param feature1: a feature in the model, the tuple contains an id and a point.
:param feature2: a feature in the model, the tuple contains an id and a point.
:param distance_range: a pair of numbers specifying the desired range.
:param which: 'intramolecular', 'intermolecular' or 'any'.
"""
self.intra_only = False
if which.lower().startswith('intra'):
which = ChemistryLib.ContactCriterion.INTRAMOLECULAR
elif which.lower().startswith('inter'):
which = ChemistryLib.ContactCriterion.INTERMOLECULAR
else:
which = ChemistryLib.ContactCriterion.ANY
crit = SubstructureSearchLib.InterAtomicDistanceCriterion(min(distance_range), max(distance_range), which, False)
feat_id1, point_id1 = feature1
feat_id2, point_id2 = feature2
for dc in self._pharmacophore.distance_constraints():
if (
(dc.id_feature_a() == feat_id1 and dc.id_feature_point_a() == point_id1 and
dc.id_feature_b() == feat_id2 and dc.id_feature_point_b() == point_id2) or
(dc.id_feature_a() == feat_id2 and dc.id_feature_point_a() == point_id2 and
dc.id_feature_b() == feat_id1 and dc.id_feature_point_b() == point_id1)
):
dc.set_distance_criterion(crit)
break
else:
assert 0 <= feat_id1 < len(self.features)
assert 0 <= point_id1 < len(self.features[feat_id1].spheres)
fd1 = self.features[feat_id1]._feature_def
s = self.features[feat_id1].spheres[point_id1]
fs1 = MathsLib.Sphere(tuple(s.centre[i] for i in range(3)), s.radius)
assert 0 <= feat_id2 < len(self.features)
assert 0 <= point_id2 < len(self.features[feat_id2].spheres)
fd2 = self.features[feat_id2]._feature_def
s = self.features[feat_id2].spheres[point_id2]
fs2 = MathsLib.Sphere(tuple(s.centre[i] for i in range(3)), s.radius)
constraint = MotifPharmacophoreLib.MotifPharmacophoreConstraintDistance(
feat_id1, point_id1, feat_id2, point_id2, fs1, fs2, fd1, fd2
)
constraint.set_distance_criterion(crit)
self._pharmacophore.add_constraint(constraint)
[docs] class SearchHit:
"""Pharmacophore search hit."""
def __init__(self, model, _motif_pharmacophore_hit=None):
if _motif_pharmacophore_hit:
self._pharmacophore_hit = _motif_pharmacophore_hit
else:
self._pharmacophore_hit = MotifPharmacophoreLib.MotifPharmacophoreHit()
self.model = model
@property
def identifier(self):
"""The hit identifier"""
return self._pharmacophore_hit.identifier()
@property
def overlay_rmsd(self):
"""The root-mean-square deviation of the pharmacophore overlay"""
return self._pharmacophore_hit.overlay_rmsd()
@property
def overlay_transformation(self):
"""The transformation matrix of the pharmacophore overlay"""
return molecule.Molecule.Transformation(_transformation=self._pharmacophore_hit.overlay_transformation())
@property
def cluster_number(self):
"""The pharmacophore hit cluster number"""
return self._pharmacophore_hit.cluster_number()
@property
def molecule(self):
"""The pharmacophore overlay molecule"""
return molecule.Molecule(self._pharmacophore_hit.identifier(), _molecule=self._pharmacophore_hit.molecule())
@property
def protein(self):
"""The hit's protein, if any."""
return protein.Protein(self.identifier, self._pharmacophore_hit.protein().create_editable_molecule())
@property
def small_molecule(self):
"""The hit's small molecule."""
return molecule.Molecule(self.identifier, self._pharmacophore_hit.small_molecule())
@property
def annotations(self):
"""The annotations of the hit."""
return {
k.strip() : v
for k, v in self._pharmacophore_hit.external_annotations().items()
}
@property
def points(self):
"""The matched points of the hit."""
_m = self._pharmacophore_hit.pharmacophore_match()
pts = []
pdic = dict()
for i in range(_m.nfeature_matches()):
_fm = _m.feature_match(i)
pdic[_fm.pharmacophore_feature_id_] = _fm.points_
for i, p in sorted(pdic.items()):
pts.extend(p)
return tuple(pts)
[docs] def constraint_values(self):
"""The values of the distance constraints of the query."""
pts = self.points
counts = [len(f.spheres) for f in self.model.features]
values = []
for dc in self.model.distance_constraints:
f_inx, s_inx = dc.feature_point1
inx1 = sum(counts[:f_inx]) + s_inx
f_inx, s_inx = dc.feature_point2
inx2 = sum(counts[:f_inx]) + s_inx
dist = GeometricDescriptors.point_distance(pts[inx1], pts[inx2])
values.append(dist)
return values
[docs] def feature_points(self, feature):
"""The points in the hit associated with the given feature."""
pts = self.points
counts = [len(f.spheres) for f in self.model.features]
inx = feature._feature_def.id()
n = len(feature.spheres)
start = sum(counts[:inx])
return tuple(pts[start:start+n])
[docs] class Search(object):
"""A pharmacophore search"""
[docs] class Settings(object):
"""Settings for a pharmacophore search."""
def __init__(self, max_hits_per_structure=5, max_hit_structures=100000):
"""Initialise settings.
:param max_hits_per_structure: keep only top n matches per database entry
:param max_hit_structures: restrict maximum number of matches per database entry
"""
self._search_manager = MotifPharmacophoreLib.PharmacophoreSearchManager()
self.max_hit_structures = max_hit_structures
self.max_hits_per_structure = max_hits_per_structure
self.n_threads = Pharmacophore._default_n_threads()
@property
def n_threads(self):
"""The number of threads to use in the search."""
return self._search_manager.n_search_threads()
@n_threads.setter
def n_threads(self, value):
self._search_manager.set_n_search_threads(value)
@property
def max_hit_structures(self):
"""The number of hits to retain."""
return self._search_manager.hitGenerationKeepTopNHits()
@max_hit_structures.setter
def max_hit_structures(self, value):
self._search_manager.setHitGenerationKeepTopNHits(value)
@property
def max_hits_per_structure(self):
"""The number of hits per structure to be returned."""
return self._search_manager.searchOptionMaxHitsPerStructure()
@max_hits_per_structure.setter
def max_hits_per_structure(self, value):
self._search_manager.setMaxHitsPerStructure(value)
@property
def max_hit_rmsd(self):
"""The maximum RMSD of hit structures to be returned."""
return self._search_manager.hitGenerationMaxHitRMSD()
@max_hit_rmsd.setter
def max_hit_rmsd(self, value):
self._search_manager.setHitGenerationMaxHitRMSD(value)
@property
def skip_proteins(self):
"""Whether or not to skip protein structures."""
return self._search_manager.hitGenerationSkipProteins()
@skip_proteins.setter
def skip_proteins(self, value):
self._search_manager.setHitGenerationSkipProteins(value)
@property
def complete_small_molecules(self):
"""Whether or not to return the complete small molecule hit."""
return self._search_manager.hitGenerationCompleteSmallMolecules()
@complete_small_molecules.setter
def complete_small_molecules(self, value):
self._search_manager.setHitGenerationCompleteSmallMolecules(value)
@property
def complete_proteins(self):
"""Whether or not to return the complete protein of a hit."""
return self._search_manager.hitGenerationCompleteProteins()
@complete_proteins.setter
def complete_proteins(self, value):
self._search_manager.setHitGenerationCompleteProteins(value)
@property
def three_cubed_packing(self):
"""Whether or not to use 3-cubed packing."""
return self._search_manager.searchOptionUseThreeCubedPacking()
@three_cubed_packing.setter
def three_cubed_packing(self, value):
self._search_manager.setSearchOptionUseThreeCubedPacking(value)
def _clone(self):
settings = Pharmacophore.Search.Settings()
settings.n_threads = self.n_threads
settings.max_hit_structures = self.max_hit_structures
settings.max_hits_per_structure = self.max_hits_per_structure
settings.max_hit_rmsd = self.max_hit_rmsd
settings.skip_proteins = self.skip_proteins
settings.complete_small_molecules = self.complete_small_molecules
settings.complete_proteins = self.complete_proteins
settings.three_cubed_packing = self.three_cubed_packing
return settings
def __init__(self, settings=None):
if settings is None:
settings = Pharmacophore.Search.Settings()
self.settings = settings
self._feature_database = None
self._feature_database_model = MotifPharmacophoreLib.PharmacophoreFeatureDatabaseModel()
self._pm_factory = MotifPharmacophoreLib.PharmacophoreManagerFactory()
self._search_manager = self.settings._clone()._search_manager
self._hits = None
def __del__(self):
self._feature_database_model = None
self._results_manager = None
self._search_manager = None
self._pm_factory = None
@property
def feature_database(self):
"""The CSD-CrossMiner feature database."""
return self._feature_database
@feature_database.setter
def feature_database(self, val):
self._feature_database = val
fdb_filtered_view = MotifPharmacophoreLib.MotifPharmacophoreFeatureDatabaseFilteredView(val._feature_db, False)
self._feature_database_model.set_feature_database_filtered_view(fdb_filtered_view)
self._search_manager.set_feature_database_filtered_view(fdb_filtered_view)
[docs] def search(self, model, database=None, verbose=False, stop_after=sys.maxsize):
"""Perform a pharmacophore search."""
if database is None:
if self.feature_database is None:
self.feature_database = Pharmacophore.default_feature_database()
database = self.feature_database
try:
self._search_manager = self.settings._clone()._search_manager
self._results_manager = MotifPharmacophoreLib.PharmacophoreResultsModel()
self._search_manager.add_results_writer(self._results_manager)
fdb_filtered_view = MotifPharmacophoreLib.MotifPharmacophoreFeatureDatabaseFilteredView(database._feature_db, False)
self._feature_database_model.set_feature_database_filtered_view(fdb_filtered_view)
self._search_manager.set_feature_database_filtered_view(fdb_filtered_view)
self._search_manager.set_pharmacophore(model._pharmacophore)
self._search_manager.non_qt_update_feature_mapping()
self._search_manager.update()
self._results_manager.set_maximum_number_of_hits_to_save(self.settings.max_hit_structures)
self._hits = None
# This is ugly. Currently we need to reload the feature database to correctly initialise other
# underlying CrossMiner search dependencies.
feature_database_file = database.file_name
fdb_filtered_view = self._search_manager.feature_database_filtered_view()
if verbose:
print('filtered feature database size {}'.format(fdb_filtered_view.size()))
selector = MotifPharmacophoreLib.SelectCSDDatabase()
selector = MotifPharmacophoreLib.select_csd_as_select_structure(selector)
self._feature_database_model.process_feature_database(feature_database_file,
UtilitiesLib.ProgressMonitor(), selector)
db_pool = self._feature_database_model.structure_databases()
self._search_manager.set_structure_databases(db_pool)
self._results_manager.set_accept_hits(True)
self._search_manager.start_search()
# Wait for search to finish.
i = 0
while self._search_manager.is_running():
i += 1
time.sleep(0.1)
if verbose:
print('{} n_searched = {} n_hits = {}'.format(
i, self._search_manager.n_searched(), self._results_manager.n_hits()
))
if self._results_manager.n_hits() >= stop_after:
break
self._search_manager.stop_search()
self._results_manager.set_accept_hits(False)
self._hits = self._results_manager.hits()
finally:
self._search_manager.remove_results_writer(self._results_manager)
self._results_manager = None
if verbose:
for hit in self._hits:
print('Hit id: {} rmsd: {} '.format(hit.identifier(), hit.overlay_rmsd()))
pharmacophore_hits = list(Pharmacophore.SearchHit(model, _motif_pharmacophore_hit=hit) for hit in self._hits)
return pharmacophore_hits
[docs] class FeatureDatabase(object):
"""A database of pharmacophore features."""
DatabaseInfo = collections.namedtuple('DatabaseInfo', ['file_name', 'n_entries', 'colour'])
def show_dbi(dbi):
return "DatabaseInfo(file_name='%s', n_entries=%d, colour=%s)" % (dbi.file_name, dbi.n_entries, str(dbi.colour))
DatabaseInfo.__repr__ = show_dbi
def __init__(self, file_name, _feature_db=None):
if _feature_db is None:
_feature_db = MotifPharmacophoreLib.MotifPharmacophoreFeatureDatabase()
self.file_name = file_name
self._feature_db = _feature_db
fd_names = [f.identifier for f in self.feature_definitions]
if 'excluded_volume' not in fd_names:
self.add_feature_definition(Pharmacophore.ExcludedVolume(GeometricDescriptors.Sphere((0, 0, 0), 0)))
def __len__(self):
return self._feature_db.size()
[docs] @staticmethod
def from_file(feature_database_path):
"""Read a pharmacophore feature database from the specified path.
:param feature_database_path: pharmacophore feature database file path.
:returns: :class:`ccdc.Pharmacophore.FeatureDatabase` instance.
"""
progress_monitor = UtilitiesLib.ProgressMonitor()
feature_database = MotifPharmacophoreLib.MotifPharmacophoreFeatureDatabase()
feature_database.read(feature_database_path, progress_monitor)
return Pharmacophore.FeatureDatabase(feature_database_path, feature_database)
[docs] def write(self, file_name):
"""Write a pharmacophore feature database to the specified path.
:param file_name: pharmacophore feature database file path.
"""
progress_monitor = UtilitiesLib.ProgressMonitor()
self._feature_db.write(file_name, progress_monitor)
@property
def schema_version(self):
"""The database schema version."""
return self._feature_db.version()
@property
def databases(self):
"""The structure databases used in the construction of this feature database.
Each database is presented as a tuple of file_name, nunmber of entries, and colour."""
colours = tuple(
self._feature_db.colour(i) for i in range(self._feature_db.number_of_source_databases())
)
return tuple(
Pharmacophore.FeatureDatabase.DatabaseInfo(
self._feature_db.file_name(i),
self._feature_db.nentries_from_source_database(i),
Colour(c.r_, c.g_, c.b_, c.a_)
) for i, c in enumerate(colours)
)
@property
def identifiers(self):
"""All identifiers from the database."""
return self._feature_db.identifiers()
def entries(self):
for i in range(self._feature_db.size()):
yield Pharmacophore.FeatureDatabase.Entry(_entry=self._feature_db.entry(i))
def __iter__(self):
"""Iterator."""
return self.entries() # pylint: disable=E1101
def __getitem__(self, index):
if index < 0 or index >= len(self):
raise IndexError('FeatureDatabase: index out of range')
return Pharmacophore.FeatureDatabase.Entry(_entry=self._feature_db.entry(index))
[docs] def entry(self, identifier):
"""The :class:`ccdc.pharmacophore.Pharmacophore.FeatureDatabase.Entry` corresponding to the identifier.
:param identifier: identifier of the entry.
:returns: a :class:`ccdc.pharmacophore.Pharmacophore.FeatureDatabase.Entry` instance.
:raises: RuntimeError if the identifier is not present.
"""
if not self._feature_db.identifier_exists(UtilitiesLib.DatabaseEntryIdentifier(identifier)):
raise RuntimeError('FeatureDatabase: %s not present' % identifier)
return Pharmacophore.FeatureDatabase.Entry(
_entry=self._feature_db.entry(identifier)
)
@property
def feature_definitions(self):
"""The feature definitions present in the database."""
return tuple(
Pharmacophore.FeatureDefinition(_feature_def=f)
for f in self._feature_db.feature_definitions()
) + tuple(
Pharmacophore.FeatureDefinition(_feature_def=f)
for f in self._feature_db.non_indexed_feature_definitions()
) + tuple(
Pharmacophore.FeatureDefinition(_feature_def=f)
for f in self._feature_db.non_matchable_feature_definitions()
)
[docs] def add_feature_definition(self, feature_definition):
"""Add a feature definition to the database.
Note: this feature definition will not be indexed in the database, so will run rather more slowly if
required in a search. To add the corresponding index, the database will need to be recreated with the
feature present.
"""
if isinstance(feature_definition, Pharmacophore.FeatureDefinition):
_fd = feature_definition._feature_def
elif isinstance(feature_definition, Pharmacophore.Feature):
_fd = feature_definition._feature_def
else:
raise NotImplementedError('Cannot add one of these to a database %s' % str(feature_definition))
db_fd = MotifPharmacophoreLib.MotifPharmacophoreFeatureDatabaseFeatureDefinition()
db_fd.set_feature_deducer(_fd.feature_deducer())
label = _fd.component_label()
if label == 2:
labels = [0, 1]
else:
labels = [label]
db_fd.set_component_labels(labels)
if _fd.is_matchable():
self._feature_db.add_non_indexed_feature_definition(db_fd)
else:
self._feature_db.add_non_matchable_feature_definition(db_fd)
@property
def annotation_keys(self):
"""The annotations available in this database."""
return self._feature_db.available_annotations()
[docs] def annotate(self, pattern, **kw):
"""Add or replace an annotation in the database.
:param pattern: a glob-style pattern which will be used to match against identifiers.
All entries in the database with a matching identifier will be annotated with the key, value pairs of the `kw` argument.
"""
for k, w in kw.items():
self._feature_db.set_external_annotation(pattern, k, w)
[docs] def clear_annotations(self):
"""Remove all annotations from the database."""
self._feature_db.clear_external_annotations()
[docs] class Entry(object):
"""An entry in a FeatureDatabase."""
class _write_through_dict(dict):
def __init__(self, _entry, *args, **kw):
self._entry = _entry
dict.__init__(self, *args, **kw)
def __setitem__(self, key, value):
dict.__setitem__(self, key, value)
self._entry.set_external_annotation(key, str(value))
def update(self, dic):
for k, v in dic.items():
self[k] = v
def __init__(self, _entry=None):
self._entry = _entry
@property
def identifier(self):
"""The identifier of the entry."""
return self._entry.name()
@identifier.setter
def identifier(self, value):
self._entry.set_name(value)
@property
def annotations(self):
"""The dictionary of annotations for the entry."""
return Pharmacophore.FeatureDatabase.Entry._write_through_dict(self._entry, self._entry.external_annotations())
[docs] def clear_annotations(self):
"""Clear all annotations from the entry."""
self._entry.clear_external_annotations()
@property
def source_database_index(self):
return self._entry.source_db_id()
[docs] class Creator(object):
"""The creator for feature databases."""
[docs] class Settings(object):
"""Settings for feature database creation."""
def __init__(self, feature_definition_directory=None, n_threads=None, skip_duplicate_identifiers=False):
if feature_definition_directory is None:
feature_definition_directory = Pharmacophore._feature_definition_directory()
self.feature_definition_directory = feature_definition_directory
if n_threads is None or n_threads < 1:
n_threads = Pharmacophore._default_n_threads()
self.n_threads = n_threads
self.skip_duplicate_identifiers = skip_duplicate_identifiers
[docs] class StructureDatabase(object):
"""A database of structures from which a feature database may be created."""
def __init__(self, db_info, use_crystal_symmetry=True, sqlmol2_database_path=None,
structure_filters=None, structure_database_path=None):
"""Initialise for creation.
If you are creating a Structure database from the CSD set DatabaseInfo.file_name to a list of CSD
database file names. If you want to include any available CSD updates include them in the list.
Alternatively, set DatabaseInfo.file_name to the string 'csd' rather than a list and all CSD updates
will be automatically included.
:param db_info: a :class:`ccdc.pharmacophore.FeatureDatabase.DatabaseInfo` instance. Here the
n_entries attribute is the desired number of entries in the resulting database.
:param use_crystal_symmetry: whether or not to use crystal symmetry when generating features.
:param structure_database_path: where to save the generated structure database. Supported types are
.csdsqlx and .sqlmol2.
:param structure_filters: a :class:`ccdc.search.Settings` instance to filter the source database.
"""
self.db_info = db_info
self.use_crystal_symmetry = use_crystal_symmetry
if sqlmol2_database_path is not None:
warnings.warn("sqlmol2_database_path is deprecated and will be removed. Use structure_database_path instead.", DeprecationWarning)
self.structure_database_path = structure_database_path or sqlmol2_database_path
self.structure_filters = structure_filters
def _database_data(self, skip_duplicate_identifiers):
if self.structure_database_path is not None:
self._database_path = self.db_info.file_name
if not os.path.exists(self.structure_database_path):
self._create_structure_database(self.structure_database_path, skip_duplicate_identifiers)
with io.EntryReader(self.structure_database_path) as reader:
database_data = MotifPharmacophoreLib.StructureDatabaseFeatureCreatorData(
os.path.basename(self.structure_database_path),
MotifPharmacophoreLib.MotifPharmacophoreFeatureColour(*self.db_info.colour),
not self.use_crystal_symmetry,
reader._db
)
return [database_data]
else:
csd = io.EntryReader(self.db_info.file_name)
if self.structure_filters is None:
self.structure_filters = self.default_csd_filters()
if isinstance(csd.file_name, (list, tuple)):
paths = csd.file_name
else:
paths = [csd.file_name]
database_data = []
if 0 < self.db_info.n_entries < sys.maxsize:
to_get = self.db_info.n_entries
else:
to_get = sys.maxsize
for self._database_path in paths:
with io.EntryReader(self._database_path) as reader:
if self.structure_filters or 0 < self.db_info.n_entries < sys.maxsize:
identifiers = self._apply_csd_filters(to_get, self.structure_filters)
data = MotifPharmacophoreLib.StructureDatabaseFeatureCreatorData(
os.path.basename(self._database_path),
MotifPharmacophoreLib.MotifPharmacophoreFeatureColour(*self.db_info.colour),
not self.use_crystal_symmetry,
reader._db,
identifiers
)
database_data.append(data)
to_get -= len(identifiers)
if to_get <= 0:
break
return database_data
def _create_structure_database(self, structure_database_path, skip_duplicate_identifiers):
"""Convert input structure database to a structure database.
:param structure_database_path: The path to the output structure database file.
"""
progress_monitor = UtilitiesLib.ProgressMonitor()
self._append_to_structure_database(progress_monitor, self._database_path, structure_database_path, skip_duplicate_identifiers)
def _append_to_structure_database(self, progress_monitor, database_path, structure_database_path, skip_duplicate_identifiers):
input_files = []
if os.path.isdir(database_path):
for path, dirs, files in os.walk(database_path):
input_files.extend([os.path.join(path,f) for f in files
if f.endswith(".mol2") or f.endswith(".sdf")])
else:
input_files.append(self._database_path)
if os.path.exists(structure_database_path):
os.unlink(structure_database_path)
suffix = structure_database_path[-8:]
if structure_database_path.endswith('.sqlmol2') or structure_database_path.endswith('.csdsqlx'):
structure_database_path = structure_database_path[:-8]
try:
if skip_duplicate_identifiers:
duplicates_policy = MotifPharmacophoreLib.SkipDuplicateEntryPolicy()
else:
duplicates_policy = MotifPharmacophoreLib.StopDuplicateEntryPolicy()
if suffix == '.csdsqlx':
MotifPharmacophoreLib.convert_bindingsites_to_csdsqlx(
input_files, structure_database_path, progress_monitor, duplicates_policy
)
else:
MotifPharmacophoreLib.convert_bindingsites_to_sqlmol2(
input_files, structure_database_path, progress_monitor, duplicates_policy
)
except RuntimeError as e:
for ext in ['.csdsqlx', '.sqlmol2']:
db_path = structure_database_path + ext
if os.path.isfile(db_path):
os.remove(db_path)
if len(e.args) > 0 and "is already in the database" in e.args[0]:
raise RuntimeError(("Error creating structure database {}, "
"entry identifiers should be unique".format(structure_database_path), e.args[0]))
else:
raise
[docs] @staticmethod
def default_csd_filters():
"""Suggested filters for CSD structures to include in pharmacophore feature database."""
filter_settings = search.Search.Settings()
filter_settings.has_3d_coordinates = True
filter_settings.max_r_factor = 10.
filter_settings.no_disorder = True
filter_settings.not_polymeric = True
filter_settings.only_organic = False
filter_settings.must_not_have_elements = [
'He', 'Be', 'Ne', 'Al', 'Si', 'Ar', 'Sc', 'Ti', 'V', 'Cr',
'Ga', 'Ge', 'As', 'Se', 'Kr', 'Rb', 'Y', 'Zr', 'Nb', 'Mo',
'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'In', 'Sn', 'Sb', 'Te',
'Xe', 'Cs', 'Ba', 'La', 'Ce', 'Pr', 'Nd', 'Pm', 'Sm', 'Eu',
'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb', 'Lu', 'Hf', 'Ta',
'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg', 'Tl', 'Pb', 'Bi',
'Po', 'Rn', 'Fr', 'Ra', 'Ac', 'Th', 'Pa', 'U'
]
return filter_settings
def _apply_csd_filters(self, max_hit_structures, filter_settings):
"""Filter input CSD database.
:param maximum_hit_structures: The maximum number of structures to retain, or ``None``
for no limit. Default ``None``.
:param filter_settings: Structure quality filters to be applied. Object of type
:class:`SubstructureSearch.Settings`, boolean ``True`` for default filters, or ``None``
for no filtering. Default ``None``.
"""
with io.EntryReader(self._database_path) as reader:
if not filter_settings:
filter_settings = search.Search.Settings()
elif filter_settings is True: # default filters, for backwards compatibility.
filter_settings = self.default_csd_filters()
identifiers = []
for e in reader:
if filter_settings.test(e):
identifiers.append(e.identifier)
if len(identifiers) >= max_hit_structures:
break
return identifiers
def __init__(self, settings=None):
if settings is None:
settings = Pharmacophore.FeatureDatabase.Creator.Settings()
self.settings = settings
self._creator = MotifPharmacophoreLib.MotifPharmacophoreFeatureDatabaseCreator()
self._creator.set_number_of_threads(self.settings.n_threads)
[docs] def create(self, structure_databases, feature_defs=None):
"""Create the feature database.
:param structure_databases: an iterable of :class:`ccdc.pharmacophore.Pharmacophore.FeatureDatabase.Creator.StructureDatabase` instances.
:param feature_defs: an iterable of :class:`ccdc.pharmacophore.Pharmacophore.FeatureDefinition`
instances or None. If None, the default set of feature definitions in read in by
:meth:`ccdc.pharmacophore.Pharmacophore.read_feature_definitions` will be used.
"""
if feature_defs is None:
if not Pharmacophore.feature_definitions:
Pharmacophore.read_feature_definitions()
feature_defs = list(Pharmacophore.feature_definitions.values())
def sort_key(fd):
d = dict(any=0, small_molecule=1, protein=2)
adict = dict(
acceptor=0,
acceptor_projected=1,
donor_ch_projected=2,
donor_projected=3,
heavy_atom=4,
hydrophobe=5,
ring=6,
ring_non_planar=7,
ring_planar_projected=8,
purine=9,
pyrimidine=10,
adenine=11,
cytosine=12,
guanine=13,
thymine=14,
uracil=15,
deoxyribose=16,
ribose=17
)
sdict = dict(
exit_vector=0,
halogen=1,
bromine=2,
chlorine=3,
fluorine=4,
metal=5,
water=6
)
pdict = dict(
ALA=0,
ARG=1,
ASN=2,
ASP=3,
CYS=4,
GLN=5,
GLU=6,
GLY=7,
HIS=8,
ILE=9,
LEU=10,
LYS=11,
MET=12,
PHE=13,
PRO=14,
SER=15,
THR=16,
TRP=17,
TYR=18,
VAL=19,
)
k = fd.component_label
n = fd.identifier
if k == 'any':
v = (d[k], adict.get(n, 99), n)
if k == 'small_molecule':
v = (d[k], sdict.get(n, 99), n)
if k == 'protein':
v = (d[k], pdict.get(n, 99), n)
return v
feature_defs.sort(key=sort_key)
_fds = [Pharmacophore._database_feature_def_from_fd(fd) for fd in feature_defs]
databases = []
for sdb in structure_databases:
databases.extend(sdb._database_data(self.settings.skip_duplicate_identifiers))
progress_monitor = UtilitiesLib.ProgressMonitor()
feature_db = self._creator.create_feature_database(databases, _fds, progress_monitor)
return Pharmacophore.FeatureDatabase('', _feature_db=feature_db)