Source code for ccdc.pharmacophore

#
# This code is Copyright (C) 2015 The Cambridge Crystallographic Data Centre
# (CCDC) of 12 Union Road, Cambridge CB2 1EZ, UK and a proprietary work of CCDC.
# This code may not be used, reproduced, translated, modified, disassembled or
# copied, except in accordance with a valid licence agreement with CCDC and may
# not be disclosed or redistributed in any form, either in whole or in part, to
# any third party. All copies of this code made in accordance with a valid
# licence agreement as referred to above must contain this copyright notice.
#
# No representations, warranties, or liabilities are expressed or implied in the
# supply of this code by CCDC, its servants or agents, except where such
# exclusion or limitation is prohibited, void or unenforceable under governing
# law.
#
"""
pharmacophore.py - access to pharmacophore searches.

This allows pharmacophore models to be read and written, and using these to run searches.
Access to the hits is also provided.
"""

#########################################################################################################

import os
import sys
import collections
import time
import multiprocessing
import warnings

from ccdc import io, search, molecule, utilities, protein
from ccdc.descriptors import GeometricDescriptors
from ccdc.utilities import Colour

from ccdc.utilities import _import_ccdc_modules
_import_ccdc_modules([
    'MotifPharmacophoreLib',
    'SubstructureSearchLib',
    'ChemistryLib',
    'MathsLib',
    'UtilitiesLib'
    ])

MotifPharmacophoreLib.licence_check()

#########################################################################################################

[docs] class Pharmacophore(object): """Namespace for the Pharmacophore functionality.""" crossminer_version = '1.4.0' feature_definitions = dict() @staticmethod def _feature_definition_directory(): """Location of the default feature definitions.""" loc = io._CSDDatabaseLocator.get_crossminer_feature_definition_directory() if loc is None or not os.path.exists(loc): import ccdc loc = ccdc._find_crossminer_feature_directory() if loc is None or not os.path.exists(loc): raise RuntimeError('Unable to find the pharmacophore feature definition directory') return loc
[docs] @staticmethod def read_feature_definitions(directory=None): """Read all the feature definitions found in any subdirectory of the provided argument. By default this will read the feature definitions supplied by the CSD. """ if directory is None: directory = Pharmacophore._feature_definition_directory() for dirpath, dirnames, filenames in os.walk(directory): for fname in filenames: if fname.endswith('.cpf'): fd = Pharmacophore.FeatureDefinition.from_file(os.path.join(dirpath, fname)) Pharmacophore.feature_definitions[fd.identifier] = fd
[docs] @staticmethod def default_feature_database(): """The CSD provided CrossMiner feature database.""" return Pharmacophore.FeatureDatabase.from_file( io._CSDDatabaseLocator.get_crossminer_database_location() )
[docs] @staticmethod def default_feature_database_location(): """The path to the default CrossMiner database.""" return io._CSDDatabaseLocator.get_crossminer_database_location()
_component_label_dict = utilities.bidirectional_dict(( (0, 'protein'), (1, 'small_molecule'), (2, 'any') )) _apply_rule_dic = utilities.bidirectional_dict(( (MotifPharmacophoreLib.MotifPharmacophoreSMARTSFeature.ALWAYS, 'Always'), (MotifPharmacophoreLib.MotifPharmacophoreSMARTSFeature.H_PRESENT, 'With_h'), (MotifPharmacophoreLib.MotifPharmacophoreSMARTSFeature.NO_H_PRESENT, 'No_h') )) # Not currently used _planarity_dic = utilities.bidirectional_dict(( (MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorCentroid.ANY, 'Any'), (MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorCentroid.PLANAR, 'Planar'), (MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorCentroid.NON_PLANAR, 'Non_planar'), )) @staticmethod def _default_n_threads(): n_threads = max(int(multiprocessing.cpu_count() / 2), 1) return n_threads @staticmethod def _database_feature_def_from_fd(fd): _fd = MotifPharmacophoreLib.MotifPharmacophoreFeatureDatabaseFeatureDefinition() _fd.set_feature_deducer(fd._feature_def.feature_deducer()) _fd.set_colour(MotifPharmacophoreLib.MotifPharmacophoreFeatureColour(*fd.colour)) label = Pharmacophore._component_label_dict.inverse_lookup(fd.component_label) if label == 2: labels = [0, 1] else: labels = [label] _fd.set_component_labels(labels) return _fd
[docs] class PointGenerator(object): """A mechanism for generating spheres from a SMARTS match.""" def __init__(self, _pg=None): self._pg = _pg @property def parameters(self): """The optional parameters of the point generator.""" return { p.replace(' ', '_') : v for p, v in zip(self._pg.optional_parameter_names(), self._pg.optional_parameters()) }
[docs] def feature_points(self, atoms): """The points generated by this generator. :raises: RuntimeError if the sequence of points is inappropriate for the generator. """ _motif_pts = self._pg.motif_features([a._atom for a in atoms]) return tuple(p for mf in _motif_pts for p in mf.points())
[docs] class DummyPointGenerator(PointGenerator): """Generates no points.""" def __init__(self, identifier): self._pg = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorDummy() self.identifier = identifier
[docs] class PointPointGenerator(PointGenerator): """Generates a single point at the position of the relevant atom.""" def __init__(self, identifier): self._pg = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorPoint() self.identifier = identifier
[docs] class LinearPointGenerator(PointGenerator): """Generates a point displaced from the relevant atom.""" def __init__(self, identifier, distance=2.8, neighbour_is_base_feature=False): self._pg = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorLinear( distance, neighbour_is_base_feature) self.identifier = identifier
[docs] class NormalPointGenerator(PointGenerator): """Generates a point from the normal to the plane of atoms.""" def __init__(self, identifier, distance=2.8): self._pg = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorNormal( distance, False, 0.0 ) self.identifier = identifier
[docs] class PlanarNormalPointGenerator(PointGenerator): """Generates a point from the normal to the plane of a planar ring.""" def __init__(self, identifier, distance=2.8, planar_ring_tolerance=0.0): self._pg = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorNormal( distance, True, planar_ring_tolerance ) self.identifier = identifier
[docs] class AnyCentroidPointGenerator(PointGenerator): """Generates a point at the centroid of atoms.""" def __init__(self, identifier): self._pg = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorCentroid( MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorCentroid.ANY, 0.0 ) self.identifier = identifier
[docs] class PlanarCentroidPointGenerator(PointGenerator): """Generates a point at the centre of a planar ring.""" def __init__(self, identifier, atom_planarity_threshold=0.0): self._pg = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorCentroid( MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorCentroid.PLANAR, atom_planarity_threshold ) self.identifier = identifier
[docs] class NonPlanarCentroidPointGenerator(PointGenerator): """Generates a point at the centroid of atoms which are not planar.""" def __init__(self, identifier, atom_planarity_threshold=0.0): self._pg = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorCentroid( MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorCentroid.NON_PLANAR, atom_planarity_threshold ) self.identifier = identifier
[docs] class TrigonalPointGenerator(PointGenerator): """Generates two points with a trigonal geometry.""" def __init__(self, identifier, distance=2.8): self._pg = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorTrigonal(distance) self.identifier = identifier
[docs] class TetrahedralPointGenerator(PointGenerator): """Generates two points with a tetrahedral geometry.""" def __init__(self, identifier, distance=2.8): self._pg = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorTetrahedral(distance) self.identifier = identifier
[docs] class SMARTSDefinition(object): """A SMARTS definition with associated point generation schemes.""" def __init__(self, smarts, point_generators, _sd=None, **kw): # _incomplete if made by hand. Need to resolve the motif_feature_types() when in a feature_def # if _sd is None: self._sd = MotifPharmacophoreLib.MotifPharmacophoreSMARTSFeature() self._sd.set_SMARTSdefinition(smarts) self._point_generators = point_generators self._incomplete = True app_rule = kw.get('apply_rule') if app_rule is None: app = MotifPharmacophoreLib.MotifPharmacophoreSMARTSFeature.ALWAYS else: app = Pharmacophore._apply_rule_dic.inverse_lookup(app_rule) self._sd.set_apply_rule(app) self._sd.set_permutational_equivalence(kw.get('permutational_equivalence', True)) self._sd.set_allow_overlapping_substructures(kw.get('allow_overlaps', False)) else: self._sd = _sd self._incomplete = False self._point_generators = point_generators @staticmethod def _from_SMARTS_feature_def(_sd, _pgs): return Pharmacophore.SMARTSDefinition( _sd.SMARTSdefinition(), tuple( (_pgs.feature_types()[i].name(), v) for i, v in _sd.motif_feature_types() ), _sd, apply_rule=Pharmacophore._apply_rule_dic[_sd.apply_rule()], permutional_equivalence=_sd.permutational_equivalence(), allow_overlaps=_sd.allow_fully_overlapping_substructures() ) @property def SMARTS(self): """The SMARTS string.""" return self._sd.SMARTSdefinition() @property def apply_rule(self): """How to apply the SMARTS definition.""" return Pharmacophore._apply_rule_dic[self._sd.apply_rule()] @property def permutational_equivalence(self): """Whether permutionally equivalent structures are deemed to be a single match.""" return self._sd.permutational_equivalence() @property def allow_overlaps(self): """Whether fully overlapping structures are permitted to be a hit.""" return self._sd.allow_fully_overlapping_substructures() @property def point_generators(self): """The generators for feature points.""" return self._point_generators
[docs] def search(self, crystal): """Search a crystal for instances of the SMARTS pattern.""" raise NotImplementedError('Pharmacophore.SMARTSDefinition.search')
[docs] class FeatureDefinition(object): """A feature definition in a feature database.""" _point_generators = utilities.bidirectional_dict( Dummy= MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorDummy, Point= MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorPoint, Linear= MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorLinear, Normal= MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorNormal, Centroid= MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorCentroid, Trigonal= MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorTrigonal, Tetrahedral=MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorTetrahedral, ) def __init__(self, _feature_def=None): self._feature_def = _feature_def def __repr__(self): return 'FeatureDefinition(%s)' % self.identifier
[docs] @staticmethod def from_file(file_name): """Reads a structural feature definition from a file.""" component = os.path.basename(os.path.dirname(file_name)) try: tag = Pharmacophore._component_label_dict.inverse_lookup(component) except Exception as exc: tag = 2 _creator = MotifPharmacophoreLib.MotifPharmacophoreFeatureDatabaseCreator() _fd = _creator.read_feature_definition(file_name) _feature_definition = MotifPharmacophoreLib.MotifPharmacophoreFeatureDefinition() _feature_definition.set_feature_deducer(_fd.feature_deducer()) _feature_definition.set_component_label(tag) _feature_definition.set_colour(_fd.colour()) return Pharmacophore.FeatureDefinition(_feature_def=_feature_definition)
[docs] @staticmethod def from_SMARTS_definitions(identifier, smarts, point_generators): """Create a feature from SMARTS definitions. :param identifier: string :param smarts: iterable of :class:`ccdc.pharmacophore.Pharmacophore.SMARTS_definition` instances. :param point_generators: iterable of :class:`ccdc.pharmacophore.Pharmaophore.PointGenerator` instances. """ pgs = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGenerationScheme() name_dic = dict() for i, pg in enumerate(point_generators): name_dic[pg.identifier] = i pgs.add_feature_type(pg.identifier, pg._pg) _sdefs = [] for sd in smarts: _sdefs.append(sd._sd) pg_spec = [] for name, ats in sd.point_generators: inx = name_dic[name] pg_spec.append((inx, ats)) sd._sd.set_motif_feature_types(pg_spec) ded = MotifPharmacophoreLib.MotifPharmacophoreFeatureDeducerSubstructure(_sdefs, pgs) ded.set_name(identifier) fd = Pharmacophore.FeatureDefinition() fd._feature_def = MotifPharmacophoreLib.MotifPharmacophoreFeatureDefinition() fd._feature_def.set_feature_deducer(ded) fd._feature_def.set_description(identifier) return fd
@property def SMARTS_definitions(self): deducer = self._feature_def.feature_deducer() if deducer.class_name() == 'MotifPharmacophoreFeatureDeducerSubstructure': _s = MotifPharmacophoreLib.deducer_as_substructure(deducer) return tuple( Pharmacophore.SMARTSDefinition._from_SMARTS_feature_def(_fd, _s.point_generation_scheme()) for _fd in _s.SMARTS_motif_feature_types() ) @property def identifier(self): """The identifier of the feature.""" return self._feature_def.name() @property def component_label(self): """Which component ('protein', 'small_molecule' or 'any') the feature def applies to.""" if hasattr(self._feature_def, 'component_label'): return Pharmacophore._component_label_dict[self._feature_def.component_label()] else: t = tuple(Pharmacophore._component_label_dict[x] for x in self._feature_def.component_labels()) if len(t) == 1: return t[0] else: return 'Any' @component_label.setter def component_label(self, value): self._feature_def.set_component_label( Pharmacophore._component_label_dict.inverse_lookup(value) ) @property def colour(self): """The colour associated with this feature.""" col = self._feature_def.colour() return Colour(col.r_, col.g_, col.b_, col.a_) @colour.setter def colour(self, val): col = MotifPharmacophoreLib.MotifPharmacophoreFeatureColour(*val) self._feature_def.set_colour(col)
[docs] def write(self, file_name): """Writes the feature definition.""" if self._feature_def.feature_deducer().class_name() == 'MotifPharmacophoreFeatureDeducerSubstructure': ded = MotifPharmacophoreLib.deducer_as_substructure(self._feature_def.feature_deducer()) writer = MotifPharmacophoreLib.SMARTSMotifFeatureTypeFileWriter(ded) ostr = MotifPharmacophoreLib.ofstream(file_name) writer.write_content('', ostr) ostr.close() else: raise RuntimeError('Not a SMARTS feature definition')
@property def point_generator_names(self): """The names of the point generator schemes associated with this feature definition.""" ded = MotifPharmacophoreLib.deducer_as_substructure(self._feature_def.feature_deducer()) pgs = ded.point_generation_scheme() fts = pgs.feature_types() return tuple(f.name() for f in fts) def detect_features(self, crystal): _csv = ChemistryLib.CrystalStructureView.instantiate(crystal._crystal) _ssc = MotifPharmacophoreLib.MotifPharmacophoreSearchStructureCreator() _ssc.register_components_from_feature_definitions((self._feature_def,)) _mss = _ssc.search_structure(_csv) _ded = self._feature_def.feature_deducer() _feats = _ded.generate_motif_features(_mss, self._feature_def.component_label()) features = [] for i in range(_feats.end_index()): code, feat_pts = _feats.at(i) for fp in feat_pts: pts = fp.points() spheres = tuple(GeometricDescriptors.Sphere((p[0], p[1], p[2]), 1.0) for p in pts) # Skip duplicates dup = False if len(spheres) == 2: for f in features: if len(f.spheres) == 2: if ( f.spheres[0] == spheres[1] and f.spheres[1] == spheres[0] ): dup = True break if not dup: features.append( Pharmacophore.Feature(self._clone(), *spheres) ) return tuple(features)
[docs] def features_from_atoms(self, point_generator, *atoms): """Create features from this feature definition and an atom of the appropriate type. :param point_generator: a string giving one of the point_generator_names associated with this definition. :param atoms: an iterable of :class:`ccdc.molecule.Atom` instances. :returns: a :class:`ccdc.pharmacophore.Pharmacophore.Feature` instance. :raises RuntimeError: if the atom is not suitable for the feature definition. """ ded = MotifPharmacophoreLib.deducer_as_substructure(self._feature_def.feature_deducer()) pgs = ded.point_generation_scheme() for ft in pgs.feature_types(): if ft.name().upper() == point_generator.upper(): mfg = ft.motif_feature_generator() mpts = mfg.motif_features([a._atom for a in atoms]) pts = [mpt.points() for mpt in mpts] spheres = [[GeometricDescriptors.Sphere(tuple(p[i] for i in range(3)), 1.0) for p in l] for l in pts] feats = [Pharmacophore.Feature(self._clone(), *ss) for ss in spheres] return tuple(feats) else: raise RuntimeError('Unknown point generator %s' % point_generator)
[docs] def copy(self): """Copies the Feature so it can be edited freely.""" return self._clone()
def _clone(self): _fd = MotifPharmacophoreLib.MotifPharmacophoreFeatureDefinition() _fd.set_feature_deducer(self._feature_def.feature_deducer()) _fd.set_component_label(self._feature_def.component_label()) _fd.set_colour(self._feature_def.colour()) _fd.set_available_component_labels(self._feature_def.available_component_labels()) _fd.set_description(self._feature_def.description()) _fd.set_id(self._feature_def.id()) # Maybe increment? return Pharmacophore.FeatureDefinition(_fd)
[docs] class Feature(object): """A feature in a Query.""" def __init__(self, feature_definition, *spheres): if feature_definition is None: # Required for ExcludedVolume and AnnotatoinFilter self._feature_def = MotifPharmacophoreLib.MotifPharmacophoreFeatureDefinition() else: self._feature_def = feature_definition._clone()._feature_def for s in spheres: self.add_sphere(s) def __repr__(self): return 'Feature(%s)' % self.identifier @property def colour(self): """The colour of the spheres.""" col = self._feature_def.colour() return Colour(col.r_, col.g_, col.b_, col.a_) @colour.setter def colour(self, value): col = MotifPharmacophoreLib.MotifPharmacophoreFeatureColour(*value) self._feature_def.set_colour(col) @property def component_label(self): """The component label""" return Pharmacophore._component_label_dict[self._feature_def.component_label()] @component_label.setter def component_label(self, value): val = Pharmacophore._component_label_dict.inverse_lookup(value) self._feature_def.set_component_label(val) if hasattr(self, '_query'): self._query._pharmacophore.set_component_label_of_feature(self._feature_def.id(), int(val)) @property def description(self): """Description of the feature definition.""" return self._feature_def.description() @description.setter def description(self, value): self._feature_def.set_description(value) @property def identifier(self): """The identifier of the feature definition.""" return self._feature_def.name() @property def spheres(self): """The spheres defined by this feature definition.""" return tuple(GeometricDescriptors.Sphere(s.centre(), s.radius()) for s in self._feature_def.spheres()) @spheres.setter def spheres(self, values): if len(self.spheres) != len(values): raise RuntimeError('Mismatch of number of spheres') _feature_def = MotifPharmacophoreLib.MotifPharmacophoreFeatureDefinition() _feature_def.set_feature_deducer(self._feature_def.feature_deducer()) _feature_def.set_filter(self._feature_def.filter()) _feature_def.set_component_label(self._feature_def.component_label()) for s in values: _feature_def.add_sphere( MathsLib.Sphere(tuple(s.centre[i] for i in range(3)), s.radius) ) _feature_def.set_colour(self._feature_def.colour()) _feature_def.set_available_component_labels(self._feature_def.available_component_labels()) _feature_def.set_id(self._feature_def.id()) _feature_def.set_description(self._feature_def.description()) self._feature_def.set_spheres( tuple(MathsLib.Sphere(tuple(v.centre[i] for i in range(3)), v.radius) for v in values) )
[docs] def add_sphere(self, sphere): """Add a sphere to the feature definition. :param sphere: :class:`ccdc.descriptors.GeometricDescriptors.Sphere` instance """ _sphere = MathsLib.Sphere([sphere.centre[i] for i in range(3)], sphere.radius) self._feature_def.add_sphere(_sphere)
[docs] class ExcludedVolume(Feature): """ An excluded volume feature. This expresses a region to exclude if atoms are in this region. :param sphere: the sphere to exclude :param smarts: an optional SMARTS pattern that means you can exclude hits that have a specific substructure within the sphere """ def __init__(self, sphere, smarts="[*]"): Pharmacophore.Feature.__init__(self, None) deducer = MotifPharmacophoreLib.MotifPharmacophoreFeatureDeducerExcludedVolume() self._feature_def.set_feature_deducer(deducer) self._feature_def.set_component_label(2) self._filterer = MotifPharmacophoreLib.MotifPharmacophoreFilterSubstructure() self._filterer.set_name(deducer.name()) self._filterer.set_key(smarts) self._feature_def.set_filter(self._filterer) self.add_sphere(sphere) @property def filter_smarts(self): """The SMARTS pattern that controls when an atom excludes in a given volume by substructure matching.""" return self._filterer.key() @filter_smarts.setter def filter_smarts(self, smarts): self._filterer.set_key(smarts)
[docs] class AnnotationFilter(Feature): def __init__(self, key, value): Pharmacophore.Feature.__init__(self, None) deducer = MotifPharmacophoreLib.MotifPharmacophoreFeatureDeducerAnnotation() deducer.set_key(key) deducer.set_value(value) self._feature_def.set_feature_deducer(deducer) @property def key(self): """The annotation filter key.""" return MotifPharmacophoreLib.deducer_as_annotation(self._feature_def.feature_deducer()).key() @property def value(self): """The annotation filter value.""" return MotifPharmacophoreLib.deducer_as_annotation(self._feature_def.feature_deducer()).value()
[docs] class Query(object): """A pharmacophore query.""" def __init__(self, features=None, _motif_pharmacophore=None): if _motif_pharmacophore: self._pharmacophore = _motif_pharmacophore else: self._pharmacophore = MotifPharmacophoreLib.MotifPharmacophore() self._features = [] if features is not None: for f in features: self.add_feature(f)
[docs] @staticmethod def from_file(file_name): """Read a pharmacophore query from a file.""" reader = MotifPharmacophoreLib.MotifPharmacophoreReader(file_name) q = Pharmacophore.Query(_motif_pharmacophore=reader.pharmacophore()) q._features_from_pharmacophore() for f in q.features: f._query = q return q
[docs] def write(self, pharmacophore_path): """Write a pharmacophore query to a file.""" writer = MotifPharmacophoreLib.MotifPharmacophoreWriter() writer.write_pharmacophore(self._pharmacophore, pharmacophore_path)
@property def packing(self): """Whether to use crystal packing for query structure.""" return self._pharmacophore.use_packing() @packing.setter def packing(self, val): self._pharmacophore.set_use_packing(val) @property def intra_only(self): """Whether all features should be intramolecular.""" return self._pharmacophore.intra_only() @intra_only.setter def intra_only(self, value): if value == self.intra_only: return self._pharmacophore.set_intra_only(value) if value: ty = ChemistryLib.ContactCriterion.INTRAMOLECULAR else: ty = ChemistryLib.ContactCriterion.ANY for dc in self._pharmacophore.distance_constraints(): f1 = self.features[dc.id_feature_a()] f2 = self.features[dc.id_feature_b()] if f1.component_label == 'small_molecule' and f2.component_label == 'small_molecule': crit = dc.distance_criterion() crit.set_type(ty) dc.set_distance_criterion(crit) @property def bounding_sphere(self): """A minimum volume sphere containing all feature points.""" s = self._pharmacophore.bounding_sphere() return GeometricDescriptors.Sphere(tuple(s.centre()[i] for i in range(3)), s.radius()) @property def maximum_feature_distance(self): """The maximum distance between features.""" return self._pharmacophore.maximum_feature_distance() @maximum_feature_distance.setter def maximum_feature_distance(self, value): self._pharmacophore.set_maximum_feature_distance(value)
[docs] def add_feature(self, feature): """Add a feature to the pharmacophore.""" self._pharmacophore.add_feature_definition(feature._feature_def) self._features.append(feature) feature._query = self
@property def features(self): """The features of the pharmacophore.""" return self._features def _features_from_pharmacophore(self): def _make_spheres(_fd): return tuple(GeometricDescriptors.Sphere((s.centre().x(), s.centre().y(), s.centre().z()), s.radius()) for s in _fd.spheres()) self._features.extend(tuple( Pharmacophore.Feature(Pharmacophore.FeatureDefinition(_fd), *_make_spheres(_fd)) for _fd in self._pharmacophore.all_feature_definitions() )) DistanceConstraint = collections.namedtuple('DistanceConstraint', ['feature_point1', 'feature_point2', 'distance_range', 'which']) def _repr_distance_constraint(dc): """Representation of the distance constraint.""" return "DistanceConstraint((%d, %d), (%d, %d) (%.3f, %.3f) which='%s')" % ( dc.feature_point1[0], dc.feature_point1[1], dc.feature_point2[0], dc.feature_point2[1], dc.distance_range[0], dc.distance_range[1], dc.which ) DistanceConstraint.__repr__ = _repr_distance_constraint @property def distance_constraints(self): cons = [] for dc in self._pharmacophore.distance_constraints(): fp1 = dc.id_feature_a(), dc.id_feature_point_a() fp2 = dc.id_feature_b(), dc.id_feature_point_b() crit = dc.distance_criterion() rng = crit.min(), crit.max() w = crit.type() if w == ChemistryLib.ContactCriterion.ANY: which = 'Any' elif w == ChemistryLib.ContactCriterion.INTERMOLECULAR: which = 'Intermolecular' else: which = 'Intramolecular' cons.append(Pharmacophore.Query.DistanceConstraint(fp1, fp2, rng, which=which)) return tuple(cons)
[docs] def add_distance_constraint(self, feature1, feature2, distance_range, which): """Add a distance constraint between features. :param feature1: a feature in the model, the tuple contains an id and a point. :param feature2: a feature in the model, the tuple contains an id and a point. :param distance_range: a pair of numbers specifying the desired range. :param which: 'intramolecular', 'intermolecular' or 'any'. """ self.intra_only = False if which.lower().startswith('intra'): which = ChemistryLib.ContactCriterion.INTRAMOLECULAR elif which.lower().startswith('inter'): which = ChemistryLib.ContactCriterion.INTERMOLECULAR else: which = ChemistryLib.ContactCriterion.ANY crit = SubstructureSearchLib.InterAtomicDistanceCriterion(min(distance_range), max(distance_range), which, False) feat_id1, point_id1 = feature1 feat_id2, point_id2 = feature2 for dc in self._pharmacophore.distance_constraints(): if ( (dc.id_feature_a() == feat_id1 and dc.id_feature_point_a() == point_id1 and dc.id_feature_b() == feat_id2 and dc.id_feature_point_b() == point_id2) or (dc.id_feature_a() == feat_id2 and dc.id_feature_point_a() == point_id2 and dc.id_feature_b() == feat_id1 and dc.id_feature_point_b() == point_id1) ): dc.set_distance_criterion(crit) break else: assert 0 <= feat_id1 < len(self.features) assert 0 <= point_id1 < len(self.features[feat_id1].spheres) fd1 = self.features[feat_id1]._feature_def s = self.features[feat_id1].spheres[point_id1] fs1 = MathsLib.Sphere(tuple(s.centre[i] for i in range(3)), s.radius) assert 0 <= feat_id2 < len(self.features) assert 0 <= point_id2 < len(self.features[feat_id2].spheres) fd2 = self.features[feat_id2]._feature_def s = self.features[feat_id2].spheres[point_id2] fs2 = MathsLib.Sphere(tuple(s.centre[i] for i in range(3)), s.radius) constraint = MotifPharmacophoreLib.MotifPharmacophoreConstraintDistance( feat_id1, point_id1, feat_id2, point_id2, fs1, fs2, fd1, fd2 ) constraint.set_distance_criterion(crit) self._pharmacophore.add_constraint(constraint)
[docs] class SearchHit: """Pharmacophore search hit.""" def __init__(self, model, _motif_pharmacophore_hit=None): if _motif_pharmacophore_hit: self._pharmacophore_hit = _motif_pharmacophore_hit else: self._pharmacophore_hit = MotifPharmacophoreLib.MotifPharmacophoreHit() self.model = model @property def identifier(self): """The hit identifier""" return self._pharmacophore_hit.identifier() @property def overlay_rmsd(self): """The root-mean-square deviation of the pharmacophore overlay""" return self._pharmacophore_hit.overlay_rmsd() @property def overlay_transformation(self): """The transformation matrix of the pharmacophore overlay""" return molecule.Molecule.Transformation(_transformation=self._pharmacophore_hit.overlay_transformation()) @property def cluster_number(self): """The pharmacophore hit cluster number""" return self._pharmacophore_hit.cluster_number() @property def molecule(self): """The pharmacophore overlay molecule""" return molecule.Molecule(self._pharmacophore_hit.identifier(), _molecule=self._pharmacophore_hit.molecule()) @property def protein(self): """The hit's protein, if any.""" return protein.Protein(self.identifier, self._pharmacophore_hit.protein().create_editable_molecule()) @property def small_molecule(self): """The hit's small molecule.""" return molecule.Molecule(self.identifier, self._pharmacophore_hit.small_molecule()) @property def annotations(self): """The annotations of the hit.""" return { k.strip() : v for k, v in self._pharmacophore_hit.external_annotations().items() } @property def points(self): """The matched points of the hit.""" _m = self._pharmacophore_hit.pharmacophore_match() pts = [] pdic = dict() for i in range(_m.nfeature_matches()): _fm = _m.feature_match(i) pdic[_fm.pharmacophore_feature_id_] = _fm.points_ for i, p in sorted(pdic.items()): pts.extend(p) return tuple(pts)
[docs] def constraint_values(self): """The values of the distance constraints of the query.""" pts = self.points counts = [len(f.spheres) for f in self.model.features] values = [] for dc in self.model.distance_constraints: f_inx, s_inx = dc.feature_point1 inx1 = sum(counts[:f_inx]) + s_inx f_inx, s_inx = dc.feature_point2 inx2 = sum(counts[:f_inx]) + s_inx dist = GeometricDescriptors.point_distance(pts[inx1], pts[inx2]) values.append(dist) return values
[docs] def feature_points(self, feature): """The points in the hit associated with the given feature.""" pts = self.points counts = [len(f.spheres) for f in self.model.features] inx = feature._feature_def.id() n = len(feature.spheres) start = sum(counts[:inx]) return tuple(pts[start:start+n])
[docs] class Search(object): """A pharmacophore search"""
[docs] class Settings(object): """Settings for a pharmacophore search.""" def __init__(self, max_hits_per_structure=5, max_hit_structures=100000): """Initialise settings. :param max_hits_per_structure: keep only top n matches per database entry :param max_hit_structures: restrict maximum number of matches per database entry """ self._search_manager = MotifPharmacophoreLib.PharmacophoreSearchManager() self.max_hit_structures = max_hit_structures self.max_hits_per_structure = max_hits_per_structure self.n_threads = Pharmacophore._default_n_threads() @property def n_threads(self): """The number of threads to use in the search.""" return self._search_manager.n_search_threads() @n_threads.setter def n_threads(self, value): self._search_manager.set_n_search_threads(value) @property def max_hit_structures(self): """The number of hits to retain.""" return self._search_manager.hitGenerationKeepTopNHits() @max_hit_structures.setter def max_hit_structures(self, value): self._search_manager.setHitGenerationKeepTopNHits(value) @property def max_hits_per_structure(self): """The number of hits per structure to be returned.""" return self._search_manager.searchOptionMaxHitsPerStructure() @max_hits_per_structure.setter def max_hits_per_structure(self, value): self._search_manager.setMaxHitsPerStructure(value) @property def max_hit_rmsd(self): """The maximum RMSD of hit structures to be returned.""" return self._search_manager.hitGenerationMaxHitRMSD() @max_hit_rmsd.setter def max_hit_rmsd(self, value): self._search_manager.setHitGenerationMaxHitRMSD(value) @property def skip_proteins(self): """Whether or not to skip protein structures.""" return self._search_manager.hitGenerationSkipProteins() @skip_proteins.setter def skip_proteins(self, value): self._search_manager.setHitGenerationSkipProteins(value) @property def complete_small_molecules(self): """Whether or not to return the complete small molecule hit.""" return self._search_manager.hitGenerationCompleteSmallMolecules() @complete_small_molecules.setter def complete_small_molecules(self, value): self._search_manager.setHitGenerationCompleteSmallMolecules(value) @property def complete_proteins(self): """Whether or not to return the complete protein of a hit.""" return self._search_manager.hitGenerationCompleteProteins() @complete_proteins.setter def complete_proteins(self, value): self._search_manager.setHitGenerationCompleteProteins(value) @property def three_cubed_packing(self): """Whether or not to use 3-cubed packing.""" return self._search_manager.searchOptionUseThreeCubedPacking() @three_cubed_packing.setter def three_cubed_packing(self, value): self._search_manager.setSearchOptionUseThreeCubedPacking(value) def _clone(self): settings = Pharmacophore.Search.Settings() settings.n_threads = self.n_threads settings.max_hit_structures = self.max_hit_structures settings.max_hits_per_structure = self.max_hits_per_structure settings.max_hit_rmsd = self.max_hit_rmsd settings.skip_proteins = self.skip_proteins settings.complete_small_molecules = self.complete_small_molecules settings.complete_proteins = self.complete_proteins settings.three_cubed_packing = self.three_cubed_packing return settings
def __init__(self, settings=None): if settings is None: settings = Pharmacophore.Search.Settings() self.settings = settings self._feature_database = None self._feature_database_model = MotifPharmacophoreLib.PharmacophoreFeatureDatabaseModel() self._pm_factory = MotifPharmacophoreLib.PharmacophoreManagerFactory() self._search_manager = self.settings._clone()._search_manager self._hits = None def __del__(self): self._feature_database_model = None self._results_manager = None self._search_manager = None self._pm_factory = None @property def feature_database(self): """The CSD-CrossMiner feature database.""" return self._feature_database @feature_database.setter def feature_database(self, val): self._feature_database = val self._feature_database_model.set_feature_database(val._feature_db) self._search_manager.set_feature_database_filtered_view(self._feature_database_model.feature_database_filtered_view())
[docs] def search(self, model, database=None, verbose=False, stop_after=sys.maxsize): """Perform a pharmacophore search.""" if database is None: if self.feature_database is None: self.feature_database = Pharmacophore.default_feature_database() database = self.feature_database try: self._search_manager = self.settings._clone()._search_manager self._results_manager = MotifPharmacophoreLib.PharmacophoreResultsModel() self._search_manager.add_results_writer(self._results_manager) self._feature_database_model.set_feature_database(database._feature_db) self._search_manager.set_feature_database_filtered_view(self._feature_database_model.feature_database_filtered_view()) self._search_manager.set_pharmacophore(model._pharmacophore) self._search_manager.non_qt_update_feature_mapping() self._search_manager.update() self._results_manager.set_maximum_number_of_hits_to_save(self.settings.max_hit_structures) self._hits = None # This is ugly. Currently we need to reload the feature database to correctly initialise other # underlying CrossMiner search dependencies. feature_database_file = database.file_name fdb_filtered_view = self._search_manager.feature_database_filtered_view() if verbose: print('filtered feature database size {}'.format(fdb_filtered_view.size())) selector = MotifPharmacophoreLib.SelectCSDDatabase() selector = MotifPharmacophoreLib.select_csd_as_select_structure(selector) self._feature_database_model.process_feature_database(feature_database_file, UtilitiesLib.ProgressMonitor(), selector) db_pool = self._feature_database_model.structure_databases() self._search_manager.set_structure_databases(db_pool) self._results_manager.set_accept_hits(True) self._search_manager.start_search() # Wait for search to finish. i = 0 while self._search_manager.is_running(): i += 1 time.sleep(0.1) if verbose: print('{} n_searched = {} n_hits = {}'.format( i, self._search_manager.n_searched(), self._results_manager.n_hits() )) if self._results_manager.n_hits() >= stop_after: break self._search_manager.stop_search() self._results_manager.set_accept_hits(False) self._hits = self._results_manager.hits() finally: self._search_manager.remove_results_writer(self._results_manager) self._results_manager = None if verbose: for hit in self._hits: print('Hit id: {} rmsd: {} '.format(hit.identifier(), hit.overlay_rmsd())) pharmacophore_hits = list(Pharmacophore.SearchHit(model, _motif_pharmacophore_hit=hit) for hit in self._hits) return pharmacophore_hits
[docs] class FeatureDatabase(object): """A database of pharmacophore features.""" DatabaseInfo = collections.namedtuple('DatabaseInfo', ['file_name', 'n_entries', 'colour']) def show_dbi(dbi): return "DatabaseInfo(file_name='%s', n_entries=%d, colour=%s)" % (dbi.file_name, dbi.n_entries, str(dbi.colour)) DatabaseInfo.__repr__ = show_dbi def __init__(self, file_name, _feature_db=None): if _feature_db is None: _feature_db = MotifPharmacophoreLib.MotifPharmacophoreFeatureDatabase() self.file_name = file_name self._feature_db = _feature_db fd_names = [f.identifier for f in self.feature_definitions] if 'excluded_volume' not in fd_names: self.add_feature_definition(Pharmacophore.ExcludedVolume(GeometricDescriptors.Sphere((0, 0, 0), 0))) def __len__(self): return self._feature_db.size()
[docs] @staticmethod def from_file(feature_database_path): """Read a pharmacophore feature database from the specified path. :param feature_database_path: pharmacophore feature database file path. :returns: :class:`ccdc.Pharmacophore.FeatureDatabase` instance. """ progress_monitor = UtilitiesLib.ProgressMonitor() feature_database = MotifPharmacophoreLib.MotifPharmacophoreFeatureDatabase() feature_database.read(feature_database_path, progress_monitor) return Pharmacophore.FeatureDatabase(feature_database_path, feature_database)
[docs] def write(self, file_name): """Write a pharmacophore feature database to the specified path. :param file_name: pharmacophore feature database file path. """ progress_monitor = UtilitiesLib.ProgressMonitor() self._feature_db.write(file_name, progress_monitor)
@property def schema_version(self): """The database schema version.""" return self._feature_db.version() @property def databases(self): """The structure databases used in the construction of this feature database. Each database is presented as a tuple of file_name, nunmber of entries, and colour.""" colours = tuple( self._feature_db.colour(i) for i in range(self._feature_db.number_of_source_databases()) ) return tuple( Pharmacophore.FeatureDatabase.DatabaseInfo( self._feature_db.file_name(i), self._feature_db.nentries_from_source_database(i), Colour(c.r_, c.g_, c.b_, c.a_) ) for i, c in enumerate(colours) ) @property def identifiers(self): """All identifiers from the database.""" return self._feature_db.identifiers() def entries(self): for i in range(self._feature_db.size()): yield Pharmacophore.FeatureDatabase.Entry(_entry=self._feature_db.entry(i)) def __iter__(self): """Iterator.""" return self.entries() # pylint: disable=E1101 def __getitem__(self, index): if index < 0 or index >= len(self): raise IndexError('FeatureDatabase: index out of range') return Pharmacophore.FeatureDatabase.Entry(_entry=self._feature_db.entry(index))
[docs] def entry(self, identifier): """The :class:`ccdc.pharmacophore.Pharmacophore.FeatureDatabase.Entry` corresponding to the identifier. :param identifier: identifier of the entry. :returns: a :class:`ccdc.pharmacophore.Pharmacophore.FeatureDatabase.Entry` instance. :raises: RuntimeError if the identifier is not present. """ if not self._feature_db.identifier_exists(UtilitiesLib.DatabaseEntryIdentifier(identifier)): raise RuntimeError('FeatureDatabase: %s not present' % identifier) return Pharmacophore.FeatureDatabase.Entry( _entry=self._feature_db.entry(identifier) )
@property def feature_definitions(self): """The feature definitions present in the database.""" return tuple( Pharmacophore.FeatureDefinition(_feature_def=f) for f in self._feature_db.feature_definitions() ) + tuple( Pharmacophore.FeatureDefinition(_feature_def=f) for f in self._feature_db.non_indexed_feature_definitions() ) + tuple( Pharmacophore.FeatureDefinition(_feature_def=f) for f in self._feature_db.non_matchable_feature_definitions() )
[docs] def add_feature_definition(self, feature_definition): """Add a feature definition to the database. Note: this feature definition will not be indexed in the database, so will run rather more slowly if required in a search. To add the corresponding index, the database will need to be recreated with the feature present. """ if isinstance(feature_definition, Pharmacophore.FeatureDefinition): _fd = feature_definition._feature_def elif isinstance(feature_definition, Pharmacophore.Feature): _fd = feature_definition._feature_def else: raise NotImplementedError('Cannot add one of these to a database %s' % str(feature_definition)) db_fd = MotifPharmacophoreLib.MotifPharmacophoreFeatureDatabaseFeatureDefinition() db_fd.set_feature_deducer(_fd.feature_deducer()) label = _fd.component_label() if label == 2: labels = [0, 1] else: labels = [label] db_fd.set_component_labels(labels) if _fd.is_matchable(): self._feature_db.add_non_indexed_feature_definition(db_fd) else: self._feature_db.add_non_matchable_feature_definition(db_fd)
@property def annotation_keys(self): """The annotations available in this database.""" return self._feature_db.available_annotations()
[docs] def annotate(self, pattern, **kw): """Add or replace an annotation in the database. :param pattern: a glob-style pattern which will be used to match against identifiers. All entries in the database with a matching identifier will be annotated with the key, value pairs of the `kw` argument. """ for k, w in kw.items(): self._feature_db.set_external_annotation(pattern, k, w)
[docs] def clear_annotations(self): """Remove all annotations from the database.""" self._feature_db.clear_external_annotations()
[docs] class Entry(object): """An entry in a FeatureDatabase.""" class _write_through_dict(dict): def __init__(self, _entry, *args, **kw): self._entry = _entry dict.__init__(self, *args, **kw) def __setitem__(self, key, value): dict.__setitem__(self, key, value) self._entry.set_external_annotation(key, str(value)) def update(self, dic): for k, v in dic.items(): self[k] = v def __init__(self, _entry=None): self._entry = _entry @property def identifier(self): """The identifier of the entry.""" return self._entry.name() @identifier.setter def identifier(self, value): self._entry.set_name(value) @property def annotations(self): """The dictionary of annotations for the entry.""" return Pharmacophore.FeatureDatabase.Entry._write_through_dict(self._entry, self._entry.external_annotations())
[docs] def clear_annotations(self): """Clear all annotations from the entry.""" self._entry.clear_external_annotations()
@property def source_database_index(self): return self._entry.source_db_id()
[docs] class Creator(object): """The creator for feature databases."""
[docs] class Settings(object): """Settings for feature database creation.""" def __init__(self, feature_definition_directory=None, n_threads=None, skip_duplicate_identifiers=False): if feature_definition_directory is None: feature_definition_directory = Pharmacophore._feature_definition_directory() self.feature_definition_directory = feature_definition_directory if n_threads is None or n_threads < 1: n_threads = Pharmacophore._default_n_threads() self.n_threads = n_threads self.skip_duplicate_identifiers = skip_duplicate_identifiers
[docs] class StructureDatabase(object): """A database of structures from which a feature database may be created.""" def __init__(self, db_info, use_crystal_symmetry=True, sqlmol2_database_path=None, structure_filters=None, structure_database_path=None): """Initialise for creation. If you are creating a Structure database from the CSD set DatabaseInfo.file_name to a list of CSD database file names. If you want to include any available CSD updates include them in the list. Alternatively, set DatabaseInfo.file_name to the string 'csd' rather than a list and all CSD updates will be automatically included. :param db_info: a :class:`ccdc.pharmacophore.FeatureDatabase.DatabaseInfo` instance. Here the n_entries attribute is the desired number of entries in the resulting database. :param use_crystal_symmetry: whether or not to use crystal symmetry when generating features. :param structure_database_path: where to save the generated structure database. Supported types are .csdsqlx and .sqlmol2. :param structure_filters: a :class:`ccdc.search.Settings` instance to filter the source database. """ self.db_info = db_info self.use_crystal_symmetry = use_crystal_symmetry if sqlmol2_database_path is not None: warnings.warn("sqlmol2_database_path is deprecated and will be removed. Use structure_database_path instead.", DeprecationWarning) self.structure_database_path = structure_database_path or sqlmol2_database_path self.structure_filters = structure_filters def _database_data(self, skip_duplicate_identifiers): if self.structure_database_path is not None: self._database_path = self.db_info.file_name if not os.path.exists(self.structure_database_path): self._create_structure_database(self.structure_database_path, skip_duplicate_identifiers) with io.EntryReader(self.structure_database_path) as reader: database_data = MotifPharmacophoreLib.StructureDatabaseFeatureCreatorData( os.path.basename(self.structure_database_path), MotifPharmacophoreLib.MotifPharmacophoreFeatureColour(*self.db_info.colour), not self.use_crystal_symmetry, reader._db ) return [database_data] else: csd = io.EntryReader(self.db_info.file_name) if self.structure_filters is None: self.structure_filters = self.default_csd_filters() if isinstance(csd.file_name, (list, tuple)): paths = csd.file_name else: paths = [csd.file_name] database_data = [] if 0 < self.db_info.n_entries < sys.maxsize: to_get = self.db_info.n_entries else: to_get = sys.maxsize for self._database_path in paths: with io.EntryReader(self._database_path) as reader: if self.structure_filters or 0 < self.db_info.n_entries < sys.maxsize: identifiers = self._apply_csd_filters(to_get, self.structure_filters) data = MotifPharmacophoreLib.StructureDatabaseFeatureCreatorData( os.path.basename(self._database_path), MotifPharmacophoreLib.MotifPharmacophoreFeatureColour(*self.db_info.colour), not self.use_crystal_symmetry, reader._db, identifiers ) database_data.append(data) to_get -= len(identifiers) if to_get <= 0: break return database_data def _create_structure_database(self, structure_database_path, skip_duplicate_identifiers): """Convert input structure database to a structure database. :param structure_database_path: The path to the output structure database file. """ progress_monitor = UtilitiesLib.ProgressMonitor() self._append_to_structure_database(progress_monitor, self._database_path, structure_database_path, skip_duplicate_identifiers) def _append_to_structure_database(self, progress_monitor, database_path, structure_database_path, skip_duplicate_identifiers): input_files = [] if os.path.isdir(database_path): for path, dirs, files in os.walk(database_path): input_files.extend([os.path.join(path,f) for f in files if f.endswith(".mol2") or f.endswith(".sdf")]) else: input_files.append(self._database_path) if os.path.exists(structure_database_path): os.unlink(structure_database_path) suffix = structure_database_path[-8:] if structure_database_path.endswith('.sqlmol2') or structure_database_path.endswith('.csdsqlx'): structure_database_path = structure_database_path[:-8] try: if skip_duplicate_identifiers: duplicates_policy = MotifPharmacophoreLib.SkipDuplicateEntryPolicy() else: duplicates_policy = MotifPharmacophoreLib.StopDuplicateEntryPolicy() if suffix == '.csdsqlx': MotifPharmacophoreLib.convert_bindingsites_to_csdsqlx( input_files, structure_database_path, progress_monitor, duplicates_policy ) else: MotifPharmacophoreLib.convert_bindingsites_to_sqlmol2( input_files, structure_database_path, progress_monitor, duplicates_policy ) except RuntimeError as e: for ext in ['.csdsqlx', '.sqlmol2']: db_path = structure_database_path + ext if os.path.isfile(db_path): os.remove(db_path) if len(e.args) > 0 and "is already in the database" in e.args[0]: raise RuntimeError(("Error creating structure database {}, " "entry identifiers should be unique".format(structure_database_path), e.args[0])) else: raise
[docs] @staticmethod def default_csd_filters(): """Suggested filters for CSD structures to include in pharmacophore feature database.""" filter_settings = search.Search.Settings() filter_settings.has_3d_coordinates = True filter_settings.max_r_factor = 10. filter_settings.no_disorder = True filter_settings.not_polymeric = True filter_settings.only_organic = False filter_settings.must_not_have_elements = [ 'He', 'Be', 'Ne', 'Al', 'Si', 'Ar', 'Sc', 'Ti', 'V', 'Cr', 'Ga', 'Ge', 'As', 'Se', 'Kr', 'Rb', 'Y', 'Zr', 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'In', 'Sn', 'Sb', 'Te', 'Xe', 'Cs', 'Ba', 'La', 'Ce', 'Pr', 'Nd', 'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb', 'Lu', 'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg', 'Tl', 'Pb', 'Bi', 'Po', 'Rn', 'Fr', 'Ra', 'Ac', 'Th', 'Pa', 'U' ] return filter_settings
def _apply_csd_filters(self, max_hit_structures, filter_settings): """Filter input CSD database. :param maximum_hit_structures: The maximum number of structures to retain, or ``None`` for no limit. Default ``None``. :param filter_settings: Structure quality filters to be applied. Object of type :class:`SubstructureSearch.Settings`, boolean ``True`` for default filters, or ``None`` for no filtering. Default ``None``. """ with io.EntryReader(self._database_path) as reader: if not filter_settings: filter_settings = search.Search.Settings() elif filter_settings is True: # default filters, for backwards compatibility. filter_settings = self.default_csd_filters() identifiers = [] for e in reader: if filter_settings.test(e): identifiers.append(e.identifier) if len(identifiers) >= max_hit_structures: break return identifiers
def __init__(self, settings=None): if settings is None: settings = Pharmacophore.FeatureDatabase.Creator.Settings() self.settings = settings self._creator = MotifPharmacophoreLib.MotifPharmacophoreFeatureDatabaseCreator() self._creator.set_number_of_threads(self.settings.n_threads)
[docs] def create(self, structure_databases, feature_defs=None): """Create the feature database. :param structure_databases: an iterable of :class:`ccdc.pharmacophore.Pharmacophore.FeatureDatabase.Creator.StructureDatabase` instances. :param feature_defs: an iterable of :class:`ccdc.pharmacophore.Pharmacophore.FeatureDefinition` instances or None. If None, the default set of feature definitions in read in by :meth:`ccdc.pharmacophore.Pharmacophore.read_feature_definitions` will be used. """ if feature_defs is None: if not Pharmacophore.feature_definitions: Pharmacophore.read_feature_definitions() feature_defs = list(Pharmacophore.feature_definitions.values()) def sort_key(fd): d = dict(any=0, small_molecule=1, protein=2) adict = dict( acceptor=0, acceptor_projected=1, donor_ch_projected=2, donor_projected=3, heavy_atom=4, hydrophobe=5, ring=6, ring_non_planar=7, ring_planar_projected=8, purine=9, pyrimidine=10, adenine=11, cytosine=12, guanine=13, thymine=14, uracil=15, deoxyribose=16, ribose=17 ) sdict = dict( exit_vector=0, halogen=1, bromine=2, chlorine=3, fluorine=4, metal=5, water=6 ) pdict = dict( ALA=0, ARG=1, ASN=2, ASP=3, CYS=4, GLN=5, GLU=6, GLY=7, HIS=8, ILE=9, LEU=10, LYS=11, MET=12, PHE=13, PRO=14, SER=15, THR=16, TRP=17, TYR=18, VAL=19, ) k = fd.component_label n = fd.identifier if k == 'any': v = (d[k], adict.get(n, 99), n) if k == 'small_molecule': v = (d[k], sdict.get(n, 99), n) if k == 'protein': v = (d[k], pdict.get(n, 99), n) return v feature_defs.sort(key=sort_key) _fds = [Pharmacophore._database_feature_def_from_fd(fd) for fd in feature_defs] databases = [] for sdb in structure_databases: databases.extend(sdb._database_data(self.settings.skip_duplicate_identifiers)) progress_monitor = UtilitiesLib.ProgressMonitor() feature_db = self._creator.create_feature_database(databases, _fds, progress_monitor) return Pharmacophore.FeatureDatabase('', _feature_db=feature_db)