Source code for ccdc.pharmacophore

#
# This code is Copyright (C) 2015 The Cambridge Crystallographic Data Centre
# (CCDC) of 12 Union Road, Cambridge CB2 1EZ, UK and a proprietary work of CCDC.
# This code may not be used, reproduced, translated, modified, disassembled or
# copied, except in accordance with a valid licence agreement with CCDC and may
# not be disclosed or redistributed in any form, either in whole or in part, to
# any third party. All copies of this code made in accordance with a valid
# licence agreement as referred to above must contain this copyright notice.
#
# No representations, warranties, or liabilities are expressed or implied in the
# supply of this code by CCDC, its servants or agents, except where such
# exclusion or limitation is prohibited, void or unenforceable under governing
# law.
#
"""
pharmacophore.py - access to pharmacophore searches.

This allows pharmacophore models to be read and written, and using these to run searches.
Access to the hits is also provided.
"""

#########################################################################################################

import os
import sys
import collections
import time
import multiprocessing
import warnings

from ccdc import io, search, molecule, utilities, protein
from ccdc.descriptors import GeometricDescriptors
from ccdc.utilities import Colour

with utilities._private_importer() as pi:
    pi.import_ccdc_module('MotifPharmacophoreLib')
    pi.import_ccdc_module('SubstructureSearchLib')
    pi.import_ccdc_module('ChemistryLib')
    pi.import_ccdc_module('MathsLib')
    pi.import_ccdc_module('UtilitiesLib')

MotifPharmacophoreLib.licence_check()

#########################################################################################################

[docs]class Pharmacophore(object): """Namespace for the Pharmacophore functionality.""" crossminer_version = '1.4.0' feature_definitions = dict() @staticmethod def _feature_definition_directory(): """Location of the default feature definitions.""" loc = io._CSDDatabaseLocator.get_crossminer_feature_definition_directory() if loc is None or not os.path.exists(loc): import ccdc loc = ccdc._find_crossminer_feature_directory() if loc is None or not os.path.exists(loc): raise RuntimeError('Unable to find the pharmacophore feature definition directory') return loc
[docs] @staticmethod def read_feature_definitions(directory=None): """Read all the feature definitions found in any subdirectory of the provided argument. By default this will read the feature definitions supplied by the CSD. """ if directory is None: directory = Pharmacophore._feature_definition_directory() for dirpath, dirnames, filenames in os.walk(directory): for fname in filenames: if fname.endswith('.cpf'): fd = Pharmacophore.FeatureDefinition.from_file(os.path.join(dirpath, fname)) Pharmacophore.feature_definitions[fd.identifier] = fd
[docs] @staticmethod def default_feature_database(): """The CSD provided CrossMiner feature database.""" return Pharmacophore.FeatureDatabase.from_file( io._CSDDatabaseLocator.get_crossminer_database_location() )
[docs] @staticmethod def default_feature_database_location(): """The path to the default CrossMiner database.""" return io._CSDDatabaseLocator.get_crossminer_database_location()
_component_label_dict = utilities.bidirectional_dict(( (0, 'protein'), (1, 'small_molecule'), (2, 'any') )) _apply_rule_dic = utilities.bidirectional_dict(( (MotifPharmacophoreLib.MotifPharmacophoreSMARTSFeature.ALWAYS, 'Always'), (MotifPharmacophoreLib.MotifPharmacophoreSMARTSFeature.H_PRESENT, 'With_h'), (MotifPharmacophoreLib.MotifPharmacophoreSMARTSFeature.NO_H_PRESENT, 'No_h') )) # Not currently used _planarity_dic = utilities.bidirectional_dict(( (MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorCentroid.ANY, 'Any'), (MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorCentroid.PLANAR, 'Planar'), (MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorCentroid.NON_PLANAR, 'Non_planar'), )) @staticmethod def _default_n_threads(): n_threads = max(int(multiprocessing.cpu_count() / 2), 1) return n_threads @staticmethod def _database_feature_def_from_fd(fd): _fd = MotifPharmacophoreLib.MotifPharmacophoreFeatureDatabaseFeatureDefinition() _fd.set_feature_deducer(fd._feature_def.feature_deducer()) _fd.set_colour(MotifPharmacophoreLib.MotifPharmacophoreFeatureColour(*fd.colour)) label = Pharmacophore._component_label_dict.inverse_lookup(fd.component_label) if label == 2: labels = [0, 1] else: labels = [label] _fd.set_component_labels(labels) return _fd
[docs] class PointGenerator(object): """A mechanism for generating spheres from a SMARTS match.""" def __init__(self, _pg=None): self._pg = _pg @property def parameters(self): """The optional parameters of the point generator.""" return { p.replace(' ', '_') : v for p, v in zip(self._pg.optional_parameter_names(), self._pg.optional_parameters()) }
[docs] def feature_points(self, atoms): """The points generated by this generator. :raises: RuntimeError if the sequence of points is inappropriate for the generator. """ _motif_pts = self._pg.motif_features([a._atom for a in atoms]) return tuple(p for mf in _motif_pts for p in mf.points())
[docs] class DummyPointGenerator(PointGenerator): """Generates no points.""" def __init__(self, identifier): self._pg = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorDummy() self.identifier = identifier
[docs] class PointPointGenerator(PointGenerator): """Generates a single point at the position of the relevant atom.""" def __init__(self, identifier): self._pg = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorPoint() self.identifier = identifier
[docs] class LinearPointGenerator(PointGenerator): """Generates a point displaced from the relevant atom.""" def __init__(self, identifier, distance=2.8, neighbour_is_base_feature=False): self._pg = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorLinear( distance, neighbour_is_base_feature) self.identifier = identifier
[docs] class NormalPointGenerator(PointGenerator): """Generates a point from the normal to the plane of atoms.""" def __init__(self, identifier, distance=2.8): self._pg = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorNormal( distance, False, 0.0 ) self.identifier = identifier
[docs] class PlanarNormalPointGenerator(PointGenerator): """Generates a point from the normal to the plane of a planar ring.""" def __init__(self, identifier, distance=2.8, planar_ring_tolerance=0.0): self._pg = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorNormal( distance, True, planar_ring_tolerance ) self.identifier = identifier
[docs] class AnyCentroidPointGenerator(PointGenerator): """Generates a point at the centroid of atoms.""" def __init__(self, identifier): self._pg = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorCentroid( MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorCentroid.ANY, 0.0 ) self.identifier = identifier
[docs] class PlanarCentroidPointGenerator(PointGenerator): """Generates a point at the centre of a planar ring.""" def __init__(self, identifier, atom_planarity_threshold=0.0): self._pg = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorCentroid( MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorCentroid.PLANAR, atom_planarity_threshold ) self.identifier = identifier
[docs] class NonPlanarCentroidPointGenerator(PointGenerator): """Generates a point at the centroid of atoms which are not planar.""" def __init__(self, identifier, atom_planarity_threshold=0.0): self._pg = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorCentroid( MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorCentroid.NON_PLANAR, atom_planarity_threshold ) self.identifier = identifier
[docs] class TrigonalPointGenerator(PointGenerator): """Generates two points with a trigonal geometry.""" def __init__(self, identifier, distance=2.8): self._pg = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorTrigonal(distance) self.identifier = identifier
[docs] class TetrahedralPointGenerator(PointGenerator): """Generates two points with a tetrahedral geometry.""" def __init__(self, identifier, distance=2.8): self._pg = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorTetrahedral(distance) self.identifier = identifier
[docs] class SMARTSDefinition(object): """A SMARTS definition with associated point generation schemes.""" def __init__(self, smarts, point_generators, _sd=None, **kw): # _incomplete if made by hand. Need to resolve the motif_feature_types() when in a feature_def # if _sd is None: self._sd = MotifPharmacophoreLib.MotifPharmacophoreSMARTSFeature() self._sd.set_SMARTSdefinition(smarts) self._point_generators = point_generators self._incomplete = True app_rule = kw.get('apply_rule') if app_rule is None: app = MotifPharmacophoreLib.MotifPharmacophoreSMARTSFeature.ALWAYS else: app = Pharmacophore._apply_rule_dic.inverse_lookup(app_rule) self._sd.set_apply_rule(app) self._sd.set_permutational_equivalence(kw.get('permutational_equivalence', True)) self._sd.set_allow_overlapping_substructures(kw.get('allow_overlaps', False)) else: self._sd = _sd self._incomplete = False self._point_generators = point_generators @staticmethod def _from_SMARTS_feature_def(_sd, _pgs): return Pharmacophore.SMARTSDefinition( _sd.SMARTSdefinition(), tuple( (_pgs.feature_types()[i].name(), v) for i, v in _sd.motif_feature_types() ), _sd, apply_rule=Pharmacophore._apply_rule_dic[_sd.apply_rule()], permutional_equivalence=_sd.permutational_equivalence(), allow_overlaps=_sd.allow_fully_overlapping_substructures() ) @property def SMARTS(self): """The SMARTS string.""" return self._sd.SMARTSdefinition() @property def apply_rule(self): """How to apply the SMARTS definition.""" return Pharmacophore._apply_rule_dic[self._sd.apply_rule()] @property def permutational_equivalence(self): """Whether permutionally equivalent structures are deemed to be a single match.""" return self._sd.permutational_equivalence() @property def allow_overlaps(self): """Whether fully overlapping structures are permitted to be a hit.""" return self._sd.allow_fully_overlapping_substructures() @property def point_generators(self): """The generators for feature points.""" return self._point_generators
[docs] def search(self, crystal): """Search a crystal for instances of the SMARTS pattern.""" raise NotImplementedError('Pharmacophore.SMARTSDefinition.search')
[docs] class FeatureDefinition(object): """A feature definition in a feature database.""" _point_generators = utilities.bidirectional_dict( Dummy= MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorDummy, Point= MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorPoint, Linear= MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorLinear, Normal= MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorNormal, Centroid= MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorCentroid, Trigonal= MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorTrigonal, Tetrahedral=MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGeneratorTetrahedral, ) def __init__(self, _feature_def=None): self._feature_def = _feature_def def __repr__(self): return 'FeatureDefinition(%s)' % self.identifier
[docs] @staticmethod def from_file(file_name): """Reads a structural feature definition from a file.""" component = os.path.basename(os.path.dirname(file_name)) try: tag = Pharmacophore._component_label_dict.inverse_lookup(component) except Exception as exc: tag = 2 _creator = MotifPharmacophoreLib.MotifPharmacophoreFeatureDatabaseCreator() _fd = _creator.read_feature_definition(file_name) _feature_definition = MotifPharmacophoreLib.MotifPharmacophoreFeatureDefinition() _feature_definition.set_feature_deducer(_fd.feature_deducer()) _feature_definition.set_component_label(tag) _feature_definition.set_colour(_fd.colour()) return Pharmacophore.FeatureDefinition(_feature_def=_feature_definition)
[docs] @staticmethod def from_SMARTS_definitions(identifier, smarts, point_generators): """Create a feature from SMARTS definitions. :param identifier: string :param smarts: iterable of :class:`ccdc.pharmacophore.Pharmacophore.SMARTS_definition` instances. :param point_generators: iterable of :class:`ccdc.pharmacophore.Pharmaophore.PointGenerator` instances. """ pgs = MotifPharmacophoreLib.MotifPharmacophoreFeaturePointGenerationScheme() name_dic = dict() for i, pg in enumerate(point_generators): name_dic[pg.identifier] = i pgs.add_feature_type(pg.identifier, pg._pg) _sdefs = [] for sd in smarts: _sdefs.append(sd._sd) pg_spec = [] for name, ats in sd.point_generators: inx = name_dic[name] pg_spec.append((inx, ats)) sd._sd.set_motif_feature_types(pg_spec) ded = MotifPharmacophoreLib.MotifPharmacophoreFeatureDeducerSubstructure(_sdefs, pgs) ded.set_name(identifier) fd = Pharmacophore.FeatureDefinition() fd._feature_def = MotifPharmacophoreLib.MotifPharmacophoreFeatureDefinition() fd._feature_def.set_feature_deducer(ded) fd._feature_def.set_description(identifier) return fd
@property def SMARTS_definitions(self): deducer = self._feature_def.feature_deducer() if deducer.class_name() == 'MotifPharmacophoreFeatureDeducerSubstructure': _s = MotifPharmacophoreLib.deducer_as_substructure(deducer) return tuple( Pharmacophore.SMARTSDefinition._from_SMARTS_feature_def(_fd, _s.point_generation_scheme()) for _fd in _s.SMARTS_motif_feature_types() ) @property def identifier(self): """The identifier of the feature.""" return self._feature_def.name() @property def component_label(self): """Which component ('protein', 'small_molecule' or 'any') the feature def applies to.""" if hasattr(self._feature_def, 'component_label'): return Pharmacophore._component_label_dict[self._feature_def.component_label()] else: t = tuple(Pharmacophore._component_label_dict[x] for x in self._feature_def.component_labels()) if len(t) == 1: return t[0] else: return 'Any' @component_label.setter def component_label(self, value): self._feature_def.set_component_label( Pharmacophore._component_label_dict.inverse_lookup(value) ) @property def colour(self): """The colour associated with this feature.""" col = self._feature_def.colour() return Colour(col.r_, col.g_, col.b_, col.a_) @colour.setter def colour(self, val): col = MotifPharmacophoreLib.MotifPharmacophoreFeatureColour(*val) self._feature_def.set_colour(col)
[docs] def write(self, file_name): """Writes the feature definition.""" if self._feature_def.feature_deducer().class_name() == 'MotifPharmacophoreFeatureDeducerSubstructure': ded = MotifPharmacophoreLib.deducer_as_substructure(self._feature_def.feature_deducer()) writer = MotifPharmacophoreLib.SMARTSMotifFeatureTypeFileWriter(ded) ostr = MotifPharmacophoreLib.ofstream(file_name) writer.write_content('', ostr) ostr.close() else: raise RuntimeError('Not a SMARTS feature definition')
@property def point_generator_names(self): """The names of the point generator schemes associated with this feature definition.""" ded = MotifPharmacophoreLib.deducer_as_substructure(self._feature_def.feature_deducer()) pgs = ded.point_generation_scheme() fts = pgs.feature_types() return tuple(f.name() for f in fts) def detect_features(self, crystal): _csv = ChemistryLib.CrystalStructureView_instantiate(crystal._crystal) _ssc = MotifPharmacophoreLib.MotifPharmacophoreSearchStructureCreator() _ssc.register_components_from_feature_definitions((self._feature_def,)) _mss = _ssc.search_structure(_csv) _ded = self._feature_def.feature_deducer() _feats = _ded.generate_motif_features(_mss, self._feature_def.component_label()) features = [] for i in range(_feats.end_index()): code, feat_pts = _feats.at(i) for fp in feat_pts: pts = fp.points() spheres = tuple(GeometricDescriptors.Sphere((p[0], p[1], p[2]), 1.0) for p in pts) # Skip duplicates dup = False if len(spheres) == 2: for f in features: if len(f.spheres) == 2: if ( f.spheres[0] == spheres[1] and f.spheres[1] == spheres[0] ): dup = True break if not dup: features.append( Pharmacophore.Feature(self._clone(), *spheres) ) return tuple(features)
[docs] def features_from_atoms(self, point_generator, *atoms): """Create features from this feature definition and an atom of the appropriate type. :param point_generator: a string giving one of the point_generator_names associated with this definition. :param atoms: an iterable of :class:`ccdc.molecule.Atom` instances. :returns: a :class:`ccdc.pharmacophore.Pharmacophore.Feature` instance. :raises RuntimeError: if the atom is not suitable for the feature definition. """ ded = MotifPharmacophoreLib.deducer_as_substructure(self._feature_def.feature_deducer()) pgs = ded.point_generation_scheme() for ft in pgs.feature_types(): if ft.name().upper() == point_generator.upper(): mfg = ft.motif_feature_generator() mpts = mfg.motif_features([a._atom for a in atoms]) pts = [mpt.points() for mpt in mpts] spheres = [[GeometricDescriptors.Sphere(tuple(p[i] for i in range(3)), 1.0) for p in l] for l in pts] feats = [Pharmacophore.Feature(self._clone(), *ss) for ss in spheres] return tuple(feats) else: raise RuntimeError('Unknown point generator %s' % point_generator)
[docs] def copy(self): """Copies the Feature so it can be edited freely.""" return self._clone()
def _clone(self): _fd = MotifPharmacophoreLib.MotifPharmacophoreFeatureDefinition() _fd.set_feature_deducer(self._feature_def.feature_deducer()) _fd.set_component_label(self._feature_def.component_label()) _fd.set_colour(self._feature_def.colour()) _fd.set_available_component_labels(self._feature_def.available_component_labels()) _fd.set_description(self._feature_def.description()) _fd.set_id(self._feature_def.id()) # Maybe increment? return Pharmacophore.FeatureDefinition(_fd)
[docs] class Feature(object): """A feature in a Query.""" def __init__(self, feature_definition, *spheres): if feature_definition is None: # Required for ExcludedVolume and AnnotatoinFilter self._feature_def = MotifPharmacophoreLib.MotifPharmacophoreFeatureDefinition() else: self._feature_def = feature_definition._clone()._feature_def for s in spheres: self.add_sphere(s) def __repr__(self): return 'Feature(%s)' % self.identifier @property def colour(self): """The colour of the spheres.""" col = self._feature_def.colour() return Colour(col.r_, col.g_, col.b_, col.a_) @colour.setter def colour(self, value): col = MotifPharmacophoreLib.MotifPharmacophoreFeatureColour(*value) self._feature_def.set_colour(col) @property def component_label(self): """The component label""" return Pharmacophore._component_label_dict[self._feature_def.component_label()] @component_label.setter def component_label(self, value): val = Pharmacophore._component_label_dict.inverse_lookup(value) self._feature_def.set_component_label(val) if hasattr(self, '_query'): self._query._pharmacophore.set_component_label_of_feature(self._feature_def.id(), int(val)) @property def description(self): """Description of the feature definition.""" return self._feature_def.description() @description.setter def description(self, value): self._feature_def.set_description(value) @property def identifier(self): """The identifier of the feature definition.""" return self._feature_def.name() @property def spheres(self): """The spheres defined by this feature definition.""" return tuple(GeometricDescriptors.Sphere(s.centre(), s.radius()) for s in self._feature_def.spheres()) @spheres.setter def spheres(self, values): if len(self.spheres) != len(values): raise RuntimeError('Mismatch of number of spheres') _feature_def = MotifPharmacophoreLib.MotifPharmacophoreFeatureDefinition() _feature_def.set_feature_deducer(self._feature_def.feature_deducer()) _feature_def.set_filter(self._feature_def.filter()) _feature_def.set_component_label(self._feature_def.component_label()) for s in values: _feature_def.add_sphere( MathsLib.Sphere(tuple(s.centre[i] for i in range(3)), s.radius) ) _feature_def.set_colour(self._feature_def.colour()) _feature_def.set_available_component_labels(self._feature_def.available_component_labels()) _feature_def.set_id(self._feature_def.id()) _feature_def.set_description(self._feature_def.description()) self._feature_def.set_spheres( tuple(MathsLib.Sphere(tuple(v.centre[i] for i in range(3)), v.radius) for v in values) )
[docs] def add_sphere(self, sphere): """Add a sphere to the feature definition. :param sphere: :class:`ccdc.descriptors.GeometricDescriptors.Sphere` instance """ _sphere = MathsLib.Sphere([sphere.centre[i] for i in range(3)], sphere.radius) self._feature_def.add_sphere(_sphere)
[docs] class ExcludedVolume(Feature): """ An excluded volume feature. This expresses a region to exclude if atoms are in this region. :param sphere: the sphere to exclude :param smarts: an optional SMARTS pattern that means you can exclude hits that have a specific substructure within the sphere """ def __init__(self, sphere, smarts="[*]"): Pharmacophore.Feature.__init__(self, None) deducer = MotifPharmacophoreLib.MotifPharmacophoreFeatureDeducerExcludedVolume() self._feature_def.set_feature_deducer(deducer) self._feature_def.set_component_label(2) self._filterer = MotifPharmacophoreLib.MotifPharmacophoreFilterSubstructure() self._filterer.set_name(deducer.name()) self._filterer.set_key(smarts) self._feature_def.set_filter(self._filterer) self.add_sphere(sphere) @property def filter_smarts(self): """The SMARTS pattern that controls when an atom excludes in a given volume by substructure matching.""" return self._filterer.key() @filter_smarts.setter def filter_smarts(self, smarts): self._filterer.set_key(smarts)
[docs] class AnnotationFilter(Feature): def __init__(self, key, value): Pharmacophore.Feature.__init__(self, None) deducer = MotifPharmacophoreLib.MotifPharmacophoreFeatureDeducerAnnotation() deducer.set_key(key) deducer.set_value(value) self._feature_def.set_feature_deducer(deducer) @property def key(self): """The annotation filter key.""" return MotifPharmacophoreLib.deducer_as_annotation(self._feature_def.feature_deducer()).key() @property def value(self): """The annotation filter value.""" return MotifPharmacophoreLib.deducer_as_annotation(self._feature_def.feature_deducer()).value()
[docs] class Query(object): """A pharmacophore query.""" def __init__(self, features=None, _motif_pharmacophore=None): if _motif_pharmacophore: self._pharmacophore = _motif_pharmacophore else: self._pharmacophore = MotifPharmacophoreLib.MotifPharmacophore() self._features = [] if features is not None: for f in features: self.add_feature(f)
[docs] @staticmethod def from_file(file_name): """Read a pharmacophore query from a file.""" reader = MotifPharmacophoreLib.MotifPharmacophoreReader(file_name) q = Pharmacophore.Query(_motif_pharmacophore=reader.pharmacophore()) q._features_from_pharmacophore() for f in q.features: f._query = q return q
[docs] def write(self, pharmacophore_path): """Write a pharmacophore query to a file.""" writer = MotifPharmacophoreLib.MotifPharmacophoreWriter() writer.write_pharmacophore(self._pharmacophore, pharmacophore_path)
@property def packing(self): """Whether to use crystal packing for query structure.""" return self._pharmacophore.use_packing() @packing.setter def packing(self, val): self._pharmacophore.set_use_packing(val) @property def intra_only(self): """Whether all features should be intramolecular.""" return self._pharmacophore.intra_only() @intra_only.setter def intra_only(self, value): if value == self.intra_only: return self._pharmacophore.set_intra_only(value) if value: ty = ChemistryLib.ContactCriterion.INTRAMOLECULAR else: ty = ChemistryLib.ContactCriterion.ANY for dc in self._pharmacophore.distance_constraints(): f1 = self.features[dc.id_feature_a()] f2 = self.features[dc.id_feature_b()] if f1.component_label == 'small_molecule' and f2.component_label == 'small_molecule': crit = dc.distance_criterion() crit.set_type(ty) dc.set_distance_criterion(crit) @property def bounding_sphere(self): """A minimum volume sphere containing all feature points.""" s = self._pharmacophore.bounding_sphere() return GeometricDescriptors.Sphere(tuple(s.centre()[i] for i in range(3)), s.radius()) @property def maximum_feature_distance(self): """The maximum distance between features.""" return self._pharmacophore.maximum_feature_distance() @maximum_feature_distance.setter def maximum_feature_distance(self, value): self._pharmacophore.set_maximum_feature_distance(value)
[docs] def add_feature(self, feature): """Add a feature to the pharmacophore.""" self._pharmacophore.add_feature_definition(feature._feature_def) self._features.append(feature) feature._query = self
@property def features(self): """The features of the pharmacophore.""" return self._features def _features_from_pharmacophore(self): def _make_spheres(_fd): return tuple(GeometricDescriptors.Sphere((s.centre().x(), s.centre().y(), s.centre().z()), s.radius()) for s in _fd.spheres()) self._features.extend(tuple( Pharmacophore.Feature(Pharmacophore.FeatureDefinition(_fd), *_make_spheres(_fd)) for _fd in self._pharmacophore.all_feature_definitions() )) DistanceConstraint = collections.namedtuple('DistanceConstraint', ['feature_point1', 'feature_point2', 'distance_range', 'which']) def _repr_distance_constraint(dc): """Representation of the distance constraint.""" return "DistanceConstraint((%d, %d), (%d, %d) (%.3f, %.3f) which='%s')" % ( dc.feature_point1[0], dc.feature_point1[1], dc.feature_point2[0], dc.feature_point2[1], dc.distance_range[0], dc.distance_range[1], dc.which ) DistanceConstraint.__repr__ = _repr_distance_constraint @property def distance_constraints(self): cons = [] for dc in self._pharmacophore.distance_constraints(): fp1 = dc.id_feature_a(), dc.id_feature_point_a() fp2 = dc.id_feature_b(), dc.id_feature_point_b() crit = dc.distance_criterion() rng = crit.min(), crit.max() w = crit.type() if w == ChemistryLib.ContactCriterion.ANY: which = 'Any' elif w == ChemistryLib.ContactCriterion.INTERMOLECULAR: which = 'Intermolecular' else: which = 'Intramolecular' cons.append(Pharmacophore.Query.DistanceConstraint(fp1, fp2, rng, which=which)) return tuple(cons)
[docs] def add_distance_constraint(self, feature1, feature2, distance_range, which): """Add a distance constraint between features. :param feature1: a feature in the model, the tuple contains an id and a point. :param feature2: a feature in the model, the tuple contains an id and a point. :param distance_range: a pair of numbers specifying the desired range. :param which: 'intramolecular', 'intermolecular' or 'any'. """ self.intra_only = False if which.lower().startswith('intra'): which = ChemistryLib.ContactCriterion.INTRAMOLECULAR elif which.lower().startswith('inter'): which = ChemistryLib.ContactCriterion.INTERMOLECULAR else: which = ChemistryLib.ContactCriterion.ANY crit = SubstructureSearchLib.InterAtomicDistanceCriterion(min(distance_range), max(distance_range), which, False) feat_id1, point_id1 = feature1 feat_id2, point_id2 = feature2 for dc in self._pharmacophore.distance_constraints(): if ( (dc.id_feature_a() == feat_id1 and dc.id_feature_point_a() == point_id1 and dc.id_feature_b() == feat_id2 and dc.id_feature_point_b() == point_id2) or (dc.id_feature_a() == feat_id2 and dc.id_feature_point_a() == point_id2 and dc.id_feature_b() == feat_id1 and dc.id_feature_point_b() == point_id1) ): dc.set_distance_criterion(crit) break else: assert 0 <= feat_id1 < len(self.features) assert 0 <= point_id1 < len(self.features[feat_id1].spheres) fd1 = self.features[feat_id1]._feature_def s = self.features[feat_id1].spheres[point_id1] fs1 = MathsLib.Sphere(tuple(s.centre[i] for i in range(3)), s.radius) assert 0 <= feat_id2 < len(self.features) assert 0 <= point_id2 < len(self.features[feat_id2].spheres) fd2 = self.features[feat_id2]._feature_def s = self.features[feat_id2].spheres[point_id2] fs2 = MathsLib.Sphere(tuple(s.centre[i] for i in range(3)), s.radius) constraint = MotifPharmacophoreLib.MotifPharmacophoreConstraintDistance( feat_id1, point_id1, feat_id2, point_id2, fs1, fs2, fd1, fd2 ) constraint.set_distance_criterion(crit) self._pharmacophore.add_constraint(constraint)
[docs] class SearchHit: """Pharmacophore search hit.""" def __init__(self, model, _motif_pharmacophore_hit=None): if _motif_pharmacophore_hit: self._pharmacophore_hit = _motif_pharmacophore_hit else: self._pharmacophore_hit = MotifPharmacophoreLib.MotifPharmacophoreHit() self.model = model @property def identifier(self): """The hit identifier""" return self._pharmacophore_hit.identifier() @property def overlay_rmsd(self): """The root-mean-square deviation of the pharmacophore overlay""" return self._pharmacophore_hit.overlay_rmsd() @property def overlay_transformation(self): """The transformation matrix of the pharmacophore overlay""" return molecule.Molecule.Transformation(_transformation=self._pharmacophore_hit.overlay_transformation()) @property def cluster_number(self): """The pharmacophore hit cluster number""" return self._pharmacophore_hit.cluster_number() @property def molecule(self): """The pharmacophore overlay molecule""" return molecule.Molecule(self._pharmacophore_hit.identifier(), _molecule=self._pharmacophore_hit.molecule()) @property def protein(self): """The hit's protein, if any.""" return protein.Protein(self.identifier, self._pharmacophore_hit.protein().create_editable_molecule()) @property def small_molecule(self): """The hit's small molecule.""" return molecule.Molecule(self.identifier, self._pharmacophore_hit.small_molecule()) @property def annotations(self): """The annotations of the hit.""" return { k.strip() : v for k, v in self._pharmacophore_hit.external_annotations().items() } @property def points(self): """The matched points of the hit.""" _m = self._pharmacophore_hit.pharmacophore_match() pts = [] pdic = dict() for i in range(_m.nfeature_matches()): _fm = _m.feature_match(i) pdic[_fm.pharmacophore_feature_id_] = _fm.points_ for i, p in sorted(pdic.items()): pts.extend(p) return tuple(pts)
[docs] def constraint_values(self): """The values of the distance constraints of the query.""" pts = self.points counts = [len(f.spheres) for f in self.model.features] values = [] for dc in self.model.distance_constraints: f_inx, s_inx = dc.feature_point1 inx1 = sum(counts[:f_inx]) + s_inx f_inx, s_inx = dc.feature_point2 inx2 = sum(counts[:f_inx]) + s_inx dist = GeometricDescriptors.point_distance(pts[inx1], pts[inx2]) values.append(dist) return values
[docs] def feature_points(self, feature): """The points in the hit associated with the given feature.""" pts = self.points counts = [len(f.spheres) for f in self.model.features] inx = feature._feature_def.id() n = len(feature.spheres) start = sum(counts[:inx]) return tuple(pts[start:start+n])
[docs] class Search(object): """A pharmacophore search"""
[docs] class Settings(object): """Settings for a pharmacophore search.""" def __init__(self, max_hits_per_structure=5, max_hit_structures=100000): """Initialise settings. :param max_hits_per_structure: keep only top n matches per database entry :param max_hit_structures: restrict maximum number of matches per database entry """ self._search_manager = MotifPharmacophoreLib.PharmacophoreSearchManager() self.max_hit_structures = max_hit_structures self.max_hits_per_structure = max_hits_per_structure self.n_threads = Pharmacophore._default_n_threads() @property def n_threads(self): """The number of threads to use in the search.""" return self._search_manager.n_search_threads() @n_threads.setter def n_threads(self, value): self._search_manager.set_n_search_threads(value) @property def max_hit_structures(self): """The number of hits to retain.""" return self._search_manager.hitGenerationKeepTopNHits() @max_hit_structures.setter def max_hit_structures(self, value): self._search_manager.setHitGenerationKeepTopNHits(value) @property def max_hits_per_structure(self): """The number of hits per structure to be returned.""" return self._search_manager.searchOptionMaxHitsPerStructure() @max_hits_per_structure.setter def max_hits_per_structure(self, value): self._search_manager.setMaxHitsPerStructure(value) @property def max_hit_rmsd(self): """The maximum RMSD of hit structures to be returned.""" return self._search_manager.hitGenerationMaxHitRMSD() @max_hit_rmsd.setter def max_hit_rmsd(self, value): self._search_manager.setHitGenerationMaxHitRMSD(value) @property def skip_proteins(self): """Whether or not to skip protein structures.""" return self._search_manager.hitGenerationSkipProteins() @skip_proteins.setter def skip_proteins(self, value): self._search_manager.setHitGenerationSkipProteins(value) @property def complete_small_molecules(self): """Whether or not to return the complete small molecule hit.""" return self._search_manager.hitGenerationCompleteSmallMolecules() @complete_small_molecules.setter def complete_small_molecules(self, value): self._search_manager.setHitGenerationCompleteSmallMolecules(value) @property def complete_proteins(self): """Whether or not to return the complete protein of a hit.""" return self._search_manager.hitGenerationCompleteProteins() @complete_proteins.setter def complete_proteins(self, value): self._search_manager.setHitGenerationCompleteProteins(value) @property def three_cubed_packing(self): """Whether or not to use 3-cubed packing.""" return self._search_manager.searchOptionUseThreeCubedPacking() @three_cubed_packing.setter def three_cubed_packing(self, value): self._search_manager.setSearchOptionUseThreeCubedPacking(value) def _clone(self): settings = Pharmacophore.Search.Settings() settings.n_threads = self.n_threads settings.max_hit_structures = self.max_hit_structures settings.max_hits_per_structure = self.max_hits_per_structure settings.max_hit_rmsd = self.max_hit_rmsd settings.skip_proteins = self.skip_proteins settings.complete_small_molecules = self.complete_small_molecules settings.complete_proteins = self.complete_proteins settings.three_cubed_packing = self.three_cubed_packing return settings
def __init__(self, settings=None): if settings is None: settings = Pharmacophore.Search.Settings() self.settings = settings self._feature_database = None self._feature_database_model = MotifPharmacophoreLib.PharmacophoreFeatureDatabaseModel() self._pm_factory = MotifPharmacophoreLib.PharmacophoreManagerFactory() self._search_manager = self.settings._clone()._search_manager self._hits = None def __del__(self): self._feature_database_model = None self._results_manager = None self._search_manager = None self._pm_factory = None @property def feature_database(self): """The CSD-CrossMiner feature database.""" return self._feature_database @feature_database.setter def feature_database(self, val): self._feature_database = val fdb_filtered_view = MotifPharmacophoreLib.MotifPharmacophoreFeatureDatabaseFilteredView(val._feature_db, False) self._feature_database_model.set_feature_database_filtered_view(fdb_filtered_view) self._search_manager.set_feature_database_filtered_view(fdb_filtered_view)
[docs] def search(self, model, database=None, verbose=False, stop_after=sys.maxsize): """Perform a pharmacophore search.""" if database is None: if self.feature_database is None: self.feature_database = Pharmacophore.default_feature_database() database = self.feature_database try: self._search_manager = self.settings._clone()._search_manager self._results_manager = MotifPharmacophoreLib.PharmacophoreResultsModel() self._search_manager.add_results_writer(self._results_manager) fdb_filtered_view = MotifPharmacophoreLib.MotifPharmacophoreFeatureDatabaseFilteredView(database._feature_db, False) self._feature_database_model.set_feature_database_filtered_view(fdb_filtered_view) self._search_manager.set_feature_database_filtered_view(fdb_filtered_view) self._search_manager.set_pharmacophore(model._pharmacophore) self._search_manager.non_qt_update_feature_mapping() self._search_manager.update() self._results_manager.set_maximum_number_of_hits_to_save(self.settings.max_hit_structures) self._hits = None # This is ugly. Currently we need to reload the feature database to correctly initialise other # underlying CrossMiner search dependencies. feature_database_file = database.file_name fdb_filtered_view = self._search_manager.feature_database_filtered_view() if verbose: print('filtered feature database size {}'.format(fdb_filtered_view.size())) selector = MotifPharmacophoreLib.SelectCSDDatabase() selector = MotifPharmacophoreLib.select_csd_as_select_structure(selector) self._feature_database_model.process_feature_database(feature_database_file, UtilitiesLib.ProgressMonitor(), selector) db_pool = self._feature_database_model.structure_databases() self._search_manager.set_structure_databases(db_pool) self._results_manager.set_accept_hits(True) self._search_manager.start_search() # Wait for search to finish. i = 0 while self._search_manager.is_running(): i += 1 time.sleep(0.1) if verbose: print('{} n_searched = {} n_hits = {}'.format( i, self._search_manager.n_searched(), self._results_manager.n_hits() )) if self._results_manager.n_hits() >= stop_after: break self._search_manager.stop_search() self._results_manager.set_accept_hits(False) self._hits = self._results_manager.hits() finally: self._search_manager.remove_results_writer(self._results_manager) self._results_manager = None if verbose: for hit in self._hits: print('Hit id: {} rmsd: {} '.format(hit.identifier(), hit.overlay_rmsd())) pharmacophore_hits = list(Pharmacophore.SearchHit(model, _motif_pharmacophore_hit=hit) for hit in self._hits) return pharmacophore_hits
[docs] class FeatureDatabase(object): """A database of pharmacophore features.""" DatabaseInfo = collections.namedtuple('DatabaseInfo', ['file_name', 'n_entries', 'colour']) def show_dbi(dbi): return "DatabaseInfo(file_name='%s', n_entries=%d, colour=%s)" % (dbi.file_name, dbi.n_entries, str(dbi.colour)) DatabaseInfo.__repr__ = show_dbi def __init__(self, file_name, _feature_db=None): if _feature_db is None: _feature_db = MotifPharmacophoreLib.MotifPharmacophoreFeatureDatabase() self.file_name = file_name self._feature_db = _feature_db fd_names = [f.identifier for f in self.feature_definitions] if 'excluded_volume' not in fd_names: self.add_feature_definition(Pharmacophore.ExcludedVolume(GeometricDescriptors.Sphere((0, 0, 0), 0))) def __len__(self): return self._feature_db.size()
[docs] @staticmethod def from_file(feature_database_path): """Read a pharmacophore feature database from the specified path. :param feature_database_path: pharmacophore feature database file path. :returns: :class:`ccdc.Pharmacophore.FeatureDatabase` instance. """ progress_monitor = UtilitiesLib.ProgressMonitor() feature_database = MotifPharmacophoreLib.MotifPharmacophoreFeatureDatabase() feature_database.read(feature_database_path, progress_monitor) return Pharmacophore.FeatureDatabase(feature_database_path, feature_database)
[docs] def write(self, file_name): """Write a pharmacophore feature database to the specified path. :param file_name: pharmacophore feature database file path. """ progress_monitor = UtilitiesLib.ProgressMonitor() self._feature_db.write(file_name, progress_monitor)
@property def schema_version(self): """The database schema version.""" return self._feature_db.version() @property def databases(self): """The structure databases used in the construction of this feature database. Each database is presented as a tuple of file_name, nunmber of entries, and colour.""" colours = tuple( self._feature_db.colour(i) for i in range(self._feature_db.number_of_source_databases()) ) return tuple( Pharmacophore.FeatureDatabase.DatabaseInfo( self._feature_db.file_name(i), self._feature_db.nentries_from_source_database(i), Colour(c.r_, c.g_, c.b_, c.a_) ) for i, c in enumerate(colours) ) @property def identifiers(self): """All identifiers from the database.""" return self._feature_db.identifiers() def entries(self): for i in range(self._feature_db.size()): yield Pharmacophore.FeatureDatabase.Entry(_entry=self._feature_db.entry(i)) def __iter__(self): """Iterator.""" return self.entries() # pylint: disable=E1101 def __getitem__(self, index): if index < 0 or index >= len(self): raise IndexError('FeatureDatabase: index out of range') return Pharmacophore.FeatureDatabase.Entry(_entry=self._feature_db.entry(index))
[docs] def entry(self, identifier): """The :class:`ccdc.pharmacophore.Pharmacophore.FeatureDatabase.Entry` corresponding to the identifier. :param identifier: identifier of the entry. :returns: a :class:`ccdc.pharmacophore.Pharmacophore.FeatureDatabase.Entry` instance. :raises: RuntimeError if the identifier is not present. """ if not self._feature_db.identifier_exists(UtilitiesLib.DatabaseEntryIdentifier(identifier)): raise RuntimeError('FeatureDatabase: %s not present' % identifier) return Pharmacophore.FeatureDatabase.Entry( _entry=self._feature_db.entry(identifier) )
@property def feature_definitions(self): """The feature definitions present in the database.""" return tuple( Pharmacophore.FeatureDefinition(_feature_def=f) for f in self._feature_db.feature_definitions() ) + tuple( Pharmacophore.FeatureDefinition(_feature_def=f) for f in self._feature_db.non_indexed_feature_definitions() ) + tuple( Pharmacophore.FeatureDefinition(_feature_def=f) for f in self._feature_db.non_matchable_feature_definitions() )
[docs] def add_feature_definition(self, feature_definition): """Add a feature definition to the database. Note: this feature definition will not be indexed in the database, so will run rather more slowly if required in a search. To add the corresponding index, the database will need to be recreated with the feature present. """ if isinstance(feature_definition, Pharmacophore.FeatureDefinition): _fd = feature_definition._feature_def elif isinstance(feature_definition, Pharmacophore.Feature): _fd = feature_definition._feature_def else: raise NotImplementedError('Cannot add one of these to a database %s' % str(feature_definition)) db_fd = MotifPharmacophoreLib.MotifPharmacophoreFeatureDatabaseFeatureDefinition() db_fd.set_feature_deducer(_fd.feature_deducer()) label = _fd.component_label() if label == 2: labels = [0, 1] else: labels = [label] db_fd.set_component_labels(labels) if _fd.is_matchable(): self._feature_db.add_non_indexed_feature_definition(db_fd) else: self._feature_db.add_non_matchable_feature_definition(db_fd)
@property def annotation_keys(self): """The annotations available in this database.""" return self._feature_db.available_annotations()
[docs] def annotate(self, pattern, **kw): """Add or replace an annotation in the database. :param pattern: a glob-style pattern which will be used to match against identifiers. All entries in the database with a matching identifier will be annotated with the key, value pairs of the `kw` argument. """ for k, w in kw.items(): self._feature_db.set_external_annotation(pattern, k, w)
[docs] def clear_annotations(self): """Remove all annotations from the database.""" self._feature_db.clear_external_annotations()
[docs] class Entry(object): """An entry in a FeatureDatabase.""" class _write_through_dict(dict): def __init__(self, _entry, *args, **kw): self._entry = _entry dict.__init__(self, *args, **kw) def __setitem__(self, key, value): dict.__setitem__(self, key, value) self._entry.set_external_annotation(key, str(value)) def update(self, dic): for k, v in dic.items(): self[k] = v def __init__(self, _entry=None): self._entry = _entry @property def identifier(self): """The identifier of the entry.""" return self._entry.name() @identifier.setter def identifier(self, value): self._entry.set_name(value) @property def annotations(self): """The dictionary of annotations for the entry.""" return Pharmacophore.FeatureDatabase.Entry._write_through_dict(self._entry, self._entry.external_annotations())
[docs] def clear_annotations(self): """Clear all annotations from the entry.""" self._entry.clear_external_annotations()
@property def source_database_index(self): return self._entry.source_db_id()
[docs] class Creator(object): """The creator for feature databases."""
[docs] class Settings(object): """Settings for feature database creation.""" def __init__(self, feature_definition_directory=None, n_threads=None, skip_duplicate_identifiers=False): if feature_definition_directory is None: feature_definition_directory = Pharmacophore._feature_definition_directory() self.feature_definition_directory = feature_definition_directory if n_threads is None or n_threads < 1: n_threads = Pharmacophore._default_n_threads() self.n_threads = n_threads self.skip_duplicate_identifiers = skip_duplicate_identifiers
[docs] class StructureDatabase(object): """A database of structures from which a feature database may be created.""" def __init__(self, db_info, use_crystal_symmetry=True, sqlmol2_database_path=None, structure_filters=None, structure_database_path=None): """Initialise for creation. If you are creating a Structure database from the CSD set DatabaseInfo.file_name to a list of CSD database file names. If you want to include any available CSD updates include them in the list. Alternatively, set DatabaseInfo.file_name to the string 'csd' rather than a list and all CSD updates will be automatically included. :param db_info: a :class:`ccdc.pharmacophore.FeatureDatabase.DatabaseInfo` instance. Here the n_entries attribute is the desired number of entries in the resulting database. :param use_crystal_symmetry: whether or not to use crystal symmetry when generating features. :param structure_database_path: where to save the generated structure database. Supported types are .csdsqlx and .sqlmol2. :param structure_filters: a :class:`ccdc.search.Settings` instance to filter the source database. """ self.db_info = db_info self.use_crystal_symmetry = use_crystal_symmetry if sqlmol2_database_path is not None: warnings.warn("sqlmol2_database_path is deprecated and will be removed. Use structure_database_path instead.", DeprecationWarning) self.structure_database_path = structure_database_path or sqlmol2_database_path self.structure_filters = structure_filters def _database_data(self, skip_duplicate_identifiers): if self.structure_database_path is not None: self._database_path = self.db_info.file_name if not os.path.exists(self.structure_database_path): self._create_structure_database(self.structure_database_path, skip_duplicate_identifiers) with io.EntryReader(self.structure_database_path) as reader: database_data = MotifPharmacophoreLib.StructureDatabaseFeatureCreatorData( os.path.basename(self.structure_database_path), MotifPharmacophoreLib.MotifPharmacophoreFeatureColour(*self.db_info.colour), not self.use_crystal_symmetry, reader._db ) return [database_data] else: csd = io.EntryReader(self.db_info.file_name) if self.structure_filters is None: self.structure_filters = self.default_csd_filters() if isinstance(csd.file_name, (list, tuple)): paths = csd.file_name else: paths = [csd.file_name] database_data = [] if 0 < self.db_info.n_entries < sys.maxsize: to_get = self.db_info.n_entries else: to_get = sys.maxsize for self._database_path in paths: with io.EntryReader(self._database_path) as reader: if self.structure_filters or 0 < self.db_info.n_entries < sys.maxsize: identifiers = self._apply_csd_filters(to_get, self.structure_filters) data = MotifPharmacophoreLib.StructureDatabaseFeatureCreatorData( os.path.basename(self._database_path), MotifPharmacophoreLib.MotifPharmacophoreFeatureColour(*self.db_info.colour), not self.use_crystal_symmetry, reader._db, identifiers ) database_data.append(data) to_get -= len(identifiers) if to_get <= 0: break return database_data def _create_structure_database(self, structure_database_path, skip_duplicate_identifiers): """Convert input structure database to a structure database. :param structure_database_path: The path to the output structure database file. """ progress_monitor = UtilitiesLib.ProgressMonitor() self._append_to_structure_database(progress_monitor, self._database_path, structure_database_path, skip_duplicate_identifiers) def _append_to_structure_database(self, progress_monitor, database_path, structure_database_path, skip_duplicate_identifiers): input_files = [] if os.path.isdir(database_path): for path, dirs, files in os.walk(database_path): input_files.extend([os.path.join(path,f) for f in files if f.endswith(".mol2") or f.endswith(".sdf")]) else: input_files.append(self._database_path) if os.path.exists(structure_database_path): os.unlink(structure_database_path) suffix = structure_database_path[-8:] if structure_database_path.endswith('.sqlmol2') or structure_database_path.endswith('.csdsqlx'): structure_database_path = structure_database_path[:-8] try: if skip_duplicate_identifiers: duplicates_policy = MotifPharmacophoreLib.SkipDuplicateEntryPolicy() else: duplicates_policy = MotifPharmacophoreLib.StopDuplicateEntryPolicy() if suffix == '.csdsqlx': MotifPharmacophoreLib.convert_bindingsites_to_csdsqlx( input_files, structure_database_path, progress_monitor, duplicates_policy ) else: MotifPharmacophoreLib.convert_bindingsites_to_sqlmol2( input_files, structure_database_path, progress_monitor, duplicates_policy ) except RuntimeError as e: for ext in ['.csdsqlx', '.sqlmol2']: db_path = structure_database_path + ext if os.path.isfile(db_path): os.remove(db_path) if len(e.args) > 0 and "is already in the database" in e.args[0]: raise RuntimeError(("Error creating structure database {}, " "entry identifiers should be unique".format(structure_database_path), e.args[0])) else: raise
[docs] @staticmethod def default_csd_filters(): """Suggested filters for CSD structures to include in pharmacophore feature database.""" filter_settings = search.Search.Settings() filter_settings.has_3d_coordinates = True filter_settings.max_r_factor = 10. filter_settings.no_disorder = True filter_settings.not_polymeric = True filter_settings.only_organic = False filter_settings.must_not_have_elements = [ 'He', 'Be', 'Ne', 'Al', 'Si', 'Ar', 'Sc', 'Ti', 'V', 'Cr', 'Ga', 'Ge', 'As', 'Se', 'Kr', 'Rb', 'Y', 'Zr', 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'In', 'Sn', 'Sb', 'Te', 'Xe', 'Cs', 'Ba', 'La', 'Ce', 'Pr', 'Nd', 'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb', 'Lu', 'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg', 'Tl', 'Pb', 'Bi', 'Po', 'Rn', 'Fr', 'Ra', 'Ac', 'Th', 'Pa', 'U' ] return filter_settings
def _apply_csd_filters(self, max_hit_structures, filter_settings): """Filter input CSD database. :param maximum_hit_structures: The maximum number of structures to retain, or ``None`` for no limit. Default ``None``. :param filter_settings: Structure quality filters to be applied. Object of type :class:`SubstructureSearch.Settings`, boolean ``True`` for default filters, or ``None`` for no filtering. Default ``None``. """ with io.EntryReader(self._database_path) as reader: if not filter_settings: filter_settings = search.Search.Settings() elif filter_settings is True: # default filters, for backwards compatibility. filter_settings = self.default_csd_filters() identifiers = [] for e in reader: if filter_settings.test(e): identifiers.append(e.identifier) if len(identifiers) >= max_hit_structures: break return identifiers
def __init__(self, settings=None): if settings is None: settings = Pharmacophore.FeatureDatabase.Creator.Settings() self.settings = settings self._creator = MotifPharmacophoreLib.MotifPharmacophoreFeatureDatabaseCreator() self._creator.set_number_of_threads(self.settings.n_threads)
[docs] def create(self, structure_databases, feature_defs=None): """Create the feature database. :param structure_databases: an iterable of :class:`ccdc.pharmacophore.Pharmacophore.FeatureDatabase.Creator.StructureDatabase` instances. :param feature_defs: an iterable of :class:`ccdc.pharmacophore.Pharmacophore.FeatureDefinition` instances or None. If None, the default set of feature definitions in read in by :meth:`ccdc.pharmacophore.Pharmacophore.read_feature_definitions` will be used. """ if feature_defs is None: if not Pharmacophore.feature_definitions: Pharmacophore.read_feature_definitions() feature_defs = list(Pharmacophore.feature_definitions.values()) def sort_key(fd): d = dict(any=0, small_molecule=1, protein=2) adict = dict( acceptor=0, acceptor_projected=1, donor_ch_projected=2, donor_projected=3, heavy_atom=4, hydrophobe=5, ring=6, ring_non_planar=7, ring_planar_projected=8, purine=9, pyrimidine=10, adenine=11, cytosine=12, guanine=13, thymine=14, uracil=15, deoxyribose=16, ribose=17 ) sdict = dict( exit_vector=0, halogen=1, bromine=2, chlorine=3, fluorine=4, metal=5, water=6 ) pdict = dict( ALA=0, ARG=1, ASN=2, ASP=3, CYS=4, GLN=5, GLU=6, GLY=7, HIS=8, ILE=9, LEU=10, LYS=11, MET=12, PHE=13, PRO=14, SER=15, THR=16, TRP=17, TYR=18, VAL=19, ) k = fd.component_label n = fd.identifier if k == 'any': v = (d[k], adict.get(n, 99), n) if k == 'small_molecule': v = (d[k], sdict.get(n, 99), n) if k == 'protein': v = (d[k], pdict.get(n, 99), n) return v feature_defs.sort(key=sort_key) _fds = [Pharmacophore._database_feature_def_from_fd(fd) for fd in feature_defs] databases = [] for sdb in structure_databases: databases.extend(sdb._database_data(self.settings.skip_duplicate_identifiers)) progress_monitor = UtilitiesLib.ProgressMonitor() feature_db = self._creator.create_feature_database(databases, _fds, progress_monitor) return Pharmacophore.FeatureDatabase('', _feature_db=feature_db)