Source code for ccdc.screening

#
# This code is Copyright (C) 2015 The Cambridge Crystallographic Data Centre
# (CCDC) of 12 Union Road, Cambridge CB2 1EZ, UK and a proprietary work of CCDC.
# This code may not be used, reproduced, translated, modified, disassembled or
# copied, except in accordance with a valid licence agreement with CCDC and may
# not be disclosed or redistributed in any form, either in whole or in part, to
# any third party. All copies of this code made in accordance with a valid
# licence agreement as referred to above must contain this copyright notice.
#
# No representations, warranties, or liabilities are expressed or implied in the
# supply of this code by CCDC, its servants or agents, except where such
# exclusion or limitation is prohibited, void or unenforceable under governing
# law.
#

'''
    The :mod:`ccdc.screening` module can be used to screen a library of compounds against a pharmacophore query
    obtained from one or multiple overlaid ligands. The algorithm generalises the 3D pharmacophore definition
    using atom property fields that are created around the query based on user-defined atom types and potentials.
'''
#######################################################################################

from __future__ import division, absolute_import, print_function

import os
import shutil

from ccdc.conformer import _DataRecordExtractor
from ccdc.molecule import Molecule
from ccdc.io import _CSDDatabaseLocator

from ccdc.utilities import _private_importer
with _private_importer():
    from LigandScreeningLib import FieldScreeningWorkFlow, FieldScreeningWorkFlowParameters, licence_check

licence_check()

#######################################################################################

[docs]class Screener(object): '''Performs field-based ligand screening.'''
[docs] class Settings(object): '''Screener settings.''' def __init__(self): self._settings = FieldScreeningWorkFlowParameters() self._settings.excluded_volume_envelope_distance_ = 3.0 self._settings.excluded_volume_penalty_ = 10.0 parameter_file_location = _CSDDatabaseLocator.get_optimisation_parameter_file_location() if parameter_file_location is None: raise RuntimeError("Cannot create Screener as parameter files are missing") self._settings.parameter_directory_ = parameter_file_location self._settings.output_directory_ = os.path.join( '.', 'screen_data' ) # location of output data if required self._settings.save_files_ = True self._settings.store_atom_types_ = True self._settings.fitting_points_threshold_ = -1.0 self._settings.fitting_points_cluster_radius_ = 1.5 self._settings.bias_conformer_selection_ = True @property def excluded_volume_envelop(self): '''Size of the excluded volume envelope.''' return self._settings.excluded_volume_envelope_distance_ @excluded_volume_envelop.setter def excluded_volume_envelop(self, val): self._settings.excluded_volume_envelope_distance_ = val @property def excluded_volume_penalty(self): '''Penalty to be applied for atoms in the excluded volume.''' return self._settings.excluded_volume_penalty_ @excluded_volume_penalty.setter def excluded_volume_penalty(self, val): self._settings.excluded_volume_penalty_ = val @property def parameter_directory(self): '''Location of parameter files.''' return self._settings.parameter_directory_ @parameter_directory.setter def parameter_directory(self, val): self._settings.parameter_directory_ = val @property def output_directory(self): '''Location where data files may be stored.''' return self._settings.output_directory_ @output_directory.setter def output_directory(self, val): self._settings.output_directory_ = val @property def save_files(self): '''Whether or not to save files.''' return self._settings.save_files_ @save_files.setter def save_files(self, val): self._settings.save_files_ = val @property def store_atom_types(self): '''Whether or not to store atom types.''' return self._settings.store_atom_types_ @store_atom_types.setter def store_atom_types(self, val): self._settings.store_atom_types_ = val @property def bias_conformer_selection(self): ''' Whether or not to bias the conformer selection to low-energy conformers. This assumes that lower-energy conformations come earlier in the list, such as for the CSD conformer generator. ''' return self._settings.bias_conformer_selection_ @bias_conformer_selection.setter def bias_conformer_selection(self, val): self._settings.bias_conformer_selection_ = val @property def fitting_points_threshold(self): '''Grid points with a score lower than this this threshold will create a fitting point.''' return self._settings.fitting_points_threshold_ @fitting_points_threshold.setter def fitting_points_threshold(self, val): self._settings.fitting_points_threshold_ = val @property def fitting_points_cluster_radius(self): '''If the distance between two fitting points is less than the cluster radius, the fitting point with the higher score will be eliminated.''' return self._settings.fitting_points_cluster_radius_ @fitting_points_cluster_radius.setter def fitting_points_cluster_radius(self, val): self._settings.fitting_points_cluster_radius_ = val
[docs] class ScreenHitList(list): '''List of screening results.'''
[docs] class ScreenHit(object): '''An individual screening hit.''' def __init__(self, molecules, atom_types, _dr): self._de = _DataRecordExtractor(_dr) for n in atom_types: setattr(self, n, self._de.get('screener.pass.%s' % n, 'double')) self.identifier = molecules[0].identifier self.molecule = Molecule( self.identifier, _molecule=self._de.get('screener.pass.fitted_molecule', 'HMolecule') ) xxx = self._de.get('screener.pass.conformer_id', 'int') #print('***************** INDEX', xxx, len(molecules)) #if xxx < len(molecules): # self.original_conformer = molecules[self._de.get('screener.pass.conformer_id', 'int')] #else: # self.original_conformer = None self.n_excluded_volume = self._de.get('screener.pass.n_excluded_volume', 'int') @property def score(self): '''The screening score. A lower score is better.''' return self._de.get('screener.pass.score', 'double')
def __init__(self, confs, atom_types, _drs): '''Initialise.''' #print('********** Confs %d, _drs %d' % (len(confs), len(_drs))) des = [ _DataRecordExtractor(dr) for dr in _drs ] # This has to be done in two steps # First we have to sort the results by # the molecule id, then we can build the hitlist initial_data = sorted([ ( de.get('reader.pass.molecule_id', 'int'), _drs[i] ) for i, de in enumerate(des) ]) hit_list = [ Screener.ScreenHitList.ScreenHit(confs[i], atom_types, initial_data[i][1] ) for i in range(len(initial_data)) ] list.__init__(self, hit_list) #list.__init__( # self, # (Screener.ScreenHitList.ScreenHit(confs[i], atom_types, _drs[i]) # for i in range(len(_drs))) #) @property def best_hit(self): '''The hit with the lowest score.''' l = sorted([(h.score, h) for h in self]) return l[0][1]
def __init__(self, overlay, settings=None, nthreads=1): '''Initialise the screener. :param overlay: a list of :class:`ccdc.molecule.Molecule` :param settings: a :class:`ccdc.screening.Screener.Settings` instance or ``None``. :param nthreads: int value for the number of threads to use when screening. ''' self._init_common(settings) self._init_from_overlay(overlay,nthreads) def _init_common(self,settings): if settings is None: settings = Screener.Settings() self.settings = settings if self.settings.save_files and os.path.exists(self.settings.output_directory): shutil.rmtree(self.settings.output_directory) self._parse_atom_types() def _init_from_overlay(self,overlay,nthreads): self.overlay = overlay self._screener = FieldScreeningWorkFlow( [m._molecule for m in self.overlay], self.settings._settings, nthreads ) def _parse_atom_types(self): '''Private: extract atom types from similarity definitions.''' with open(os.path.join(self.settings.parameter_directory, 'similarity_atom_types_new.txt')) as f: self.atom_types = dict( (l.split()[1], True) for l in f if l.startswith('ATOM_TYPE') ).keys()
[docs] def screen(self, molecules): '''Screen conformers against the overlay. :param molecules: a list of lists of :class:`ccdc.molecule.Molecule` :return: :class:`ccdc.Screener.ScreenHitList` ''' mols = [ [m._molecule for m in confs] for confs in molecules ] drs = self._screener.screen(mols) return Screener.ScreenHitList(molecules, self.atom_types, drs)
#######################################################################################