Source code for ccdc.docking

# coding=utf8
# This code is Copyright (C) 2015 The Cambridge Crystallographic Data Centre
# (CCDC) of 12 Union Road, Cambridge CB2 1EZ, UK and a proprietary work of CCDC.
# This code may not be used, reproduced, translated, modified, disassembled or
# copied, except in accordance with a valid licence agreement with CCDC and may
# not be disclosed or redistributed in any form, either in whole or in part, to
# any third party. All copies of this code made in accordance with a valid
# licence agreement as referred to above must contain this copyright notice.
# No representations, warranties, or liabilities are expressed or implied in the
# supply of this code by CCDC, its servants or agents, except where such
# exclusion or limitation is prohibited, void or unenforceable under governing
# law.
The :mod:`ccdc.docking` module provides an API to molecular docking functionality.

.. note:: The :mod:`ccdc.docking` module is available only to CSD-Discovery and CSD-Enterprise users.

The class :class:`ccdc.docking.Docker.LigandPreparation` provides functionality
for preparing ligands for docking. This classes encapsulate the
typical preparation activities, such as protonation and bond typing.
#    >>> import os
#    >>> if 'GOLD_DIR' in os.environ and os.environ['GOLD_DIR']:
#    ...     from ccdc.docking import Docker
#    ...     from import MoleculeReader
#    ...     import os
#    ...     docker = Docker()
#    ...     settings = docker.settings
#    ...     protein_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'testsuite', 'testdata', '1fax_protein.mol2')
#    ...     aspirin = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'testsuite', 'testdata', 'aspirin.mol2')
#    ...     settings.add_protein_file(protein_file)
#    ...     settings.add_ligand_file(aspirin)
#    ...     settings.autoscale = 10.
#    ...     import tempfile
#    ...     tempd = tempfile.mkdtemp()
#    ...     settings.output_directory = tempd
#    ...     settings.output_file = 'aspirin_dock.mol2'
#    ...     settings.fitness_function = 'plp'
#    ...     ligand = MoleculeReader(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'testsuite', 'testdata', '1fax_ligand.mol2'))[0]
#    ...     settings.binding_site = settings.BindingSiteFromPoint(
#    ...         settings.proteins[0], ligand.centre_of_geometry(), 10.0
#    ...     )
#    ...     results = docker.dock()
#    ...     return_code = results.return_code
#    ...     ligand_reader = results.ligands
#    ...     output_file = os.path.join(settings.output_directory, settings.output_file)
#    >>> #docked_molecules = [m for m in MoleculeReader(os.path.join(tempd, output_file))]

from __future__ import division, absolute_import, print_function
import six
import sys
import os
import shutil
import glob
import subprocess
import socket
import re
import collections

from ccdc import io
from ccdc.entry import Entry
from ccdc.molecule import Coordinates, Molecule
from ccdc.protein import Protein
from ccdc.utilities import nested_class

from ccdc.utilities import _private_importer
with _private_importer():
    import DockingLib
    import ChemicalAnalysisLib
    import ChemistryLib
    import AnnotationsLib



[docs]class Docker(object): '''Docker.'''
[docs] @nested_class('Docker') class LigandPreparation(object): '''Prepare ligands for docking.'''
[docs] @nested_class('Docker.LigandPreparation') class Settings(object): '''Configuration options for the preparation of ligands.''' remove_unknown_atoms = True # Whether or not to remove unknown atoms assign_bond_types = True # Whether or not to assign bond types standardise_bond_types = False # Whether or not to standardise bonds to CSD conventions add_hydrogens = True # Whether hydrogens need to be added protonate = True # Whether protonation rules need to be applied protonation_rules_file = None # Location of a file containing protonation rules
def __init__(self, settings=None): if settings is None: self.settings = Docker.LigandPreparation.Settings() else: self.settings = settings if self.settings.protonation_rules_file is None: rules_dir = io._CSDDatabaseLocator.get_optimisation_parameter_file_location() rules_file = os.path.join(rules_dir, 'protonation_rules.txt') else: rules_file = self.settings.protonation_rules_file if os.path.exists(rules_file): self._protonation_rules = ChemicalAnalysisLib.ProtonationRules(rules_file) else: self._protonation_rules = None
[docs] def prepare(self, entry): '''Prepare an entry for docking. :param entry: :class:`ccdc.entry.Entry` instance :returns: :class:`ccdc.entry.Entry` instance with specified rules applied. ''' m = entry.molecule if len(m.components) > 1: raise RuntimeError('Docking of multi-component molecules is not supported') if self.settings.remove_unknown_atoms: m.remove_unknown_atoms() if self.settings.assign_bond_types: m.assign_bond_types() if self.settings.standardise_bond_types: m.standardise_aromatic_bonds() m.standardise_delocalised_bonds() if self.settings.protonate and self._protonation_rules is not None and self._protonation_rules.valid(): self._protonation_rules.apply_rules(m._molecule) if self.settings.add_hydrogens: m.remove_hydrogens() m.add_hydrogens() return Entry.from_molecule(m, **entry.attributes)
[docs] @nested_class('Docker') class Settings(object): '''Settings for docker.''' _fitness_functions = ['goldscore', 'chemscore', 'asp', 'plp']
[docs] @nested_class('Docker.Settings') class LigandFileInfo(object): '''Information about a ligand file.''' def __init__(self, file_name, ndocks=1, start=0, finish=0): self.file_name = file_name self.ndocks = ndocks self.start = start self.finish = finish def __str__(self): return '{file_name} {ndocks} docks, starting at {start} finishing at {finish}'.format( **self.__dict__ ) def __repr__(self): return "LigandFileInfo('{file_name}', {ndocks}, {start}, {finish})".format(**self.__dict__) def __eq__(self, other): return ( self.file_name == other.file_name and self.ndocks == other.ndocks and self.start == other.start and self.finish == other.finish )
def __init__(self, _settings=None): '''Initialise settings.''' if _settings is None: self._settings = DockingLib.GoldConfFile() self.clear_protein_files() self.autoscale = 100. self._conf_file_name = './api_gold.conf' self._fitness_function = '' self._rescore_function = '' self.fitness_function = 'goldscore' self.rescore_function = '' self._constraints = [] self._binding_site = None else: self._constraints = [] self._binding_site = None self._settings = _settings self._settings.set_preserve_mol2_comments(True) self._gold_exe = None self._socket = None self._save_binding_site_atoms = False def __del__(self): if self._socket is not None: self._socket.close() del self._socket
[docs] @staticmethod def from_file(file_name): '''Read docking settings from a gold.conf file. :param file_name: Location of the gold.conf file. ''' settings = Docker.Settings( _settings=DockingLib.GoldConfFileReader().read(file_name) ) settings._conf_file_name = os.path.abspath(file_name) settings.make_absolute_file_names(settings._conf_file_name) _ = settings.constraints # Ensure they are read settings.binding_site = Docker.Settings.BindingSite._from_settings(settings) if settings._settings.run_type() == settings._settings.RESCORE_RUN: settings._rescore_function = settings._settings.gold_fitness_function_path() settings._fitness_function = '' elif settings._settings.run_type() == settings._settings.CONSENSUS_SCORE: settings._fitness_function = settings._settings.docking_fitness_function_path() settings._rescore_function = settings._settings.rescore_fitness_function_path() else: settings._fitness_function = settings._settings.gold_fitness_function_path() settings._rescore_function = settings._settings.rescore_fitness_function_path() score_pars = settings.score_parameter_file if score_pars and score_pars != 'DEFAULT' and not os.path.exists(score_pars): settings.score_parameter_file = os.path.basename(score_pars) tor_file = settings.torsion_distribution_file if tor_file and tor_file != 'DEFAULT' and not os.path.exists(tor_file): settings.torsion_distribution_file = tor_file return settings
[docs] def make_absolute_file_names(self, file_name, relative=False): '''Convert any relative file names to absolute file names. :param file_name: str, the location of the settings file. :param relative: bool, whether to make file names relative to the settings file. ''' dirpath = os.path.dirname(os.path.abspath(file_name)) if not os.path.exists(dirpath): os.makedirs(dirpath) ligand_files = self.ligand_files self.clear_ligand_files() for lf in ligand_files: file_name = lf.file_name abs_path = os.path.abspath(os.path.join(dirpath, file_name)) rel_path = os.path.abspath(os.path.join(dirpath, os.path.basename(file_name))) if relative: # Don't copy ligands lf.file_name = abs_path else: lf.file_name = abs_path self.add_ligand_file(lf) protein_files = self.protein_files self.clear_protein_files() for pf in protein_files: file_name = pf.file_name abs_path = os.path.abspath(os.path.join(dirpath, file_name)) rel_path = os.path.abspath(os.path.join(dirpath, os.path.basename(file_name))) if relative: if not os.path.exists(rel_path) and abs_path != rel_path: shutil.copyfile(abs_path, rel_path) #pf.file_name = os.path.basename(file_name) pf.file_name = rel_path else: pf.file_name = abs_path self.add_protein_file(pf) if self._settings.cavity_file(): file_name = self._settings.cavity_file() abs_path = os.path.abspath(os.path.join(dirpath, file_name)) rel_path = os.path.abspath(os.path.join(dirpath, os.path.basename(file_name))) if relative: if os.path.exists(abs_path) and abs_path != rel_path: shutil.copyfile(abs_path, rel_path) self._settings.set_cavity_file(rel_path) else: self._settings.set_cavity_file(abs_path) if self._settings.ligand_reference_file(): file_name = self._settings.ligand_reference_file() abs_path = os.path.abspath(os.path.join(dirpath, file_name)) rel_path = os.path.abspath(os.path.join(dirpath, os.path.basename(file_name))) if relative: if os.path.exists(abs_path) and abs_path != rel_path: shutil.copyfile(abs_path, rel_path) self._settings.set_ligand_reference_file(rel_path) else: self._settings.set_ligand_reference_file(abs_path) # Parameter files, ... seed_file_setting = self._settings.seed_file() if seed_file_setting: abs_path = os.path.abspath(os.path.join(dirpath, seed_file_setting)) rel_path = os.path.abspath(os.path.join(dirpath, os.path.basename(seed_file_setting))) if relative: # Setting to rel_path doesn't work for a dock + rescore, for some reason #if os.path.exists(abs_path) and abs_path != rel_path: # shutil.copyfile(abs_path, rel_path) self._settings.set_seed_file(abs_path) else: self._settings.set_seed_file(abs_path) # output directory and file if self.output_directory: self._settings.set_directory(os.path.join(dirpath, self.output_directory)) if self.output_file: self._settings.set_concatenated_output(os.path.join(self.output_directory, os.path.basename(self.output_file))) # fitting points if self._settings.read_fitting_points(): abs_path = os.path.abspath(os.path.join(dirpath, self._settings.fitting_points_file())) rel_path = os.path.join(dirpath, os.path.basename(self._settings.fitting_points_file())) if relative: self._settings.set_fitting_points_file(rel_path) if self._settings.read_fitting_points(): if relative: if os.path.exists(abs_path) and abs_path != rel_path: shutil.copyfile(abs_path, rel_path) else: self._settings.set_fitting_points_file(abs_path)
@property def conf_file(self): '''The GOLD conf file represented by this settings instance.''' return self._conf_file_name # Ligands @property def ligand_files(self): '''The ligand datafile settings. :returns: tuple of class:`ccdc.docking.Docker.Settings.LigandFileInfo` instances. ''' return tuple( Docker.Settings.LigandFileInfo( df.ligand_filename_, df.n_ga_runs_, df.start_ligand_, df.finish_ligand_ ) for df in self._settings.ligand_datafiles() )
[docs] def clear_ligand_files(self): '''Remove all ligand datafiles from settings.''' self._settings.set_ligand_datafiles(tuple())
[docs] def add_ligand_file(self, file_name, ndocks=1, start=0, finish=0): '''Add a file of ligands to the docking settings. :param file_name: a mol2 or sdf file of ligand molecules, or a :class:`ccdc.docking.Docker.Settings.LigandFileInfo` instance. :param ndocks: int, the number of docking attempts for each ligand :param start: int, index of ligand at which to start :param finish: int, index of ligand at which to finish ''' df = DockingLib.LigandDataFile() if isinstance(file_name, Docker.Settings.LigandFileInfo): df.ligand_filename_ = file_name.file_name df.n_ga_runs_ = file_name.ndocks df.start_ligand_ = file_name.start df.finish_ligand_ = file_name.finish else: df.ligand_filename_ = file_name df.n_ga_runs_ = ndocks df.start_ligand_ = start df.finish_ligand_ = finish self._settings.add_ligand_datafile(df)
@property def ligands(self): '''The ligands specified for docking.''' ligands = io.MoleculeReader( [l.file_name for l in self.ligand_files] ) return ligands # Proteins
[docs] @nested_class('Docker.Settings') class ProteinFileInfo(object): '''Data associated with a protein for docking.''' def __init__(self, file_name=None, _protein_data=None, settings=None): '''Initialise a ProteinFileInfo instance. :param file_name: str ''' if _protein_data is None: _protein_data = DockingLib.GoldConfProteinData() _protein_data.set_protein_datafile(file_name) self._protein_data = _protein_data self._constraints = tuple() @property def file_name(self): '''The file name of the protein.''' return self._protein_data.protein_datafile() @file_name.setter def file_name(self, file_name): self._protein_data.set_protein_datafile(file_name) def __str__(self): return "ProteinFileInfo('%s')" % self.file_name __repr__ = __str__ def __eq__(self, other): return self.file_name == other.file_name
[docs] def add_constraint(self, constraint): '''Add a constraint to the protein.''' self._protein_data.add_constraint(constraint._constraint) self._constraints = self._constraints + (constraint,)
[docs] def clear_constraints(self): '''Remove all constraints.''' self._protein_data.clear_constraints() self._constraints = tuple()
@property def constraints(self): '''The constraints associated with this protein.''' return self._constraints
@property def protein_files(self): '''The protein file targets.''' if not hasattr(self, '_protein_info'): self._protein_info = tuple( Docker.Settings.ProteinFileInfo(_protein_data=p, settings=self) for p in self._settings.protein_data() ) for p in self._protein_info: p._constraints = tuple( Docker.Settings.Constraint._make_constraint( self, p._protein_data.constraint(i), p.file_name ) for i in range(p._protein_data.nconstraints()) ) return self._protein_info @property def proteins(self): '''The proteins.''' def _read_protein(file_name): file_name = os.path.join(os.path.dirname(self.conf_file), file_name) p = Protein.from_file(file_name) return p if not hasattr(self, '_proteins'): self._proteins = tuple(_read_protein(f.file_name) for f in self.protein_files) return self._proteins
[docs] def clear_protein_files(self): '''Clear the set of targets.''' self._settings.set_protein_data(tuple()) if hasattr(self, '_proteins'): del self._proteins if hasattr(self, '_protein_info'): del self._protein_info
[docs] def add_protein_file(self, file_name): '''Add a target file to be docked against.''' if isinstance(file_name, Docker.Settings.ProteinFileInfo): prot_data = file_name._protein_data else: prot_data = DockingLib.GoldConfProteinData() prot_data.set_protein_datafile(file_name) self._settings.add_protein_data(prot_data) if hasattr(self, '_proteins'): del self._proteins if hasattr(self, '_protein_info'): del self._protein_info
@property def reference_ligand_file(self): '''Any reference ligand file name set.''' return self._settings.ligand_reference_file() @reference_ligand_file.setter def reference_ligand_file(self, file_name): self._settings.set_ligand_reference_file(file_name) # Output @property def output_directory(self): '''Directory to which output will be sent.''' return @output_directory.setter def output_directory(self, dir_name): '''Set the output directory.''' self._settings.set_directory(dir_name) @property def output_file(self): '''Output file. If this is an empty string then each docking will be in a separate file. ''' return self._settings.concatenated_output() @output_file.setter def output_file(self, file_name): '''Set the output file.''' self._settings.set_concatenated_output(file_name) self.output_format = os.path.splitext(file_name)[1][1:] @property def output_format(self): '''Desired format for output file.''' x = self._settings.output_file_format() if x == DockingLib.GoldConfFile.MOL2: return 'mol2' elif x == DockingLib.GoldConfFile.MACCS: return 'sdf' else: return None @output_format.setter def output_format(self, value): if value.lower() == 'mol2': self._settings.set_output_file_format(DockingLib.GoldConfFile.MOL2) elif value.lower() == 'sdf': self._settings.set_output_file_format(DockingLib.GoldConfFile.MACCS) else: self._settings.set_output_file_format(DockingLib.GoldConfFile.FILEFORMAT_NOTSET) # fitting points @property def fitting_points_file(self): '''A file to read or write the fitting points.''' if not self._settings.fitting_points_file(): self._settings.set_fitting_points_file('fit_pts.mol2') return self._settings.fitting_points_file() @fitting_points_file.setter def fitting_points_file(self, file_name): if not file_name: self._settings.set_fitting_points_file('fit_pts.mol2') self._settings.set_read_fitting_points(False) else: self._settings.set_fitting_points_file(file_name) self._settings.set_read_fitting_points(True) # Binding site @property def binding_site(self): return self._binding_site @binding_site.setter def binding_site(self, value): self._binding_site = value @property def save_binding_site_atoms(self): '''Whether or not to write the binding site atom file.''' return self._save_binding_site_atoms @save_binding_site_atoms.setter def save_binding_site_atoms(self, value): self._save_binding_site_atoms = bool(value) def write(self, file_name): if not self.fitness_function and not self.rescore_function: raise RuntimeError('No fitness or rescore function set') self._conf_file_name = file_name self.make_absolute_file_names(file_name, relative=True) constraints = self.constraints if self.binding_site is not None: self.binding_site._to_settings(self) for c in constraints: c._write_mol_files(self) c._constraint.from_string(c._to_string()) for p in self.protein_files: for c in p.constraints: c._write_mol_files(self) c._constraint.from_string(c._to_string()) if self.save_binding_site_atoms: with open(os.path.join(os.path.dirname(self.conf_file), 'cavity.atoms'), 'w') as writer: for i, a in enumerate(self.binding_site.atoms): if i and i % 10 == 0: writer.write('\n') writer.write('%s%d' % ('' if i % 10 == 0 else ' ', a.index+1)) writer.write('\n\n') writer = DockingLib.GoldConfFileWriter(self._settings) writer.write(file_name)
[docs] @nested_class('Docker.Settings') class BindingSite(Protein.BindingSite): def __init__(self): '''Initialise a binding site definition.''' self.detect_cavity = False def _to_settings(self, settings): settings._settings.set_detect_cavity(self.detect_cavity) settings._settings.set_cavity_origin((0, 0, 0)) settings._settings.set_cavity_radius(10) settings._settings.set_floodfill_atom_no(0) settings._settings.set_cavity_file('') settings._settings.set_cavity_contact_distance(10) @staticmethod def _from_settings(settings): _mode_dict = { DockingLib.GoldConfFile.CAVITY_FROM_POINT : Docker.Settings.BindingSiteFromPoint, DockingLib.GoldConfFile.CAVITY_FROM_ATOM : Docker.Settings.BindingSiteFromAtom, DockingLib.GoldConfFile.CAVITY_FROM_RESIDUE : Docker.Settings.BindingSiteFromResidue, DockingLib.GoldConfFile.CAVITY_FROM_LIST_OF_ATOMS : Docker.Settings.BindingSiteFromListOfAtoms, DockingLib.GoldConfFile.CAVITY_FROM_LIST_OF_RESIDUES : Docker.Settings.BindingSiteFromListOfResidues, DockingLib.GoldConfFile.CAVITY_FROM_LIGAND : Docker.Settings.BindingSiteFromLigand } klass = _mode_dict[settings._settings.cavity_definition_mode()] return klass._from_settings(settings)
[docs] class BindingSiteFromPoint(Protein.BindingSiteFromPoint, BindingSite): '''A cavity defined from a point.''' def __init__(self, protein, origin=(0,0,0), distance=12.): Protein.BindingSiteFromPoint.__init__(self, protein, origin, distance) Docker.Settings.BindingSite.__init__(self) def _to_settings(self, settings): super(self.__class__, self)._to_settings(settings) settings._settings.set_cavity_origin(self.origin) settings._settings.set_cavity_radius(self.distance) settings._settings.set_cavity_definition_mode(DockingLib.GoldConfFile.CAVITY_FROM_POINT) @staticmethod def _from_settings(settings): pt = settings._settings.cavity_origin() bs = Docker.Settings.BindingSiteFromPoint( None, Coordinates(pt.x(), pt.y(), pt.z()), settings._settings.cavity_radius() ) bs.detect_cavity = settings._settings.detect_cavity() return bs
[docs] class BindingSiteFromAtom(Protein.BindingSiteFromAtom, BindingSite): '''A cavity defined from a protein atom.''' def __init__(self, protein, atom, distance): Protein.BindingSiteFromAtom.__init__(self, protein, atom, distance) Docker.Settings.BindingSite.__init__(self) def _to_settings(self, settings): super(self.__class__, self)._to_settings(settings) settings._settings.set_floodfill_atom_no(self.atom.index+1) settings._settings.set_cavity_radius(self.distance) settings._settings.set_cavity_origin(self.atom.coordinates) settings._settings.set_cavity_definition_mode(DockingLib.GoldConfFile.CAVITY_FROM_ATOM) @staticmethod def _from_settings(settings): p = settings.proteins[0] at = p.atoms[settings._settings.floodfill_atom_no()-1] bs = Docker.Settings.BindingSiteFromAtom(settings.proteins[0], at, settings._settings.cavity_radius()) bs.detect_cavity = settings._settings.detect_cavity() return bs
[docs] class BindingSiteFromResidue(Protein.BindingSiteFromResidue, BindingSite): '''A cavity defined from a protein residue.''' def __init__(self, protein, residue, distance): Protein.BindingSiteFromResidue.__init__(self, protein, residue, distance) Docker.Settings.BindingSite.__init__(self) def _to_settings(self, settings): super(self.__class__, self)._to_settings(settings) settings._settings.set_floodfill_atom_no(self.residue.atoms[0].index) settings._settings.set_cavity_radius(self.distance) settings._settings.set_cavity_definition_mode(DockingLib.GoldConfFile.CAVITY_FROM_RESIDUE) @staticmethod def _from_settings(settings): p = settings.proteins[0] at = p.atoms[settings._settings.floodfill_atom_no()] for res in p.residues: if at in res.atoms: break else: raise RuntimeError('The atom %s does not appear to be in a residue.' % str(at)) bs = Docker.Settings.BindingSiteFromResidue( p, res, settings._settings.cavity_radius() ) bs.detect_cavity = settings._settings.detect_cavity() return bs
[docs] class BindingSiteFromListOfAtoms(Protein.BindingSiteFromListOfAtoms, BindingSite): def __init__(self, protein, atoms): Protein.BindingSiteFromListOfAtoms.__init__(self, protein, atoms) Docker.Settings.BindingSite.__init__(self) def _to_settings(self, settings): super(self.__class__, self)._to_settings(settings) if settings._settings.cavity_file(): file_name = os.path.join(os.path.dirname(settings.conf_file), os.path.basename(settings._settings.cavity_file())) settings._settings.set_cavity_file(file_name) else: file_name = os.path.join(os.path.dirname(settings.conf_file), 'cavity.atoms') with open(file_name, 'w') as writer: for i, a in enumerate(self.atoms): if i and i % 10 == 0: writer.write('\n') writer.write('%d ' % a.index) settings._settings.set_cavity_file(file_name) settings._settings.set_cavity_definition_mode(DockingLib.GoldConfFile.CAVITY_FROM_LIST_OF_ATOMS) @staticmethod def _from_settings(settings): file_name = os.path.join(os.path.dirname(settings.conf_file), settings._settings.cavity_file()) if not os.path.exists(file_name): raise RuntimeError('The cavity file %s does not exist' % file_name) with open(file_name) as f: indices = [int(i) for i in] prot = settings.proteins[0] prot_atoms = prot.atoms inxs = [i for i in indices if i-1 < len(prot_atoms)] bs = Docker.Settings.BindingSiteFromListOfAtoms( prot, tuple(prot_atoms[i] for i in indices) ) bs.detect_cavity = settings._settings.detect_cavity() return bs
[docs] class BindingSiteFromListOfResidues(Protein.BindingSiteFromListOfResidues, BindingSite): '''Cavity defined from a list of residues.''' def __init__(self, protein, residues): Protein.BindingSiteFromListOfResidues.__init__(self, protein, residues) Docker.Settings.BindingSite.__init__(self) def _to_settings(self, settings): super(self.__class__, self)._to_settings(settings) file_name = os.path.join(os.path.dirname(settings.conf_file), 'cavity.residues') settings._settings.set_cavity_file(file_name) with open(file_name, 'w') as writer: writer.write('> <Gold.Protein.ActiveResidues>\n') for i, r in enumerate(self.residues): if i and i % 8 == 0: writer.write('\n') writer.write('%s ' % r.identifier) writer.write('\n\n') settings._settings.set_cavity_definition_mode(DockingLib.GoldConfFile.CAVITY_FROM_LIST_OF_RESIDUES) @staticmethod def _from_settings(settings): file_name = os.path.join(os.path.dirname(settings.conf_file), settings._settings.cavity_file()) if not os.path.exists(file_name): raise RuntimeError('The cavity file %s does not exist' % file_name) p = settings.proteins[0] with open(file_name) as f: lines = f.readlines()[1:] text = ' '.join(lines) residue_names = set(text.split()) name_dic = { r.identifier : r for r in p.residues } no_chain_dic = collections.defaultdict(list) for r in p.residues: no_chain_dic[r.identifier[r.identifier.index(':')+1:]].append(r) residues = [] for n in residue_names: if n in name_dic: residues.append(name_dic[n]) elif n in no_chain_dic: if len(no_chain_dic[n]) == 1: residues.append(no_chain_dic[n][0]) else: raise RuntimeError('Ambiguous residue name %s' % n) else: raise RuntimeError('Residue name %s not found in protein' % n) if len(residues) != len(residue_names): raise RuntimeError('The number of residues and the number of residue names do not match') bs = Docker.Settings.BindingSiteFromListOfResidues(p, residues) bs.detect_cavity = settings._settings.detect_cavity() return bs
[docs] class BindingSiteFromLigand(Protein.BindingSiteFromMolecule, BindingSite): '''A cavity defined from a ligand and a contact distance.''' def __init__(self, protein, ligand, distance=6.0, whole_residues=True): Protein.BindingSiteFromMolecule.__init__(self, protein, ligand, distance, whole_residues=whole_residues) Docker.Settings.BindingSite.__init__(self) def _to_settings(self, settings): super(self.__class__, self)._to_settings(settings) if settings._settings.cavity_file() and settings._settings.cavity_file().endswith('.mol2'): file_name = os.path.join(os.path.dirname(settings.conf_file), os.path.basename(settings._settings.cavity_file())) settings._settings.set_cavity_file(file_name) else: file_name = os.path.join(os.path.dirname(settings.conf_file), 'cavity_%s.mol2' % self.molecule.identifier.replace(':', '_')) with io.MoleculeWriter(file_name) as writer: writer.write(self.molecule) settings._settings.set_cavity_file(file_name) settings._settings.set_cavity_contact_distance(self.distance) settings._settings.set_cavity_radius(self.distance) settings._settings.set_use_whole_residues(self.whole_residues) settings._settings.set_cavity_definition_mode(DockingLib.GoldConfFile.CAVITY_FROM_LIGAND) @staticmethod def _from_settings(settings): file_name = os.path.join(settings.conf_file, settings._settings.cavity_file()) with io.MoleculeReader(file_name) as reader: ligand = reader[0] bs = Docker.Settings.BindingSiteFromLigand( settings.proteins[0], ligand, settings._settings.cavity_contact_distance(), settings._settings.use_whole_residues() ) bs.detect_cavity = settings._settings.detect_cavity() return bs
# Constraints
[docs] @nested_class('Docker.Settings') class Constraint(object): '''A docking constraint.''' def __init__(self, _constraint=None): self._constraint = _constraint @staticmethod def _make_constraint(settings, _constraint, protein_file=None): _class_map = dict( distance=Docker.Settings.DistanceConstraint, h_bond=Docker.Settings.HBondConstraint, protein_h_bond=Docker.Settings.ProteinHBondConstraint, substructure=Docker.Settings.SubstructureConstraint, similarity=Docker.Settings.TemplateSimilarityConstraint, scaffold=Docker.Settings.ScaffoldMatchConstraint, sphere=Docker.Settings.RegionConstraint ) return _class_map[_constraint.type()]._from_string(settings, _constraint=_constraint, protein_file=protein_file) def _write_mol_files(self, settings): pass @staticmethod def _is_protein_atom(atom): for i in range(atom._molecule.natoms()): a = atom._molecule.atom(i) annos = a.annotations() if annos and AnnotationsLib.has_ProteinSubstructureData(annos): psd = annos.find_ProteinSubstructureData() if not psd.heterogen(): return True return False
[docs] class DistanceConstraint(Constraint): '''A distance constraint.''' def __init__(self, atom1, atom2, limits=(1.5, 3.5), weight=5.0, topological_equivalent=False, _constraint=None): '''Initialise a distance constraint. :param atom1, atom2: :class:`ccdc.molecule.Atom` instances. These may be either protein or ligand atoms. :param limits: range of values with no penalty. :param weight: spring constant :param topological_equivalent: whether or not to accept a topologically equivalent atom ''' self.atom1 = atom1 self.atom2 = atom2 self.limits = (min(limits), max(limits)) self.weight = weight self.topological_equivalent = topological_equivalent if _constraint is None: _constraint = DockingLib.GoldDistanceConstraint() _constraint.from_string(self._to_string()) super(self.__class__, self).__init__(_constraint) if self._is_protein_atom(atom1): self._add_to_protein = atom1._molecule elif self._is_protein_atom(atom2): self._add_to_protein = atom2._molecule def _to_string(self): s = '%s %d %s %d %.4f %.4f %.4f %s' % ( 'protein' if self._is_protein_atom(self.atom1) else 'ligand', self.atom1.index + 1, 'protein' if self._is_protein_atom(self.atom2) else 'ligand', self.atom2.index + 1, max(self.limits), min(self.limits), self.weight, 'on' if self.topological_equivalent else 'off' ) return s @staticmethod def _from_string(settings, _constraint, protein_file=None): parts = _constraint.to_string().split() if parts[0] == 'protein': atom1 = settings.proteins[0].atoms[int(parts[1]) - 1] else: atom1 = settings.ligands[0].atoms[int(parts[1]) - 1] if parts[2] == 'protein': atom2 = settings.proteins[0].atoms[int(parts[3]) - 1] else: atom2 = settings.ligands[0].atoms[int(parts[3]) - 1] limits = (float(parts[4]), float(parts[5])) weight = float(parts[6]) topological_equivalent = parts[7] == 'on' return Docker.Settings.DistanceConstraint( atom1, atom2, limits, weight, topological_equivalent, _constraint )
[docs] class HBondConstraint(Constraint): '''An HBond constraint. :param atom1, atom2: :class:`ccdc.molecule.Atom` instances. One of the atoms should be a donatable hydrogen, the other a HBond acceptor atom. One atom should be on the protein, the other on a ligand. ''' def __init__(self, atom1, atom2, _constraint=None): if self._is_protein_atom(atom1): self._add_to_protein = atom1._molecule if self._is_protein_atom(atom2): raise RuntimeError('HBond must be between a ligand atom and a protein atom') self.atom1 = atom2 self.atom2 = atom1 elif not self._is_protein_atom(atom2): raise RuntimeError('HBond must be between a ligand atom and a protein atom') else: self._add_to_protein = atom2._molecule self.atom1 = atom1 self.atom2 = atom2 if self.atom1 is not None and self.atom2 is not None: if not ( (self.atom1.atomic_number == 1 and self.atom1.neighbours[0].is_donor and self.atom2.is_acceptor) or (self.atom1.is_acceptor and self.atom2.atomic_number == 1 and self.atom2.neighbours[0].is_donor) ): raise RuntimeError('The specified atoms do not form an HBond') if _constraint is None: _constraint = DockingLib.GoldHBondConstraint() _constraint.from_string(self._to_string()) super(self.__class__, self).__init__(_constraint) def _to_string(self): s = '%s %d %s %d' % ( 'protein' if self._is_protein_atom(self.atom1) else 'ligand', self.atom1.index + 1, 'protein' if self._is_protein_atom(self.atom2) else 'ligand', self.atom2.index + 1 ) return s @staticmethod def _from_string(settings, _constraint, protein_file=None): parts = _constraint.to_string().split() if parts[0] == 'protein': atom1 = settings.proteins[0].atoms[int(parts[1]) - 1] else: atom1 = settings.ligands[0].atoms[int(parts[1]) - 1] if parts[2] == 'protein': atom2 = settings.proteins[0].atoms[int(parts[3]) - 1] else: atom2 = settings.ligands[0].atoms[int(parts[3]) - 1] return Docker.Settings.HBondConstraint(atom1, atom2, _constraint)
[docs] class ProteinHBondConstraint(Constraint): '''A Protein HBond constraint.''' def __init__(self, atoms, weight=10.0, min_hbond_score=0.005, _constraint=None): '''Initialise a ProteinHBond constraint. :param atoms: a list :class:`ccdc.molecule.Atom` instances from the protein. The atoms should be donatable hydrogens or acceptor atoms. :param weight: the penalty to be applied for no atom of the list forming an HBond. :param min_hbond_score: the minimal score of an HBond to be considered a valid HBond. ''' self.atoms = atoms[:] for a in self.atoms: if not self._is_protein_atom(a): raise RuntimeError('One of the atoms is not in the protein') if ( not (a.atomic_number == 1 and a.neighbours[0].is_donor) and not (a.is_acceptor) ): raise RuntimeError('One of the atoms does not form an HBond') self._add_to_protein = atoms[0]._molecule self.weight = weight self.min_hbond_score = min_hbond_score if _constraint is None: _constraint = DockingLib.GoldProteinHBondConstraint() _constraint.from_string(self._to_string()) super(self.__class__, self).__init__(_constraint) def _to_string(self): s = '%.4f %.4f %s' % ( self.weight, self.min_hbond_score, ' '.join(str(a.index + 1) for a in self.atoms) ) return s @staticmethod def _from_string(settings, _constraint, protein_file=None): parts = _constraint.to_string().split() weight = float(parts[0]) min_hbond_score = float(parts[1]) # Can't get from settings.proteins since they read the constraints file_name = os.path.join(os.path.dirname(settings.conf_file), protein_file) prot = Protein.from_file(os.path.join(os.path.dirname(settings.conf_file), protein_file)) pats = prot.atoms ats = [pats[int(i) - 1] for i in parts[2:]] return Docker.Settings.ProteinHBondConstraint(ats, weight, min_hbond_score)
[docs] class SubstructureConstraint(Constraint): '''A Substructure constraint.''' def __init__(self, protein_atom, substructure, substructure_atom, limits, weight=5.0, use_ring_centre=True, substructure_file_name=None, _constraint=None): if not self._is_protein_atom(protein_atom): raise RuntimeError('Atom %s is not a protein atom' % protein_atom) if substructure_atom not in substructure.atoms: raise RuntimeError('Atom %s is not in the substructure' % substructure_atom) self._add_to_protein = protein_atom._molecule self.protein_atom = protein_atom self.substructure = substructure self.substructure_atom = substructure_atom self.limits = limits self.weight = weight self.use_ring_centre = use_ring_centre if substructure_file_name is None: substructure_file_name = 'substructure_' + substructure.identifier.replace(':', '_') + '.mol2' self.substructure_file_name = substructure_file_name if _constraint is None: _constraint = DockingLib.GoldSubstructureConstraint() _constraint.from_string(self._to_string()) _constraint.substructure = substructure super(self.__class__, self).__init__(_constraint) def _to_string(self): return 'protein %d %s %d %.4f %.4f %.4f %s' % ( self.protein_atom.index + 1, self.substructure_file_name, self.substructure_atom.index + 1, max(self.limits), min(self.limits), self.weight, 'ring_center' if self.use_ring_centre else '' ) @staticmethod def _from_string(settings, _constraint, protein_file=None): parts = _constraint.to_string().split() protein_atom = settings.proteins[0].atoms[int(parts[1]) - 1] if parts[-1] == 'ring_center': use_ring_centre = True last = -2 else: use_ring_centre = False last = -1 weight = float(parts[last]) limits = (float(parts[last-1]), float(parts[last-2])) sub_index = int(parts[last-3]) substructure_file_name = ' '.join(parts[2:last-3]) substructure_file_name = os.path.join(os.path.dirname(settings.conf_file), substructure_file_name) if os.path.exists(substructure_file_name): substructure = io.MoleculeReader(substructure_file_name)[0] else: substructure = _constraint.substructure substructure_atom = substructure.atoms[sub_index - 1] return Docker.Settings.SubstructureConstraint( protein_atom, substructure, substructure_atom, limits, weight, use_ring_centre, substructure_file_name, _constraint ) def _write_mol_files(self, settings): self.substructure_file_name = os.path.abspath(os.path.join(os.path.dirname(settings.conf_file), os.path.basename(self.substructure_file_name))) with io.MoleculeWriter(self.substructure_file_name) as writer: writer.write(self.substructure)
[docs] class TemplateSimilarityConstraint(Constraint): '''A template similarity constraint.''' def __init__(self, type, template, weight=5.0, template_file_name=None, _constraint=None): '''Initialise a template similarity constraint. :param type: must be 'donor', 'acceptor' or 'all' :param template: a :class:`ccdc.molecule.Molecule` instance :param template_file_name: where the template may be written. If not provided, the identifier of the template will be used. :param weight: the maximum weight to be given in the event of an exact match with the template. ''' self.type = type.lower() if self.type not in ['donor', 'acceptor', 'all']: raise RuntimeError('Invalid type %s for TemplateSimilarityConstraint' % self.type) self.template = template if template_file_name is None: template_file_name = template.identifier + '.mol2' self.template_file_name = template_file_name self.weight = weight if _constraint is None: _constraint = DockingLib.GoldTemplateSimilarityConstraint() _constraint.from_string(self._to_string()) _constraint.template = template super(self.__class__, self).__init__(_constraint) def _to_string(self): s = '%s %s %.4f' % ( self.type, self.template_file_name, self.weight ) return s @staticmethod def _from_string(settings, _constraint, protein_file=None): parts = _constraint.to_string().split() type = parts[0] weight = float(parts[-1]) template_file_name = ' '.join(parts[1:-1]) template_file_name = os.path.join(os.path.dirname(settings.conf_file), template_file_name) if os.path.exists(template_file_name): template = io.MoleculeReader(template_file_name)[0] else: template = _constraint.template return Docker.Settings.TemplateSimilarityConstraint( type, template, weight, template_file_name, _constraint ) def _write_mol_files(self, settings): self.template_file_name = os.path.abspath(os.path.join(os.path.dirname(settings.conf_file), os.path.basename(self.template_file_name))) with io.MoleculeWriter(self.template_file_name) as writer: writer.write(self.template)
[docs] class ScaffoldMatchConstraint(Constraint): '''A scaffold match constraint.''' def __init__(self, molecule, weight=5.0, atoms=None, _constraint=None): '''Construct a ScaffoldMatchConstraint. :param molecule: a :class:`ccdc.molecule.Molecule` instance :param weight: a spring constant :param atoms: a list of :class:`ccdc.molecule.Atom` instances ''' self.molecule = molecule self.weight = weight self.atoms = atoms if _constraint is not None: self.file_name = _constraint.file_name else: self.file_name = 'scaffold_%s.mol2' % self.molecule.identifier.replace(':', '_') if _constraint is None: _constraint = DockingLib.GoldScaffoldMatchConstraint() _constraint.from_string(self._to_string()) _constraint._molecule = molecule super(self.__class__, self).__init__(_constraint) @staticmethod def _from_string(settings, _constraint, protein_file=None): s = _constraint.to_string() if 'list' in s: indexes = s[s.index('list'):].split()[1:] s = s[:s.index('list')] else: indexes = None bits = s.split() weight = float(bits[-1]) file_name = ' '.join(bits[:-1]) file_name = os.path.join(os.path.dirname(settings.conf_file), file_name) _constraint.file_name = file_name if os.path.exists(file_name): with io.MoleculeReader(file_name) as reader: molecule = reader[0] if indexes: atoms = [molecule.atoms[int(i) - 1] for i in indexes] else: atoms = None else: molecule = None atoms = None return Docker.Settings.ScaffoldMatchConstraint(molecule, weight, atoms, _constraint) def _to_string(self): if self.atoms is not None: ats = ' list %s' % ' '.join('%d' % a.index for a in self.atoms) else: ats = '' s = '%s %.4f%s' % (self.file_name, self.weight, ats) return s def _write_mol_files(self, settings): self.file_name = os.path.abspath(os.path.join(os.path.dirname(settings.conf_file), os.path.basename(self.file_name))) with io.MoleculeWriter(self.file_name) as writer: writer.write(self.molecule)
[docs] class RegionConstraint(Constraint): '''A region constraint.''' def __init__(self, origin, radius, type, weight=5.0, atoms=None, _constraint=None): self.origin = origin if radius <= 0.0: raise RuntimeError('Invalid radius for RegionConstraint') self.radius = radius type = type.lower() if type.startswith('arom'): self.type = 'aromatic' self._type = 'arom_ring_atoms' if atoms is not None: raise RuntimeError('Atoms should not be provided if the type of a RegionConstraint is to be aromatic') elif type.startswith('hydro'): self.type = 'hydrophobic' self._type = 'hydrophobic_atoms' if atoms is not None: raise RuntimeError('Atoms should not be provided if the type of a RegionConstraint is to be hydrophobic') else: self.type = 'explicit' self._type = 'list' if atoms is None: raise RuntimeError('Atoms must be provided if the type of a RegionConstraint is to be explicit') self.atoms = atoms self.weight = weight if _constraint is None: _constraint = DockingLib.GoldRegionConstraint() _constraint.from_string(self._to_string()) super(self.__class__, self).__init__(_constraint) def _to_string(self): s = '%.4f %.4f %.4f %.4f %.4f %s ' % ( self.origin[0], self.origin[1], self.origin[2], self.radius, self.weight, self._type ) if self.atoms is not None: s += ' '.join(str(a.index + 1) for a in self.atoms) return s @staticmethod def _from_string(settings, _constraint, protein_file=None): parts = _constraint.to_string().split() x = float(parts[0]) y = float(parts[1]) z = float(parts[2]) r = float(parts[3]) weight = float(parts[4]) if parts[5] == 'list': type = 'list' atom_ids = [int(i) for i in parts[6:]] l = settings.ligands[0] atoms = [l.atoms[i - 1] for i in atom_ids] else: type = parts[5] atoms = None return Docker.Settings.RegionConstraint( (x, y, z), r, type, weight, atoms, _constraint )
@property def constraints(self): '''The tuple of constraints set.''' if not self._constraints: self._constraints = [ Docker.Settings.Constraint._make_constraint(self, self._settings.constraint(i)) for i in range(self._settings.nconstraints()) ] for pinfo in self.protein_files: self._constraints.extend(pinfo.constraints) return self._constraints
[docs] def clear_constraints(self): '''Clears the set of constraints.''' self._constraints = [] self._settings.clear_constraints() for pfinfo in self.protein_files: pfinfo._protein_data.clear_constraints()
[docs] def add_constraint(self, constraint): '''Add a constraint to the docking. :param constraint: :class:`ccdc.docking.Docker.Settings.Constraint` instance. ''' if hasattr(constraint, '_add_to_protein'): for p, inf in zip(self.proteins, self.protein_files): if p._molecule == constraint._add_to_protein: inf.add_constraint(constraint) break else: raise RuntimeError('Cannot find appropriate protein.') else: self._settings.add_constraint(constraint._constraint) self._constraints.append(constraint)
@property def force_constraints(self): '''Whether the constraints are to be forced.''' return self._settings.force_constraints() @force_constraints.setter def force_constraints(self, tf): self._settings.set_force_constraints(tf) # Fitness function @property def fitness_function(self): '''Which fitness function to use. Options are 'goldscore', 'chemscore', 'asp', 'plp'. GoldScore is selected by default. ''' return self._fitness_function @fitness_function.setter def fitness_function(self, value): if not value: self._fitness_function = '' self._settings.set_gold_fitness_function_path(self._rescore_function) self._settings.set_run_type(self._settings.RESCORE_RUN) self._settings.set_use_relative_ligand_energy(False) else: value = value.lower() if value in self._fitness_functions: self._fitness_function = value self._settings.set_gold_fitness_function_path(value) if self._rescore_function: self._settings.set_gold_fitness_function_path('consensus_score') self._settings.set_docking_fitness_function_path(value) self._settings.set_rescore_fitness_function_path(self._rescore_function) self._settings.set_run_type(self._settings.CONSENSUS_SCORE) else: self._settings.set_run_type(self._settings.STANDARD_RUN) else: raise TypeError('%s is not a recognised fitness function' % value) @property def rescore_function(self): '''The fitness function used for rescoring. Should not be the same as the fitness function. ''' return self._rescore_function @rescore_function.setter def rescore_function(self, value): '''Set the rescore function.''' if not value: self._rescore_function = '' self._settings.set_rescore_fitness_function_path('') self._settings.set_run_type(DockingLib.GoldConfFile.STANDARD_RUN) return value = value.lower() if value not in self._fitness_functions: raise TypeError('%s is not a recognised fitness function' % value) elif value == self.fitness_function: raise TypeError('%s is the current fitness function' % value) elif value == self._settings.docking_fitness_function_path(): raise TypeError('%s is the current docking function' % value) else: self._rescore_function = value if self.fitness_function == '': self._settings.set_gold_fitness_function_path(value) self._settings.set_run_type(self._settings.RESCORE_RUN) self._settings.set_use_relative_ligand_energy(False) else: self._settings.set_rescore_fitness_function_path(value) self._settings.set_gold_fitness_function_path('consensus_score') self._settings.set_docking_fitness_function_path(self._fitness_function) self._settings.set_run_type(self._settings.CONSENSUS_SCORE) @property def score_parameter_file(self): '''The location of an alternative score parameter file. If set to a relative path the file will be found in the standard GOLD distribution. If set to None, the DEFAULT file will be used. ''' return self._settings.score_parameter_file() @score_parameter_file.setter def score_parameter_file(self, value): if not value or value == 'DEFAULT': self._settings.set_score_parameter_file('DEFAULT') else: if os.path.isabs(value): file_name = value else: if 'GOLD_DIR' in os.environ: file_name = os.path.join(os.environ['GOLD_DIR'], 'gold', value) if not os.path.exists(file_name): file_name = os.path.join(os.environ['GOLD_DIR'], value) elif 'MAINDIR' in os.environ: file_name = os.path.join(os.environ['MAINDIR'], '..', 'goldsuite', 'gold_dist', 'gold', value) else: file_name = value if os.path.exists(file_name): self._settings.set_score_parameter_file(file_name) else: raise RuntimeError('score_parameter_files: Cannot find path for %s' % value) @property def torsion_distribution_file(self): '''The location of a torsion distribution file. If set to a relative path, the file will be found in the standard GOLD distribution. If set to None, the DEFAULT file will be used. ''' return self._settings.torsion_distribution_file() @torsion_distribution_file.setter def torsion_distribution_file(self, value): if not value or value == 'DEFAULT': self._settings.set_torsion_distribution_file('DEFAULT') else: if os.path.isabs(value): file_name = value else: if 'GOLD_DIR' in os.environ: file_name = os.path.join(os.environ['GOLD_DIR'], 'gold', value) if not os.path.exists(file_name): file_name = os.path.join(os.environ['GOLD_DIR'], value) elif 'MAINDIR' in os.environ: file_name = os.path.join(os.environ['MAINDIR'], '..', 'goldsuite', 'gold_dist', 'gold', value) else: file_name = value if os.path.exists(file_name): self._settings.set_torsion_distribution_file(file_name) else: raise RuntimeError('torsion_distribution_file: Cannot find path for %s' % value) # GA parameters @property def autoscale(self): '''The autoscale percentage, which controls how much searching is performed. The docker will determine how much docking is reasonable to perform on a ligand based on the number of rotatable bonds and the number of hydrogen donors and acceptors. This percentage will scale the amount of docking done to perform faster or more thorough docking. ''' return self._settings.autoscale() @autoscale.setter def autoscale(self, percent): '''Set the autoscale factor.''' self._settings.set_autoscale(int(percent)) # Termination options @property def early_termination(self): '''Whether early termination is permitted. If early termination is permitted this will be (True, number_of_solutions, rmsd_threshold), if not this will be (False, None, None)''' x = self._settings.use_early_termination() if x: return ( x, self._settings.early_termination_n_top_solutions(), self._settings.early_termination_rmsd() ) else: return (x, None, None) @early_termination.setter def early_termination(self, value): try: tf = bool(value[0]) except (ValueError, TypeError, IndexError): if value: self._settings.set_use_early_termination(True) self._settings.set_early_termination_n_top_solutions(3) self._settings.set_early_termination_rmsd(1.5) else: self._settings.set_use_early_termination(False) else: self._settings.set_use_early_termination(tf) if tf: try: n = int(value[1]) except (ValueError, TypeError, IndexError): self._settings.set_early_termination_n_top_solutions(3) self._settings.set_early_termination_rmsd(1.5) else: self._settings.set_early_termination_n_top_solutions(n) try: f = float(value[2]) except (ValueError, TypeError, IndexError): self._settings.set_early_termination_rmsd(1.5) else: self._settings.set_early_termination_rmsd(f) @property def write_options(self): '''Determines which write options are set. The options are: * MIN_OUT: Use this to write only the gold.log and bestranking.lst files. This is the recommended option for high-throughput virtual screening * NO_LOG_FILES: Use this to disable the writing of all ligand log files and the gold_protein.log file. * NO_LINK_FILES: Use this to disable the writing of ranked pose shortcut files to solution files. By default, one file is written per solution file. * NO_RNK_FILES: Use this to disable the writing of the ranked fitness lists (.rnk extension) for each molecule. By default, one file is written per ligand. * NO_BESTRANKING_LST_FILE: Use this to disable the writing of the bestranking.lst file which includes a list of the highest scoring pose for each ligand. * NO_GOLD_SOLN_LIGAND_MOL2_FILES: Use this to disable the writing of all solution files. As there would be nothing to point to, this option also disables the writing of the ranked pose shortcut files. * NO_GOLD_LIGAND_MOL2_FILE: Use this to disable the writing of all ligand files. By default, one file is written per ligand. * NO_GOLD_PROTEIN_MOL2_FILE: Use this to disable the writing of the protein file. By default, one file is written per target protein. * NO_LGFNAME_FILE: Use this to disable the writing of the .lgfname file. * NO_PLP_MOL2_FILES: If using the ChemPLP scoring function, use this to disable the writing of plp_ligand.mol2 and plp_protein.mol2. * NO_PID_FILE: Use this to disable the writing of the file. * NO_SEED_LOG_FILE: Use this to disable the writing of the gold.seed_log file. * NO_GOLD_ERR_FILE: Use this to disable the writing of the gold.err file. * NO_FIT_PTS_FILES: Use this to disable the writing of all files related to fitting points including, but not limited to, fit_pts.mol2 and fit_pts_merged.mol2. * NO_ASP_MOL2_FILES: If using the ASP scoring function, use this to disable the writing of asp_ligand.mol2 and asp_protein.mol2. * NO_GOLD_LOG_FILE: Use this to disable the writing of gold.log. Returns a list of enabled write options. ''' current_write_options = [] if self._settings.no_gold_log_file(): current_write_options.append('NO_GOLD_LOG_FILE') if self._settings.no_bestranking_lst_file(): current_write_options.append('NO_BESTRANKING_LST_FILE') if self._settings.min_out(): current_write_options.append('MIN_OUT') if self._settings.no_log_files(): current_write_options.append('NO_LOG_FILES') if self._settings.no_link_files(): current_write_options.append('NO_LINK_FILES') if self._settings.no_rnk_files(): current_write_options.append('NO_RNK_FILES') if self._settings.no_gold_soln_ligand_mol2_files(): current_write_options.append('NO_GOLD_SOLN_LIGAND_MOL2_FILES') if self._settings.no_gold_ligand_mol2_file(): current_write_options.append('NO_GOLD_LIGAND_MOL2_FILE') if self._settings.no_gold_protein_mol2_file(): current_write_options.append('NO_GOLD_PROTEIN_MOL2_FILE') if self._settings.no_lgfname_file(): current_write_options.append('NO_LGFNAME_FILE') if self._settings.no_plp_mol2_files(): current_write_options.append('NO_PLP_MOL2_FILES') if self._settings.no_pid_file(): current_write_options.append('NO_PID_FILE') if self._settings.no_seed_log_file(): current_write_options.append('NO_SEED_LOG_FILE') if self._settings.no_gold_err_file(): current_write_options.append('NO_GOLD_ERR_FILE') if self._settings.no_fit_pts_files(): current_write_options.append('NO_FIT_PTS_FILES') if self._settings.no_asp_mol2_files(): current_write_options.append('NO_ASP_MOL2_FILES') return current_write_options @write_options.setter def write_options(self, value): if isinstance(value, six.string_types): value = value.upper() else: try: value = ' '.join(v.upper() for v in value if isinstance(v, six.string_types)) except: raise TypeError('write_options() requires an argument of type \'str\'.') # refresh write options to default self._settings.set_no_log_files(False) self._settings.set_no_link_files(False) self._settings.set_no_rnk_files(False) self._settings.set_no_bestranking_lst_file(False) self._settings.set_no_gold_soln_ligand_mol2_files(False) self._settings.set_no_gold_ligand_mol2_file(False) self._settings.set_no_gold_protein_mol2_file(False) self._settings.set_no_lgfname_file(False) self._settings.set_no_plp_mol2_files(False) self._settings.set_no_pid_file(False) self._settings.set_no_seed_log_file(False) self._settings.set_no_gold_err_file(False) self._settings.set_no_fit_pts_files(False) self._settings.set_no_asp_mol2_files(False) self._settings.set_no_gold_log_file(False) self._settings.set_min_out(False) if 'NO_GOLD_LOG_FILE' in value: self._settings.set_no_gold_log_file(True) if 'NO_BESTRANKING_LST_FILE' in value: self._settings.set_no_bestranking_lst_file(True) if 'MIN_OUT' in value or 'MINIMUM_OUTPUT' in value: self._settings.set_min_out(True) else: if 'NO_LOG_FILES' in value: self._settings.set_no_log_files(True) if 'NO_LINK_FILES' in value: self._settings.set_no_link_files(True) if 'NO_RNK_FILES' in value: self._settings.set_no_rnk_files(True) if 'NO_GOLD_SOLN_LIGAND_MOL2_FILES' in value: self._settings.set_no_gold_soln_ligand_mol2_files(True) if 'NO_GOLD_LIGAND_MOL2_FILE' in value: self._settings.set_no_gold_ligand_mol2_file(True) if 'NO_GOLD_PROTEIN_MOL2_FILE' in value: self._settings.set_no_gold_protein_mol2_file(True) if 'NO_LGFNAME_FILE' in value: self._settings.set_no_lgfname_file(True) if 'NO_PLP_MOL2_FILES' in value: self._settings.set_no_plp_mol2_files(True) if 'NO_PID_FILE' in value: self._settings.set_no_pid_file(True) if 'NO_SEED_LOG_FILE' in value: self._settings.set_no_seed_log_file(True) if 'NO_GOLD_ERR_FILE' in value: self._settings.set_no_gold_err_file(True) if 'NO_FIT_PTS_FILES' in value: self._settings.set_no_fit_pts_files(True) if 'NO_ASP_MOL2_FILES' in value: self._settings.set_no_asp_mol2_files(True) @property def diverse_solutions(self): '''Diverse solutions settings. If diverse solutions is enabled this will be (True, cluster size, rmsd), otherwise (False, None, None) ''' tf = self._settings.use_diverse_solutions() if tf: return ( True, self._settings.diverse_solutions_cluster_size(), self._settings.diverse_solutions_rmsd() ) else: return (False, None, None) @diverse_solutions.setter def diverse_solutions(self, value): try: tf = bool(value[0]) except (ValueError, TypeError, IndexError): if value: self._settings.set_use_diverse_solutions(True) self._settings.set_diverse_solutions_cluster_size(1) self._settings.set_diverse_solutions_rmsd(1.5) else: self._settings.set_use_diverse_solutions(False) else: self._settings.set_use_diverse_solutions(tf) if tf: try: n = int(value[1]) except (ValueError, TypeError, IndexError): self._settings.set_diverse_solutions_cluster_size(1) self._settings.set_diverse_solutions_rmsd(1.5) else: self._settings.set_diverse_solutions_cluster_size(n) try: f = float(value[2]) except (ValueError, TypeError, IndexError): self._settings.set_diverse_solutions_rmsd(1.5) else: self._settings.set_diverse_solutions_rmsd(f) @property def seed_file(self): '''The seed file for the pseudo random number generator''' return self._settings.seed_file() @seed_file.setter def seed_file(self, value): self._settings.set_seed_file(value)
[docs] def set_hostname(self, hostname='localhost', ndocks=1): '''Set the hostname on which docking jobs will be run.''' self._socket = self._pick_unused_port(hostname) self._port = self._socket.getsockname()[1] self._settings.set_ligands_from_socket( hostname, self._port, ndocks ) self._settings.set_ligands_to_socket( hostname, self._port ) self.output_file = ''
@staticmethod def _pick_unused_port(hostname='localhost'): '''Private: get an unused port for a socket. May fail if someone else grabs it between query and binding. ''' s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.bind((hostname, 0)) #addr, port = s.getsockname() #s.close() return s def _start_gold(self, file_name=None, mode='foreground'): '''Private: start the gold server.''' if self._gold_exe is None: if 'GOLD_EXE' in os.environ: self._gold_exe = os.path.abspath(os.environ['GOLD_EXE']) else: if 'GOLD_DIR' in os.environ: self._gold_dir = os.environ['GOLD_DIR'] else: raise RuntimeError('''GOLD not installed or configured. Some features of the CSD Python API will not be available.''') if not os.path.exists(self._gold_dir): raise RuntimeError('Unable to find a GOLD executable in %s' % self._gold_dir) if sys.platform == 'win32': self._gold_exe = os.path.join(self._gold_dir, 'gold', 'd_win32', 'bin', 'gold_win32.exe') else: self._gold_exe = os.path.join(self._gold_dir, 'bin', 'gold_auto') self._par_gold_exe = os.path.join(self._gold_dir, 'bin', 'parallel_gold_auto') if not os.path.exists(self._gold_exe): raise RuntimeError('GOLD executable not found at %s' % self._gold_exe) if file_name is None: file_name = os.path.abspath('./api_gold.conf') #file_name = os.path.join(self.output_directory, 'api_gold.conf') else: file_name = os.path.abspath(file_name) if not os.path.exists(os.path.dirname(file_name)): os.makedirs(os.path.dirname(file_name)) if mode.lower().startswith('inter'): # Preserve old tags self._settings.set_replace_tags(False) # Disable ranking stuff self._settings.set_bestranking_list_filename('') self._settings.set_delete_rank_files(True) # Sockets - check that sockets are enabled, or create them self.clear_ligand_files() pars = self._settings.socket_parameters() hostname, port = pars[0], pars[1] if not hostname or port == 0: self.set_hostname() else: if hostname != 'localhost': # Assume it's running??? print('RUNNING ON', hostname) fname = os.path.join(self.output_directory, '') if not os.path.exists(fname): raise RuntimeError('GOLD does not appear to be running on %s:%d' % (hostname, port)) with open(fname) as f: pid = int( return Docker.InteractiveResults(self, pid=pid) #if not os.path.exists(self.output_directory): # os.makedirs(self.output_directory) if not os.path.exists(os.path.dirname(file_name)): os.makedirs(os.path.dirname(file_name)) self.write(file_name) print('Starting GOLD with conf file %s' % file_name) p = subprocess.Popen( [self._gold_exe, file_name] ) if mode.lower().startswith('back'): return Docker.Results(self, elif mode.lower().startswith('fore'): return Docker.Results(self, return_code=p.wait()) else: # Socket docking return Docker.InteractiveResults(self,
[docs] @nested_class('Docker') class Results(object): '''Docking results. '''
[docs] class DockedLigand(Entry): '''Subclass of :class:`ccdc.entry.Entry` to provide nicer access to the scoring terms of the docking. ''' def __init__(self, entry, settings): self._entry = entry._entry self.attributes = entry.attributes self.settings = settings @staticmethod def _is_float(t): try: x = float(t) return True except ValueError: return False
[docs] def fitness(self, fitness_function=None): '''The recorded fitness of a docking. :param: fitness_function: one of the fitness functions of the :class:`ccdc.docking.Docker.Settings` or ``None``. If the docking has exactly one fitness attribute, *i.e.*, no rescoring has been performed, then there is no need to specify the fitness_function. ''' possibles = [(k, float(v)) for k, v in self.attributes.items() if 'fitness' in k.lower() and self._is_float(v)] if len(possibles) == 0: raise RuntimeError('No fitness term in the entry') terms = [ k.split('.')[1].lower() for k, v in possibles ] if fitness_function is None: if len(possibles) == 1: return possibles[0][1] else: raise RuntimeError('Fitness terms for %s in entry' % ', '.join(terms)) else: matched = [(k, v) for k, v in possibles if fitness_function.lower() in k.lower()] if len(matched) == 0: raise RuntimeError('No matching fitness term. Available are %s' % (', ').join(terms)) elif len(matched) == 1: return matched[0][1] else: raise RuntimeError('Multiple matching fitness terms, %s' % ', '.join(k for k, v in matched))
[docs] def scoring_term(self, *filters): '''Individual or dicts of scoring terms from the entry. :param fitness_function: any of the fitness functions of :class:`ccdc.docking.Settings` :param `*filters`: an iterable of additional constraints to put on the name of the term. :returns: a float if the specification is exact or a dictionary of key:float if ambiguous. ''' terms = [(k, float(v)) for k, v in self.attributes.items() if self._is_float(v)] terms = [(k, v) for k, v in terms if all(x.lower() in k.lower() for x in filters)] if len(terms) == 0: raise RuntimeError('No scoring term matched') elif len(terms) == 1: return terms[0][1] else: return dict(terms)
[docs] class HBond(Molecule.HBond): '''A hydrogen bond in the docked ligand.''' def __init__(self, text, ligand, results): self.results = results def _get_atom(mol_ref, at_ref): if mol_ref.startswith('P'): inx = int(mol_ref[1:]) - 1 return self.results.proteins[inx].atoms[int(at_ref)-1] else: return ligand.molecule.atoms[int(at_ref)-1] parts = text.split() at0 = _get_atom(parts[0], parts[3]) at1 = _get_atom(parts[4], parts[5]) strength = float(parts[6]) _contact = ChemistryLib.MolecularContact(at0._molecule, at0._atom, at1._molecule, at1._atom, strength, ChemistryLib.MolecularContact.HBOND_CONTACT) super(self.__class__, self).__init__(_contact)
def hbonds(self, which=None): if which is None: s = self.settings.settings.fitness_function else: s = which if s.lower() == 'plp' or s.lower() == 'chemscore': tag = 'Gold.Chemscore.Hbonds' elif s.lower() == 'asp': return None elif s.lower() == 'goldscore': tag = 'Gold.Goldscore.Hbonds' text = self.attributes.get(tag) if text is not None: text = text.split('\n') return tuple( Docker.Results.DockedLigand.HBond(l, self, self.settings) for l in text[1:] if l )
[docs] class DockedLigandReader(io.EntryReader): '''Subclass of :class:`` to provide :class:`ccdc.docking.Docker.Results.DockedLigand` instances.''' def __new__(kl, file_name, settings): ret = io._ReaderFactory.__new__(kl, file_name) #super(self.__class__, self).__new__(io.EntryReader, file_name) #super(self.__class__, self).__init__(file_name) ret.settings = settings return ret def _make_entry(self, _entry): return Docker.Results.DockedLigand( super(self.__class__, self)._make_entry(_entry), self.settings ) def __iter__(self): '''Iterator.''' return self.entries() # pylint: disable=E1101 def __getitem__(self, i): return self._make_entry(self._enumerator.entry(i)) # pylint: disable=E1101
def __init__(self, settings, return_code=None, pid=None): self.settings = settings self.return_code = return_code = pid def _read_file(self, file_name): '''Read it if it exists.''' fname = os.path.join(self.settings.output_directory, file_name) if os.path.exists(fname): with open(fname) as f: return @property def protein_log(self): '''The content of the protein log file.''' return self._read_file('gold_protein.log') @property def error_log(self): '''The content of the docking error log file.''' return self._read_file('gold.err') @property def docking_log(self): '''The content of the docking log file.''' return self._read_file('gold.log')
[docs] def ligand_log(self, index): '''The content of a ligand log.''' l = glob.glob(os.path.join(self.settings.output_directory, 'gold_*_m*.log')) def mtime(f): return os.path.getmtime(f) l.sort(key=mtime) if index < len(l): return self._read_file(l[index])
@property def ligands(self): '''The ligands of the docking. The value of this property is a :class:``. Each entry has an attributes property, a dictionary of the docking information pertaining to the docking. ''' dock_files = DockingLib.QtGoldDockingSolutionFiles(self.settings._conf_file_name) return Docker.Results.DockedLigandReader([x.filename_ for x in dock_files.solution_filenames()], self) @property def proteins(self): '''The protein(s) of the docking. :returns: a tuple of :class:`ccdc.protein.Protein`. This tuple will have more than one entry if ensemble docking was used. ''' if not hasattr(self, '_proteins'): dock_files = DockingLib.QtGoldDockingSolutionFiles(self.settings._conf_file_name) self._proteins = tuple( Protein.from_file(df.filename_) for df in dock_files.protein_filenames() ) return self._proteins
[docs] def make_complex(self, ligand): '''Make the complex with the ligand, adjusting rotatables as required. :return: a :class:`ccdc.protein.Protein` with the ligand added. ''' prot_id = int(ligand.attributes.get('Gold.Ensemble.ID', 1))-1 prot = self.proteins[prot_id] if not hasattr(prot, '_complex'): prot._complex = None if prot._complex is not None: prot._protein_structure.remove_ligand(prot._protein_structure.ligand(prot._complex), False) prot._complex = prot._protein_structure.nligands() prot.add_ligand(ligand.molecule) if not hasattr(prot, '_manager'): prot._manager = DockingLib.GoldProteinManager(prot.identifier, prot._molecule) prot._manager.set_rotated_atoms(ligand._entry) return prot
[docs] @nested_class('Docker') class InteractiveResults(Results): '''A session connecting to a GOLD process. If the :class:`ccdc.docking.Docker.InteractiveResults` instance has an attribute, 'ligand_preparation', this should be either None, in which case no ligand preparation is performed, or an instance of :class:`ccdc.docking.Docker.LigandPreparation` whose prepare method will be called for each interactive docking attempted. A default constructed :class:`ccdc.docking.Docker.LigandPreparation` will be used if the attribute is not present. ''' _line_match = re.compile( r".*'(?P<file_name>[^']*)'.*'(?P<identifier>[^']*)'.*$" ) _file_name_match = re.compile( r".*gold_soln_(?P<identifier>.*)_m[0-9]+_[0-9]+\.mol2" ) def __init__(self, settings, pid=None): super(self.__class__, self).__init__(settings, pid=pid) # Set up the socket pars = settings._settings.socket_parameters() hostname, port = pars[0], pars[1] if not hasattr(settings, '_socket'): self._socket = socket.socket( socket.AF_INET, socket.SOCK_STREAM ) self._socket.bind(('', port)) else: self._socket = settings._socket self._socket.listen(5) # for now, just a single GOLD job. self._socket.settimeout(5*60.) try: self._client_socket, address = self._socket.accept() except socket.timeout: raise RuntimeError('Socket timed out on accept()') self._client_socket.settimeout(None) print('CONNECTED TO', hostname, port, address) self._docked_ligand_count = 0 self._wait_for_gold() def __del__(self): if hasattr(self, '_socket'): self._socket.close() self._socket = None if hasattr(self, '_client_socket'): self._client_socket.close() self._client_socket = None fname = os.path.join(self.settings.output_directory, '') if os.path.exists(fname): try: os.unlink(fname) except OSError: # File in use on windows pass socket_files = glob.glob(os.path.join(self.settings.output_directory, 'gold_SOCKET_m*.log')) for fname in socket_files: try: os.unlink(fname) except OSError: # File in use on windows pass
[docs] def dock(self, entry): '''Send an entry to be docked. :returns: a tuple of :class:`ccdc.entry.Entry` instances. These are the docked poses. ''' if not hasattr(self, 'ligand_preparation'): self.ligand_preparation = Docker.LigandPreparation() if self.ligand_preparation is not None: entry = self.ligand_preparation.prepare(entry) structure = entry.to_string(format='mol2') + '\nGOLDMINE MOL2 TERMINATOR\n' structure = structure.replace('\n', '\r\n') header = '%d %d %s.mol2\n' % (len(structure), self._docked_ligand_count, entry.identifier) self._docked_ligand_count += 1 self._send(header) l = self._recv_line() if l.startswith('SEND LIGAND'): pass self._send(structure, add_cr=False) l = self._recv_line() if l.startswith('GOT LIGAND'): pass return self._get_ligands()
def _wait_for_gold(self): while True: l = self._recv_line() if l.startswith('SEND LIGAND HEADER'): return def _recv_line(self): l = [] while 1: c = self._client_socket.recv(1) c = c.decode('ISO-8859-1') if not c: print('SOCKET CLOSED?') raise IOError('Socket failed (probably client died).') l.append(c) if c == '\n': return ''.join(l) def _send(self, msg, add_cr=True): if add_cr: msg = msg.replace('\n', '\r\n') msg = six.b(msg) to_send = len(msg) total_sent = 0 while total_sent < to_send: sent = self._client_socket.send(msg[total_sent:]) if sent == 0: pars = self.settings._settings.socket_parameters() hostname, port = pars[0], pars[1] raise RuntimeError('Socket connection %s:%d broken' % (hostname, port)) total_sent += sent def _get_ligands(self): ligs = [] chunks = [] bytes_in = 0 in_ligand = False while True: line = self._recv_line() if line.startswith('SEND LIGAND HEADER'): break if line.startswith('DOCKED LIGAND'): in_ligand = True if line.startswith('END DOCKED LIGAND'): in_ligand = False structure = ''.join(chunks).replace('\r', '') lig = Entry.from_string(structure, format='mol2') # Patch up identifier parts = lig.identifier.split('|') identifier = '%s|%s|%s' % (parts[0], parts[0], '|'.join(parts[2:])) lig.identifier = identifier ligs.append(Docker.Results.DockedLigand(lig, self.settings)) chunks = [] else: if in_ligand: chunks.append(line) else: print('UNRECOGNISED LINE:', line) # Remove ranked_ files ranked_files = glob.glob(os.path.join(self.settings.output_directory, 'ranked_*')) for r in ranked_files: os.unlink(r) # Patch bestranking.lst best_ranking_file = os.path.join(self.settings.output_directory, 'bestranking.lst') if os.path.exists(best_ranking_file): with open(best_ranking_file) as f: lines = f.readlines() last = lines[-1] linem = Docker.InteractiveResults._line_match.match(last) if linem is not None: gd = linem.groupdict() fname = gd['file_name'] identifier = gd['identifier'] match = Docker.InteractiveResults._file_name_match.match(fname) if match is not None: new_name = fname.replace(match.groupdict()['identifier'], identifier) last = last.replace(fname, new_name) lines = lines[:-1] + [last] with open(best_ranking_file, 'w') as writer: writer.write(''.join(lines)) # rename mol2 files last_mol2_file = glob.glob(os.path.join(self.settings.output_directory, 'gold_*_m%d.mol2' % (self._docked_ligand_count))) if last_mol2_file: try: os.rename(last_mol2_file[0], os.path.join(self.settings.output_directory, 'gold_%s_m%d.mol2' % (ligs[0].identifier.split('|')[0], self._docked_ligand_count))) except IndexError: pass # rename log files last_log_file = glob.glob(os.path.join(self.settings.output_directory, 'gold_SOCKET_m%d.log' % (self._docked_ligand_count))) if last_log_file: os.rename(last_log_file[0], os.path.join(self.settings.output_directory, 'gold_%s_m%d.log' % (ligs[0].identifier.split('|')[0], self._docked_ligand_count)) ) return tuple(ligs)
def __init__(self, settings=None): '''Initialise the docker.''' if settings is None: settings = Docker.Settings() self.settings = settings
[docs] def dock(self, file_name=None, mode='foreground'): '''Dock from the current settings. :param file_name: file name for the settings. If ``None``, current settings are written to a temporary directory. :param mode: one of 'foreground', 'background' or 'interactive'. :raises: RuntimeError if no GOLD executable is found. ''' return self.settings._start_gold(file_name=file_name, mode=mode)
@property def results(self): '''The docking results. If the docking is still in progress, the results may be partial. ''' pidfile = os.path.join(self.settings.output_directory, '') if os.path.exists(pidfile): with open(pidfile) as f: pid = return_code = None else: pid = None return_code = 0 return Docker.Results(self.settings, return_code=return_code, pid=pid)
[docs] def dock_status(self): '''Check the status of a docking job via the file.''' pidfile = os.path.join(self.settings.output_directory, '') if os.path.exists(pidfile): return 1 else: return 0
[docs] def copy_settings(self,newdocker): '''Copy this docker's settings to another docker instance ''' for fn in self.settings.protein_files: newdocker.settings.add_protein_file(fn) for ligand_file in self.settings.ligand_files: newdocker.settings.add_ligand_file( ligand_file.file_name, ligand_file.ndocks, ligand_file.start, ligand_file.finish ) newdocker.settings.binding_site = self.settings.binding_site simple_attributes = ['output_directory', 'output_file', 'output_format', 'fitness_function', 'rescore_function', 'autoscale', 'early_termination', 'diverse_solutions'] for attribute in simple_attributes: try: setattr(newdocker.settings,attribute, getattr(self.settings,attribute)) except AttributeError: pass
def _count_mol_file(self,mol_filename): '''Private: count number of molecules in a file returns -1 if an error occurs ''' try: mol_reader = io.MoleculeReader(mol_filename) except IOError: return -1 return len(mol_reader) def _split_ligand_files(self,maximum_size,ligand_file_lengths={}): '''Private: split the ligand files based on a maximum size Returns a list of lists of tuples in form [(fn1,start,end),(fn2,start,end)] Includes explicit start and end molecules If not provided and the original ligand files have 0 for end, it will read the file to determine it's size ''' # Let's start by counting ligands in files as needed for ligand_file in self.settings.ligand_files: try: count = ligand_file_lengths[ligand_file.file_name] except KeyError: # we have to read this file to get the length unless the ligand count is set if ligand_file.finish != 0: ligand_file_lengths[ligand_file.file_name] = ligand_file.finish - ligand_file.start else: mr = io.MoleculeReader(ligand_file.file_name) ligand_file_lengths[ligand_file.file_name] = len(mr) # This is done as the next file will start at this position maximum_size = maximum_size - 1 # Now generate the splits ligand_file_splits = [] curcount = 0 t = [] for ligand_file in self.settings.ligand_files: if curcount + ligand_file_lengths[ligand_file.file_name] <= maximum_size: # We can add the whole ligand file entry to this split startmol = ligand_file.start if ligand_file.finish == 0: endmol = ligand_file.start + ligand_file_lengths[ligand_file.file_name] else: endmol = ligand_file.finish if startmol == 0: startmol = 1 t.append(Docker.Settings.LigandFileInfo( ligand_file.file_name, ligand_file.ndocks, startmol, endmol )) curcount += ligand_file_lengths[ligand_file.file_name] if curcount == maximum_size: ligand_file_splits.append(t[:]) t = [] curcount = 0 else: if ligand_file.finish == 0: ligand_file = Docker.Settings.LigandFileInfo( ligand_file.file_name, ligand_file.ndocks, ligand_file.start, ligand_file_lengths[ligand_file.file_name] ) try: x = endmol except NameError: endmol = -1 while endmol != ligand_file.finish: try: x = startmol except NameError: startmol = ligand_file.start if startmol == 0: startmol = 1 endmol = startmol + (maximum_size - curcount) if endmol > ligand_file.finish: endmol = ligand_file.finish cursize = endmol - startmol curcount += cursize t.append( Docker.Settings.LigandFileInfo( ligand_file.file_name, ligand_file.ndocks, startmol, endmol ) ) if endmol == ligand_file.finish: startmol = 1 else: startmol = endmol + 1 if curcount == maximum_size: ligand_file_splits.append(t[:]) t = [] curcount = 0 ligand_file_splits.append(t[:]) return ligand_file_splits