Docking examples

Note

The docking features are available only to CSD-Discovery and CSD-Enterprise users.

Docking and Rescoring

This example shows how a docking or rescoring may be performed from a protein and a set of ligands. It is not intended to be a production-quality protein-ligand docking, but to exemplify the technology.

#!/usr/bin/env python
#
# This script can be used for any purpose without limitation subject to the
# conditions at http://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx
#
# This permission notice and the following statement of attribution must be
# included in all copies or substantial portions of this script.
#
# 2015-06-17: created by the Cambridge Crystallographic Data Centre
#
######################################################################
'''
docking_and_rescoring.py - read protein, perform minimal clean-up, define
         docking or rescoring and run the docker.

Note that any realistic docking would require a great deal of thought
to prepare the protein properly.  This example shows the minimum
that needs to be done to define a docking.

The script exemplifies docking and rescoring.
'''
###########################################################################

from __future__ import division, absolute_import, print_function

import argparse
import tempfile
import os

from ccdc.io import EntryWriter, MoleculeReader
from ccdc.protein import Protein
from ccdc.docking import Docker


class Runner(argparse.ArgumentParser):
    '''Parses arguments, runs the docking.'''
    def __init__(self):
        '''Defines the arguments.'''
        super(self.__class__, self).__init__(description=__doc__)
        self.add_argument(
            'protein_file_name',
            help='The protein file name'
        )
        self.add_argument(
            '-l', '--ligand-file-name',
            help='Ligand file. If not supplied the substrate ligand will be docked. If rescoring only, this file should contain the docked ligands.'
        )
        self.add_argument(
            '-rl', '--reference-ligand-file-name',
            help='Reference ligand file.',
            required=True
        )
        self.add_argument(
            '-s', '--speed',
            default='very fast',
            choices=('very fast', 'fast', 'medium', 'slow', 'very slow'),
            help='How thorough the docking attempts should be. The values correspond to 10%%, 25%%, 50%%, 75%% and 100%% autoscaling of docking parameters.'
        )
        self.add_argument(
            '-n', '--ndocks', default=10, type=int,
            help='Number of docking attempts to run.'
        )
        self.add_argument(
            '-d', '--directory',
            help='Output directory for the docking. Default is a temporary directory.'
        )
        self.add_argument(
            '-f', '--fitness-function', choices=('plp', 'asp', 'chemscore', 'goldscore'),
            help='Fitness function to use (default None).'
        )
        self.add_argument(
            '-r', '--rescore-function', choices=('plp', 'asp', 'chemscore', 'goldscore'),
            help='Rescore function to use (default None)'
        )

        self.args = self.parse_args()

        self.docker = None
        self.settings = None
        self.ref_ligand = None
        self.protein = None

    def run(self):
        '''Run the docking.'''
        self.create_docker()
        protein_ligands = self.prepare_protein()
        self.prepare_ligands(protein_ligands)
        self.prepare_binding_site()
        self.dock()

    def is_rescore(self):
        return self.args.rescore_function and not self.args.fitness_function

    def create_docker(self):
        '''Create a docker.'''
        self.docker = Docker()
        self.settings = self.docker.settings
        if self.args.directory:
            if not os.path.isdir(self.args.directory):
                os.makedirs(self.args.directory)
        else:
            self.args.directory = tempfile.mkdtemp()

        self.settings.output_directory = self.args.directory
        if self.is_rescore():
            self.settings.output_file = 'rescored_ligands.mol2'
        else:
            self.settings.output_file = 'docked_ligands.mol2'

        self.settings.output_format = 'mol2'
        self.settings.autoscale = {
            'very fast': 10,
            'fast': 25,
            'medium': 50,
            'slow': 75,
            'very slow': 100
        }[self.args.speed]

        self.settings.fitness_function = self.args.fitness_function
        if self.args.rescore_function:
            self.settings.rescore_function = self.args.rescore_function

        if self.args.reference_ligand_file_name:
            self.settings.reference_ligand_file = self.args.reference_ligand_file_name

        self.ref_ligand = MoleculeReader(self.args.reference_ligand_file_name)[0]
        self.settings.reference_ligand_file = self.args.reference_ligand_file_name

    def prepare_binding_site(self):
        '''Define the binding site from the reference ligand.'''
        ligand_cog = self.ref_ligand.centre_of_geometry()
        self.settings.binding_site = self.settings.BindingSiteFromPoint(
            None, ligand_cog, 12.0
        )

    def prepare_protein(self):
        '''Prepare the protein and remove any ligands.'''
        self.protein = Protein.from_file(self.args.protein_file_name)
        self.protein.remove_all_waters()
        self.protein.remove_unknown_atoms()
        self.protein.add_hydrogens()

        ligands = self.protein.ligands

        for l in ligands:
            self.protein.remove_ligand(l.identifier)

        protein_file_name = os.path.join(
            self.settings.output_directory, 'clean_%s.mol2' % self.protein.identifier
        )

        with EntryWriter(protein_file_name) as writer:
            writer.write(self.protein)
        self.settings.add_protein_file(protein_file_name)

        return ligands

    def prepare_ligands(self, protein_ligands):
        '''Write an appropriate ligand file to the docking directory, if required.'''
        if not self.args.ligand_file_name:
            if protein_ligands:
                self.args.ligand_file_name = os.path.join(
                    self.settings.output_directory,
                    'protein_ligands.mol2'
                )
                with EntryWriter(self.args.ligand_file_name) as writer:
                    for ligand in protein_ligands:
                        writer.write(ligand)
            else:
                raise RuntimeError('No ligand file given and no ligands in the protein.')

        self.settings.add_ligand_file(self.args.ligand_file_name, self.args.ndocks)

    def dock(self):
        '''Do the docking.'''
        self.docker.dock()
        print('Docking completed. Results are in %s' % self.settings.output_directory)

###########################################################################

if __name__ == '__main__':
    runner = Runner()
    runner.run()

Interactive docking

This example shows how the results of a docking may be used to inform further dockings in an iterative fashion.

#!/usr/bin/env python
#
# This script can be used for any purpose without limitation subject to the
# conditions at http://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx
#
# This permission notice and the following statement of attribution must be
# included in all copies or substantial portions of this script.
#
# 2015-06-17: created by the Cambridge Crystallographic Data Centre
#

'''
    similarity_docking.py - search the CSD for similar structures to a known
    ligand, dock them, saving structures which score better than the known ligand.

    This exemplifies using an interactive docker.
'''
######################################################################

from __future__ import division, absolute_import, print_function

import sys
import os
import argparse

from ccdc.io import EntryReader, EntryWriter
from ccdc.search import SimilaritySearch
from ccdc.docking import Docker
from ccdc.entry import Entry

########################################################################################

class Runner(argparse.ArgumentParser):
    '''Defines arguments and runs the job.'''
    def __init__(self):
        '''Defines the arguments.'''
        super(self.__class__, self).__init__(description=__doc__)
        self.add_argument(
            'conf_file_name',
            help='The configuration file to use.'
        )
        self.add_argument(
            '-l', '--ligand-file-name', default=None,
            help='Ligand to use in the docking'
        )
        # Docking parameters
        self.add_argument(
            '-f', '--fitness-function', default=None,
            help='The fitness function to use.'
        )
        self.add_argument(
            '-n', '--ndocks', default=10, type=int,
            help='The number of docking attempts to make.'
        )
        self.add_argument(
            '-o', '--output-directory',
            help='Directory in which results will be stored.'
        )
        self.add_argument(
            '-s', '--speed', default=None, choices=('very fast', 'fast', 'medium', 'slow', 'very slow'),
            help='How thorough the docking attempts should be.  The values correspond to 10%%, 25%%, 50%%, 75%% and 100%% auto-scaling of docking parameters.'
        )

    def get_arguments(self):
        '''Extract and process the arguments.'''
        self.args = self.parse_args()
        self.settings = Docker.Settings.from_file(self.args.conf_file_name)
        if self.args.output_directory:
            self.settings.output_directory = self.args.output_directory
        if self.args.fitness_function is not None:
            self.settings.fitness_function = self.args.fitness_function
        if self.args.speed is not None:
            self.settings.autoscale = {
                'very fast': 10,
                'fast': 25,
                'medium': 50,
                'slow': 75,
                'very slow': 100
            }[self.args.speed]
        self.ligand_info = self.settings.ligand_files
        if self.args.ligand_file_name is None:
            self.args.ligand_file_name = self.ligand_info[0].file_name
        self.settings.clear_ligand_files()
        self.settings.set_hostname(ndocks=self.args.ndocks)
        self.docker = Docker(settings=self.settings)
        if not os.path.isdir(self.settings.output_directory):
            os.makedirs(self.settings.output_directory)
        self.winner_writer = EntryWriter(
            os.path.join(self.settings.output_directory, 'winners.mol2')
        )
        self.better_writer = EntryWriter(
            os.path.join(self.settings.output_directory, 'better.mol2')
        )

    def run(self):
        '''Runs the job.'''
        self.get_arguments()
        self.session = self.docker.dock(
            os.path.join(self.settings.output_directory, 'settings.conf'),
            'interactive'
        )
        self.run_substrate()
        self.done = set()
        dock_index = 1
        while True:
            hits = self.search()
            if not hits:
                break
            for h in hits:
                self.done.add(h.identifier)
                mol = h.molecule
                if mol.all_atoms_have_sites:
                    mol.assign_bond_types()
                    mol.add_hydrogens()
                    entry = Entry.from_molecule(mol)
                    poses = self.session.dock(entry)
                    dock_index += 1
                    if poses:
                        score, pose = self.best(poses)
                        for inx, p in enumerate(poses):
                            with EntryWriter(
                                os.path.join(
                                    self.settings.output_directory,
                                    'gold_soln_CSD_API_m%d_%d.mol2' % (dock_index, inx+1)
                                )
                            ) as w:
                                w.write(pose)
                        if score > self.best_score:
                            print('%-8s: %.3f Winner!' % (h.identifier, score))
                            self.best_score, self.best_pose = score, pose
                            self.winner_writer.write(self.best_pose)
                        else:
                            print('%-8s: %.3f' % (h.identifier, score))
                            if score > self.substrate_score:
                                self.better_writer.write(pose)
                    else:
                        print('%-8s: Failed to dock' % h.identifier)
                else:
                    print('%-8s: No 3d structure' % h.identifier)

    def search(self):
        '''Similarity search the csd.'''
        threshold = 1.0
        hits = []
        mol = self.best_pose.molecule
        mol.standardise_aromatic_bonds()
        while not hits and round(threshold, 6) > 0.5:
            threshold -= 0.1
            simsearcher = SimilaritySearch(mol, threshold)
            simsearcher.settings.only_organic = True
            simsearcher.settings.has_3d_coordinates = True
            hits = [h for h in simsearcher.search() if h.identifier not in self.done]

            if hits:
                def match_components(h):
                    if len(h.molecule.components) > 1:
                        for m in h.molecule.components:
                            m.identifier = h.identifier
                            hh = simsearcher.search(m)
                            if hh:
                                return hh[0]
                        else:
                            return None
                            #raise RuntimeError('Similarity search failed to match any component')
                    else:
                        return h

                l = [x for x in [match_components(h) for h in hits] if x is not None]
                print('%d new hits with threshold %.2f against %s' % (len(l), threshold, mol.identifier))
                return l
            else:
                print('No hits with threshold %.2f against %s' % (threshold, mol.identifier))

    def run_substrate(self):
        '''Runs the substrate, for the comparison.'''
        self.substrate = EntryReader(self.args.ligand_file_name)[0]
        poses = self.session.dock(self.substrate)

        if not poses:
            print('FAILED TO DOCK SUBSTRATE')
            sys.exit()
        self.best_score, self.best_pose = self.best(poses)
        self.substrate_score = self.best_score
        for inx, p in enumerate(poses):
            with EntryWriter(
                os.path.join(
                    self.settings.output_directory,
                    'gold_soln_CSD_API_m1_%d.mol2' % (inx+1)
                )) as w:
                w.write(self.best_pose)
        print('%-9s: %.3f' % ('Substrate', self.best_score))

    def best(self, poses):
        '''Extracts best score and best structure from the docked poses.'''
        fitness_tag = dict(
            plp='Gold.PLP.Fitness',
            chemscore='Gold.Chemscore.Fitness',
            goldscore='Gold.Goldscore.Fitness',
            asp='Gold.ASP.Fitness'
        )[self.settings.fitness_function]
        scores = [
            float(e.attributes[fitness_tag]) for e in poses
        ]
        best_score = max(scores)
        best_pose = poses[scores.index(best_score)]
        return best_score, best_pose

########################################################################################

if __name__ == '__main__':
    runner = Runner()
    runner.run()

Testing fitness functions

This example shows how the performance of docking and rescoring functions can be systematically evaulated, how the pool method can be employed to parallize docking runs and the pandas module can be used to help with statistical analysis

#!/usr/bin/env python
#
# This script can be used for any purpose without limitation subject to the
# conditions at http://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx
#
# This permission notice and the following statement of attribution must be
# included in all copies or substantial portions of this script.
#
# 2017-02-10: created by the Cambridge Crystallographic Data Centre
#
#

'''
This simple example script uses docking to evaluate different scoring functions

Some of the analysis code depends on the numpy package and the pandas packagebeing installed.
Presentation of the results depends on the package matplotlib

.. seealso:: <http://www.numpy.org/> for more information on numpy.
.. seealso:: <http://pandas.pydata.org/> for more information on pandas.
.. seealso:: <https://matplotlib.org/> for more information on matplotlib.
'''
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import os
from ccdc import io
from ccdc.docking import Docker
from ccdc.io import MoleculeReader
import argparse
import multiprocessing
from multiprocessing import Pool
import re
import numpy as np
import pandas as pd
from ccdc.protein import Protein


def run_gold(params):
    (protein,ligand,reference,ndocks,fitness_function, rescore_function, no_run) = params
    docker = Docker()
    settings = docker.settings
    protein_file = os.path.abspath(protein)
    protein = Protein.from_file(protein_file)

    ligand_file = os.path.abspath(ligand)
    ligand = MoleculeReader(ligand_file)[0]

    native_ligand_file = os.path.abspath(reference)
    native_ligand = MoleculeReader(native_ligand_file)[0]
    settings.add_protein_file(protein_file)
    settings.add_ligand_file(ligand_file, ndocks)
    settings.reference_ligand_file = reference
    settings.binding_site = settings.BindingSiteFromLigand(protein, native_ligand, 6.0)


    settings.diverse_solutions = (True, 3, 1.0)
    settings.early_termination = False
    settings.diverse_solutions = (True, 3, 1.0)

    settings.fitness_function = fitness_function
    rescore_label = ''
    if rescore_function != 'None':
        settings.rescore_function = rescore_function
        rescore_label = "score_" + str(rescore_function)
    else:
        rescore_label = "score_" + str(fitness_function)

    out_path = os.path.abspath(os.path.join("fitness_"+str(fitness_function), rescore_label, 'run_' + str(no_run)))
    settings.output_directory = out_path
    if not os.path.exists(out_path):
        os.makedirs(out_path)
    settings.output_file = "poses_" +  str(fitness_function) + "_" +  rescore_label + "_run_" + str(no_run) + ".mol2"

    docker.dock(file_name = str(fitness_function) + "_" +  rescore_label + "_run_" + str(no_run) + ".conf")



def rescore_gold(params):
    (reference,fitness_function, rescore_function, no_run) = params
    in_path = os.path.abspath(os.path.join("fitness_"+str(fitness_function), "score_" + str(fitness_function), 'run_' + str(no_run)))
    # protein settings for docking
    docker = Docker()
    settings = docker.settings
    protein_file = os.path.join(in_path, 'gold_protein.mol2')
    protein = Protein.from_file(protein_file)
    settings.add_protein_file(os.path.join(protein_file))

    ligand_file = "poses_" +  str(fitness_function) + "_score_" +  str(fitness_function) + "_run_" + str(no_run) + ".mol2"
    settings.add_ligand_file(os.path.join(in_path,ligand_file), 1)

    # define the binding site using the reference ligand
    native_ligand_file = os.path.abspath(reference)
    native_ligand = MoleculeReader(native_ligand_file)[0]
    settings.binding_site = settings.BindingSiteFromLigand(protein, native_ligand, 6.)


    settings.reference_ligand_file = native_ligand_file


    settings.fitness_function = None
    settings.rescore_function = rescore_function
    out_path = os.path.join("fitness_"+str(fitness_function), "score_" + str(rescore_function), 'run_' + str(no_run))
    settings.output_directory = out_path
    if not os.path.exists(out_path):
        os.makedirs(out_path)
    settings.output_file = "poses_" +  str(fitness_function) +  "_score_" +   str(rescore_function) + "_run_" + str(no_run) + ".mol2"
    docker.dock(file_name = str(fitness_function) +  "_score_" +   str(rescore_function) + "_run_" + str(no_run) + ".conf")

def get_path(fitness_function, scoring_function, no_run):
    docking_directory = os.path.join("fitness_"+str(fitness_function), "score_"+str(scoring_function), "run_" + str(no_run))
    docking_file =  "poses_"+str(fitness_function)+ "_score_"+str(scoring_function)+ "_run_" + str(no_run) + ".mol2"
    docking_path = os.path.join(docking_directory, docking_file)
    return docking_path


def get_scores(ndocks,nruns):
    SF = ('goldscore', 'plp', 'chemscore','asp')
    re_RMSD = re.compile(r"\w*\.Reference\.RMSD")
    re_FITNESS = re.compile(r".+\..+\.Fitness")
    df = {}
    pose_metrics = {}
    for run in range(1,int(ndocks)+1):
        for dockfunc in SF:
            dockmols = {}
            dock_entries = {}
            poselist = {}
            for sf in SF:
                this_path = get_path(dockfunc, sf, run)
                dockmols[sf] = MoleculeReader(this_path)
                dock_entries[sf] = io.EntryReader(this_path)
            for i in range (0,int(nruns)):
                table = {}
                for sf in SF:
                    table['Docking_function'] = sf
                    d = dock_entries[sf][i]
                    m = dockmols[sf][i]
                    for k,v in d.attributes.items():
                       test = re_RMSD.match(k)
                       if test:
                           table[k] = v
                       test_fitness = re_FITNESS.match(k)
                       if test_fitness:
                           table[k] = v
                series = pd.Series(table)
                poselist[i] = series
            df = pd.DataFrame(poselist)
            pose_metrics[dockfunc,run] = df.T



    # Next, let's work out, if any correct poses were found (sampling success) and if so, if the scoring or rescoring functions were able to detect them as the top solution
    # We create a dictionary for each docking/rescoring/run combination, add them to a list and convert them to a dataframe
    RMSD_threshold = 2.0
    data = []
    for dockfunc in ['asp', 'chemscore', 'goldscore', 'plp']:
        for run in range(1,int(ndocks)+1):
            df = pose_metrics[dockfunc,run]
            for scorefunc in ['Gold.ASP.Fitness', 'Gold.Chemscore.Fitness', 'Gold.Goldscore.Fitness', 'Gold.PLP.Fitness', 'Gold.Reference.RMSD']:
                dockings = {}
                dockings["dockfunc"] = dockfunc
                dockings["run"] = run
                dockings["scorefunc"] = scorefunc
                dockings["worked"] = 0
                flag=False
                if scorefunc == 'Gold.Reference.RMSD':
                    flag=True
                if (float(df.sort_values(by=scorefunc,ascending=flag).iloc[0]['Gold.Reference.RMSD']) < float(RMSD_threshold)):
                    dockings["worked"] = 1
                data.append(dockings)
    success_list = pd.DataFrame(data)
# Now we can average the success obtained by a combination of docking and scoring function using a pivot table
    mean_success = pd.pivot_table(success_list, index= ['dockfunc', 'scorefunc'], values=['worked'], aggfunc=np.mean)
    mean_success.to_csv("mean_sucess.csv")


# Last thing to do is to illustrate our data in a visual diagram
    plot_data =  mean_success.unstack()
    dockfunc = plot_data.index.get_level_values(0)
    scorefunc = [str(a).split('.')[1].lower() for a in plot_data.columns.get_level_values(1)]
    scorefunc = [s.replace('reference','rmsd') for s in scorefunc]
    plt.yticks(np.arange(0.5, len(dockfunc), 1),dockfunc)
    plt.xticks(np.arange(0.5, len(scorefunc),1),scorefunc)
    plt.pcolor(plot_data, cmap=matplotlib.cm.Blues)
    plt.title('GOLD docking performance')
    plt.xlabel('scoring function')
    plt.ylabel('docking function')
    plt.tight_layout()
    plt.savefig('statistics.png')
    plt.show()


if  __name__ == "__main__":
    parser = argparse.ArgumentParser(description='example usage:  python fitness_function_evaluation.py -p 2w5y_protein_prepared.mol2 -l SAH.mol2 -r SAH_native.mol2 -t 2 -n 5')
    parser.add_argument('-p','--protein',default=None,dest='protein',help='The prepared protein .mol2 file')
    parser.add_argument('-l','--ligand', default=None, dest='ligand',help='The prepared ligand .mol2 file')
    parser.add_argument('-r','--reference',default=None,dest='reference',help='The reference ligand to define the active site')
    parser.add_argument('-n','--ndocks',default=10, dest='ndocks',help='The number of dockings to attempt per ligand')
    parser.add_argument('-t','--ntests',default=5,dest='ntests',help='The number of parallel test runs to attempt')
    args = parser.parse_args()
    SF = ('goldscore', 'plp', 'chemscore','asp')
    params = [(args.protein, args.ligand, args.reference, int(args.ndocks), scoring_function, 'None', run) for scoring_function in SF for run in range(1,int(args.ntests)+1)]
    p = Pool(multiprocessing.cpu_count())
    p.map(run_gold, params)
    rescore_params = [(args.reference, scoring_function, rescoring_function, run) for scoring_function in SF\
                                                                  for rescoring_function in SF\
                                                                  for run in range(1,int(args.ntests)+1) if scoring_function != rescoring_function]
    p.map(rescore_gold,rescore_params)
    get_scores(args.ntests,args.ndocks)

Ensemble docking

This example shows again how docking and rescoring can be evaluated. It also shows how to systematically set a protein h-bond constraints looking atom distance search lookup.

#!/usr/bin/env python
#
# This script can be used for any purpose without limitation subject to the
# conditions at http://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx
#
# This permission notice and the following statement of attribution must be
# included in all copies or substantial portions of this script.
#
# 2017-03-03: created by the Cambridge Crystallographic Data Centre
#
"""
This script exemplifies docking, constraints and the use of MolecularDescriptors.AtomDistanceSearch.

This script runs GOLD in batch mode, example usage:

python ensemble_docking_constraints.py --proteins 1TVO_protein.mol2 2OJG_protein.mol2 4ZZN_protein.mol2
    3I5Z_protein.mol2 4FV9_protein.mol2 --out_dir results --cavity cavity_ligands.mol2
    --ligand FRZ_ideal.sdf  --PHBconstraint 1TVO A MET108 H
"""

import argparse
from multiprocessing import Pool
import os
import glob

from ccdc.descriptors import MolecularDescriptors
from ccdc.docking import Docker
from ccdc.io import MoleculeReader
from ccdc.molecule import Molecule
from ccdc.protein import Protein


def define_docker(proteins, cavity, ligands, constraint):
    merge_cavity = Molecule()
    with MoleculeReader(cavity) as cavity_reader:
        for c in cavity_reader:
            merge_cavity.add_molecule(c)
    settings = Docker.Settings()
    merge_protein = Molecule()
    # creating a merged protein from multiple proteins and adding proteins individually to the ensemble
    for protein in proteins:
        settings.add_protein_file(os.path.abspath(protein))
        protein = Protein.from_file(os.path.abspath(protein))
        merge_protein.add_molecule(protein)
    # defining the ligand
    settings.add_ligand_file(os.path.abspath(ligands))
    # defining the binding site
    settings.binding_site = settings.BindingSiteFromLigand(merge_protein, merge_cavity, 6.0)
    # sorting out protein HBond constraints
    for protein in settings.proteins:
        # we find the protein specified in the constraint
        if str(protein.identifier) == str(constraint[0]):
            chain = str(constraint[1].upper())
            residue = str(constraint[2].upper())
            label = str(constraint[3].upper())
            atom = [a for a in protein["%s:%s" %(chain,residue)].atoms if a.label==label]
            c = settings.ProteinHBondConstraint(atom)
            settings.add_constraint(c)
            for otherprot in [x for x in settings.proteins if x != protein]:
                searcher = MolecularDescriptors.AtomDistanceSearch(otherprot)
                near_atoms = searcher.atoms_within_range(atom[0].coordinates, 1.5)
                for n in near_atoms:
                    if n.label == atom[0].label:
                        matched_atom_index = n.index
                        ec = settings.ProteinHBondConstraint([otherprot.atoms[matched_atom_index]])
                        settings.add_constraint(ec)
    new_docker = Docker()
    new_docker.settings = settings
    return new_docker


def run_gold(params):
    (proteins, cavity, ligands, PHBconstraint, outdir, fitness_function, rescore_function, no_run) = params
    docker = define_docker(proteins, cavity, ligands, PHBconstraint)
    settings = docker.settings
    settings.clear_ligand_files()
    settings.add_ligand_file(os.path.abspath(ligands), 10)
    settings.fitness_function = fitness_function
    if rescore_function != 'None':
        settings.rescore_function = rescore_function
        rescore_label = "score_" + str(rescore_function)
    else:
        rescore_label = "score_" + str(fitness_function)
    out_path = os.path.join(outdir, "fitness_" + str(fitness_function), rescore_label, 'run_' + str(no_run))
    settings.output_directory = out_path
    if not os.path.exists(out_path):
        os.makedirs(out_path)
    new_docker = Docker()
    new_docker.settings = settings
    new_docker.dock(file_name = str(fitness_function) + "_" + rescore_label + "_run_" + str(no_run) + ".conf")


def rescore_gold(params):
    (proteins, cavity, ligands, constraint, odir, fitness_function, rescore_function, no_run) = params
    docker = define_docker(proteins, cavity, ligands, constraint)
    settings = docker.settings
    settings.clear_ligand_files()
    in_path = os.path.join(odir, "fitness_" + str(fitness_function), "score_" + str(fitness_function), 'run_' + str(no_run))
    out_path = os.path.join(odir, "fitness_" + str(fitness_function), "score_" + str(rescore_function), 'run_' + str(no_run))
    settings.output_directory = out_path
    if not os.path.exists(out_path):
        os.makedirs(out_path)
    poses = glob.glob(os.path.join(in_path, 'gold_soln*'))
    for pose in poses:
       settings.add_ligand_file(os.path.join(pose), 1)
    settings.fitness_function = None
    settings.rescore_function = rescore_function
    settings.output_directory = out_path
    new_docker = Docker()
    new_docker.settings = settings
    new_docker.dock(file_name = str(fitness_function) + "_score_" + str(rescore_function) + "_run_" + str(no_run) + ".conf" )


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('--proteins', dest='proteins', nargs='+',
                        help='protein(s) used for docking')
    parser.add_argument('--PHBconstraint', dest='PHBconstraint', nargs=4,
                        help='Protein HBond constraint, protein ID followed by chain, residue and atom label')
    parser.add_argument('--cavity', dest='cavity',
                        help='one or more ligand files for cavity definition')
    parser.add_argument('--ligand', dest='ligand',
                        help='one or more ligands for docking')
    parser.add_argument('--out_dir', dest='odir', default='output',
                        help='output directory for docking results')
    args = parser.parse_args()
    SF = ('goldscore', 'plp', 'chemscore', 'asp')
    NO = (1, 2)
    params = [(args.proteins, args.cavity, args.ligand, args.PHBconstraint, args.odir, scoring_function, 'None', run)
              for scoring_function in SF for run in NO]
    p = Pool(1)
    p.map(run_gold, params)
    rescore_params = [(args.proteins, args.cavity, args.ligand, args.PHBconstraint,
                       args.odir, scoring_function, rescoring_function, run)
                      for scoring_function in SF for rescoring_function in SF
                      for run in NO if scoring_function != rescoring_function]
    p.map(rescore_gold, rescore_params)

Searching docked structures

This example shows how to write a csdsql format database from docking results, to enable faster substructure searching, and how to associate the GOLD docking tags with the substructure search hits found.

#!/usr/bin/env python
#
# This script can be used for any purpose without limitation subject to the
# conditions at http://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx
#
# This permission notice and the following statement of attribution must be
# included in all copies or substantial portions of this script.
#
# 2015-06-17: created by the Cambridge Crystallographic Data Centre
#
##############################################################################
'''
    searching_docking_results.py    -   how to perform substructure searching on a multi-mol2 file.

    This example shows how to create a .csdsql database from a multi-mol2 file
    to get fast substructure searching, then to associate the GOLD docking scores with the hits.
    The .csdsql database will be written to the output directory of the docking, if it doesn't already exist, or to
    a directory optionally specified on the command line.
    One or more SMARTS patterns may be provided to search, and the resulting mean fitness scores calculated and printed.
'''
##############################################################################

from __future__ import division, absolute_import, print_function

import os
import argparse
import collections

from ccdc import io, docking, search

##############################################################################

class Runner(argparse.ArgumentParser):
    def __init__(self):
        super(self.__class__, self).__init__(description=__doc__)
        self.add_argument(
            'conf_file',
            help='Docking configuration file'
        )
        self.add_argument(
            'SMARTS', nargs='*',
            help='SMARTS patterns to search for in the database'
        )
        self.add_argument(
            '-o', '--output-directory', default=None,
            help='Alternative directory for the csdsql file'
        )
        self.args = self.parse_args()

    def run(self):
        settings = docking.Docker.Settings.from_file(self.args.conf_file)
        docker = docking.Docker(settings=settings)
        results = docker.results
        if self.args.output_directory is None:
            output_dir = settings.output_directory
        else:
            output_dir = self.args.output_directory
            if not os.path.exists(output_dir):
                os.makedirs(output_dir)

        output_base, _ = os.path.splitext(os.path.basename(settings.output_file))
        csdsql_file = os.path.join(output_dir, output_base + '.csdsql')
        if not os.path.exists(csdsql_file):
            print('Writing:', csdsql_file, 'from', settings.output_file)
            # Ensure that identifiers are unique
            identifier_suffix = collections.defaultdict(int)
            with io.EntryWriter(csdsql_file) as writer:
                for i, ligand in enumerate(results.ligands):
                    identifier_suffix[ligand.identifier] += 1
                    ligand.identifier = '%s_%d' % (ligand.identifier, identifier_suffix[ligand.identifier])
                    writer.write(ligand)

        db = io.EntryReader(csdsql_file)
        ligands = results.ligands
        longest = max(len(s) for s in self.args.SMARTS)
        # Need an index by new identifier
        index = {
            e.identifier : i for i, e in enumerate(db)
        }
        for smarts in self.args.SMARTS:
            searcher = search.SubstructureSearch()
            searcher.add_substructure(search.SMARTSSubstructure(smarts))
            hits = searcher.search(database=db, max_hits_per_structure=1)
            fitness = 0.0
            for h in hits:
                ligand = ligands[index[h.identifier]]
                fitness += ligand.fitness()
            if hits:
                fitness /= len(hits)
            print('%*s: %4d hits %.2f average fitness' % (longest, smarts, len(hits), fitness))

##############################################################################

if __name__ == '__main__':
    runner = Runner()
    runner.run()

Table Of Contents

Previous topic

Protein Examples

Next topic

Pharmacophore Examples