Screening examples

Note

The virtual screening features are available only to CSD-Discovery and CSD-Enterprise users.

Field-based virtual screening

This example screens a library of molecules against a specified query file.

#!/usr/bin/env python
#
# This script can be used for any purpose without limitation subject to the
# conditions at https://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx
#
# This permission notice and the following statement of attribution must be
# included in all copies or substantial portions of this script.
#
# 2016-08-15: created by the Cambridge Crystallographic Data Centre
#
######################################################################
"""
    virtual_screening.py - simple interface to the field-based ligand screener.
"""

import os
import argparse
import time

from ccdc.io import MoleculeReader, MoleculeWriter
from ccdc.conformer import ConformerGenerator, ConformerSettings
from ccdc.screening import Screener


def setup_screener(output_dir):
    """Return the settings object for the ligand screener.

    :param output_dir: Directory for screener output files
    :return: Screener settings
    """
    settings = Screener.Settings()
    settings.output_directory = os.path.join(os.getcwd(), output_dir)
    return settings


def standardise(mol):
    """Return standardised molecule.

    :param mol: molecule to standardise
    """
    mol.standardise_aromatic_bonds()
    mol.standardise_delocalised_bonds()
    return mol


def generate_confs(mols, nconformers, nthreads):
    """Return the conformer hit lists.

    :param mol: molecule for conformer generation
    :param nconformers: Number of conformers of the specified molecule to generate
    :param nthreads: Number of threads on which to run the conformer generation
    """
    settings = ConformerSettings()
    settings.max_conformers = nconformers
    gen = ConformerGenerator(settings, nthreads=nthreads)
    standardised_mols = [standardise(mol) for mol in mols]
    conf_hit_lists = gen.generate(standardised_mols)

    # ensure that the returned conformers have a list for every molecule
    conf_dict = {conf_hit_list.identifier:[c.molecule for c in conf_hit_list] for conf_hit_list in conf_hit_lists}
    confs = []
    for mol in mols:
        if mol.identifier in conf_dict:
            confs.append(conf_dict[mol.identifier])
        else:
            confs.append([standardise(mol)])
    return confs


def screen_molecules(screener, mols_to_screen, nconformers, nthreads, output_name):
    """Run the ligand screener and write out the screened conformations.
    Return sorted list of ranked scores.

    :param screener:
    :param mols_to_screen: Screening set
    :param nconformers: Number of conformers to screen for each molecule in screening set
    :param nthreads: Number of threads on which to run the conformer generation
    :param output_name: File name for the result molecules
    :return: sorted list of ranked scores
    """
    screen_set = [m for m in MoleculeReader(mols_to_screen)]  # Read the molecules to screen
    scores = []
    molwriter = MoleculeWriter(output_name)

    # If nconformers=0 the input conformation is used, otherwise the CSD-driven conformer
    # generator is called and a number of conformations equal to nconformers is generated.
    if nconformers == 0:
        confs = [[standardise(mol)] for mol in screen_set]
        print('Screening input conformation of %s' % mol.identifier)
    else:
        try:
            confs = generate_confs(screen_set, nconformers, nthreads)
            for conf, mol in zip(confs, screen_set):
                print('Screening %d conformers of %s' % (len(conf), mol.identifier))
        except Exception as ex:
            print(str(ex))
            confs = [[standardise(mol)] for mol in screen_set]
            for conf, mol in zip(confs, screen_set):
                print('Screening input conformation of %s' % mol.identifier)
        start = time.time()
    res = screener.screen(confs)  # Screening step
    scores.extend([(r.score, r.identifier) for r in res])
    end = time.time()
    print('Screening completed in %.2gs' % (end-start))
    # Write results
    for r in res:
        molwriter.write(r.molecule)
    molwriter.close()

    return sorted(scores)


def write_scores(results, output_file):
    """Write results to an output file.

    :param results: Scores and identifiers from a screen
    :param output_file: Filename for output
    """
    with open(output_file, 'w') as f:
        for r in results:
            score, identifier = r
            f.write('%.3f, %s\n' % (score, identifier))


def parse_command_line_args():
    """Return the command line arguments.

    Parse the command line arguments and return them."""

    parser = argparse.ArgumentParser(description=__doc__)

    parser.add_argument('-q', '--query', type=str, default='',
                        help='Query file')
    parser.add_argument('-s', '--screen-set', type=str, default='',
                        help='Library of molecules to screen')
    parser.add_argument('-n', '--nconfs', type=int, default=25,
                        help='Maximum number of conformers [25]')
    parser.add_argument('-t', '--threads', type=int, default=1,
                        help='Number of threads [1]')
    parser.add_argument('-o', '--output-directory', type=str, default='screen_data',
                        help='Output directory')

    args = parser.parse_args()

    return args


def main():

    # Parse arguments
    args = parse_command_line_args()
    overlay = args.query
    mols_to_screen = args.screen_set

    nt = args.threads
    nc = args.nconfs
    output_dir = args.output_directory
    query = [m for m in MoleculeReader(overlay)]  # Read the query mol or overlay of mols
    settings = setup_screener(output_dir)
    screener = Screener(query, settings=settings, nthreads=nt)  # Generate fields around the input query

    print('Generated fields for query: %s' % overlay)

    output_name = os.path.join(settings.output_directory, "screened_molecules.mol2")
    scores = screen_molecules(screener, mols_to_screen, nc, nt, output_name)  # Screen set of molecules

    output_name_scores = os.path.join(settings.output_directory, "screening_scores.csv")
    write_scores(scores, output_name_scores)


if __name__ == '__main__':
    main()