Screening examples¶
Note
The virtual screening features are available only to CSD-Discovery and CSD-Enterprise users.
Field-based virtual screening¶
This example screens a library of molecules against a specified query file.
#!/usr/bin/env python
#
# This script can be used for any purpose without limitation subject to the
# conditions at https://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx
#
# This permission notice and the following statement of attribution must be
# included in all copies or substantial portions of this script.
#
# 2016-08-15: created by the Cambridge Crystallographic Data Centre
#
######################################################################
"""
virtual_screening.py - simple interface to the field-based ligand screener.
"""
import os
import argparse
import time
from ccdc.io import MoleculeReader, MoleculeWriter
from ccdc.conformer import ConformerGenerator, ConformerSettings
from ccdc.screening import Screener
def setup_screener(output_dir):
"""Return the settings object for the ligand screener.
:param output_dir: Directory for screener output files
:return: Screener settings
"""
settings = Screener.Settings()
settings.output_directory = os.path.join(os.getcwd(), output_dir)
return settings
def standardise(mol):
"""Return standardised molecule.
:param mol: molecule to standardise
"""
mol.standardise_aromatic_bonds()
mol.standardise_delocalised_bonds()
return mol
def generate_confs(mols, nconformers, nthreads):
"""Return the conformer hit lists.
:param mol: molecule for conformer generation
:param nconformers: Number of conformers of the specified molecule to generate
:param nthreads: Number of threads on which to run the conformer generation
"""
settings = ConformerSettings()
settings.max_conformers = nconformers
gen = ConformerGenerator(settings, nthreads=nthreads)
standardised_mols = [standardise(mol) for mol in mols]
conf_hit_lists = gen.generate(standardised_mols)
# ensure that the returned conformers have a list for every molecule
conf_dict = {conf_hit_list.identifier:[c.molecule for c in conf_hit_list] for conf_hit_list in conf_hit_lists}
confs = []
for mol in mols:
if mol.identifier in conf_dict:
confs.append(conf_dict[mol.identifier])
else:
confs.append([standardise(mol)])
return confs
def screen_molecules(screener, mols_to_screen, nconformers, nthreads, output_name):
"""Run the ligand screener and write out the screened conformations.
Return sorted list of ranked scores.
:param screener:
:param mols_to_screen: Screening set
:param nconformers: Number of conformers to screen for each molecule in screening set
:param nthreads: Number of threads on which to run the conformer generation
:param output_name: File name for the result molecules
:return: sorted list of ranked scores
"""
screen_set = [m for m in MoleculeReader(mols_to_screen)] # Read the molecules to screen
scores = []
molwriter = MoleculeWriter(output_name)
# If nconformers=0 the input conformation is used, otherwise the CSD-driven conformer
# generator is called and a number of conformations equal to nconformers is generated.
if nconformers == 0:
confs = [[standardise(mol)] for mol in screen_set]
print('Screening input conformation of %s' % mol.identifier)
else:
try:
confs = generate_confs(screen_set, nconformers, nthreads)
for conf, mol in zip(confs, screen_set):
print('Screening %d conformers of %s' % (len(conf), mol.identifier))
except Exception as ex:
print(str(ex))
confs = [[standardise(mol)] for mol in screen_set]
for conf, mol in zip(confs, screen_set):
print('Screening input conformation of %s' % mol.identifier)
start = time.time()
res = screener.screen(confs) # Screening step
scores.extend([(r.score, r.identifier) for r in res])
end = time.time()
print('Screening completed in %.2gs' % (end-start))
# Write results
for r in res:
molwriter.write(r.molecule)
molwriter.close()
return sorted(scores)
def write_scores(results, output_file):
"""Write results to an output file.
:param results: Scores and identifiers from a screen
:param output_file: Filename for output
"""
with open(output_file, 'w') as f:
for r in results:
score, identifier = r
f.write('%.3f, %s\n' % (score, identifier))
def parse_command_line_args():
"""Return the command line arguments.
Parse the command line arguments and return them."""
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('-q', '--query', type=str, default='',
help='Query file')
parser.add_argument('-s', '--screen-set', type=str, default='',
help='Library of molecules to screen')
parser.add_argument('-n', '--nconfs', type=int, default=25,
help='Maximum number of conformers [25]')
parser.add_argument('-t', '--threads', type=int, default=1,
help='Number of threads [1]')
parser.add_argument('-o', '--output-directory', type=str, default='screen_data',
help='Output directory')
args = parser.parse_args()
return args
def main():
# Parse arguments
args = parse_command_line_args()
overlay = args.query
mols_to_screen = args.screen_set
nt = args.threads
nc = args.nconfs
output_dir = args.output_directory
query = [m for m in MoleculeReader(overlay)] # Read the query mol or overlay of mols
settings = setup_screener(output_dir)
screener = Screener(query, settings=settings, nthreads=nt) # Generate fields around the input query
print('Generated fields for query: %s' % overlay)
output_name = os.path.join(settings.output_directory, "screened_molecules.mol2")
scores = screen_molecules(screener, mols_to_screen, nc, nt, output_name) # Screen set of molecules
output_name_scores = os.path.join(settings.output_directory, "screening_scores.csv")
write_scores(scores, output_name_scores)
if __name__ == '__main__':
main()