Source code for ccdc.csp.csd_landscape_generator

# This code is Copyright (C) 2022 The Cambridge Crystallographic Data Centre
# (CCDC) of 12 Union Road, Cambridge CB2 1EZ, UK and a proprietary work of CCDC.
# This code may not be used, reproduced, translated, modified, disassembled or
# copied, except in accordance with a valid licence agreement with CCDC and may
# not be disclosed or redistributed in any form, either in whole or in part, to
# any third party. All copies of this code made in accordance with a valid
# licence agreement as referred to above must contain this copyright notice.
#
# No representations, warranties, or liabilities are expressed or implied in the
# supply of this code by CCDC, its servants or agents, except where such
# exclusion or limitation is prohibited, void or unenforceable under governing
# law.
#
'''
csd_landscape_generator.py -   an API for crystal structure prediction landscape generation.

Simplest operation is:

.. code:: python

    >>> from ccdc import io
    >>> from ccdc.csp.csd_landscape_generator import CSDLandscapeGenerator
    >>> csd = io.EntryReader('csd')
    >>> mol = csd.molecule('HXACAN')
    >>> landscape_generator = CSDLandscapeGenerator()

..
    # This is not needed in real use, for doctest purposes only
    >>> import os
    >>> landscape_generator.settings.shape_database_location = os.path.join(
    ...     os.path.dirname(os.path.abspath(__file__)), '..', '..', 'tests',
    ...     'csp', 'testdata', 'csd_landscape_generator', 'HXACAN_shapes.sqlite')

.. code:: python

    >>> landscape_generator.settings.nstructures=3
    >>> landscape = list(landscape_generator.generate(mol))
    >>> print(sorted([round(prediction.relative_score) for prediction in landscape]))
    [0, 27, 32]
'''
###########################################################################

import os
import tempfile

from ccdc import utilities
from ccdc import io
from ccdc import entry
from ccdc import crystal

from ccdc.utilities import _private_importer

with _private_importer() as pi:
    pi.import_ccdc_module('CrystalStructurePredictionLib')
    pi.import_ccdc_module('ChemistryLib')
    pi.import_ccdc_module('FileFormatsLib')
    pi.import_ccdc_module('UtilitiesLib')
    pi.import_ccdc_module('CspLib')

###########################################################################

[docs]class CSDLandscapeGenerator(object): '''Generation of a landscape of structural analogues using experimentally observed crystal structures as templates for predictions.'''
[docs] class Settings(object): '''Settings pertaining to landscape generation.''' def __init__(self, _settings=None): templater_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'templater') if _settings is None: _settings = CrystalStructurePredictionLib.CrystalStructureTemplaterSettings() args_file = os.path.join(templater_dir, 'templater.args') _settings.load_arg_file(args_file) self._settings = _settings self._settings.optimiser_factory_ = CrystalStructurePredictionLib.CrystalStructureTemplaterOptimiserFactory(self._settings) self._settings.working_directory_ = tempfile.mkdtemp() self._format = "cif" self._num_conformers = 0 self._min_conformer_rmsd = 0.2 @property def working_directory(self): '''The working and output directory for the landscape.''' return self._settings.working_directory_ @working_directory.setter def working_directory(self, path): if not os.path.exists(path): os.makedirs(path) self._settings.working_directory_ = path @property def database_file(self): '''The database file containing the template structures. If this is not set, the current release of the CSD will be used.''' return self._settings.database_file_ @database_file.setter def database_file(self, value): self._settings.database_file_ = value @property def shape_database_location(self): '''The database file containing molecule shape information. If this is not set, the current release of the CSD will be used.''' return self._settings.shape_database_location_ @shape_database_location.setter def shape_database_location(self, value): self._settings.shape_database_location_ = str(value) @property def nstructures(self): '''The number of structures to generate, default is 100. There may be fewer structures generated if sufficient template structures cannot be found.''' return self._settings.list_size_ @nstructures.setter def nstructures(self, value): self._settings.list_size_ = int(value) @property def sohncke_only(self): '''Returns true if structures should be generated only in Sohncke space groups. These are the space groups that are possible for single enantiomers of chiral molecules.''' return self._settings.sohncke_only_ @sohncke_only.setter def sohncke_only(self, value): self._settings.sohncke_only_ = bool(value) @property def nthreads(self): '''Number of threads to allocate for the template predictor.''' return self._settings.num_threads_ @nthreads.setter def nthreads(self, value): self._settings.num_threads_ = int(value) @property def format(self): '''The output crystal file format, one of 'cif' or 'mol2'.''' return self._format @format.setter def format(self, value): formats = ('cif', 'mol2') if value not in formats: raise ValueError("format must be one of " + ", ".join(formats)) self._format = value @property def _additional_score_terms(self): '''Report additional score terms in the prediction results.''' return self._settings.keep_score_terms_ @_additional_score_terms.setter def _additional_score_terms(self, value): self._settings.keep_score_terms_ = bool(value) @property def maximum_number_of_conformers(self): '''The maximum number of conformers to generate from the input molecule. 0 means that the input molecule will be used.''' return self._num_conformers @maximum_number_of_conformers.setter def maximum_number_of_conformers(self, value): self._num_conformers = int(value) @property def minimum_conformer_rmsd(self): '''The minimum RMSD acceptable between generated conformers. A higher value will increase conformer diversity, but may reduce the number or quality of the conformers generated.''' return self._min_conformer_rmsd @minimum_conformer_rmsd.setter def minimum_conformer_rmsd(self, value): self._min_conformer_rmsd = float(value) @property def external_minimiser(self): '''A path to an external minimiser program to use for crystal structure optimisation. If this is not set, the internal minimiser described at https://doi.org/10.1107/S2052520616006533 will be used. An external minimiser should be the path to a program that takes 2 command line arguments: an input CIF file to optimise and an output CIF file in which to store the result. The minimsed value will be read from the CIF field '_ccdc_csp_classification_energy_lattice_absolute' in the output CIF file. ''' return self._settings.external_optimiser_ @external_minimiser.setter def external_minimiser(self, value): self._settings.external_optimiser_ = str(value)
[docs] class GeneratedEntry(entry.Entry): '''A generated structural analogue entry. This is a :class:`ccdc.entry.Entry` with associated scores.''' def __init__(self, _prediction): self._prediction = _prediction self._entry = CrystalStructurePredictionLib.CrystalStructureTemplaterPredictor.prediction_as_entry(_prediction) self.attributes = dict() @property def score(self): '''The prediction's absolute score.''' return self._prediction.absolute_score() @property def relative_score(self): '''The structural analogue’s score relative to the lowest scoring prediction in the prediction run. This value will be zero until the landscape generation run is finished. ''' return self._prediction.relative_score()
def __init__(self, settings=None): if settings is None: settings = CSDLandscapeGenerator.Settings() self.settings = settings def _start_prediction(self, molecule): '''Start prediction of crystal forms for this molecule.''' molecule.assign_bond_types('unknown') path = '%s.mol2' % os.path.join(self.settings.working_directory, molecule.identifier) with io.MoleculeWriter(path) as writer: writer.write(molecule) self.settings._settings.clear_components() process_meta_data = CspLib.CspMetaData() if self.settings.maximum_number_of_conformers > 0: self.settings._settings.add_generated_conformers(molecule._molecule, self.settings.maximum_number_of_conformers, self.settings.minimum_conformer_rmsd, molecule.identifier, process_meta_data) else: self.settings._settings.add_unscored_conformers(molecule._molecule, process_meta_data) self._predictor = CrystalStructurePredictionLib.CrystalStructureTemplaterPredictor( self.settings._settings ) self._predictor.set_process_meta_data(process_meta_data) landscape_name = f"Predicted_{molecule.identifier}" csv_name = os.path.join(self.settings.working_directory, landscape_name+".csv") self.settings._settings.result_prefix_ = landscape_name + "_on_" self._landscape = CrystalStructurePredictionLib.CrystalStructureLandscape() self._landscape.set_molecule_name(molecule.identifier) self._landscape.open(csv_name, self.settings.format, UtilitiesLib.OpenMode( UtilitiesLib.OpenMode.CREATE | UtilitiesLib.OpenMode.WRITE )) self._predictor.start(self._landscape)
[docs] def generate(self, molecule): '''Generate a structural analogue landscape for the input molecule. This will yield :class:`ccdc.csp.csd_landscape_generator.GeneratedEntry`s as they become ready. The :class:`ccdc.csp.csd_landscape_generator.GeneratedEntry` will be given valid relative_score values when landscape generation is completed. ''' self._start_prediction(molecule) try: while True: p = self._next_prediction() if not p: break yield p finally: for p in self._stop_predicting(): yield p
def _add_prediction(self, prediction): '''return a GeneratedEntry object''' return CSDLandscapeGenerator.GeneratedEntry(prediction) def _next_prediction(self): '''The next available prediction. This will block until a prediction is available, which may take some time. ''' prediction = self._predictor.release_gil_and_take_prediction() if prediction is not None: return self._add_prediction(prediction) else: return None def _stop_predicting(self): '''Stop the prediction threads and return remaining structures. This will not take place immediately, but after the predictions are completed in each worker thread.''' self._predictor.stop_predicting() # note, using nthreads * 2 to capture completed and ongoing predictions results = [self._predictor.release_gil_and_take_prediction() for i in range(self.settings.nthreads * 2)] extra_predictions = tuple( self._add_prediction(p) for p in results if p is not None ) return extra_predictions
###########################################################################