# This code is Copyright (C) 2022 The Cambridge Crystallographic Data Centre
# (CCDC) of 12 Union Road, Cambridge CB2 1EZ, UK and a proprietary work of CCDC.
# This code may not be used, reproduced, translated, modified, disassembled or
# copied, except in accordance with a valid licence agreement with CCDC and may
# not be disclosed or redistributed in any form, either in whole or in part, to
# any third party. All copies of this code made in accordance with a valid
# licence agreement as referred to above must contain this copyright notice.
#
# No representations, warranties, or liabilities are expressed or implied in the
# supply of this code by CCDC, its servants or agents, except where such
# exclusion or limitation is prohibited, void or unenforceable under governing
# law.
#
'''
csd_landscape_generator.py - an API for crystal structure prediction landscape generation.
Simplest operation is:
.. code:: python
>>> from ccdc import io
>>> from ccdc.csp.csd_landscape_generator import CSDLandscapeGenerator
>>> csd = io.EntryReader('csd')
>>> mol = csd.molecule('HXACAN')
>>> landscape_generator = CSDLandscapeGenerator()
..
# This is not needed in real use, for doctest purposes only
>>> import os
>>> landscape_generator.settings.shape_database_location = os.path.join(
... os.path.dirname(os.path.abspath(__file__)), '..', '..', 'tests',
... 'csp', 'testdata', 'csd_landscape_generator', 'HXACAN_shapes.sqlite')
.. code:: python
>>> landscape_generator.settings.nstructures=3
>>> landscape = list(landscape_generator.generate(mol))
>>> print(sorted([round(prediction.relative_score) for prediction in landscape]))
[0, 27, 32]
'''
###########################################################################
import os
import tempfile
from ccdc import utilities
from ccdc import io
from ccdc import entry
from ccdc import crystal
from ccdc.utilities import _private_importer
with _private_importer() as pi:
pi.import_ccdc_module('CrystalStructurePredictionLib')
pi.import_ccdc_module('ChemistryLib')
pi.import_ccdc_module('FileFormatsLib')
pi.import_ccdc_module('UtilitiesLib')
pi.import_ccdc_module('CspLib')
###########################################################################
[docs]class CSDLandscapeGenerator(object):
'''Generation of a landscape of structural analogues using experimentally observed crystal structures as templates
for predictions.'''
[docs] class Settings(object):
'''Settings pertaining to landscape generation.'''
def __init__(self, _settings=None):
templater_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'templater')
if _settings is None:
_settings = CrystalStructurePredictionLib.CrystalStructureTemplaterSettings()
args_file = os.path.join(templater_dir, 'templater.args')
_settings.load_arg_file(args_file)
self._settings = _settings
self._settings.optimiser_factory_ = CrystalStructurePredictionLib.CrystalStructureTemplaterOptimiserFactory(self._settings)
self._settings.working_directory_ = tempfile.mkdtemp()
self._format = "cif"
self._num_conformers = 0
self._min_conformer_rmsd = 0.2
@property
def working_directory(self):
'''The working and output directory for the landscape.'''
return self._settings.working_directory_
@working_directory.setter
def working_directory(self, path):
if not os.path.exists(path):
os.makedirs(path)
self._settings.working_directory_ = path
@property
def database_file(self):
'''The database file containing the template structures.
If this is not set, the current release of the CSD will be used.'''
return self._settings.database_file_
@database_file.setter
def database_file(self, value):
self._settings.database_file_ = value
@property
def shape_database_location(self):
'''The database file containing molecule shape information.
If this is not set, the current release of the CSD will be used.'''
return self._settings.shape_database_location_
@shape_database_location.setter
def shape_database_location(self, value):
self._settings.shape_database_location_ = str(value)
@property
def nstructures(self):
'''The number of structures to generate, default is 100.
There may be fewer structures generated if sufficient template structures cannot be found.'''
return self._settings.list_size_
@nstructures.setter
def nstructures(self, value):
self._settings.list_size_ = int(value)
@property
def sohncke_only(self):
'''Returns true if structures should be generated only in Sohncke space groups.
These are the space groups that are possible for single enantiomers of chiral molecules.'''
return self._settings.sohncke_only_
@sohncke_only.setter
def sohncke_only(self, value):
self._settings.sohncke_only_ = bool(value)
@property
def nthreads(self):
'''Number of threads to allocate for the template predictor.'''
return self._settings.num_threads_
@nthreads.setter
def nthreads(self, value):
self._settings.num_threads_ = int(value)
@property
def format(self):
'''The output crystal file format, one of 'cif' or 'mol2'.'''
return self._format
@format.setter
def format(self, value):
formats = ('cif', 'mol2')
if value not in formats:
raise ValueError("format must be one of " + ", ".join(formats))
self._format = value
@property
def _additional_score_terms(self):
'''Report additional score terms in the prediction results.'''
return self._settings.keep_score_terms_
@_additional_score_terms.setter
def _additional_score_terms(self, value):
self._settings.keep_score_terms_ = bool(value)
@property
def maximum_number_of_conformers(self):
'''The maximum number of conformers to generate from the input molecule.
0 means that the input molecule will be used.'''
return self._num_conformers
@maximum_number_of_conformers.setter
def maximum_number_of_conformers(self, value):
self._num_conformers = int(value)
@property
def minimum_conformer_rmsd(self):
'''The minimum RMSD acceptable between generated conformers.
A higher value will increase conformer diversity, but may reduce the number or quality of the conformers generated.'''
return self._min_conformer_rmsd
@minimum_conformer_rmsd.setter
def minimum_conformer_rmsd(self, value):
self._min_conformer_rmsd = float(value)
@property
def external_minimiser(self):
'''A path to an external minimiser program to use for crystal structure optimisation.
If this is not set, the internal minimiser described at https://doi.org/10.1107/S2052520616006533 will be used.
An external minimiser should be the path to a program that takes 2 command line arguments: an input CIF file to optimise and an output CIF file in which to store the result.
The minimsed value will be read from the CIF field '_ccdc_csp_classification_energy_lattice_absolute' in the output CIF file.
'''
return self._settings.external_optimiser_
@external_minimiser.setter
def external_minimiser(self, value):
self._settings.external_optimiser_ = str(value)
@property
def iteration_limit(self) -> int:
'''The maximum number of iterations for the optimiser.'''
return self._settings.optimisation_iteration_limit_
@iteration_limit.setter
def iteration_limit(self, value: int):
self._settings.optimisation_iteration_limit_ = int(value)
[docs] class GeneratedEntry(entry.Entry):
'''A generated structural analogue entry.
This is a :class:`ccdc.entry.Entry` with associated scores.'''
def __init__(self, _prediction):
self._prediction = _prediction
self._entry = CrystalStructurePredictionLib.CrystalStructureTemplaterPredictor.prediction_as_entry(_prediction)
self.attributes = dict()
@property
def score(self):
'''The prediction's absolute score.'''
return self._prediction.absolute_score()
@property
def relative_score(self):
'''The structural analogue’s score relative to the lowest scoring prediction in the prediction run.
This value will be zero until the landscape generation run is finished.
'''
return self._prediction.relative_score()
@property
def status(self) -> str:
'''The status of the prediction's optimisation.'''
return self._prediction.status()
def __init__(self, settings=None):
if settings is None:
settings = CSDLandscapeGenerator.Settings()
self.settings = settings
def _start_prediction(self, molecule):
'''Start prediction of crystal forms for this molecule.'''
molecule.assign_bond_types('unknown')
path = '%s.mol2' % os.path.join(self.settings.working_directory, molecule.identifier)
with io.MoleculeWriter(path) as writer:
writer.write(molecule)
self.settings._settings.clear_components()
process_meta_data = CspLib.CspMetaData()
if self.settings.maximum_number_of_conformers > 0:
self.settings._settings.add_generated_conformers(molecule._molecule, self.settings.maximum_number_of_conformers, self.settings.minimum_conformer_rmsd, molecule.identifier, process_meta_data)
else:
self.settings._settings.add_unscored_conformers(molecule._molecule, process_meta_data)
self._predictor = CrystalStructurePredictionLib.CrystalStructureTemplaterPredictor(
self.settings._settings
)
self._predictor.set_process_meta_data(process_meta_data)
landscape_name = f"Predicted_{molecule.identifier}"
csv_name = os.path.join(self.settings.working_directory, landscape_name+".csv")
self.settings._settings.result_prefix_ = landscape_name + "_on_"
self._landscape = CrystalStructurePredictionLib.CrystalStructureLandscape()
self._landscape.set_molecule_name(molecule.identifier)
self._landscape.open(csv_name, self.settings.format, UtilitiesLib.OpenMode(
UtilitiesLib.OpenMode.CREATE | UtilitiesLib.OpenMode.WRITE
))
self._progress_monitor = UtilitiesLib.ProgressMonitor()
self._predictor.start_predicting(self._landscape, self._progress_monitor)
[docs] def generate(self, molecule):
'''Generate a structural analogue landscape for the input molecule.
This will yield :class:`ccdc.csp.csd_landscape_generator.GeneratedEntry`s as they become ready.
The :class:`ccdc.csp.csd_landscape_generator.GeneratedEntry` will be given valid relative_score values when
landscape generation is completed.
'''
self._start_prediction(molecule)
try:
while True:
p = self._next_prediction()
if not p:
break
yield p
finally:
self._progress_monitor.cancel()
for p in self._stop_predicting():
yield p
def _add_prediction(self, prediction):
'''return a GeneratedEntry object'''
return CSDLandscapeGenerator.GeneratedEntry(prediction)
def _next_prediction(self):
'''The next available prediction.
This will block until a prediction is available, which may take some time.
'''
prediction = self._predictor.release_gil_and_take_prediction()
if prediction is not None:
return self._add_prediction(prediction)
else:
return None
def _stop_predicting(self):
'''Stop the prediction threads and return remaining structures.
This will not take place immediately, but after the predictions are completed in each worker thread.'''
self._predictor.stop_predicting()
# note, using nthreads * 2 to capture completed and ongoing predictions
results = [self._predictor.release_gil_and_take_prediction() for i in range(self.settings.nthreads * 2)]
extra_predictions = tuple(
self._add_prediction(p)
for p in results
if p is not None
)
return extra_predictions
###########################################################################