Source code for ccdc.csp.prediction

#
# This code is Copyright (C) 2020 The Cambridge Crystallographic Data Centre
# (CCDC) of 12 Union Road, Cambridge CB2 1EZ, UK and a proprietary work of CCDC.
# This code may not be used, reproduced, translated, modified, disassembled or
# copied, except in accordance with a valid licence agreement with CCDC and may
# not be disclosed or redistributed in any form, either in whole or in part, to
# any third party. All copies of this code made in accordance with a valid
# licence agreement as referred to above must contain this copyright notice.
#
# No representations, warranties, or liabilities are expressed or implied in the
# supply of this code by CCDC, its servants or agents, except where such
# exclusion or limitation is prohibited, void or unenforceable under governing
# law.
#
'''
The main class of the :mod:`ccdc.csp.prediction` module is :class:`ccdc.csp.prediction.Prediction`.

A :class:`ccdc.csp.prediction.Prediction` represents a predictied crystal structure, associated
metadata and computed properties. It will normally be provided by a :class:`ccdc.csp.database.CspDatabase`
but can also be loaded from a CIF file with CSP annotations.

.. code-block:: python

    >>> from ccdc.csp.prediction import Prediction
    >>> import os
    >>> cif_file = os.path.join(os.path.dirname(os.path.abspath(__file__)),
    ...                         '..', '..', 'tests', 'csp', 'testdata',
    ...                         'Axitinib_00001.CIF')
    >>> p=Prediction.from_cif(cif_file)
    >>> len(p.molecule.atoms)
    46
    >>> p.classification_energy_relative
    0.0
    >>> p.optimisation_energy_model
    'Force Field'
    >>> p.BFDH_form
    'Needle'

'''

import os

from ccdc.entry import Entry
from ccdc.descriptors import CrystalDescriptors
from ccdc.crystal import Crystal

from ccdc.utilities import _private_importer
with _private_importer() as pi:
    pi.import_ccdc_module('CspLib')
    pi.import_ccdc_module('PackingSimilarityLib')
    pi.import_ccdc_module('ChemicalAnalysisLib')


[docs]class Prediction: '''A class representing a crystal structure prediction.''' def __init__(self, _prediction): '''Initialise a Prediction with an internal representation''' self._prediction = _prediction _completer = CspLib.CspEntryCompleter()
[docs] @staticmethod def from_cif(cif_filename: str, landscape_name=None, compute_properties=True): '''Get a Prediction from a CIF file. :param cif_filename: The filename of a CIF file. :param landscape_name: An optional name of the CSP landscape for this entry. This will come from the directory name if None. :param compute_properties: When true (default), CSP relevant properties are computed otherwise only the raw CIF data is loaded. ''' _prediction = CspLib.CspCifConversion.from_cif(cif_filename) if compute_properties: try: Prediction._completer.complete(_prediction.structure_) except: # when importing multiple landscapes, we need to recover from incompatible molecules Prediction._completer = CspLib.CspEntryCompleter() Prediction._completer.complete(_prediction.structure_) _prediction.meta_data_.landscape_id_ = landscape_name if landscape_name else os.path.basename(os.path.dirname(cif_filename)) entry = Prediction(_prediction) if compute_properties: entry._compute_properties() return entry
@property def identifier(self): '''The string identifier of the entry, e.g. axitinib_00001, in this prediction.''' return self._prediction.identifier().str() @property def landscape(self) -> str: '''The landscape name containing this prediction.''' return self._prediction.landscape_identifier() @property def entry(self): '''The :class:`ccdc.entry.Entry` contained in this prediction.''' return Entry(_entry=self._prediction.structure_) @property def crystal(self): '''The :class:`ccdc.crystal.Crystal` contained in this prediction.''' return self.entry.crystal @property def molecule(self): '''The :class:`ccdc.molecule.Molecule` contained in this prediction.''' return self.entry.molecule @landscape.setter def landscape(self, name: str): '''Sets the landscape name.''' self._prediction.meta_data_.landscape_id_ = name @property def free_energy_id(self) -> str: '''The identifier of a 0 K structure that matches this structure.''' return self._prediction.meta_data_.free_energy_id_ @free_energy_id.setter def free_energy_id(self, id: str): '''Sets the identifier of a 0 K structure that matches this structure.''' self._prediction.meta_data_.free_energy_id_ = id @property def simulation_temperature(self) -> float: '''The simulation temperature, in K, of this prediction.''' return self._prediction.meta_data_.simulation_temperature_ @property def classification_energy_relative(self) -> float: '''The (absolute or lattice) relative energy, in kJ/mol, of this prediction.''' return self._prediction.meta_data_.classification_energy_relative_ @property def optimisation_energy_model(self) -> str: '''The optimisation energy model of this prediction.''' return self._prediction.meta_data_.optimisation_energy_model_ @property def matched_refcode(self) -> str: '''The refcode of the experimental form matching the predicted crystal structure of this prediction.''' return self._prediction.meta_data_.matched_refcode_ @property def refcode_match(self) -> float: '''The crystal packing similarity, range 0 - 1, between the predicted crystal structure and :attr:`ccdc.csp.prediction.Prediction.matched_refcode` in this prediction.''' return self._prediction.meta_data_.refcode_match_ @property def refcode_match_rmsd(self) -> float: '''The RMSD value of the crystal packing similarity, between the predicted crystal structure and :attr:`ccdc.csp.prediction.Prediction.matched_refcode` if this prediction.''' return self._prediction.meta_data_.refcode_match_rmsd_ @property def optimisation_energy_model_definition(self) -> str: '''The high-level defintion of the optimisation energy model of this prediction.''' return self._prediction.meta_data_.optimisation_energy_model_definition_ @property def free_energy_relative(self) -> float: '''The relative free energy, in kJ/mol, of this prediction.''' return self._prediction.meta_data_.free_energy_relative_ @property def free_energy_method(self) -> str: '''The high-level defintion of the free energy method of this prediction.''' return self._prediction.meta_data_.free_energy_method_ @property def calculated_density(self) -> float: '''The CSP calculated density of this prediction.''' return self._prediction.meta_data_.calculated_density_ @property def molecular_shape_average(self) -> float: '''The average of the molecular shape descriptor, for :attr:`ccdc.entry.molecule.Molecule.heaviest_component` molecules in the asymmetric unit, of this prediction.''' return self._prediction.analysis_.molecular_shape_average_ @property def molecular_shape_difference(self) -> float: '''The difference between the maximum and minimum value of the molecular shape descriptor, for :attr:`ccdc.entry.molecule.Molecule.heaviest_component` molecules in the asymmetric unit, of this prediction.''' return self._prediction.analysis_.molecular_shape_difference_ @property def BFDH_form(self) -> str: '''The BFDH predicted morphology for the crystal structure in this prediction.''' return self._prediction.analysis_.BFDH_form_ @property def void_percent(self) -> float: '''The calculated void volume, as % of the unit cell volume, of the predicted crystal structure of this prediction.''' return self._prediction.analysis_.void_percent_ @property def packing_coefficient(self) -> float: '''The packing coefficient of this prediction.''' return self._prediction.analysis_.packing_coefficient_ @property def void_volume(self) -> float: '''The calculated absolute void volume in the unit cell, in cubic Angstroms, of this prediction.''' return self._prediction.analysis_.void_volume_ class HydrogenBond: def __init__(self, _hbond): self.donor_ = _hbond.donor_ self.acceptor_ = _hbond.acceptor_ self.distance_ = _hbond.distance_ self.angle_ = _hbond.angle_ self.molecule_relationship_ = _hbond.molecule_relationship_ @property def donor(self) -> str: '''The donor of this hydrogen bond.''' return self.donor_ @property def acceptor(self) -> str: '''The acceptor of this hydrogen bond.''' return self.acceptor_ @property def distance(self) -> float: '''The distance of this hydrogen bond.''' return self.distance_ @property def angle(self) -> float: '''The angle of this hydrogen bond.''' return self.angle_ @property def molecule_relationship(self) -> str: '''The molecule relationship of this hydrogen bond.''' return self.molecule_relationship_ @property def hydrogen_bonds(self): '''List of the intra- and inter-molecular hydrogen bond interactions of this prediction.''' return [Prediction.HydrogenBond(CspLib.get_hbond(self._prediction.analysis_, i)) for i in range(CspLib.hbond_count(self._prediction.analysis_))] def _compute_properties(self): reduced_crystal = Crystal(self.crystal._crystal.clone(), self.entry.identifier) ChemicalAnalysisLib.ReducedCellTransformer.transform_to_reduced_cell(reduced_crystal._crystal) mol_shape = _molecular_shape(self.entry) self._prediction.analysis_.molecular_shape_average_ = mol_shape['MolecularShapeAverage'] self._prediction.analysis_.molecular_shape_difference_ = mol_shape['MolecularShapeDifference'] self._prediction.analysis_.BFDH_form_ = _BFDH_form(self.entry) void_percent = round(reduced_crystal.void_volume(probe_radius=1.2, grid_spacing=0.3, mode='contact'), 3) self._prediction.analysis_.void_percent_ = void_percent self._prediction.analysis_.packing_coefficient_ = round(reduced_crystal.packing_coefficient, 3) self._prediction.analysis_.void_volume_ = round(void_percent * self.crystal.cell_volume / 100, 3) for hb in _hbonds(self.entry, 'intermolecular') + _hbonds(self.entry, 'intramolecular'): cpp_hb = CspLib.CspHbondAnalysis() cpp_hb.donor_ = hb['Donor'] cpp_hb.acceptor_ = hb['Acceptor'] cpp_hb.distance_ = hb['Distance'] cpp_hb.angle_ = hb['Angle'] cpp_hb.molecule_relationship_ = hb['MoleculeRelationship'] CspLib.add_hbond_to_analysis(self._prediction.analysis_, cpp_hb)
# Graph sets to be enabled # self._prediction.analysis_.GraphSetsInter = _graph_sets(self.entry, 'intermolecular') # self._prediction.analysis_.GraphSetsIntra = _graph_sets(self.entry, 'intramolecular') def _BFDH_form(entry): try: bfdh_morphology = CrystalDescriptors.Morphology(entry.crystal) bb = bfdh_morphology.oriented_bounding_box SM_ratio = bb.minor_length / bb.median_length ML_ratio = bb.median_length / bb.major_length if SM_ratio < 0.5: if ML_ratio < 0.5: return 'Lath' else: return 'Plate' elif ML_ratio < 0.5: return 'Needle' else: return 'Block' except: return 'NA' def _molecular_shape(entry): molecule = entry.molecule def molecule_shape(molecule): def get_length(property): calculator = PackingSimilarityLib.MoleculeShapeProperty(property) calculator.update(molecule._molecule) return calculator.value() laxis = get_length(PackingSimilarityLib.MoleculeShapeProperty.L) maxis = get_length(PackingSimilarityLib.MoleculeShapeProperty.M) saxis = get_length(PackingSimilarityLib.MoleculeShapeProperty.S) return round((laxis * saxis) / maxis, 1) mols = molecule.components largest = max(len(mol.atoms) for mol in mols) shapes = [molecule_shape(mol) for mol in mols if len(mol.atoms) == largest] average = round(sum(shapes) / max(1,len(shapes)), 1) diff = (max(shapes) - min(shapes))/average return {'MolecularShapeAverage': average, 'MolecularShapeDifference': diff} def _hbonds(entry, inter): crystal = entry.crystal hbonds = crystal.hbonds(intermolecular=inter, require_hydrogens=True, vdw_corrected=True, distance_range=(-5.0, 0.0), angle_tolerance=120.00) hbond_info = [] for hbond in hbonds: donor = hbond.atoms[0] hydrogen = hbond.atoms[1] acceptor = hbond.atoms[2] for bond in acceptor.bonds: if hydrogen in bond.atoms: donor, acceptor = acceptor, donor break hbond_info.append({'Donor': donor.label, 'Acceptor': acceptor.label, 'Distance': round(hbond.da_distance, 3), 'Angle': round(hbond.angle, 2), 'MoleculeRelationship': inter }) hbond_info.sort(key=lambda x: (x['Acceptor'], x['Donor'])) return hbond_info def _graph_sets(entry, inter): crystal = entry.crystal gsa = CrystalDescriptors.GraphSetSearch() gsa.settings.require_hydrogens = True gsa.settings.distance_range = (-5.0, 0.0) gsa.settings.angle_tolerance = 120.00 gsa.settings.vdw_corrected = True gsa.settings.intermolecular = inter graph_sets = gsa.search(crystal) sets = [] for element in graph_sets: gs_part = str(element).split(' ')[0] hbond_label = [] if element.descriptor != 'ring': for hbond in element.hbonds: hbond_label.append(str(hbond.atoms[0].label + "..." + hbond.atoms[2].label)) else: # For ring motifs only show unique hydrogen bonds for hbond in element.hbonds: label = str(hbond.atoms[0].label + "..." + hbond.atoms[2].label) if label not in hbond_label: hbond_label.append(label) sets.append(str(gs_part) + ' [' + ', '.join(hbond_label) + ']') return sets