Source code for ccdc.solid_form

#
# This code is Copyright (C) 2023 The Cambridge Crystallographic Data Centre
# (CCDC) of 12 Union Road, Cambridge CB2 1EZ, UK and a proprietary work of CCDC.
# This code may not be used, reproduced, translated, modified, disassembled or
# copied, except in accordance with a valid licence agreement with CCDC and may
# not be disclosed or redistributed in any form, either in whole or in part, to
# any third party. All copies of this code made in accordance with a valid
# licence agreement as referred to above must contain this copyright notice.
#
# No representations, warranties, or liabilities are expressed or implied in the
# supply of this code by CCDC, its servants or agents, except where such
# exclusion or limitation is prohibited, void or unenforceable under governing
# law.
#
'''
The :mod:`ccdc.solid_form` module contains classes for solid form analysis.

The main classes in the :mod:`ccdc.solid_form` module are:

- :class:`ccdc.solid_form.SolvateAnalyser`.
- :class:`ccdc.solid_form.AromaticsAnalyser`.

'''

from dataclasses import dataclass
import os
import platform
import sys
import warnings

import joblib
import numpy as np

from ccdc import io, molecule, search
from ccdc.utilities import nested_class, Resources, _import_ccdc_modules

# On ARM macOS, libtensorflow_framework has a static-initialisation deadlock in
# abseil/protobuf that hangs on the very first import of tensorflow.  Defer the
# import to call-time via _load_model() on that platform only.
# On all other platforms restore the original module-level import so that TF is
# already in sys.modules before pytest-xdist workers start, which is required for
# tests to run correctly in forked/subprocess workers.
if sys.platform == 'darwin' and platform.machine() == 'arm64':
    _keras_load_model = None
else:
    try:
        from tensorflow.keras.models import load_model as _keras_load_model
    except ModuleNotFoundError:
        print('Tensorflow is required to run this feature.')
        print('Please consult the post-install documentation at '
              'https://downloads.ccdc.cam.ac.uk/documentation/API/installation_notes.html#post-installation')
        raise


def _load_model(model_file):
    keras_load_model = _keras_load_model
    if keras_load_model is None:
        try:
            from tensorflow.keras.models import load_model as keras_load_model
        except ModuleNotFoundError:
            print('Tensorflow is required to run this feature.')
            print('Please consult the post-install documentation at '
                  'https://downloads.ccdc.cam.ac.uk/documentation/API/installation_notes.html#post-installation')
            raise
    return keras_load_model(model_file, compile=False)

_import_ccdc_modules([
    'ChemistryLib',
    'PackingSimilarityLib'
    ])

warnings.simplefilter('always', DeprecationWarning)


[docs] class SolvateAnalyser: '''A class for solvate analysis. :param crystal: The crystal to analyse :type crystal: class:`ccdc.crystal.Crystal` instance :param probe_radius: the probe radius for surface calculation, defaults to 1.2 :type probe_radius: float, optional :param grid_spacing: the grid spacing for surface calculation, defaults to 0.3 :type grid_spacing: float, optional :param method: The method to calculate solvent space, either contact_surface or solvent_accessible_surface :type method: string ''' _telemetry = 0
[docs] @dataclass class Solvent: '''A class to represent a solvent with the following attributes: :ivar name: The name of the solvent :ivar molecule: The :class:`ccdc.molecule.Molecule` object of the solvent ''' name: str molecule: molecule.Molecule
def __init__(self, crystal, probe_radius=1.2, grid_spacing=0.3, method='contact_surface'): self._crystal = crystal self._probe_radius = probe_radius self._grid_spacing = grid_spacing self._method = None self._solvents = {} self._ccdc_solvents_dir = Resources().get_ccdc_solvents_dir() solvent_files = self._ccdc_solvents_dir.glob('*.mol2') for solvent_file in solvent_files: mol = io.MoleculeReader(str(solvent_file))[0] self._solvents[mol.smiles] = mol.identifier if type(self)._telemetry == 0: UtilitiesLib.ccdc_solvate_analyser_telemetry() type(self)._telemetry = 1
[docs] def add_solvents(self, extra_solvents): '''Add extra solvents to check :param extra_solvents: a dictionary of solvent SMILES to solvent ID ''' for solvent_smiles, solvent_id in extra_solvents.items(): if solvent_smiles not in self._solvents: self._solvents.update({solvent_smiles: solvent_id})
[docs] def find_solvents(self): '''Find solvents in the crystal :returns: A list of :class:`ccdc.solid_form.SolvateAnalyser.Solvent` objects ''' found_solvents = [] mol_components = self._crystal.molecule.components for mol_comp in mol_components: if mol_comp.smiles in self._solvents: found_solvents.append( SolvateAnalyser.Solvent( self._solvents[mol_comp.smiles], mol_comp ) ) return found_solvents
[docs] class AromaticsAnalyser: '''A class for aromatics analysis. ''' @nested_class('AromaticsAnalyser') class Settings(object): def __init__(self): self._include_intramolecular_pairs = False @property def include_intramolecular_pairs(self): return self._include_intramolecular_pairs @include_intramolecular_pairs.setter def include_intramolecular_pairs(self, include): self._include_intramolecular_pairs = include _telemetry = 0 # TODO specify these by reference to the variables defined in the C++ code _descriptor_labels = [ 'distance', 'theta3', ] def __init__(self, settings=None, x_scaler_file=None, model_file=None): if settings is None: self.settings = AromaticsAnalyser.Settings() self._model_dir = Resources().get_aromatic_ring_model_dir() if x_scaler_file is None: x_scaler_file = os.path.join(self._model_dir, 'x_training_scaler.save') self.x_scaler = joblib.load(x_scaler_file) if model_file is None: model_file = os.path.join(self._model_dir, 'deep_NN_100_32.h5') self._model_file = model_file self._model = None if type(self)._telemetry == 0: UtilitiesLib.ccdc_aromatics_analyser_telemetry() type(self)._telemetry = 1 @property def model(self): if self._model is None: self._model = _load_model(self._model_file) return self._model def _predict_for_ring_pair(self, x): result = self.model.predict(np.array(x).reshape(1, -1), verbose=0)[0, 0] # Convert to score return PackingSimilarityLib.get_score(result.item()) def run_neural_network_calculation(self, x_data): x_numpy = np.array([np.array([xi for xi in line]) for line in x_data]) x_transformed = self.x_scaler.transform(x_numpy) return [round(self._predict_for_ring_pair(x), 1) for x in x_transformed] @staticmethod def _search_for_benzene_rings(crystal): benzene_smarts = '[C,c]1~[C,c]~[C,c]~[C,c]~[C,c]~[C,c]~1' substructure_search = search.SubstructureSearch() substructure_search.add_substructure(search.SMARTSSubstructure(benzene_smarts)) substructure_search.add_substructure(search.SMARTSSubstructure(benzene_smarts)) substructure_search.add_centroid('CENT1', *((0, x) for x in range(6))) substructure_search.add_centroid('CENT2', *((1, x) for x in range(6))) substructure_search.add_distance_constraint('DIST1', 'CENT1', 'CENT2', (1.5, 8), type='inter') return substructure_search.search(database=crystal) @staticmethod def _make_ring_pair(ring1, ring2): ring_pair = PackingSimilarityLib.make_ring_pair(ring1, ring2) for descriptor in AromaticsAnalyser._descriptor_labels: ring_pair.calculate_and_add_descriptor_variable(descriptor) return ring_pair def _include_ring_pair(self, ring_pair): if not self.settings.include_intramolecular_pairs and not ring_pair.intermolecular(): return False return True def make_ring_pairs(self, crystal): crystal_view = ChemistryLib.CrystalStructureView.instantiate(crystal._crystal) base_molecules = [crystal_view.molecule(i) for i in range(crystal_view.nmolecules())] PackingSimilarityLib.expand_aromatics_analyser_shell(crystal_view) ring_pairs = PackingSimilarityLib.make_ring_data(base_molecules, crystal_view).ring_pairs() return [pair for pair in ring_pairs if self._include_ring_pair(pair)] def generate_neural_network_input_data(self, crystal): ring_pairs = self.make_ring_pairs(crystal) return PackingSimilarityLib.tabulate_ring_pair_data_for_neural_network_input(ring_pairs)