Source code for ccdc.solid_form

#
# This code is Copyright (C) 2023 The Cambridge Crystallographic Data Centre
# (CCDC) of 12 Union Road, Cambridge CB2 1EZ, UK and a proprietary work of CCDC.
# This code may not be used, reproduced, translated, modified, disassembled or
# copied, except in accordance with a valid licence agreement with CCDC and may
# not be disclosed or redistributed in any form, either in whole or in part, to
# any third party. All copies of this code made in accordance with a valid
# licence agreement as referred to above must contain this copyright notice.
#
# No representations, warranties, or liabilities are expressed or implied in the
# supply of this code by CCDC, its servants or agents, except where such
# exclusion or limitation is prohibited, void or unenforceable under governing
# law.
#
'''
The :mod:`ccdc.solid_form` module contains classes for solid form analysis.

The main classes in the :mod:`ccdc.solid_form` module are:

- :class:`ccdc.solid_form.SolvateAnalyser`.
- :class:`ccdc.solid_form.AromaticsAnalyser`.

'''

from dataclasses import dataclass
import os
import warnings

from tensorflow.keras.models import load_model
import joblib
import numpy as np

from ccdc import io, molecule, search
from ccdc.utilities import nested_class, Resources, _private_importer

with _private_importer() as pi:
    pi.import_ccdc_module('ChemistryLib')
    pi.import_ccdc_module('PackingSimilarityLib')

warnings.simplefilter('always', DeprecationWarning)


[docs]class SolvateAnalyser: '''A class for solvate analysis. :param crystal: The crystal to analyse :type crystal: class:`ccdc.crystal.Crystal` instance :param probe_radius: the probe radius for surface calculation, defaults to 1.2 :type probe_radius: float, optional :param grid_spacing: the grid spacing for surface calculation, defaults to 0.3 :type grid_spacing: float, optional :param method: The method to calculate solvent space, either contact_surface or solvent_accessible_surface :type method: string ''' _telemetry = 0
[docs] @dataclass class Solvent: '''A class to represent a solvent with the following attributes: :ivar name: The name of the solvent :ivar molecule: The :class:`ccdc.molecule.Molecule` object of the solvent ''' name: str molecule: molecule.Molecule
def __init__(self, crystal, probe_radius=1.2, grid_spacing=0.3, method='contact_surface'): self._crystal = crystal self._probe_radius = probe_radius self._grid_spacing = grid_spacing self._method = None self._solvents = {} self._ccdc_solvents_dir = Resources().get_ccdc_solvents_dir() solvent_files = self._ccdc_solvents_dir.glob('*.mol2') for solvent_file in solvent_files: mol = io.MoleculeReader(str(solvent_file))[0] self._solvents[mol.smiles] = mol.identifier if type(self)._telemetry == 0: UtilitiesLib.ccdc_solvate_analyser_telemetry() type(self)._telemetry = 1
[docs] def add_solvents(self, extra_solvents): '''Add extra solvents to check :param extra_solvents: a dictionary of solvent SMILES to solvent ID ''' for solvent_smiles, solvent_id in extra_solvents.items(): if solvent_smiles not in self._solvents: self._solvents.update({solvent_smiles: solvent_id})
[docs] def find_solvents(self): '''Find solvents in the crystal :returns: A list of :class:`ccdc.solid_form.SolvateAnalyser.Solvent` objects ''' found_solvents = [] mol_components = self._crystal.molecule.components for mol_comp in mol_components: if mol_comp.smiles in self._solvents: found_solvents.append( SolvateAnalyser.Solvent( self._solvents[mol_comp.smiles], mol_comp ) ) return found_solvents
[docs]class AromaticsAnalyser: '''A class for aromatics analysis. ''' @nested_class('AromaticsAnalyser') class Settings(object): def __init__(self): self._include_intramolecular_pairs = False @property def include_intramolecular_pairs(self): return self._include_intramolecular_pairs @include_intramolecular_pairs.setter def include_intramolecular_pairs(self, include): self._include_intramolecular_pairs = include _telemetry = 0 # TODO specify these by reference to the variables defined in the C++ code _descriptor_labels = [ 'distance', 'theta3', ] def __init__(self, settings=None, x_scaler_file=None, model_file=None): if settings is None: self.settings = AromaticsAnalyser.Settings() self._model_dir = Resources().get_aromatic_ring_model_dir() if x_scaler_file is None: x_scaler_file = os.path.join(self._model_dir, 'x_training_scaler.save') self.x_scaler = joblib.load(x_scaler_file) if model_file is None: model_file = os.path.join(self._model_dir, 'deep_NN_100_32.h5') self.model = load_model(model_file) if type(self)._telemetry == 0: UtilitiesLib.ccdc_aromatics_analyser_telemetry() type(self)._telemetry = 1 def _predict_for_ring_pair(self, x): result = self.model.predict(np.array(x).reshape(1, -1))[0, 0] # Convert to score return PackingSimilarityLib.get_score(result.item()) def run_neural_network_calculation(self, x_data): x_numpy = np.array([np.array([xi for xi in line]) for line in x_data]) x_transformed = self.x_scaler.transform(x_numpy) return [round(self._predict_for_ring_pair(x), 1) for x in x_transformed] @staticmethod def _search_for_benzene_rings(crystal): benzene_smarts = '[C,c]1~[C,c]~[C,c]~[C,c]~[C,c]~[C,c]~1' substructure_search = search.SubstructureSearch() substructure_search.add_substructure(search.SMARTSSubstructure(benzene_smarts)) substructure_search.add_substructure(search.SMARTSSubstructure(benzene_smarts)) substructure_search.add_centroid('CENT1', *((0, x) for x in range(6))) substructure_search.add_centroid('CENT2', *((1, x) for x in range(6))) substructure_search.add_distance_constraint('DIST1', 'CENT1', 'CENT2', (1.5, 8), type='inter') return substructure_search.search(database=crystal) @staticmethod def _make_ring_pair(ring1, ring2): ring_pair = PackingSimilarityLib.make_ring_pair(ring1, ring2) for descriptor in AromaticsAnalyser._descriptor_labels: ring_pair.calculate_and_add_descriptor_variable(descriptor) return ring_pair def _include_ring_pair(self, ring_pair): if not self.settings.include_intramolecular_pairs and not ring_pair.intermolecular(): return False return True def make_ring_pairs(self, crystal): crystal_view = ChemistryLib.CrystalStructureView.instantiate(crystal._crystal) base_molecules = [crystal_view.molecule(i) for i in range(crystal_view.nmolecules())] PackingSimilarityLib.expand_aromatics_analyser_shell(crystal_view) ring_pairs = PackingSimilarityLib.make_ring_data(base_molecules, crystal_view).ring_pairs() return [pair for pair in ring_pairs if self._include_ring_pair(pair)] def generate_neural_network_input_data(self, crystal): ring_pairs = self.make_ring_pairs(crystal) return PackingSimilarityLib.tabulate_ring_pair_data_for_neural_network_input(ring_pairs)