#
# This code is Copyright (C) 2023 The Cambridge Crystallographic Data Centre
# (CCDC) of 12 Union Road, Cambridge CB2 1EZ, UK and a proprietary work of CCDC.
# This code may not be used, reproduced, translated, modified, disassembled or
# copied, except in accordance with a valid licence agreement with CCDC and may
# not be disclosed or redistributed in any form, either in whole or in part, to
# any third party. All copies of this code made in accordance with a valid
# licence agreement as referred to above must contain this copyright notice.
#
# No representations, warranties, or liabilities are expressed or implied in the
# supply of this code by CCDC, its servants or agents, except where such
# exclusion or limitation is prohibited, void or unenforceable under governing
# law.
#
'''
The :mod:`ccdc.solid_form` module contains classes for solid form analysis.
The main classes in the :mod:`ccdc.solid_form` module are:
- :class:`ccdc.solid_form.SolvateAnalyser`.
- :class:`ccdc.solid_form.AromaticsAnalyser`.
'''
from dataclasses import dataclass
import os
import warnings
from tensorflow.keras.models import load_model
import joblib
import numpy as np
from ccdc import io, molecule, search
from ccdc.utilities import nested_class, Resources, _private_importer
with _private_importer() as pi:
pi.import_ccdc_module('ChemistryLib')
pi.import_ccdc_module('PackingSimilarityLib')
warnings.simplefilter('always', DeprecationWarning)
[docs]class SolvateAnalyser:
'''A class for solvate analysis.
:param crystal: The crystal to analyse
:type crystal: class:`ccdc.crystal.Crystal` instance
:param probe_radius: the probe radius for surface calculation, defaults to 1.2
:type probe_radius: float, optional
:param grid_spacing: the grid spacing for surface calculation, defaults to 0.3
:type grid_spacing: float, optional
:param method: The method to calculate solvent space, either contact_surface or solvent_accessible_surface
:type method: string
'''
_telemetry = 0
[docs] @dataclass
class Solvent:
'''A class to represent a solvent with the following attributes:
:ivar name: The name of the solvent
:ivar molecule: The :class:`ccdc.molecule.Molecule` object of the solvent
'''
name: str
molecule: molecule.Molecule
def __init__(self, crystal, probe_radius=1.2, grid_spacing=0.3, method='contact_surface'):
self._crystal = crystal
self._probe_radius = probe_radius
self._grid_spacing = grid_spacing
self._method = None
self._solvents = {}
self._ccdc_solvents_dir = Resources().get_ccdc_solvents_dir()
solvent_files = self._ccdc_solvents_dir.glob('*.mol2')
for solvent_file in solvent_files:
mol = io.MoleculeReader(str(solvent_file))[0]
self._solvents[mol.smiles] = mol.identifier
if type(self)._telemetry == 0:
UtilitiesLib.ccdc_solvate_analyser_telemetry()
type(self)._telemetry = 1
[docs] def add_solvents(self, extra_solvents):
'''Add extra solvents to check
:param extra_solvents: a dictionary of solvent SMILES to solvent ID
'''
for solvent_smiles, solvent_id in extra_solvents.items():
if solvent_smiles not in self._solvents:
self._solvents.update({solvent_smiles: solvent_id})
[docs] def find_solvents(self):
'''Find solvents in the crystal
:returns: A list of :class:`ccdc.solid_form.SolvateAnalyser.Solvent` objects
'''
found_solvents = []
mol_components = self._crystal.molecule.components
for mol_comp in mol_components:
if mol_comp.smiles in self._solvents:
found_solvents.append(
SolvateAnalyser.Solvent(
self._solvents[mol_comp.smiles], mol_comp
)
)
return found_solvents
[docs]class AromaticsAnalyser:
'''A class for aromatics analysis.
'''
@nested_class('AromaticsAnalyser')
class Settings(object):
def __init__(self):
self._include_intramolecular_pairs = False
@property
def include_intramolecular_pairs(self):
return self._include_intramolecular_pairs
@include_intramolecular_pairs.setter
def include_intramolecular_pairs(self, include):
self._include_intramolecular_pairs = include
_telemetry = 0
# TODO specify these by reference to the variables defined in the C++ code
_descriptor_labels = [
'distance',
'theta3',
]
def __init__(self, settings=None, x_scaler_file=None, model_file=None):
if settings is None:
self.settings = AromaticsAnalyser.Settings()
self._model_dir = Resources().get_aromatic_ring_model_dir()
if x_scaler_file is None:
x_scaler_file = os.path.join(self._model_dir, 'x_training_scaler.save')
self.x_scaler = joblib.load(x_scaler_file)
if model_file is None:
model_file = os.path.join(self._model_dir, 'deep_NN_100_32.h5')
self.model = load_model(model_file, compile=False)
if type(self)._telemetry == 0:
UtilitiesLib.ccdc_aromatics_analyser_telemetry()
type(self)._telemetry = 1
def _predict_for_ring_pair(self, x):
result = self.model.predict(np.array(x).reshape(1, -1), verbose=0)[0, 0]
# Convert to score
return PackingSimilarityLib.get_score(result.item())
def run_neural_network_calculation(self, x_data):
x_numpy = np.array([np.array([xi for xi in line]) for line in x_data])
x_transformed = self.x_scaler.transform(x_numpy)
return [round(self._predict_for_ring_pair(x), 1) for x in x_transformed]
@staticmethod
def _search_for_benzene_rings(crystal):
benzene_smarts = '[C,c]1~[C,c]~[C,c]~[C,c]~[C,c]~[C,c]~1'
substructure_search = search.SubstructureSearch()
substructure_search.add_substructure(search.SMARTSSubstructure(benzene_smarts))
substructure_search.add_substructure(search.SMARTSSubstructure(benzene_smarts))
substructure_search.add_centroid('CENT1', *((0, x) for x in range(6)))
substructure_search.add_centroid('CENT2', *((1, x) for x in range(6)))
substructure_search.add_distance_constraint('DIST1', 'CENT1', 'CENT2', (1.5, 8), type='inter')
return substructure_search.search(database=crystal)
@staticmethod
def _make_ring_pair(ring1, ring2):
ring_pair = PackingSimilarityLib.make_ring_pair(ring1, ring2)
for descriptor in AromaticsAnalyser._descriptor_labels:
ring_pair.calculate_and_add_descriptor_variable(descriptor)
return ring_pair
def _include_ring_pair(self, ring_pair):
if not self.settings.include_intramolecular_pairs and not ring_pair.intermolecular():
return False
return True
def make_ring_pairs(self, crystal):
crystal_view = ChemistryLib.CrystalStructureView.instantiate(crystal._crystal)
base_molecules = [crystal_view.molecule(i) for i in range(crystal_view.nmolecules())]
PackingSimilarityLib.expand_aromatics_analyser_shell(crystal_view)
ring_pairs = PackingSimilarityLib.make_ring_data(base_molecules, crystal_view).ring_pairs()
return [pair for pair in ring_pairs if self._include_ring_pair(pair)]
def generate_neural_network_input_data(self, crystal):
ring_pairs = self.make_ring_pairs(crystal)
return PackingSimilarityLib.tabulate_ring_pair_data_for_neural_network_input(ring_pairs)