Source code for ccdc.interaction

#
# This code is Copyright (C) 2015 The Cambridge Crystallographic Data Centre
# (CCDC) of 12 Union Road, Cambridge CB2 1EZ, UK and a proprietary work of CCDC.
# This code may not be used, reproduced, translated, modified, disassembled or
# copied, except in accordance with a valid licence agreement with CCDC and may
# not be disclosed or redistributed in any form, either in whole or in part, to
# any third party. All copies of this code made in accordance with a valid
# licence agreement as referred to above must contain this copyright notice.
#
# No representations, warranties, or liabilities are expressed or implied in the
# supply of this code by CCDC, its servants or agents, except where such
# exclusion or limitation is prohibited, void or unenforceable under governing
# law.
#
'''
The :mod:`ccdc.interaction` module contains classes for investigating molecular
interaction statistics.
The InteractionLibrary is a knowledge-based library of intermolecular interactions.
This module gives access to the InteractionLibrary central and contact
groups. It also provides an interface for working with the data stored in
.istr files. It also provides hydrogren bond statistics search classes.

>>> from ccdc.interaction import InteractionLibrary
>>> central_lib = InteractionLibrary.CentralGroupLibrary()
>>> contact_lib = InteractionLibrary.ContactGroupLibrary()
>>> central_ketone = central_lib.group_by_name('aliphatic-aliphatic ketone')
>>> contact_alcohol = contact_lib.group_by_name('alcohol OH')
>>> interaction_data = central_ketone.interaction_data(contact_alcohol)
>>> print('Rel. density %.2f, est. std.dev. %.2f' % interaction_data.relative_density)
Rel. density 2.91, est. std.dev. 0.23

Methods to establish which central and contact groups are present in a
molecule are provided.

>>> from ccdc.io import CrystalReader
>>> csd_crystal_reader = CrystalReader('CSD')
>>> aabhtz = csd_crystal_reader.crystal('AABHTZ')
>>> aabhtz_central_group_hits = central_lib.search_molecule(aabhtz.molecule)
>>> len(aabhtz_central_group_hits)
4

The order of the central group hits is indeterminate. However, we do know that
methyl is present in AABHTZ.

>>> names = [g.name for g in aabhtz_central_group_hits]
>>> assert('methyl' in names)
>>> i = names.index('methyl')
>>> print(aabhtz_central_group_hits[i].match_atoms())
[Atom(C10), Atom(C11), Atom(H7), Atom(H8), Atom(H9)]

Hydrogen bond statistics can be investigated with :class:`HydrogenBondStatisticsSearch`.

>>> from ccdc.interaction import HydrogenBondStatisticsSearch
>>> hbs = HydrogenBondStatisticsSearch()
>>> stats = hbs.search(aabhtz)
>>> for hb in stats:
...     near_obs = [ob for ob in hb.observations if hb.distance-0.1 < ob.distance < hb.distance+0.1]
...     near_percent = round(10.0 * len(near_obs) / len(hb.observations)) * 10
...     classification = "unusual" if hb.distance_unusual else "normal"
...     print(f'{hb.donor_atom.label}-{hb.acceptor_atom.label} has {classification} distance with {near_percent}% observed hydrogen bonds near')
N6-O2 has normal distance with 80% observed hydrogen bonds near
'''
###########################################################################

import collections
import glob
import math
import os
import sys

# Work around to avoid name conflict with "io" module.
the_current_working_directory = os.path.abspath(os.getcwd())
try:
    os.chdir(os.path.dirname(sys.executable))

    import gzip
finally:
    os.chdir(the_current_working_directory)

from ccdc import io, utilities, protein
from ccdc.molecule import Coordinates, Atom, Molecule
from ccdc.crystal import Crystal
from ccdc.search import QuerySubstructure, SubstructureSearch
from ccdc.utilities import Logger, nested_class, natural_sort_key, Resources
from ccdc.descriptors import CrystalDescriptors

from ccdc.utilities import _private_importer

with _private_importer() as pi:
    pi.import_ccdc_module('IsoStarAnalysisLib')
    pi.import_ccdc_module('InteractionMapsLib')
    pi.import_ccdc_module('ChemistryLib')
    pi.import_ccdc_module('PackingSimilarityLib')
    pi.import_ccdc_module('MotifSearchLib')


###########################################################################
class InteractionLibraryBase:
    """Base class for InteractionLibrary class."""
    calling_no = 0

    def __init__(self):
        """Base class constructor that calls Telemetry, only once"""
        if InteractionLibraryBase.calling_no == 0:
            IsoStarAnalysisLib.ccdc_interaction_library_telemetry()
            InteractionLibraryBase.calling_no = 1


class InteractionMapAnalysisBase:
    """Base class for InteractionMapAnalysis class."""
    calling_no = 0

    def __init__(self):
        """Base class constructor that calls Telemetry, only once."""
        if InteractionMapAnalysisBase.calling_no == 0:
            InteractionMapsLib.ccdc_interaction_map_analysis_telemetry()
            InteractionMapAnalysisBase.calling_no = 1


[docs]class InteractionLibrary(object): '''The InteractionLibrary derived from CSD data.'''
[docs] @nested_class('InteractionLibrary') class InteractionData(InteractionLibraryBase): '''Class for working with interaction data.''' def __init__(self, fname=None, _istr_data=None): '''Read the file. :param fname: string representing path to the istr file ''' super().__init__() self.fname = fname if fname is None: self._iso_file = IsoStarAnalysisLib.IsostarMol2File() self._iso_file.set_isostar_data(_istr_data) else: self.fname = os.path.realpath(fname) # Windows installed data have .istr and .istr.gz but not linked if os.path.exists(self.fname + '.gz'): self.fname = self.fname + '.gz' if self.fname.endswith('.gz'): f = gzip.open(self.fname) content = f.read() f.close() self._iso_file = IsoStarAnalysisLib.IsostarMol2File() self._iso_file.read_string(content) else: # Looks like windows doesn't take links to gzipped files. try: self._iso_file = IsoStarAnalysisLib.IsostarMol2File(self.fname) except RuntimeError: try: f = gzip.open(self.fname) content = f.read() f.close() except IOError: raise IOError('Unable to open data file') else: self._iso_file = IsoStarAnalysisLib.IsostarMol2File() self._iso_file.read_string(content) @property def central_group_name(self): '''Name of the central group.''' return self._iso_file.isostar_data().central_group_name() @property def contact_group_name(self): '''Name of the contact group.''' return self._iso_file.isostar_data().contact_group_name() @property def average_density(self): '''Average density of the data.''' return self._iso_file.isostar_data().average_density() @property def histogram(self): '''Histogram of contact distances.''' contacts = self._iso_file.isostar_data().isostar_contact_groups() contact_stats = IsoStarAnalysisLib.IsostarContactStatistics(contacts) return list(contact_stats.frequencies()) @property def min_distance(self): '''Closest distance in the histogram.''' return self._iso_file.isostar_data().scaled_shortest_contact_distance() / 100. @property def max_distance(self): '''Furthest distance in the histogram.''' return self._iso_file.isostar_data().scaled_longest_contact_distance() / 100. @property def ncentral_atoms(self): '''Number of atoms in the central group.''' return self._iso_file.isostar_data().ncentral_group_atoms() @property def ncontact_atoms(self): '''Number of atoms in the contact group.''' return self._iso_file.isostar_data().ncontact_group_atoms() @property def nflexible_atoms(self): '''Number of flexible atoms.''' return self._iso_file.isostar_data().nflexible_atoms() @property def ncontacts(self): '''Number of observations in the istr file.''' return self._iso_file.isostar_data().nisostar_contact_group() @property def nvdw_contacts(self): '''Number of observations with a distance <= sum of VdW radii.''' if self.min_distance > 0: return 0 if self.max_distance < 0: val = self.max_distance else: val = 0 contacts = self._iso_file.isostar_data().isostar_contact_groups() contact_stats = IsoStarAnalysisLib.IsostarContactStatistics(contacts) accumulated_counts = dict(zip(contact_stats.distance_bins(), contact_stats.accumulated_counts())) if val in accumulated_counts: return accumulated_counts[val] else: return 0
[docs] def ncontacts_in_range(self, low, high): '''The number of observations in a given distance range''' mind = self._iso_file.isostar_data().scaled_shortest_contact_distance() maxd = self._iso_file.isostar_data().scaled_longest_contact_distance() if high < mind / 100. or high < low: return 0 if low > maxd / 100.: return 0 low = max(mind / 100., low) high = min(maxd / 100., high) low = min(maxd / 100., low) high = max(mind / 100., high) low = max(mind, int(low * 100)) high = min(maxd, int(high * 100)) low = abs(mind) + low high = abs(mind) + high return int(sum(self.histogram[low:high + 1]))
@property def relative_density(self): '''Relative density and estimated standard deviation tuple.''' mind = self._iso_file.isostar_data().scaled_shortest_contact_distance() maxd = self._iso_file.isostar_data().scaled_longest_contact_distance() try: av_vdw = 300. vs = math.exp(3 * math.log(av_vdw)) - math.exp(3 * math.log(av_vdw + mind)) vl = math.exp(3 * math.log(av_vdw + maxd)) - math.exp(3 * math.log(av_vdw)) if vs * (self.ncontacts - self.nvdw_contacts) > 0 and vl * self.nvdw_contacts > 0: ss = 1. / self.nvdw_contacts sl = 1. / (self.ncontacts - self.nvdw_contacts) d_rel = (vl * self.nvdw_contacts) / (vl * (self.ncontacts - self.nvdw_contacts)) std_dev = d_rel * (ss + sl) ** 0.5 return d_rel, std_dev else: return 0, 0 except RuntimeError as e: Logger().warning('relative_density failed for %s %s because %s' % (self.central_group_name, self.contact_group_name, str(e)) ) return None, None # std_dev approx is n_contact/n_tot * (1/n_contact + 1/n_tot)**0.5 # Vs = exp(3*log(av_vdw)) - exp(3*log(av_vdw + shortest)) # Vl = exp(3*log(av_vdw + longest)) - exp(3*log(av_vdw)) # Perl code for d_rel # see isostar_data/data_generation/bin/mkhtml_nqs_new.pl # # ## Calculate the relative density drel: # # # # $av_vdw = 300. # $shortest = 30 - but should be the actual distance found (> -100) # %longest = 50 # # $Vs = exp(3*log($av_vdw)) - exp(3*log($av_vdw + $shortest)); # $Vl = exp(3*log($av_vdw + $longest)) - exp(3*log($av_vdw)); # if (($Vs*$nlong > 0) && ($Vl*$nshort > 0)) { # $ss = 1/$nshort; # $sl = 1/$nlong; # $rel_freq = ($Vl*$nshort)/($Vs*$nlong); # $std_dev = sprintf("%2.1f",$rel_freq*sqrt($ss+$sl)); # $rel_freq = sprintf("%8.1f",$rel_freq); # if ($rel_freq > 0.4) { # if (($std_dev/$rel_freq) < 0.20) { # $rel_freq = "$rel_freq ($std_dev)"; # } else { $rel_freq = $space; } # } else { # if ($std_dev < 0.3) { # $rel_freq = "$rel_freq ($std_dev)"; # } else { $rel_freq = $space; } # } # } # else { $rel_freq = $space; } @property def hits(self): '''The entry hits in this file.''' d = self._iso_file.isostar_data() hits = [] for i in range(d.nisostar_contact_group()): c = d.isostar_contact_group(i) if c.filtered(): continue hits.extend([ InteractionLibrary.InteractionHit(c.hyperlink(j)) for j in range(c.contact_molecule().natoms() + d.nflexible_atoms()) ]) return hits
[docs] def write(self, file_name): '''Writes the data to file.''' istr_file = IsoStarAnalysisLib.IsostarMol2File() istr_file.set_isostar_data(self._iso_file.isostar_data()) # This calls IsostarMol2File::print() accounting for # SWIG renaming of python keywords istr_file._print(file_name)
[docs] @nested_class('InteractionLibrary') class InteractionHit(object): '''A hit from InteractionData''' def __init__(self, _link): '''Initialiser''' self._link = _link @property def identifier(self): '''The refcode of the hit''' return self._link.entry_name() @property def distance(self): '''Distance between central and contact groups relative to Sum VdW''' return self._link.distance() / 100. @property def entry(self): '''The entry for the hit''' return io.EntryReader('CSD').entry(self.identifier) # pylint: disable=E1101 @property def crystal(self): '''The crystal of the hit''' return io.EntryReader('CSD').crystal(self.identifier) # pylint: disable=E1101 @property def molecule(self): '''The molecule of the hit''' return io.EntryReader('CSD').molecule(self.identifier) # pylint: disable=E1101
[docs] def central_group_atoms(self, indices=False): '''The atoms of the central group''' mol = self.molecule ats = [a for a in mol.atoms if a.label in self._link.central_atomlabels()] if indices: ats = [a.index for a in ats] return ats
[docs] def contact_group_atoms(self, indices=False): '''The atoms of the contact group''' mol = self.molecule ats = [a for a in mol.atoms if a.label in self._link.contact_atomlabels()] if indices: ats = [a.index for a in ats] return ats
[docs] @nested_class('InteractionLibrary') class FunctionalGroupHit(object): '''A hit from a functional group search.''' def __init__(self, group, hit): '''Constructed from the name of the hit and the SubstructureHit. :param group: the group used to identify the hit :param hit: :class:`ccdc.search.SubstructureHit` ''' self.group = group self.hit = hit @property def name(self): '''The name of the group matched the hit.''' return self.group.name
[docs] def match_atoms(self, indices=False): '''Returns the matched atoms. See :func:`ccdc.search.SubstructureHit.match_atoms`.''' return self.hit.match_atoms(indices=indices)
@nested_class('InteractionLibrary') class _Group(object): '''A group definition.''' def __init__(self, _group): '''Internal use only''' self._group = _group @property def name(self): '''The name of the group.''' return self._group.name() @property def is_valid(self): '''Whether this group is valid or not.''' return os.path.exists(self._file_name) # pylint: disable=E1101 @property def substructure_query(self): '''The :class:`ccdc.search.QuerySubstructure` of this group.''' if not self.is_valid: return None if self._group.substructure() is None: self._group.parse_quest_ini_file(self._file_name) # pylint: disable=E1101 return QuerySubstructure(_substructure=self._group.substructure()) def search_molecule(self, mol): '''Return the functional group hits identified in a molecule. The :class:`ccdc.interaction.InteractionLibrary.FunctionalGroupHit` instances returned are the ones that are matched the specific :class:`ccdc.interaction.InteractionLibrary.Group` instance. :param mol: :class:`ccdc.molecule.Molecule` to search :returns: list of :class:`ccdc.interaction.InteractionLibrary.FunctionalGroupHit` instances ''' searcher = SubstructureSearch() searcher.add_substructure(self.substructure_query) return [ InteractionLibrary.FunctionalGroupHit(self, h) for h in searcher.search(mol) ]
[docs] @nested_class('InteractionLibrary') class CentralGroup(_Group): ''' central group definition.''' @property def name(self): '''The name of the group.''' if self._group.alt(): return '%s %s' % (self._group.alt(), self._group.name()) return self._group.name() @property def _file_name(self): '''The file name of the Connser query for this group.''' central_loc = io._CSDDatabaseLocator.get_interaction_library_query_files_location('central') if central_loc is None: raise IOError( 'No interaction library data found. Please download and install it if you want to use the IsoStar API' ) return os.path.join( central_loc, self._group.que_db() + self._group.que_index() + '.ini' ) def _file_name_base(self): '''The base of the istr file name.''' istr_loc = io._CSDDatabaseLocator.get_interaction_library_data_files_location() if istr_loc is None: raise IOError( 'No interaction library data found. Please download and install it if you want to use the IsoStar API' ) fname = self._group.que_db() + self._group.que_index() thisdir = os.path.join(istr_loc, fname) if self._group.has_params(): fname += '_%s' % self._group.read_index() return os.path.join(thisdir, fname)
[docs] def contact_groups(self): '''Return list of contact groups for which the interaction library has data. The :class:`ccdc.interaction.InteractionLibrary.ContactGroup` instances returned are the ones for which the specific :class:`ccdc.interaction.InteractionLibrary.CentralGroup` has data. :returns: list of :class:`ccdc.interaction.InteractionLibrary.ContactGroup` instances ''' base = self._file_name_base() l = glob.glob(base + '_[0-9][0-9].istr.gz') indices = [n[-10:-8] for n in l] indices.sort() contact_lib = InteractionLibrary.ContactGroupLibrary() grps = [contact_lib.group_by_index(i) for i in indices] return grps
[docs] def interaction_data(self, contact_group): ''' Return the file for this central group and the contact group. :param contact_group: :class:`ccdc.interaction.InteractionLibrary.ContactGroup` :returns: :class:`ccdc.interaction.InteractionLibrary.InteractionData` ''' base = self._file_name_base() fname = '%s_%s.istr' % (base, contact_group._group.que()) if os.path.exists(fname) or os.path.exists(fname + '.gz'): try: return InteractionLibrary.InteractionData(fname) except RuntimeError as e: Logger().warning('Cannot read Data for %s %s because %s' % (self.name, contact_group.name, e) )
[docs] @nested_class('InteractionLibrary') class ContactGroup(_Group): ''' contact group definition''' @property def _file_name(self): '''The file name of the Connser query for this group.''' contact_loc = io._CSDDatabaseLocator.get_interaction_library_query_files_location('contact') if contact_loc is None: raise IOError( 'No interaction library data found. Please download and install it if you want to use the IsoStar API' ) return os.path.join( contact_loc, self._group.que() + '.ini' )
[docs] def central_groups(self): '''List of central groups which have data for this contact group :returns: list of :class:`ccdc.interaction.InteractionLibrary.CentralGroup` instances ''' istr_loc = io._CSDDatabaseLocator.get_interaction_library_data_files_location() if istr_loc is None: raise IOError( 'No interaction library data found. Please download and install it if you want to use the IsoStar API' ) varpatt = 'csd*/csd*_[12]_%02d.istr' % int(self._group.que()) patt = 'csd*/csd*_%02d.istr' % int(self._group.que()) cent_lib = InteractionLibrary.CentralGroupLibrary() candidates = \ glob.glob(os.path.join(istr_loc, varpatt)) + \ glob.glob(os.path.join(istr_loc, patt)) + \ glob.glob(os.path.join(istr_loc, varpatt + '.gz')) + \ glob.glob(os.path.join(istr_loc, patt + '.gz')) def f(n): if n.endswith('.gz'): val = n[-7:-3] else: val = n[-4:] return int(val) ds = set([f(os.path.basename(os.path.dirname(c))) for c in candidates]) gps = [] for g in cent_lib.groups: inx = int(g._group.que_index()) if inx in ds: ds -= {inx} gps.append(g) return gps
[docs] def interaction_data(self, central): '''The :class:`ccdc.interaction.InteractionLibrary.InteractionData` corresponding to the given central group and this contact group :param central: :class:`ccdc.interaction.InteractionLibrary.CentralGroup` ''' istr_loc = io._CSDDatabaseLocator.get_interaction_library_data_files_location() if istr_loc is None: raise IOError( 'No interaction library data found. Please download and install it if you want to use the IsoStar API' ) fname = central._group.que_db() + central._group.que_index() thisdir = os.path.join(istr_loc, fname) if central._group.has_params(): fname += '_%s' % self._group.read_index() fname += '_%s.istr' % self._group.que() fname = os.path.join(thisdir, fname) if os.path.exists(fname) or os.path.exists(fname + '.gz'): return InteractionLibrary.InteractionData(fname)
@nested_class('InteractionLibrary') class _GroupLibrary(InteractionLibraryBase): '''Base class for interaction group collections.''' def group_by_name(self, name): '''Find a group by name. :param name: name of the group to return :returns: the group ''' for g in self.groups: # pylint: disable=E1101 if g.name.strip() == name.strip(): return g def search_molecule(self, mol): '''Return the functional group hits identified in the molecule. All of the functional groups defined in the :class:`ccdc.interaction.InteractionLibrary.GroupLibrary` instance are used to search the molecule. :param mol: :class:`ccdc.molecule.Molecule` to search :returns: list of :class:`ccdc.interaction.InteractionLibrary.FunctionalGroupHit` instances ''' res = [] for g in self.groups: # pylint: disable=E1101 res.extend(g.search_molecule(mol)) return res
[docs] @nested_class('InteractionLibrary') class CentralGroupLibrary(_GroupLibrary): '''Collection of central groups. :attr:`groups` list of :class:`ccdc.interaction.InteractionLibrary.CentralGroup` instances in the library ''' def __init__(self): '''Initialise with a known set of central groups.''' InteractionLibraryBase.__init__(self) central_loc = io._CSDDatabaseLocator.get_interaction_library_definition_location('central') if central_loc is None: raise IOError( 'No interaction library data found. Please download and install it if you want to use the IsoStar API') parser = IsoStarAnalysisLib.ParseCentralGroupFile( IsoStarAnalysisLib.ParseCentralGroupFile.ALL_SETS_EXCLUDING_SUPERSTAR) groups = parser.parse_central_groups(central_loc) # Uncomment this to see the duplicates # all_names = [g.name() for g in groups] # all_groups = [g for g in groups] # for n in all_names: # if all_names.count(n) > 1: # print 'DUPLICATE:', n, # for g in all_groups: # if g.name() == n: # print g.que_index(), g.que_db(), # print l = [InteractionLibrary.CentralGroup(_group=g) for g in groups] self.groups = [] for g in l: if g.is_valid: self.groups.append(g) else: pass
# print 'Invalid central group:', g.name
[docs] @nested_class('InteractionLibrary') class ContactGroupLibrary(_GroupLibrary): '''Collection of contact groups. :attr:`groups` list of :class:`ccdc.interaction.InteractionLibrary.ContactGroup` instances in the library ''' def __init__(self): '''Initialise with a known set of contact groups''' InteractionLibraryBase.__init__(self) contact_loc = io._CSDDatabaseLocator.get_interaction_library_definition_location('contact') if contact_loc is None: raise IOError( 'No interaction library data found. Please download and install it if you want to use the IsoStar API') parser = IsoStarAnalysisLib.ParseContactGroupFile() groups = parser.parse_contact_groups(contact_loc) # Uncomment this to see the duplicates # all_names = [g.name() for g in groups] # all_groups = [g for g in groups] # for n in all_names: # if all_names.count(n) > 1: # print 'DUPLICATE:', n, # for g in all_groups: # if g.name() == n: # print g.target_atoms(), # print # for g in all_groups: # print g.que(), g.name() interaction_library_list = [InteractionLibrary.ContactGroup(_group=g) for g in groups] self.groups = [] for g in interaction_library_list: if g.is_valid: self.groups.append(g) else: pass # print 'Invalid contact group:', g.name
[docs] def group_by_index(self, index): '''Find a contact group by index. :param index: integer index of the group to return :returns: the :class:`ccdc.interaction.InteractionLibrary.ContactGroup` ''' for g in self.groups: if g._group.que() == index: return g
###########################################################################
[docs]class InteractionMapAnalysis(InteractionMapAnalysisBase): '''Searches crystals for potential interactions.'''
[docs] class CommonSettings(object): '''Settings common to protein and small molecule analyses. :ivar working_directory: default value './' :ivar probe_names: default value ['Uncharged NH Nitrogen', 'Carbonyl Oxygen', 'Aromatic CH Carbon'] ''' #: Defined probes using CSD data csd_defined_probes = [ 'Uncharged NH Nitrogen', 'Charged NH Nitrogen', 'RNH3 Nitrogen', 'Alcohol Oxygen', 'Carbonyl Oxygen', 'Water Oxygen', 'Oxygen Atom', 'Methyl Carbon', 'Aromatic CH Carbon', 'C-F Fluorine', 'C-Cl Chlorine', 'C-Br Bromine', 'C-I Iodine', ] try: csd_defined_probes += list(InteractionMapsLib.additional_probe_names()) except RuntimeError as exc: if 'SuperStar' in str(exc): pass else: raise def __init__(self, _settings=None): InteractionMapAnalysisBase.__init__(self) if _settings is None: _settings = InteractionMapsLib.SuperstarSettings() self._settings = _settings self._working_directory = os.path.abspath(os.getcwd()) self.probe_names = [ 'Uncharged NH Nitrogen', 'Carbonyl Oxygen', 'Aromatic CH Carbon', ] self.detect_hotspots = True
[docs] def write(self, file_name): '''Write the settings to an INS file.''' if os.path.exists(file_name): os.unlink(file_name) writer = InteractionMapsLib.SuperstarINSFileWriter() writer.write_settings(file_name, self._settings)
def _validate_probes(self): if self._settings.source() in [InteractionMapsLib.CSD, InteractionMapsLib.CSD_SMALL]: bad = [p for p in self.probe_names if p not in InteractionMapAnalysis.CommonSettings.csd_defined_probes] else: bad = [p for p in self.probe_names if p not in InteractionMapAnalysis.ProteinSettings.pdb_defined_probes] if bad: raise RuntimeError('Some probe names are invalid: %s' % ('; '.join(bad))) @property def working_directory(self): '''The parent directory of all result directories.''' return self._working_directory @working_directory.setter def working_directory(self, value): if not os.path.exists(value): os.makedirs(value) self._working_directory = os.path.abspath(value) @property def num_threads(self): '''Number of threads to use, default -1 (all but one).''' return self._settings.num_threads() @num_threads.setter def num_threads(self, value): self._settings.set_num_threads(value) @property def detect_hotspots(self): '''Whether or not to detect hotspots in the grids.''' return self._settings.peak_fitting() @detect_hotspots.setter def detect_hotspots(self, tf): self._settings.set_peak_fitting(bool(tf)) @property def hotspots_ncycles(self): '''The number of cycles of hotspot fitting to perform.''' return self._settings.pharmacophore_settings().number_cycles() @hotspots_ncycles.setter def hotspots_ncycles(self, value): self._settings.pharmacophore_settings().set_number_cycles(value) @property def hotspots_min_value(self): '''The minimum value at which a hotspot will be detected.''' return self._settings.pharmacophore_settings().minimum_propensity() @hotspots_min_value.setter def hotspots_min_value(self, value): self._settings.pharmacophore_settings().set_minimum_propensity(value) @property def hotspots_refine(self): '''Whether or not to refine the hotspots calculation.''' return self._settings.pharmacophore_settings().peak_fitting() @hotspots_refine.setter def hotspots_refine(self, tf): self._settings.pharmacophore_settings().set_peak_fitting(tf) @property def grid_spacing(self): '''The grid spacing to be used for the analysis.''' return self._settings.grid_info().spacing() @grid_spacing.setter def grid_spacing(self, value): self._settings.grid_info().set_spacing(value) @property def custom_grid_bounding_box(self): '''The custom grid if it has been set, or None.''' if self._settings.grid_info().custom_grid(): xr = self._settings.grid_info().x_range() yr = self._settings.grid_info().y_range() zr = self._settings.grid_info().z_range() return Coordinates(xr[0], yr[0], zr[0]), Coordinates(xr[1], yr[1], zr[1]) @custom_grid_bounding_box.setter def custom_grid_bounding_box(self, bbox): if bbox is None: self._settings.grid_info().set_custom_grid(False) else: org, far = bbox self._settings.grid_info().set_custom_grid(True) self._settings.grid_info().set_x_range(org[0], far[0]) self._settings.grid_info().set_y_range(org[1], far[1]) self._settings.grid_info().set_z_range(org[2], far[2])
[docs] def set_custom_grid_from_grid(self, grid): '''A convenience to set spacing and bounding box.''' self.grid_spacing = grid.spacing self.custom_grid_bounding_box = grid.bounding_box
[docs] def summary(self): '''Prints a table of settings.''' lines = [ 'working_directory: %s' % self.working_directory, 'detect_hotspots: %s' % self.detect_hotspots, ] if self.detect_hotspots: lines.extend([ 'hotspots_ncycles: %d' % self.hotspots_ncycles, 'hotspots_min_value: %.2f' % self.hotspots_min_value, 'hotspots_refine: %.2f' % self.hotspots_refine, ]) lines.append('grid_spacing: %.2f' % self.grid_spacing) if self.custom_grid_bounding_box is not None: (ox, oy, oz), (fx, fy, fz) = self.custom_grid_bounding_box lines.append( 'custom_grid_bounding_box: (%.2f, %.2f, %.2f) (%.2f, %.2f, %.2f)' % (ox, oy, oz, fx, fy, fz) ) lines.append('probe_names: %s' % '; '.join(self.probe_names)) return '\n'.join(lines)
@staticmethod def _get_superstar_paths(use_fims_data_setting): isostar_dir = io._CSDDatabaseLocator.get_interaction_library_data_files_location() if not isostar_dir: isostar_dir = '' return InteractionMapsLib.SuperstarPaths( os.getenv('SUPERSTAR_ROOT', str(Resources().get_superstar_data_dir())), os.getenv('SUPERSTAR_ISODIR', isostar_dir), use_fims_data_setting)
[docs] class SmallMoleculeSettings(CommonSettings): '''Settings pertaining to small molecule analysis. :ivar working_directory: default value './' :ivar probe_names: default value ['Uncharged NH Nitrogen', 'Carbonyl Oxygen', 'Aromatic CH Carbon'] ''' def __init__(self, _settings=None): '''Those settings pertaining to small molecule analysis. Most important is the list of probe_names. ''' InteractionMapAnalysisBase.__init__(self) if _settings is None: _superstar_paths = InteractionMapAnalysis._get_superstar_paths( InteractionMapsLib.SuperstarPaths.USE_FIMS_DATA) _settings = InteractionMapsLib.SuperstarSettings.default_superstar_settings(_superstar_paths) _settings.set_source(InteractionMapsLib.CSD_SMALL) _settings.set_propensity_correction_method(InteractionMapsLib.HYDCORRECTION_OFF) _settings.set_ignore_insignificant_shell_values(False) super(InteractionMapAnalysis.SmallMoleculeSettings, self).__init__(_settings) self._settings = _settings self._use_fims_data = InteractionMapsLib.SuperstarPaths.USE_FIMS_DATA self._settings.set_source(InteractionMapsLib.CSD_SMALL) self._settings.set_propensity_correction_method(InteractionMapsLib.HYDCORRECTION_OFF) self._settings.set_ignore_insignificant_shell_values(False)
[docs] @staticmethod def from_file(file_name): '''Reads SmallMoleculeSettings from an INS file.''' settings = InteractionMapAnalysis.SmallMoleculeSettings() settings._settings.read_settings(file_name) return settings
[docs] def summary(self): '''A summary of current settings.''' return InteractionMapAnalysis.CommonSettings.summary(self)
[docs] class ProteinSettings(CommonSettings): '''Settings pertaining to protein analysis. :ivar working_directory: default value './' :ivar probe_names: default value ['Uncharged NH Nitrogen', 'Carbonyl Oxygen', 'Aromatic CH Carbon'] ''' pdb_defined_probes = [ 'Alcohol Oxygen', 'Water Oxygen', 'Carbonyl Oxygen', 'Amino Nitrogen', 'Aliphatic CH Carbon', 'Aromatic CH Carbon', ] _source_dict = utilities.bidirectional_dict( CSD=InteractionMapsLib.CSD, PDB=InteractionMapsLib.PDB, CSD_SMALL=InteractionMapsLib.CSD_SMALL, PDB_NOH=InteractionMapsLib.PDB_NOH ) _buried_dict = utilities.bidirectional_dict(( ('shallow', 2), ('shallow/normal', 3), ('normal', 4), ('normal/buried', 5), ('buried', 6) )) def __init__(self, _settings=None): InteractionMapAnalysisBase.__init__(self) if _settings is None: _superstar_paths = InteractionMapAnalysis._get_superstar_paths( InteractionMapsLib.SuperstarPaths.DONT_USE_FIMS_DATA) _settings = InteractionMapsLib.SuperstarSettings.default_superstar_settings(_superstar_paths) super(self.__class__, self).__init__(_settings) self._settings.cavity_detection_settings().set_grow_from( InteractionMapsLib.SuperstarCavityDetectionSettings.SELECTED_RESIDUES) self._residues = [] self.source = 'CSD' self.detect_cavities = True self._settings = _settings self._use_fims_data = InteractionMapsLib.SuperstarPaths.DONT_USE_FIMS_DATA
[docs] def write(self, file_name): '''Write the settings.''' if hasattr(self, '_protein'): protein_file = os.path.join(os.path.abspath(os.path.join(os.path.dirname(file_name))), self._protein.identifier + '.mol2') with io.EntryWriter(protein_file) as writer: writer.write(self._protein) self._settings.set_molecule_file(protein_file) InteractionMapAnalysis.CommonSettings.write(self, file_name)
[docs] @staticmethod def from_file(file_name): '''Reads ProteinSettings from an INS file.''' settings = InteractionMapAnalysis.ProteinSettings() dirname = os.path.dirname(file_name) if not dirname: dirname = os.path.abspath(os.getcwd()) settings.working_directory = dirname settings._settings.read_settings(file_name) if settings._settings.molecule_file() and settings._settings.molecule_file() != 'None' and os.path.exists( settings._settings.molecule_file()): settings._protein = protein.Protein.from_file(settings._settings.molecule_file()) p = settings._settings.cavity_detection_settings().the_point() if any(p[i] != 0 for i in range(3)): settings._settings.cavity_detection_settings().set_grow_from( InteractionMapsLib.SuperstarCavityDetectionSettings.POINT) if settings._settings.cavity_detection_settings().grow_from() == InteractionMapsLib.SuperstarCavityDetectionSettings.SELECTED_RESIDUES: substructure_ids = list(settings._settings.substructure_identifiers()) if len(substructure_ids) == 0: settings._residues = [] return settings if not hasattr(settings, '_protein'): raise RuntimeError( 'INS file specifies residues %s but the protein file does not exist' % substructure_ids) residues = settings._protein.residues settings._residues = [residues[int(s) - 1] for s in substructure_ids] settings._settings.clear_substructures() for r in sorted(settings._residues, key=lambda x: natural_sort_key(x.identifier[5:])): chain, name = r.identifier.split(':') s = InteractionMapsLib.SuperstarSubstructure(str(residues.index(r) + 1), chain, name) settings._settings.add_substructure(s) return settings
#### Cavities @property def detect_cavities(self): '''Whether or not to detect the cavities of the protein.''' return self._settings.cavity_detection() @detect_cavities.setter def detect_cavities(self, tf): self._settings.set_cavity_detection(tf) @property def cavity_origin(self): '''The origin of the cavity if set, otherwise ``None``.''' if self._settings.cavity_detection_settings().grow_from() == InteractionMapsLib.SuperstarCavityDetectionSettings.POINT: p = self._settings.cavity_detection_settings().the_point() return Coordinates(p.x(), p.y(), p.z()) @cavity_origin.setter def cavity_origin(self, point): self._settings.cavity_detection_settings().set_point(point) self._settings.cavity_detection_settings().set_grow_from( InteractionMapsLib.SuperstarCavityDetectionSettings.POINT) @property def cavity_radius(self): '''The radius of the cavity when searching from an origin.''' if self._settings.cavity_detection_settings().grow_from() == InteractionMapsLib.SuperstarCavityDetectionSettings.POINT: return self._settings.cavity_detection_settings().radius() @cavity_radius.setter def cavity_radius(self, value): self._settings.cavity_detection_settings().set_radius(value) self._settings.cavity_detection_settings().set_grow_from( InteractionMapsLib.SuperstarCavityDetectionSettings.POINT) @property def min_cavity_volume(self): '''The minimum volume required for a cavity.''' return self._settings.cavity_detection_settings().minimum_volume() @min_cavity_volume.setter def min_cavity_volume(self, value): self._settings.cavity_detection_settings().set_minimum_volume(value) @property def min_cavity_buriedness(self): '''Threshold for the detection of buried cavities. One of 'shallow', 'shallow/normal', 'normal', 'normal/buried', 'buried'. When assigning one these values may be specified, or a value between 1 and 7, the higher the value the more buried a cavity must be in order to be detected. ''' return self._buried_dict.inverse_lookup(self._settings.cavity_detection_settings().min_psp()) @min_cavity_buriedness.setter def min_cavity_buriedness(self, value): if isinstance(value, str): value = self._buried_dict.prefix_lookup(value) self._settings.cavity_detection_settings().set_min_psp(value) @property def detect_cavity_from_residues(self): '''List of residues from which cavities will be detected.''' if self._settings.cavity_detection_settings().grow_from() == InteractionMapsLib.SuperstarCavityDetectionSettings.POINT: return None return self._residues @detect_cavity_from_residues.setter def detect_cavity_from_residues(self, binding_site): '''Set the list of residues from which the cavity will be detected.''' self._settings.cavity_detection_settings().set_grow_from( InteractionMapsLib.SuperstarCavityDetectionSettings.SELECTED_RESIDUES) self._protein = binding_site.protein self._residues = binding_site.residues all_residues = self._protein.residues self._settings.clear_substructures() for r in sorted(self._residues, key=lambda x: natural_sort_key(x.identifier[5:])): chain, name = r.identifier.split(':') s = InteractionMapsLib.SuperstarSubstructure(str(all_residues.index(r) + 1), chain, name) self._settings.add_substructure(s) self.detect_cavities = False self.calculte_contour_surface = False #### Surfaces @property def calculate_contour_surface(self): '''Whether or not to calculate a countour surface.''' return self._settings.calc_contour_surface() @calculate_contour_surface.setter def calculate_contour_surface(self, tf): self._settings.set_calc_contour_surface(tf) if tf: self.calculate_connolly_surface = False @property def calculate_connolly_surface(self): '''Whether or not to calculate a Connolly surface.''' return self._settings.calc_connolly_surface() @calculate_connolly_surface.setter def calculate_connolly_surface(self, tf): self._settings.set_calc_connolly_surface(tf) if tf: self.calculate_contour_surface = False @property def project_on_surface(self): pass #### Source @property def source(self): '''The source for scatterplot data. One of 'CSD', 'CSD_SMALL', 'PDB'. ''' return self._source_dict.inverse_lookup(self._settings.source()) @source.setter def source(self, value): self._settings.set_source(self._source_dict.prefix_lookup(value)) #### Rotate bonds @property def rotate_torsions(self): '''Whether or not to rotate R-[O,N,S]-H bonds.''' return self._settings.use_torsions() @rotate_torsions.setter def rotate_torsions(self, tf): self._settings.set_use_torsions(tf) #### Hydrophobic and shell correction @property def logp_hydrophobic_attenuation_a(self): '''LogP attenuation a factor.''' return self._settings.propensity_logp_delta_a() @logp_hydrophobic_attenuation_a.setter def logp_hydrophobic_attenuation_a(self, value): self._settings.set_propensity_logp_delta_a(value) @property def logp_hydrophobic_attenuation_b(self): '''LogP attenuation factor b.''' return self._settings.propensity_logp_delta_b() @logp_hydrophobic_attenuation_b.setter def logp_hydrophobic_attenuation_b(self, value): self._settings.set_propensity_logp_delta_b(value) @property def polar_shell_correction(self): '''The polar shell correction parameter.''' return self._settings.dshell_value_polar() @polar_shell_correction.setter def polar_shell_correction(self, value): self._settings.set_dshell_value_polar(value) @property def apolar_shell_correction(self): '''The apolar shell correction parameter.''' return self._settings.dshell_value_apolar() @apolar_shell_correction.setter def apolar_shell_correction(self, value): self._settings.set_dshell_value_apolar(value)
[docs] def summary(self): '''A summary of current settings.''' lines = InteractionMapAnalysis.CommonSettings.summary(self).split('\n') lines.append('detect_cavities: %s' % self.detect_cavities) if self.detect_cavities: lines.extend([ 'min_cavity_volume: %.2f' % self.min_cavity_volume, 'min_cavity_buriedness: %s' % self.min_cavity_buriedness, ]) if self.cavity_origin: lines.extend([ 'cavity_origin: %.2f %.2f %.2f' % self.cavity_origin, 'cavity_radius: %.2f' % self.cavity_radius, ]) else: if self.detect_cavity_from_residues: residues = [r.label for r in self.detect_cavity_from_residues] split_res = [residues[i:i + 10] for i in range(0, len(residues), 10)] lines.append('detect_cavity_from_residues: %s' % ' '.join(split_res[0])) lines.extend('%*s %s' % (len('detect_cavity_from_residues: '), split_res[i]) for i in range(1, len(split_res))) else: lines.append('detect_cavity_from_residues: ALL') lines.extend([ 'calculate_contour_surface: %s' % self.calculate_contour_surface, 'calculate_connolly_surface: %s' % self.calculate_connolly_surface, ]) lines.append('source: %s' % self.source) lines.append('rotate_torsions: %s' % self.rotate_torsions) lines.extend([ 'logp_hydrophobic_attenuation_a: %.2f' % self.logp_hydrophobic_attenuation_a, 'logp_hydrophobic_attenuation_b: %.2f' % self.logp_hydrophobic_attenuation_b, 'polar_shell_correction: %.2f' % self.polar_shell_correction, 'apolar_shell_correction: %.2f' % self.apolar_shell_correction, ]) return '\n'.join(lines)
'''Searches surfaces for potential interactions.'''
[docs] class SurfaceSettings(SmallMoleculeSettings): '''Settings pertaining to surface analysis. :ivar working_directory: default value './' :ivar probe_names: default value ['Uncharged NH Nitrogen', 'Carbonyl Oxygen', 'Aromatic CH Carbon'] ''' def __init__(self, _settings=None): '''Those settings pertaining to surface analysis. ''' super(InteractionMapAnalysis.SurfaceSettings, self).__init__(_settings) self._settings.detect_cavities = False self._grid_spacing_modified = False
[docs] @staticmethod def from_file(file_name): '''Reads SurfaceSettings from an INS file.''' settings = InteractionMapAnalysis.SurfaceSettings() settings._settings.read_settings(file_name) return settings
@property def grid_spacing(self): '''The grid spacing to be used for the analysis.''' return self._settings.grid_info().spacing() @grid_spacing.setter def grid_spacing(self, value): old_spacing = self.grid_spacing self._settings.set_smearing_sigma(self._settings.smearing_sigma() * value / old_spacing) self._settings.grid_info().set_spacing(value) self._grid_spacing_modified = True
[docs] class Results(utilities.GridEnsemble): '''The results of a InteractionMapAnalysis analysis. Previous results may be processed by instantiating this class with a directory. The grids generated by the InteractionMapAnalysis are presented as dictionary-like attributes of this class, keyed by the probe name. ''' Hotspot = collections.namedtuple('Hotspot', ['coordinates', 'value']) def hotspot_repr(h): return 'Hotspot(%s, %.2f)' % h Hotspot.__repr__ = hotspot_repr def __init__(self, output_directory, settings=None): InteractionMapAnalysisBase.__init__(self) utilities.GridEnsemble.__init__(self, output_directory) self.settings = settings
[docs] def log_file(self, probe_name): '''Read and return the log file for a probe.''' log = os.path.join(self.directory, probe_name.replace(' ', '_') + '.log') if not os.path.exists(log): raise RuntimeError('No log file for %s' % probe_name) with open(log) as reader: return reader.read()
[docs] def error_file(self, probe_name): '''Read and return the error file for a probe.''' log = os.path.join(self.directory, probe_name.replace(' ', '_') + '.err') if not os.path.exists(log): raise RuntimeError('No error file for %s' % probe_name) with open(log) as reader: return reader.read()
[docs] def hotspots(self, probe_name): '''Return the hotspots for the given probe.''' hotspots_file_name = os.path.join(self.directory, '%s.peaks.mol2' % probe_name.replace(' ', '_')) if not os.path.exists(hotspots_file_name): return tuple() with io.MoleculeReader(hotspots_file_name) as reader: mol = reader[0] return tuple(InteractionMapAnalysis.Results.Hotspot(a.coordinates, a.partial_charge) for a in mol.atoms)
def __init__(self, settings=None): '''Initialise with an optional settings instance.''' super().__init__() if settings is None: settings = InteractionMapAnalysis.SmallMoleculeSettings() self.settings = settings
[docs] def analyse_small_molecule(self, molecule): '''Analyses the given crystal. :param molecule: a :class:`ccdc.molecule.Molecule` instance. :returns: a :class:`ccdc.interaction.InteractionMapAnalysis.Results` instance. If molecules are provided, the analysis they should be a collection of molecules derived from the crystal. All saved files will be in a subdirectory, :attr:`ccdc.crystal.Crystal.identifier`, of :attr:`ccdc.interaction.InteractionMapAnalysis.Settings.working_directory`. ''' _settings = self.settings._settings try: self.settings._settings = _settings.clone() self.settings.detect_cavities = False output_dir = os.path.join(self.settings.working_directory, molecule.identifier.replace(':', '_')) if not os.path.exists(output_dir): os.makedirs(output_dir) mol_file = os.path.abspath(os.path.join(output_dir, '%s_fim.mol2' % molecule.identifier)) with io.CrystalWriter(mol_file) as writer: writer.write(molecule) self._run_superstar(molecule.identifier, output_dir, mol_file) finally: self.settings._settings = _settings return InteractionMapAnalysis.Results(output_dir, self.settings)
@staticmethod def _prepare_protein(protein): protein.add_hydrogens() protein.remove_all_waters() for l in protein.ligands: protein.remove_ligand(l.identifier)
[docs] def analyse_protein(self, protein, prepared=False): '''Analyses the given protein. :param protein: a :class:`ccdc.protein.Protein` instance. :param prepared: if True then no preparation of the protein will be done; if False, ligands and waters will be removed, and hydrogens added. :returns: a :class:`ccdc.interaction.InteractionMapAnalysis.Results` instance. ''' output_dir = os.path.join(self.settings.working_directory, protein.identifier.replace(':', '_')) if not os.path.exists(output_dir): os.makedirs(output_dir) protein = protein.copy() if not prepared: self._prepare_protein(protein) mol_file = os.path.join(output_dir, '%s.mol2' % protein.identifier) with io.CrystalWriter(mol_file) as writer: writer.write(protein) settings = self.settings self._run_superstar(protein.identifier, output_dir, mol_file, settings=settings) return InteractionMapAnalysis.Results(output_dir, self.settings)
[docs] def analyse_ligand(self, protein, ligand, prepared=False): '''Analyses a ligand in the protein. :param protein: a :class:`ccdc.protein.Protein` instance. :param ligand: a :class:`ccdc.molecule.Molecule` instance, extracted from the protein. :param prepared: if True then no preparation of the protein will be done. :returns: a :class:`ccdc.interaction.InteractionMapAnalysis.Results` instance. ''' output_dir = os.path.join(self.settings.working_directory, protein.identifier.replace(':', '_')) if not os.path.exists(output_dir): os.makedirs(output_dir) protein = protein.copy() if not prepared: self._prepare_protein(protein) protein.add_ligand(ligand) mol_file = os.path.join(output_dir, '%s.mol2' % ligand.identifier.replace(':', '_')) with io.CrystalWriter(mol_file) as writer: writer.write(ligand) settings = self.settings.__class__(_settings=self.settings._settings.clone()) settings.probe_names = self.settings.probe_names if ':' in ligand.identifier: chain, name = ligand.identifier.split(':') else: chain, name = '', ligand.identifier settings._settings.clear_substructures() settings._settings.add_substructure(InteractionMapsLib.SuperstarSubstructure(name, chain, name)) self._run_superstar(protein.identifier, output_dir, mol_file, settings=settings) return InteractionMapAnalysis.Results(output_dir, settings)
[docs] def cavities(self, protein, prepared=False): '''Detects the cavities of the protein. :param protein: a :class:`ccdc.protein.Protein` instance. :param prepared: if True, no protein preparation will be performed; if False ligands and waters will be removed and hydrogens added. :returns: :class:`ccdc.interaction.InteractionMapAnalysis.Results` class. ''' output_dir = os.path.join(self.settings.working_directory, protein.identifier.replace(':', '_')) if not os.path.exists(output_dir): os.makedirs(output_dir) protein = protein.copy() if not prepared: self._prepare_protein(protein) mol_file = os.path.abspath(os.path.join(output_dir, '%s_cavity.mol2' % protein.identifier)) with io.CrystalWriter(mol_file) as writer: writer.write(protein) settings = InteractionMapAnalysis.ProteinSettings(_settings=self.settings._settings.clone()) settings._settings.set_run_type(InteractionMapsLib.CAVITY) settings._settings.set_save_accessible_volume_map(True) settings._settings.set_save_excluded_volume_map(True) settings._settings.set_save_ligsite_map(True) settings._settings.set_jobname('%s_cavity' % protein.identifier) settings._settings.set_save_cavity(True) self._run_superstar(protein.identifier, output_dir, mol_file, settings=settings) return InteractionMapAnalysis.Results(output_dir, self.settings)
def _run_superstar(self, identifier, output_dir, mol_file, settings=None, clip_region=None): here = os.getcwd() try: if settings is None: settings = self.settings settings._settings.set_molecule_file(mol_file) probe_names = [] if hasattr(settings, 'probe_names'): if not settings.probe_names: raise RuntimeError('No probes set') settings._validate_probes() probe_names = settings.probe_names _superstar_paths = InteractionMapAnalysis._get_superstar_paths(settings._use_fims_data) run_args = [ identifier, settings._settings, probe_names, output_dir, _superstar_paths, InteractionMapsLib.SuperstarINSFileWriter.INCLUDE_ALL_SETTINGS, InteractionMapsLib.SuperstarRunner.OVERWRITE_ALL_FILES ] if clip_region is not None: run_args.append(clip_region) InteractionMapsLib.SuperstarSmallMoleculeAnalysis.run(*run_args) finally: os.chdir(here)
[docs] def analyse_surface(self, surface): '''Analyses the given surface. :param surface: a :class:`ccdc.particle.Surface` instance. :returns: a :class:`ccdc.interaction.InteractionMapAnalysis.Results` instance. All saved files will be in a subdirectory, :attr:`ccdc.particle.Surface.identifier`, of :attr:`ccdc.interaction.InteractionMapAnalysis.Settings.working_directory`. ''' surface_csv = surface._cssv.extended_crystal_view() surface_molecule = Molecule() for i in range(surface_csv.nmolecules()): mol = ChemistryLib.Molecule3d_instantiate(surface_csv.molecule(i)) surface_molecule._molecule.add(mol) hkl_offset_str = '' if surface.miller_indices: for idx in surface.miller_indices.hkl: hkl_offset_str += str(idx) hkl_offset_str += f"_{surface.offset:.2f}" fim_id = surface.identifier + '_' + hkl_offset_str + '_fim' crystal = Crystal(surface.crystal._crystal.clone(), fim_id) crystal.molecule = surface_molecule output_dir = os.path.join(self.settings.working_directory, surface.identifier.replace(':', '_')) if not os.path.exists(output_dir): os.makedirs(output_dir) mol2_file = os.path.abspath(os.path.join(output_dir, '%s.mol2' % fim_id)) with io.CrystalWriter(mol2_file) as writer: writer.write(crystal) clip_region = surface._interaction_volume if not self.settings._grid_spacing_modified: self.settings.grid_spacing = surface.grid_spacing * 2 self._run_superstar(fim_id, output_dir, mol2_file, clip_region=clip_region) return InteractionMapAnalysis.Results(output_dir, self.settings)
###########################################################################
[docs]class HydrogenBondStatisticsSearch: '''Generate statistics about hydrogen bond interactions within crystals.''' _telemetry = 0
[docs] class Settings: '''Settings for a hydrogen bond statistics search. Attributes ---------- max_distance : float this is the maximum VDW-adjusted donor-acceptor contact distance limit. min_angle : float this is the minimum accepted donor-hydrogen-acceptor angle limit (in degrees). This will be ignored if require_hydrogen is False. require_hydrogen : bool whether or not the donor is required to have a hydrogen atom with defined coordinates. usual_distance_lower_quantile : float the lower quantile limit for unusual distances. usual_distance_upper_quantile : float the upper quantile limit for unusual distances. usual_angle_lower_quantile : float the lower quantile limit for unusual angles. ''' def __init__(self): '''Initiailise default settings''' self.max_distance = PackingSimilarityLib.HyBondStats.DEFAULT_DISTANCE_TOLERANCE.fget() self.min_angle = PackingSimilarityLib.HyBondStats.DEFAULT_ANGLE_TOLERANCE.fget() self.require_hydrogen = True self.usual_distance_lower_quantile = 0.05 self.usual_distance_upper_quantile = 0.95 self.usual_angle_lower_quantile = 0.05
[docs] class HydrogenBondObservation(SubstructureSearch._MotifMatchHit): '''A hydrogen bond observation.''' def __init__(self, obs, database): '''Initialise a hydrogen bond observation.''' super(HydrogenBondStatisticsSearch.HydrogenBondObservation, self).__init__(identifier=obs.identifier.str(), match=obs.motif_match, _database=database) self._obs = obs @property def distance(self): '''The donor-acceptor distance in the hydrogen bond observation.''' return self._obs.distance @property def angle(self): '''The donor-acceptor angle in the hydrogen bond observation.''' return self._obs.angle if HydrogenBondStatisticsSearch.HydrogenBondStatistics._valid_angle( self._obs.angle) else float('nan') @property def donor_atom(self): '''Return the observed hydrogen bond donor atom.''' con = self._obs.motif_match.contact_match(0) return self._atom_at_addr(con.from_atom()) @property def acceptor_atom(self): '''Return the observed hydrogen bond acceptor atom.''' con = self._obs.motif_match.contact_match(0) return self._atom_at_addr(con.to_atom()) def _atom_at_addr(self, addr): '''Return an atom at a motif search address.''' _csv = ChemistryLib.CrystalStructureView_instantiate(self.crystal._crystal) _mss = MotifSearchLib.MotifSearchStructure(_csv) return Atom(_atom=_mss.atom(addr))
[docs] class HydrogenBondStatistics: '''Information about a hydrogen bond''' def __init__(self, stats, crystal, database, settings): self._stats = stats self._crystal = crystal self._database = database self._settings = settings @property def donor_atom(self): '''The donor atom from the query crystal.''' hatom = self._stats.reference_contact().donor_atom_analysis().atom() return Atom(_atom=hatom) @property def acceptor_atom(self): '''The acceptor atom from the query crystal.''' hatom = self._stats.reference_contact().acceptor_atom_analysis().atom() return Atom(_atom=hatom) @property def donor_functional_group_name(self): '''The donor functional group name.''' return self._stats.reference_contact().donor_atom_analysis().substructure_label() @property def acceptor_functional_group_name(self): '''The acceptor functional group name.''' return self._stats.reference_contact().acceptor_atom_analysis().substructure_label() @property def distance(self): '''The donor-acceptor distance in the query.''' return self._stats.reference_distance() @property def angle(self): '''The donor-hydrogen-acceptor angle in the query, which may be NaN.''' a = self._stats.reference_angle() if not self._valid_angle(a): return float('nan') return a @staticmethod def _valid_angle(a): return 0 <= a <= 180 @property def intermolecular(self): '''True if the hydrogen bond is intermolecular, false otherwise.''' return self._stats.reference_contact().is_intermolecular() @property def distances(self): '''The observed distance measurements.''' return self._stats.distances() @property def angles(self): '''The observed angle measurements with invalid angles filtered out.''' return [a for a in self._stats.angles() if self._valid_angle(a)] @property def observations(self): '''The observations of this hydrogen bond found in the database.''' return [HydrogenBondStatisticsSearch.HydrogenBondObservation(o, self._database) for o in self._stats.contact_geometries()] @property def distance_unusual(self): '''Whether the distance falls outside of the usual distance quantile range.''' low = self._stats.distance_quantile(self._settings.usual_distance_lower_quantile) high = self._stats.distance_quantile(self._settings.usual_distance_upper_quantile) return self.distance < low or high < self.distance @property def angle_unusual(self): '''Whether the angle falls below the usual angle quantile limit.''' a = self._stats.reference_angle() if not self._valid_angle(a): return False low = self._stats.angle_quantile(self._settings.usual_angle_lower_quantile) return a < low @property def distance_mean(self): '''The mean distance in the query.''' return self._stats.distance_mean() @property def distance_min(self): '''The minimum angle of contacts in the query.''' return self._stats.distance_min() @property def distance_max(self): '''The maximum angle of contacts in the query.''' return self._stats.distance_max() @property def distance_stdev(self): '''The standard deviation of angle of contacts in the query.''' return self._stats.distance_stdev() @property def distance_threshold(self): '''The value of the threshold distances in the query''' low = self._stats.distance_quantile(self._settings.usual_distance_lower_quantile) high = self._stats.distance_quantile(self._settings.usual_distance_upper_quantile) return low, high @property def angle_mean(self): '''The mean angle in the query.''' return self._stats.angle_mean() @property def angle_min(self): '''The minimum angle of contacts in the query.''' return self._stats.angle_min() @property def angle_max(self): '''The maximum angle of contacts in the query.''' return self._stats.angle_max() @property def angle_stdev(self): '''The standard deviation of angle of contacts in the query.''' return self._stats.angle_stdev() @property def angle_threshold(self): '''The value of the threshold angle in the query.''' return self._stats.angle_quantile(self._settings.usual_angle_lower_quantile) @property def distance_histogram_weights(self): '''Weights of bins in distance histograms''' return self._stats.distance_histogram_weights() @property def angle_histogram_weights(self): '''Weights of bins in angle histograms''' return self._stats.angle_histogram_weights() @property def distance_histogram_effective_densities(self): '''Effective Densities of bins for distance histograms''' return self._stats.distance_histogram_effective_densities() @property def angle_histogram_effective_densities(self): '''Effective Densities of bins for angle histograms''' return self._stats.angle_histogram_effective_densities() @property def distance_histogram_frequencies(self): '''Frequency counts of bins in distance histogram''' return self._stats.distance_histogram_frequencies() @property def angle_histogram_frequencies(self): '''Frequency counts of bins in angle histogram''' return self._stats.angle_histogram_frequencies() @property def distance_histogram_x_axis_properties(self): '''X-axis properties of distance histogram, :returns: a tuple containing the various x-axis properties needed to plot a distance histogram :rtype::tuple: - (start, stop, number of bins) ''' return self._stats.distance_histogram_x_axis_properties() @property def angle_histogram_x_axis_properties(self): '''X-axis properties of angle histogram, :returns: a tuple containing the various x-axis properties needed to plot an angle histogram :rtype::tuple: - (start, stop, number of bins) ''' return self._stats.angle_histogram_x_axis_properties() @property def heatplot_distances_radial_corrected(self): '''Returns the radial corrected distance data for plotting heatplots''' return self._stats.heatplot_distances_radial_corrected() @property def heatplot_angles_cone_corrected(self): '''Returns the cone corrected angle data for plotting heatplots''' return self._stats.heatplot_angles_cone_corrected()
def __init__(self, settings=None): '''Initialise the search.''' if settings is None: settings = HydrogenBondStatisticsSearch.Settings() self.settings = settings if type(self)._telemetry == 0: UtilitiesLib.ccdc_hydrogen_bond_statistics_telemetry() type(self)._telemetry = 1
[docs] def search(self, crystal, database=None): '''Return hydrogen bond statistics relevant to a crystal structure, from a database.''' db_arg = database if database is None: database = io.EntryReader('CSD') elif isinstance(database, str): database = io.EntryReader(database) elif not isinstance(database, io._DatabaseReader): raise TypeError('Cannot search this database: %s' % db_arg) if hasattr(crystal, 'crystal'): crystal = crystal.crystal hbond_stats = PackingSimilarityLib.HyBondStats() stats = hbond_stats.pair_statistics( CrystalDescriptors.HBondPropensities.Settings().functional_groups_directory, crystal._crystal, crystal.identifier, database._db, self.settings.max_distance, self.settings.min_angle, "", self.settings.require_hydrogen) return tuple(self.HydrogenBondStatistics(s, crystal, database, self.settings) for s in stats)