Crystal examples

Generating molecular assemblies

These examples take a crystal and generate a molecular representation based on various criteria.

An assembly based on distance

#!/usr/bin/env python
#
# This script can be used for any purpose without limitation subject to the
# conditions at https://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx
#
# This permission notice and the following statement of attribution must be
# included in all copies or substantial portions of this script.
#
# 2015-06-17: created by the Cambridge Crystallographic Data Centre
#

"""
Write out heaviest component and molecular shell for a set of CSD structures.

This script takes an input a GCD file and an output directory. For each CSD
entry the script then identifies the heaviest component and writes it to the
output directory. The molecules in the molecular shell around the heaviest
component are then identified and written out to a separate file in the output
directory.

For more help use the -h argument.

"""

import argparse
import os
from ccdc import io

##############################################################################

def main(gcd_file, out_dir):
    """
    Run the main program.
    """
    crystal_reader = io.CrystalReader(gcd_file, format='identifiers')
    for crystal in crystal_reader:

        # Get the atoms from the heaviest component in the crystal. These will
        # be used as the selection around which we create the molecular shell.
        heaviest_component = crystal.molecule.heaviest_component
        selection = heaviest_component.atoms

        # Write out the central molecule
        out_file_name = '%s.mol2' % crystal.molecule.identifier
        out_file_path = os.path.join(out_dir, out_file_name)
        with io.MoleculeWriter(out_file_path) as mol_writer:
            mol_writer.write(heaviest_component)

        # Generate the molecular shell around the atoms in the selection.
        mol_shell = crystal.molecular_shell(distance_type="VdW",
                                            distance_range=(0.0, 0.5),
                                            atom_selection=selection)
        # Write out the molecular shell
        out_file_name = '%s_shell.mol2' % crystal.molecule.identifier
        out_file_path = os.path.join(out_dir, out_file_name)
        with io.MoleculeWriter(out_file_path) as mol_writer:
            mol_writer.write(mol_shell)

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('gcd_file', type=str, help="Input GCD file")
    parser.add_argument('out_dir', type=str, help="Output directory")
    args = parser.parse_args()

    if not os.path.isdir(args.out_dir):
        os.mkdir(args.out_dir)
    if not os.path.isfile(args.gcd_file):
        parser.error('%s is not a file.' % args.gcd_file)

    main(args.gcd_file, args.out_dir)

An assembly based on graph sets

#!/usr/bin/env python
#
# This script can be used for any purpose without limitation subject to the
# conditions at https://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx
#
# This permission notice and the following statement of attribution must be
# included in all copies or substantial portions of this script.
#
# 2016-03-23: created by the Cambridge Crystallographic Data Centre
#
'''
    graph_set_closure.py    -   expand a crystal such that all HBonds of the crystal's graph sets
    are included.

    The graph sets of the crystal will be calculated, then the crystal expanded to include
    all of the hydrogen bonds making up the graph sets.
'''
############################################################################

import os
import argparse

from ccdc.descriptors import CrystalDescriptors
from ccdc.io import CrystalReader, MoleculeWriter
from ccdc.molecule import Molecule

############################################################################

class Runner(argparse.ArgumentParser):
    '''Does the whole job.'''
    def __init__(self):
        '''Defines arguments and parses them.'''
        super(self.__class__, self).__init__(
            description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
        )
        self.add_argument(
            'refcode_or_files', nargs='+',
            help='Refcode or crystal file name.'
        )
        self.add_argument(
            '-o', '--output-file', '--output_file',
            help='Output molecule file.  If not given the molecule will be written to stdout in mol2 format.'
        )

        self.args = self.parse_args()

    @property
    def csd(self):
        '''Lazily create a CSD reader if necessary.'''
        if not hasattr(self, '_csd'):
            self._csd = CrystalReader('csd')
        return self._csd

    @property
    def searcher(self):
        '''Lazily create a GraphSetSearch instance.'''
        if not hasattr(self, '_searcher'):
            self._searcher = CrystalDescriptors.GraphSetSearch()
        return self._searcher

    def expand(self, crystal):
        '''Expand the crystal by the symmetry operators of the GraphSet HBonds.'''
        gsets = self.searcher.search(crystal)
        symms = {
            s
            for gs in gsets
            for hb in gs.hbonds
            for s in hb.symmetry_operators
            if s
        }
        mols = [
            crystal.symmetric_molecule(s)
            for s in sorted(symms)
        ]
        mol = Molecule(crystal.identifier)
        for m in mols:
            mol.add_molecule(m)
        return mol

    def run(self):
        '''Runs over all refcodes and files.'''
        if self.args.output_file:
            self.writer = MoleculeWriter(self.args.output_file)
        else:
            self.writer = MoleculeWriter('stdout', format='mol2')
        for r_or_f in self.args.refcode_or_files:
            if os.path.exists(r_or_f):
                reader = CrystalReader(r_or_f)
                for c in reader:
                    self.writer.write(self.expand(c))
            else:
                c = self.csd.crystal(r_or_f)
                self.writer.write(self.expand(c))
        self.writer.close()

############################################################################

if __name__ == '__main__':
    Runner().run()

Other molecular expansions

This example constructs various molecular expansions of a crystal based on packing, slicing, hydrogen bond and contact networks, and polymer expansion:

#!/usr/bin/env python
#
# This script can be used for any purpose without limitation subject to the
# conditions at https://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx
#
# This permission notice and the following statement of attribution must be
# included in all copies or substantial portions of this script.
#
# 2017-07-17: created by the Cambridge Crystallographic Data Centre
#

"""
    crystal_expansion.py    -   show the various molecular expansions of a crystal.

    This script creates various multi-molecular structures from a crystal.

    The structures will be written to a multi-mol2 file.
"""

import argparse
from ccdc import io
from ccdc.descriptors import MolecularDescriptors
import os


class Runner(argparse.ArgumentParser):
    """Defines arguments, parses the command line and runs the job."""

    def __init__(self):
        """Defines the arguments."""
        super(self.__class__, self).__init__(description=__doc__,
                                             formatter_class=argparse.ArgumentDefaultsHelpFormatter)
        self.add_argument(
            'refcodes', nargs='+', help='CSD Refcodes to expand.'
        )
        self.add_argument(
            '-o', '--output-directory', '--output_directory', default='.',
            help='Optional output directory'
        )

    def run(self):
        """Runs the task."""
        args = self.parse_args()

        if not os.path.exists(args.output_directory):
            os.makedirs(args.output_directory)

        csd = io.EntryReader('csd')

        for refcode in args.refcodes:
            crystal = csd.crystal(refcode)

            molecular_shell = crystal.molecular_shell(distance_range=(0.0, 7.0))
            molecular_shell.identifier = '%s_molecular_shell' % refcode

            packing_shell = crystal.packing_shell(30)
            packing_shell.identifier = '%s_packing_shell' % refcode

            hbond_network = crystal.hbond_network(repetitions=3)
            hbond_network.identifier = '%s_hbond_network' % refcode

            contact_network = crystal.contact_network(repetitions=2)
            contact_network.identifier = '%s_contact_network' % refcode

            packing = crystal.packing(((-1, -1, -1), (0.5, 0.5, 0.5)))
            packing.identifier = '%s_packing' % refcode

            slicing = crystal.slicing(
                MolecularDescriptors.atom_plane(*[a for a in crystal.molecule.heavy_atoms])
            )
            slicing.identifier = '%s_slicing' % refcode

            polymer_expansion = crystal.polymer_expansion(repetitions=12)
            polymer_expansion.identifier = '%s_polymer' % refcode

            output_file = os.path.join(args.output_directory, '%s.mol2' % refcode)
            with io.MoleculeWriter(output_file) as writer:
                writer.write(molecular_shell)
                writer.write(packing_shell)
                writer.write(hbond_network)
                writer.write(contact_network)
                writer.write(packing)
                writer.write(slicing)
                writer.write(polymer_expansion)


if __name__ == '__main__':
    Runner().run()

Analysis of HBond dimensionality

This example uses the hbond_network expansions and a principle axes aligned molecule to determine in how many dimensions a crystal’s HBonds are expanding.

#!/usr/bin/env python
#
# This script can be used for any purpose without limitation subject to the
# conditions at https://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx
#
# This permission notice and the following statement of attribution must be
# included in all copies or substantial portions of this script.
#
# 2017-09-04: created by the Cambridge Crystallographic Data Centre
#
'''
    hbond_dimensionality.py - establish the dimensionality of a crystal's hydrogen bonded network.

    Expands the hbond network of a crystal, and sees which dimenions are growing.  For example:
        ACSALA (aspirin) shows no dimension grows, i.e the network forms a ring
        YIGPIO02 (Ritonavir) grows in one dimension
        HXACAN (Paracetamol) grows in two dimensiont
        DMANTL08 (Mannitol) grows in all three dimensions.
'''
###########################################################################

import os
import argparse

from ccdc import io
from ccdc.descriptors import MolecularDescriptors

csd = io.EntryReader('csd')

###########################################################################

class Runner(argparse.ArgumentParser):
    '''Runs the calculation.'''
    def __init__(self):
        super(self.__class__, self).__init__(description=__doc__)
        self.add_argument('crystals', nargs='+',
            help='Refcodes or crystal files to analyese'
        )
        self.args = self.parse_args()

    def run(self):
        print('Identifier\tNetwork dimensionality')
        for a in self.args.crystals:
            if not os.path.exists(a):
                c = csd.crystal(a)
                self.analyse(c)
            else:
                for c in io.CrystalReader(a):
                    self.analyse(c)

    def analyse(self, crystal):
        '''Calculates the dimensionality of the crystal.'''
        m = crystal.molecule
        siteless = not m.all_atoms_have_sites
        if siteless:
            m.add_hydrogens()
            crystal.molecule = m
        shells = [
            crystal.hbond_network(repetitions=2), crystal.hbond_network(repetitions=5)
        ]
        boxes = [MolecularDescriptors.PrincipleAxesAlignedBox(s) for s in shells]
        vecs = [
            [b.x_vector.length, b.y_vector.length, b.z_vector.length]
            for b in boxes
        ]
        ratios = [vecs[-1][i]/vecs[0][i] for i in range(3)]
        thresh = 1.1
        ndims = sum(r > thresh for r in ratios)
        #print(ratios)
        #print(ndims)
        print('%8s\t%d' % (crystal.identifier, ndims))


###########################################################################

if __name__ == '__main__':
    Runner().run()

Descriptor HTML report

Note that this script makes use of functionality from the cookbook utility module.

#!/usr/bin/env python
#
# This script can be used for any purpose without limitation subject to the
# conditions at https://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx
#
# This permission notice and the following statement of attribution must be
# included in all copies or substantial portions of this script.
#
# 2015-06-17: created by the Cambridge Crystallographic Data Centre
#

"""
Generate a HTML table of molecular and crystallographic properties for
structures in the CSD.

This includes, amongst more generic ones, point group analysis descriptors.

For more help use the -h argument.

"""

import sys
import argparse
import os.path
import collections
import codecs

this_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(1, this_dir)
import utilities
sys.path.remove(this_dir)

from ccdc import io
from ccdc.descriptors import MolecularDescriptors

##############################################################################

# The headers for the HTML table
HEADERS = (
    'REFCODE',
    'Identifier',
    'Hydrate',
    'Formula',
    'Space Gp. Symbol',
    'Z Prime',
    'Unique Chem. Units',
    'Mol. Wt.',
    'Donors',
    'Acceptors',
    'Rotatable bonds',
    'Point Gp. Symbol',
    'Point Gp. Order',
    'Point Gp. Description',
    'Atom counts',
)

##############################################################################

def get_num_unique_structures(molecule):
    "Return the number of unique structures in a molecule."
    d = collections.defaultdict(int)
    for m in molecule.components:
        smi = m.smiles
        if smi is None:
            smi = 'Unknown bond in SMILES'
        d[smi] += 1
    return len(d)

def get_atom_counts(molecule):
    "Return a list of strings of atom counts."
    atom_cts = collections.defaultdict(int)
    for atom in molecule.atoms:
        atom_cts[atom.atomic_symbol] += 1
    l = sorted([(v, k) for k, v in atom_cts.items()])
    l.reverse()
    return ['%s: %d' % (k, v) for v, k in l]

##############################################################################

def get_row_data(entry):
    "Return a tuple of molecular and crystallographic properties."

    # Extract the molecule (which may have several components) and the heaviest
    # component from the crystal structure. The heaviest component is the
    # molecule of interest.
    crystal = entry.crystal
    mol = crystal.molecule
    mol_of_interest = mol.heaviest_component

    # Find out how many unique molecules are present in the crystal structure
    num_unique_structures = get_num_unique_structures(mol)

    # Ensure all hydrogen atoms are present, for donor calculations
    mol_of_interest.add_hydrogens()

    # Counts of molecular properties
    donors = len([at for at in mol_of_interest.atoms if at.is_donor])
    accs   = len([at for at in mol_of_interest.atoms if at.is_acceptor])
    rots   = len([b  for b  in mol_of_interest.bonds if b.is_rotatable])

    # Collect a list of atoms present (which could be taken from the formula)
    atom_counts = get_atom_counts(mol_of_interest)

    #   Another datum requested
    hydrate = 'hydrate' in entry.chemical_name.lower()

    # Molecular weight (before stripping hydrogen atoms!)
    mol_wt = mol_of_interest.molecular_weight

    #   Strip hydrogens for point group calculations
    mol_of_interest.remove_hydrogens()

    #   Returns a triple (order, symbol, description)
    pt_group_info = MolecularDescriptors.point_group_analysis(mol_of_interest)

    #   Now we have all the information
    return (
        mol.identifier,
        entry.chemical_name,
        hydrate,
        entry.formula,
        crystal.spacegroup_symbol,
        crystal.z_prime,
        num_unique_structures,
        mol_wt,
        donors,
        accs,
        rots,
        pt_group_info[0],
        pt_group_info[1],
        pt_group_info[2],
        ' '.join(atom_counts)
    )

def main(gcd_file, out_file):
    "Run the main program."

    # Collect the data of interest in a list.
    rows = []
    with io.EntryReader(gcd_file, format='identifiers') as entry_reader:
        for entry in entry_reader:
            rows.append(get_row_data(entry))

    # Write out the report as a HTML file.
    with codecs.open(out_file, 'w', encoding='utf8') as writer:
        writer.write('\n'.join([
            '<HTML>',
            '<HEAD>',
            '<TITLE>Point group analysis</TITLE>',
            '</HEAD>',
            '<BODY>',
            '<H1>Point group analysis</H1>\n'
        ]))
        utilities.write_html_table(HEADERS, rows, stream=writer, border=1)
        writer.write('</BODY></HTML>\n')

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('gcd_file', type=str, help="Input GCD file")
    parser.add_argument('out_file', type=str, help="Output HTML file")
    args = parser.parse_args()

    if not os.path.isfile(args.gcd_file):
        parser.error('%s is not a file.' % args.gcd_file)

    main(args.gcd_file, args.out_file)

Analysing crystal morphologies

This example creates BFDH morphologies of CSD structures and analyses the aspect ratios of them to predict whether they will from needles, plates, laths or blocks. For example, for the polymorphs of the anti-inflammatory XOCXUI01 and XOCXUI02:

Classification of morphologies of XOCXUI01 and XOCXUI02

Classification of XOCXUI01 and XOCXUI02

#!/usr/bin/env python
#
# This script can be used for any purpose without limitation subject to the
# conditions at https://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx
#
# This permission notice and the following statement of attribution must be
# included in all copies or substantial portions of this script.
#
''' morphology_analysis.py  -   compare the habits of two crystal structures.

    Determines the aspect ratios of the BFDH morphologies of the two crystals
    and plots them on a chart, showing which class of habit they are likely to
    possess.
'''

import argparse
import os
import matplotlib
matplotlib.use('Agg')
from matplotlib import pyplot as plt

from ccdc.morphology import BFDHMorphology
from ccdc.io import EntryReader

arg_parser = argparse.ArgumentParser(description=__doc__)
arg_parser.add_argument(
    'refcodes', nargs='+', help='Refcodes of structures to analyse'
)
arg_parser.add_argument(
    '-o', '--output', default=os.path.join('.', 'morphology_aspects.png'), help='Output chart PNG filename.'
)
args = arg_parser.parse_args()

#  Set up plot
fig = plt.figure(figsize=(10., 10.))
ax = plt.axes()
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
ax.set_xlabel('M:L Ratio', fontsize=16)
ax.set_ylabel('S:M Ratio', fontsize=16)
ax.annotate('Needle', (0.23, 0.95), fontsize=16)
ax.annotate('Block', (0.73, 0.95), fontsize=16)
ax.annotate('Lath', (0.23, 0.45), fontsize=16)
ax.annotate('Plate', (0.73, 0.45), fontsize=16)
plt.plot([0.5, 0.5], [0, 1], c='black', linewidth=2)
plt.plot([0, 1], [0.5, 0.5], c='black', linewidth=2)
ptc = fig.patch
ptc.set_facecolor('white')


def make_plot(asp, colour, refcode):
    #  This function plots the ratios
    y, x = get_ratios(asp)
    plt.scatter(x, y, c=colour, s=40)
    ax.annotate('{}'.format(refcode), (x + 0.007, y - 0.007), fontsize=10)


def get_ratios(aspect):
    #  This function returns the side length ratios small:medium and medium:large
    s_m = aspect[0] / aspect[1]  # Small to medium side lengths ratio
    m_l = aspect[1] / aspect[2]  # Medium to long side lengths ratio
    return s_m, m_l


def get_form(aspect):
    #  In this function we designate a shape from the aspect ratio
    s_m, m_l = get_ratios(aspect)
    if s_m < 0.5:
        if m_l < 0.5:
            return 'Lath'
        else:
            return 'Plate'
    elif m_l < 0.5:
        return 'Needle'
    else:
        return 'Block'


if __name__ == '__main__':
    # Read structures from the CSD
    reader = EntryReader('CSD')
    if not os.path.exists(os.path.dirname(args.output)):
        os.makedirs(os.path.dirname(args.output))

    # Read in two crystal polymorphs of the drug Nabumetone
    for refcode in args.refcodes:
        crystal = reader.crystal(refcode)
        morphology = BFDHMorphology(crystal)
        bounding_box = morphology.oriented_bounding_box
        #  By calculating the bounging box of the morphologies, we can get the aspect ratios
        aspect = [
            bounding_box.minor_length, bounding_box.median_length, bounding_box.major_length
        ]
        # The aspect ratios can be used to designate a shape for the crystal
        shape = get_form(aspect)
        print('{}: {}'.format(refcode, shape))
        # This can be plotted
        make_plot(aspect, 'red', refcode)
        # Using the plotting from Morphology.py we can also overlay the morphologies in 3D
        plt.savefig(args.output)

Charting crystal morphologies

This example draws an image of the morphology and tables of plane angles and of face relative areas of morphologies of one or more crystals of the CSD. An example for HXACAN is in ../images/hxacan_morphology.html.

#!/usr/bin/env python
#
# This script can be used for any purpose without limitation subject to the
# conditions at https://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx
#
# This permission notice and the following statement of attribution must be
# included in all copies or substantial portions of this script.
#
'''morphology_plot.py   -   make an image of a morphology.
'''

import argparse
import codecs
import itertools
import io as sys_io
import os
import matplotlib
matplotlib.use('Agg')
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d.art3d import Poly3DCollection

from ccdc.io import EntryReader
from ccdc.morphology import BFDHMorphology
from ccdc.descriptors import GeometricDescriptors

# Visualisation functions


def wireframe(face, ax):
    #  Plot wireframe of morphology in matplotlib
    for n, edge in enumerate(face.edges):
        Axes3D.plot(ax,
                    [coord[0] for coord in edge],
                    [coord[1] for coord in edge],
                    [coord[2] for coord in edge],
                    c='black',
                    linewidth=1.5)


def skin(face, colour, ax):
    #  Mesh the facets for matplotlib
    for edge in face.edges:
        vertices = [(edge[0], edge[1], face.centre_of_geometry)]
        Axes3D.add_collection3d(ax, Poly3DCollection(
            vertices, color=colour, linewidth=0, alpha=0.1))


def add_labels(morphology, face, ax):
    #  Add labels to facets
    ax.text(face.centre_of_geometry[0], face.centre_of_geometry[1], face.centre_of_geometry[2],
            '''   {}    {}'''.format(face.miller_indices.hkl,
                                     round(morphology.relative_area(face.miller_indices), 3)),
            color='black')

    ax.scatter(face.centre_of_geometry[0],
               face.centre_of_geometry[1],
               face.centre_of_geometry[2],
               s=10,
               color='black')


def generate_plot(morphology, colour):
    #  Plot the morphology in matplotlib
    # Set up Axes
    fig = plt.figure(figsize=(10., 10.))  # 3D graph instance
    ax = fig.add_subplot(111, projection='3d')  # 3D Axes
    ax.set_xlim3d(-20, 20)
    ax.set_ylim3d(-20, 20)
    ax.set_zlim3d(-20, 20)
    ax.set_xlabel('X axis')
    ax.set_ylabel('Y axis')
    ax.set_zlabel('Z axis')
    ax.grid(False)
    plt.axis('off')

    for facet in morphology.facets:
        wireframe(facet, ax)
        skin(facet, colour, ax)
        add_labels(morphology, facet, ax)
    return ax, plt


# Analysis functions


def compare_angles(morphology, format='html'):
    # Print a table of every combination of facets and their dihedral angle
    indices = {}
    for f in morphology.facets:
        indices[f.miller_indices.hkl] = GeometricDescriptors.Plane.from_points(*f.coordinates)
    # Every pair of facets to compare
    combinations = list(itertools.combinations([k for k in indices], 2))
    if format == 'html':
        return '<table><tr><th>Plane1</th><th>Plane2</th></th><th>Angle</th>%s</table>' % (
            '\n'.join('<tr><td>%s</td><td>%s</td><td align="right">%.2f</td></tr>' %
                      (c[0], c[1], 180 - (indices[c[0]].plane_angle(indices[c[1]])))
                      for c in combinations)
        )
    for combination in combinations:
        angle = 180 - (indices[combination[0]]).plane_angle(indices[combination[1]])
        print('planes {} & {}: angle = {}'.format(combination[0], combination[1], angle))


def print_areas(morphology, format='html'):
    '''Print a table of every face and its relative area.'''
    if format == 'html':
        return '<table><tr><th>Indices</th><th>Area</th></tr>%s</table>' % (
            '\n'.join('<tr><td>%s</td><td align="right">%.2f</td></tr>' %
                      (face.miller_indices.hkl, morphology.relative_area(face.miller_indices))
                      for face in morphology.facets
                      )
        )
    for face in morphology.facets:
        print('{}: relative area = {}'.format(
            face.miller_indices.hkl, round(morphology.relative_area(face.miller_indices), 3)))


def run_all(morphology):
    # Run all of the functions in this script
    ax, plt = generate_plot(morphology, (0, 0, 1, 0.1))
    compare_angles(morphology)
    print_areas(morphology)
    return ax, plt


if __name__ == '__main__':
    arg_parser = argparse.ArgumentParser(
            description=__doc__,
            formatter_class=argparse.ArgumentDefaultsHelpFormatter
            )
    arg_parser.add_argument(
        'refcodes', nargs='+', help='refcodes of CSD structures to plot'
    )
    arg_parser.add_argument(
        '-o', '--output',
        default=os.path.join(os.getcwd(), 'morphologies.html'),
        help='Full path of the HTML file for the output report.'
    )

    args = arg_parser.parse_args()
    reader = EntryReader('CSD')  # Read structures from the CSD
    if not os.path.exists(os.path.dirname(args.output)):
        os.makedirs(os.path.dirname(args.output))

    lines = [
        '<style>',
        '.box { margin:0; }',
        '.left { float: left; width: 50%; overflow: hidden; }',
        '.middle { float: left; width: 25%; overflow: hidden; }',
        '.right { float: left; width: 20%; overflow: hidden; }',
        '</style>',
        '']

    for refcode in args.refcodes:
        cryst = reader.crystal(refcode)  # Open the crystal from CSD entry SUCACB03
        morphology = BFDHMorphology(cryst)  # Create a morphology object
        ax, plt = generate_plot(morphology, (0, 0, 1, 0.1))
        svg = sys_io.BytesIO()
        plt.savefig(svg, format='svg')
        lines.append('<div class="box">')
        lines.append('<div class="left">%s</div>' % svg.getvalue().decode('utf-8'))
        lines.append('<div class="middle">%s</div>' % compare_angles(morphology, format='html'))
        lines.append('<div class="right">%s</div>' % print_areas(morphology, format='html'))
        lines.append('</div>')

        with codecs.open(args.output, 'w', encoding='utf-8') as writer:
            for line in lines:
                writer.write('%s%s' % (line, os.linesep))

Functional groups of a crystal’s HBonds

This writes a csv file containing the hydrogen bonding atoms of crystals with the functional groups of the donor and acceptor atoms.

#!/usr/bin/env python
#
# This script can be used for any purpose without limitation subject to the
# conditions at https://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx
#
# This permission notice and the following statement of attribution must be
# included in all copies or substantial portions of this script.
#
'''
    functional_groups_of_hbonds.py  -   produce a spreadsheet of functional groups of hbonds
'''
################################################################################

import sys
import csv
import argparse

from ccdc.io import CrystalReader
from ccdc.descriptors import CrystalDescriptors

################################################################################

class Runner(argparse.ArgumentParser):
    '''Defines and parses arguments, runs the job.'''
    def __init__(self):
        super(self.__class__, self).__init__(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
        self.add_argument(
            'gcd_file',
            help='GCD file of crystal structures to analyse'
        )
        self.add_argument(
            '-o', '--output-file', '--output_file', default=None,
            help='Output csv file [stdout]'
        )
        self.args = self.parse_args()

    def run(self):
        '''Runs the job.'''
        calculator = CrystalDescriptors.HBondCoordination()
        reader = CrystalReader(self.args.gcd_file)
        if self.args.output_file:
            f = open(self.args.output_file, 'w')
        else:
            f = sys.stdout
        headers = [
            'Identifier', 'Donor residue', 'Donor functional group', 'Donor atom',
            'Acceptor residue', 'Acceptor functional group', 'Acceptor atom'
        ]
        writer = csv.writer(f)
        writer.writerow(headers)
        def get_residue(crystal, label):
            for i, c in enumerate(crystal.molecule.components):
                try:
                    c.atom(label)
                    return 'RES%d' % (i+1)
                except RuntimeError:
                    pass
            raise RuntimeError('Ambiguous atom label %s' % label)


        for c in reader:
            if c.has_disorder:
                continue
            predictions = calculator.predict(c)
            for hb in c.hbonds():
                if hb.atoms[1] in hb.atoms[0].neighbours:
                    don_at, acc_at = hb.atoms[0], hb.atoms[-1]
                else:
                    don_at, acc_at = hb.atoms[-1], hb.atoms[0]
                don, acc = predictions.functional_groups_of_hbond(hb)
                if don is None:
                    don = '%s Nothing Unidentified group Nothing' % don_at.label
                if acc is None:
                    acc = '%s Nothing Unidentified group Nothing' % acc_at.label
                don_bits = don.split()
                acc_bits = acc.split()
                row = [
                    c.identifier,
                    get_residue(c, don_bits[0]),
                    ' '.join(don_bits[2:-1]),
                    don_bits[0],
                    get_residue(c, acc_bits[0]),
                    ' '.join(acc_bits[2:-1]),
                    acc_bits[0]
                ]
                writer.writerow(row)

################################################################################

if __name__ == '__main__':
    Runner().run()

A simple spreadsheet of hydrogen bond coordination likelihoods

This example iterates over a GCD file (or any other crystal file) creating a csv file of hbond coordination likelihoods.

#!/usr/bin/env python
#
# This script can be used for any purpose without limitation subject to the
# conditions at https://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx
#
# This permission notice and the following statement of attribution must be
# included in all copies or substantial portions of this script.
#
'''
    hbond_coordination_prediction_spreadsheet.py - calculate hbond coordination predictions for a set of crystals

    This makes a spreadsheet of hbond coordination predictions containing columns indicating predicted probability and whether or not that number of hbonds was observed.
'''
#############################################################################################

import argparse

from ccdc.io import CrystalReader
from ccdc.descriptors import CrystalDescriptors
from ccdc.utilities import CSVWriter

#############################################################################################

class Runner(argparse.ArgumentParser):
    '''Defines arguments, parses arguments and runs the analysis.'''
    def __init__(self):
        super(self.__class__, self).__init__(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
        self.add_argument(
            'gcd_file', type=str,
            help='GCD file of crystal structures to analyse'
        )
        self.add_argument(
            '-o', '--output-file', '--output_file', default=None,
            help='Output csv file [stdout]'
        )
        self.add_argument(
            '-m', '--model-path', '--model_path', default=None,
            help='Path to a directory of coordination models'
        )
        self.args = self.parse_args()

    def run(self):
        settings = CrystalDescriptors.HBondCoordination.Settings()
        if self.args.model_path is not None:
            settings.coordination_models_path = self.args.model_path
        calculator = CrystalDescriptors.HBondCoordination(settings=settings)
        reader = CrystalReader(self.args.gcd_file)

        outfile = self.args.output_file if self.args.output_file else None

        headers = ['Identifier', 'Molecule', 'Atom', 'Functional group', 'D/A']
        for i in range(7):
            headers.extend(['%s %d likelihood' % ('>=' if i == 6 else '=', i), 'Observed'])

        with CSVWriter(file_name=outfile, header=headers) as writer:
            for c in reader:
                if c.has_disorder:
                    continue
                predictions = calculator.predict(c)
                for r in predictions.to_csv(separator=None):
                    writer.write_row(r)

#############################################################################################

if __name__ == '__main__':
    Runner().run()

Exploring hydrogen bond coordination models

This example builds a coordination model as a tree of possible coordinations for the donors and acceptors of a crystal. This is only one of many possible models that can be constructed from the API.

#!/usr/bin/env python
#
# This script can be used for any purpose without limitation subject to the
# conditions at https://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx
#
# This permission notice and the following statement of attribution must be
# included in all copies or substantial portions of this script.
#
"""
    hbond_coordination_tree.py  -   create a tree of possible hbond coordinations with attached probabilities.

    The tree enumerates each possible coordination for each donor atom. Subtrees from this enumerates each possible coordination for each acceptor atom consistent with the donations previously defined.  For example in the subtree where the first donor donates once and the second donor donates once, the sum of the acceptor atoms' coordination must sum to two.

    For convenience, the created tree will be returned, so invoking this interactively allows one to explore the tree data structure.
"""

import argparse
import os

from ccdc.descriptors import CrystalDescriptors
from ccdc import io
from ccdc.diagram import DiagramGenerator
from ccdc.utilities import CSVWriter, HTMLReport, html_table

csd = io.EntryReader('csd')


class Node(object):
    """A node in a hydrogen bond propensity tree.

    :param label: (:obj:`str`) A label for the node
    :param is_donor: (:obj:`boolean`) Whether the node is a donor.
    :param coordination: (:obj:`float`) This node's coordination score.
    :param probability: (:obj:`float`) The hydrogen bond probability of this node.
    :param cumulative_probability: (:obj:`float`)  The cumulative hydrogen bond probability of this
        node and all the nodes on the path to it.
    :param is_observed: (:obj:`boolean`) Whether the node is on the observed coordination path.
    :param current_donations: (:obj:`list`) The nodes which are donors for this node.
    :param remaining_donors: (:obj:`list`) The nodes which are donors for this node but have not
        yet been factored into the cumulative probability.
    :param remaining_acceptors: (:obj:`list`) Remaining acceptors for this node.
    :param predictions: (:obj:`list`) The probability predictions for this node.
    """

    def __init__(self, label, is_donor, coordination, probability, cumulative_probability,
                 is_observed, current_donations, remaining_donors, remaining_acceptors, predictions):
        """Holds all the info for a decision tree node."""
        self.label = label
        self.is_donor = is_donor
        self.coordination = coordination
        self.probability = probability
        self.cumulative_probability = cumulative_probability * self.probability
        self.is_observed = is_observed
        self.remaining_donors = remaining_donors
        self.remaining_acceptors = remaining_acceptors
        self.current_donations = current_donations
        self.predictions = predictions
        self.children = []
        self._expand()

    def _expand(self):
        """Expands child nodes."""
        if self.remaining_donors:
            preds = self.predictions.predictions_for_label(self.remaining_donors[0])
            self.children = [
                Node(self.remaining_donors[0], True, p, preds[1][p], self.cumulative_probability,
                     p == preds[0], p + self.current_donations,
                     self.remaining_donors[1:], self.remaining_acceptors, self.predictions)
                for p in preds[1]
                if preds[1][p] != 0.0 and preds[1][p]*self.cumulative_probability > 0.0
            ]
        elif self.remaining_acceptors:
            preds = self.predictions.predictions_for_label(self.remaining_acceptors[0], 'acceptor')
            self.children = [
                Node(self.remaining_acceptors[0], False, p, preds[1][p],
                     self.cumulative_probability, p == preds[0], self.current_donations - p,
                     self.remaining_donors, self.remaining_acceptors[1:], self.predictions)
                for p in preds[1]
                if preds[1][p] != 0.0 and p <= self.current_donations
                and preds[1][p] != 0.0 and preds[1][p] * self.cumulative_probability > 0.0
            ]

    @property
    def is_leaf(self):
        """:obj:`boolean`: Whether or not this node is a leaf node."""
        return self.children == []

    @property
    def observed_path(self):
        """:obj:`list` of :obj:`Node`: All nodes corresponding to the crystal's observed coordination."""
        if self.is_leaf:
            if self.is_observed:
                return [self]
        else:
            for k in self.children:
                if k.is_observed:
                    return [self] + k.observed_path
        print(self.label, self.is_observed, self.coordination)
        print([(k.is_observed, k.label, k.coordination) for k in self.children])
        raise RuntimeError('No observed child')

    @property
    def leaves(self):
        """:obj:`list` of :obj:`Node`: All the leaf nodes in the tree."""
        if self.is_leaf:
            yield self
        else:
            for k in self.children:
                for l in k.leaves:
                    yield l

    def walk(self):
        """Walks over the tree yielding all nodes.

        :returns: (:obj:`Node`) Yields all the nodes in the tree.
        """
        yield self
        for k in self.children:
            for x in k.walk():
                yield x

    def _set_depth(self, d):
        """Recursively sets the depth of the tree.

        :param d: (:obj:`int`) The current depth of the tree.
        """
        self.depth = d
        for k in self.children:
            k._set_depth(d+1)

    def _prune(self):
        """Remove paths where the leaf has still got unaccounted for donations."""
        for k in self.children:
            k._prune()
        to_go = [(i, k) for i, k in enumerate(self.children) if k.is_leaf and k.current_donations]
        to_go.sort(reverse=True)
        for i, k in to_go:
            self.children.pop(i)

    def _accumulate_das(self, dons, accs):
        """Sets in the node the accumulated coordinations.

        This will be attributes, dons and accs, contaiming labels of the donor or acceptor with a
        multiplicity given by the coordination.

        :param dons: (:obj:`list`) A list of donor atoms.
        :param accs: (:obj:`list`) A list of acceptor atoms.
        """
        if self.is_donor:
            dons = dons + [self.label] * self.coordination
        else:
            accs = accs + [self.label] * self.coordination
        if self.is_leaf:
            self.donors = dons
            self.acceptors = accs
        else:
            for k in self.children:
                k._accumulate_das(dons, accs)

    def _set_cumulative_prob(self, prob):
        """There are various different ways of setting the cumulative probability.
        Here we have chosen to ignore the probability associated with coordination scores of 0.

        :param prob: (:obj:`float`) The probability to factor in to the total.
        """
        # Ignore 0-coordinated acceptors
        if not self.is_donor and self.coordination == 0:
                self.cumulative_probability = prob
        else:
            self.cumulative_probability = self.probability * prob
        for k in self.children:
            k._set_cumulative_prob(self.cumulative_probability)

    def _set_full_cumulative_prob(self, prob):
        """Sets the full cumulative probability including the 0-coordination probabilities.

        :param prob: (:obj:`float`) The probability to factor in to the total.
        """
        self.full_cumulative_probability = self.probability * prob
        for k in self.children:
            k._set_full_cumulative_prob(self.full_cumulative_probability)

    def _accumulate_coordinations(self, accumulated):
        """Accumulates the coordination in a list.

        This is to ensure leaf nodes have the complete list of coordinations contributing.

        :param accumulated: (:obj:`list`) The coordinations to add to the total.
        """
        self.accumulated_coordination = accumulated + [self.coordination]
        for k in self.children:
            k._accumulate_coordinations(self.accumulated_coordination)


class RootNode(Node):
    """The root node of the tree.

    :param donors: (:obj:`list`) The donor atoms.
    :param acceptors: (:obj:`list`) The acceptor atoms.
    :param predictions: (:obj:`list`) The probability predictions.
    :param children: (:obj:`Node`) The child nodes of the root.
    """

    def __init__(self, donors, acceptors, predictions, children):
        self.label = 'ROOT'
        self.donors = donors
        self.acceptors = acceptors
        self.is_donor = None
        self.coordination = 0
        self.probability = 1.0
        self.cumulative_probability = 1.0
        self.is_observed = True
        self.remaining_donors = donors
        self.remaining_acceptors = acceptors
        self.current_donations = 0
        self.predictions = predictions
        self.children = children

        for k in self.children:
            k._expand()

        self._prune()
        self._set_depth(0)
        self._accumulate_das([], [])
        self._set_cumulative_prob(1.0)
        self._set_full_cumulative_prob(1.0)

        for k in self.children:
            k._accumulate_coordinations([])

    @staticmethod
    def make_tree(crystal):
        """Makes the tree from the crystal.

        :param crystal: (:obj:`ccdc.crystal.Crystal`) A CSD Python API Crystal object.
        :returns: (:obj:`RootNode`) A Hydrogen Bond Propensity tree for the crystal.
        """
        coordination_calc = CrystalDescriptors.HBondCoordination()
        predicts = coordination_calc.predict(crystal)
        dons = [d.label for d in coordination_calc.donor_atoms]
        accs = [a.label for a in coordination_calc.acceptor_atoms]
        preds = predicts.predictions_for_label(dons[0])
        return RootNode(dons, accs, predicts, [
            Node(dons[0], True, p, preds[1][p], 1.0, p == preds[0], p, dons[1:], accs, predicts)
            for p in preds[1] if preds[1][p] != 0.0
        ])


class Runner(argparse.ArgumentParser):
    """Handle command-line arguments and generate the reports."""

    def __init__(self):
        """Set up the argument parser."""
        super(self.__class__, self).__init__(description=__doc__,
                                             formatter_class=argparse.ArgumentDefaultsHelpFormatter)
        self.add_argument('refcodes', nargs='+', help='CSD Refcodes or files')
        self.add_argument('-o', '--output',
                          help='The output file to use. HTML or CSV files can be written.')
        self.args = self.parse_args()


        self.output_format = None
        """:obj:`str` An identifier for the output format to write."""

        self.output_file = None
        """:obj:`str` The full file name of the output file."""

        self.output_dir = None
        """:obj:`str` The directory to hold output files."""

        self.diagram_generator = None
        """:obj:`ccdc.diagram.DiagramGenerator` A diagram generator."""

        if not self.args.output:
            self.output_format = 'stdout'
        else:
            if not (self.args.output.endswith('html') or self.args.output.endswith('csv')):
                raise Exception('Cannot recognise format for output file %s! .html or .csv are valid.'
                                % self.args.output)

            self.output_file = os.path.abspath(self.args.output)
            self.output_dir = os.path.dirname(self.output_file)

            # Make sure the output directory exists
            if not os.path.exists(self.output_dir):
                os.makedirs(self.output_dir)

            if self.output_file:
                self.diagram_generator = DiagramGenerator()
                self.diagram_generator.settings.return_type = 'SVG'

            self.output = None
            if self.args.output.endswith('html'):
                self.output_format = 'html'
            elif self.args.output.endswith('csv'):
                self.output_format = 'csv'

    def run(self):
        """Run the reporting on all specified crystals.

        :returns: (:obj:`RootNode`) A Hydrogen Bond Propensity tree for the last crystal processed.
        """
        for refcode_or_file in self.args.refcodes:
            if os.path.isfile(refcode_or_file):
                for crystal in io.CrystalReader(refcode_or_file):
                    tree = self.run_one(crystal)
            else:
                tree = self.run_one(csd.crystal(refcode_or_file))

        # Write the HTML footer for HTML reports
        if self.output_format == 'html':
            with self.get_output_writer() as report:
                report.write_footer()

        return tree

    def get_output_writer(self, identifier=''):
        """Get an output writer for the given identifer.

        This will reopen and close the files after each iteration so problems any structure
        don't result in all the data so far being lost.

        :param identifier: (:obj:`str`) The identifier of the structure to write for.
        :returns: (:obj:`file`) A context handler file-like object that can write output,
            appropriate to the output format specified.
        """
        if self.output_format == 'html':
            return HTMLReport(file_name=self.output_file,
                              report_title='Hydrogen Bond Propensity Report',
                              append=True)
        elif self.output_format == 'csv':
            out_path = os.path.dirname(self.output_file)
            basename, ext = os.path.splitext(os.path.basename(self.output_file))
            output_file_path = os.path.join(out_path, '%s_%s%s' % (basename, identifier, ext))
            return CSVWriter(file_name=output_file_path)
        elif self.output_format == 'stdout':
            return CSVWriter(stdout=True)

    def run_one(self, crystal):
        """Calculates the tree for one crystal and writes results to output.

        :param crystal: (:obj:`ccdc.crystal.Crystal`) A CSD Python API Crystal object.
        :returns: (:obj:`RootNode`) A Hydrogen Bond Propensity tree.
        """
        tree = RootNode.make_tree(crystal)
        if self.output_format == 'html':
            self.write_html(tree, crystal)
        else:
            self.write_csv(tree, crystal)

        return tree

    @staticmethod
    def csv_headers(tree):
        """Generate CSV headers for the given tree.

        :param tree: (:obj:`RootNode`) A Hydrogen Bond Propensity tree.
        :returns: (:obj:`list`) A list of CSV column headers for the given tree.
        """
        headers = ['Identifier']
        headers += ['D %s' % tree.donors[i] for i in range(len(tree.donors))]
        headers += ['A %s' % tree.acceptors[i] for i in range(len(tree.acceptors))]
        headers += ['Probability', 'Full Probability', 'Observed']
        return headers

    def write_csv(self, tree, crystal):
        """Write tree data to CSV.

        :param tree: (:obj:`RootNode`) A Hydrogen Bond Propensity tree.
        :param crystal: (:obj:`ccdc.crystal.Crystal`) A CSD Python API Crystal object.
        """
        observed_path = tree.observed_path
        leaves = [(n.cumulative_probability, n) for n in tree.leaves]
        leaves.sort(reverse=True)

        with self.get_output_writer(crystal.identifier) as writer:
            writer.write_header(self.csv_headers(tree))
            for probability, leaf in leaves:
                row = [crystal.identifier]
                row += list(leaf.accumulated_coordination)
                row += [probability, leaf.full_cumulative_probability, leaf == observed_path[-1]]
                writer.write_row(row)

    @staticmethod
    def propensity_data(leaves, observed_path):
        """Generate rows of HTML output.

        :param leaves: (:obj:`list`) A list of (:obj:`float`, :obj:`Node`) tuples of hydrogen bond
            probabilities and the leaves of the probability tree they correspond to.
        :param observed_path: (:obj:`list`) A list of :obj:`Node`s representing the observed
            coordination of the crystal.
        :returns: (:obj:`list`) A list of the propensity data for each leaf passed in.
        """
        for leaf_data in leaves:
            leaf = leaf_data[1]
            probability = leaf_data[0]
            yield [str(i) for i in leaf.accumulated_coordination] +\
                  ['%.4f' % probability, '%.4f' % leaf.full_cumulative_probability,
                   (leaf == observed_path[-1])]

    def write_html(self, tree, crystal):
        """Writes the given crystal's report to HTML.

        :param tree: (:obj:`RootNode`) A Hydrogen Bond Propensity tree.
        :param crystal: (:obj:`ccdc.crystal.Crystal`) A CSD Python API Crystal object.
        """
        # Sort the data by reverse probability
        leaves = [(n.cumulative_probability, n) for n in tree.leaves]
        leaves.sort(reverse=True)

        # Generate a diagram
        diagram = self.diagram_generator.image(crystal).replace(
            'Qt SVG Document', 'Diagram for %s' % crystal.identifier)

        with self.get_output_writer() as report:
            report.write_section_header(headline=crystal.identifier)
            report.write_figure_data(diagram, caption='Diagram for %s' % crystal.identifier,
                                     figure_id='diagram_%s' % crystal.identifier)
            table_header = ['D %s' % x for x in tree.donors] +\
                           ['A %s' % a for a in tree.acceptors] +\
                           ['Probability', 'Full Probability', 'Observed']
            report.write(html_table(data=self.propensity_data(leaves,
                                                              observed_path=tree.observed_path),
                                    header=table_header,
                                    table_id='data_%s' % crystal.identifier,
                                    caption='Propensity data for %s' % crystal.identifier))


if __name__ == '__main__':
    t = Runner().run()

Charting hydrogen bond propensities

This example creates a chart of hydrogen bond propensity groupings, plotting hydrogen bond score against hbond coordination score. Where there are structures to the right of and lower than the observed structure, there is a greater chance of the existence of polymorphic structures, for example:

YIGPIO03 hydrogen bond propensity chart

YIGPIO03 Hydrogen Bond Propensities

#!/usr/bin/env python
#
# This script can be used for any purpose without limitation subject to the
# conditions at https://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx
#
# This permission notice and the following statement of attribution must be
# included in all copies or substantial portions of this script.
#
'''
    hydrogen_bond_propensities.py - performs an hbp calculation and produces a chart of propensity vs coordination score
'''
################################################################################

import argparse
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

import ccdc.io
from ccdc.descriptors import CrystalDescriptors

################################################################################


def run(refcode):
    '''Runs the job.'''
    hbp = CrystalDescriptors.HBondPropensities()
    reader = ccdc.io.CrystalReader('csd')
    crystal = reader.crystal(refcode)
    print('Hydrogen Bond Propensity calculation for %s\n' % refcode)
    hbp.set_target(crystal)
    hbp.match_fitting_data(count=300)
    hbp.analyse_fitting_data()
    hbp.perform_regression()
    propensities = hbp.calculate_propensities()
    groups = hbp.generate_hbond_groupings()
    observed_groups = hbp.target_hbond_grouping()
    make_chart(groups, observed_groups, crystal.identifier)


def make_chart(groups, observed_groups, identifier):
    '''Makes a chart of the hbond groupings, plotting hbond score against coordination score.'''
    figure = plt.scatter([group.hbond_score for group in groups],
                         [group.coordination_score for group in groups],
                         s=50, label='Putative Structures')
    ax2 = plt.scatter(observed_groups.hbond_score,
                      observed_groups.coordination_score,
                      c='red', marker='*', s=250, label='Observed Structure')
    plt.origin = 'upper'
    plt.xlim(0, 1.0)
    plt.ylim(-1.0, 0)
    plt.legend(scatterpoints=1)
    ax = plt.gca()
    ax.xaxis.tick_top()
    ax.yaxis.tick_left()
    ax.set_xlabel('Mean H-Bond Propensity')
    ax.set_ylabel('Mean H-Bond Coordination')
    ax.xaxis.set_label_position('top')
    fig_name = '%s_standard_chart' % identifier
    plt.savefig('%s.png' % fig_name, format='png')
    plt.savefig('%s.svg' % fig_name, format='svg')


################################################################################

if __name__ == '__main__':
    parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, description=__doc__)
    parser.add_argument('refcode', help='Refcode of structure to be analysed', type=str)

    args = parser.parse_args()

    run(args.refcode)