InChI examples

Accessing InChIs for CSD Entries

This example shows how to access InChIs for components of a CSD Entry. Where available, the InChI string and InChI key of each component of an entry are pre-calculated and stored in the CSD. For CSD entries where it is considered difficult to generate reliable InChIs, these are unavailable.

#!/usr/bin/env python
#
# This script can be used for any purpose without limitation subject to the
# conditions at https://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx
#
# This permission notice and the following statement of attribution must be
# included in all copies or substantial portions of this script.
#
# 2022-12-12: created by the Cambridge Crystallographic Data Centre
#
'''
entry_inchi.py - extract InChI of CSD entries.

Access InChIs for components of a CSD entry. For examples:
    BEGFIF - no reliable InChI is available for this entry
    AABHTZ - a single-component entry with InChI
    AAGGAG10 - a multi-component entry with InChI
'''
###########################################################################

from ccdc import io
csd = io.EntryReader('csd')


def main():
    for r in ['BEGFIF', 'AABHTZ', 'AAGGAG10']:
        print(f'=== Refcode: {r} ===')
        e = csd.entry(r)
        inchis = e.component_inchis
        if inchis:
            for index, inchi in enumerate(inchis):
                print(f'Component {index+1}:')
                print(f'  InChI string: {inchi.inchi}')
                print(f'  InChI key: {inchi.key}')
        else:
            print('No reliable InChI available')


if __name__ == '__main__':
    main()

Generating InChIs for molecules

This example shows how to generate the InChI string and InChI key of a molecule read from a file.

#!/usr/bin/env python
#
# This script can be used for any purpose without limitation subject to the
# conditions at https://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx
#
# This permission notice and the following statement of attribution must be
# included in all copies or substantial portions of this script.
#
# 2022-12-12: created by the Cambridge Crystallographic Data Centre
#
'''
molecular_descriptors_inchi.py - generate InChI for a crystal or molecule.

'''

from pathlib import Path

from ccdc.io import MoleculeReader, CrystalReader
from ccdc.descriptors import MolecularDescriptors


def main():
    gen = MolecularDescriptors.InChIGenerator()

    input_file = 'aspirin.mol2'
    mol_reader = MoleculeReader(str(Path(__file__).parent / 'inchi' / input_file))
    mol = mol_reader[0]
    inchi = gen.generate(mol)
    print(f'=== {input_file} ===')
    print(f'InChI string: {inchi.inchi}')
    print(f'InChI key: {inchi.key}')

    # A multi-component crystal
    input_file = 'caffeine.cif'
    cry_reader = CrystalReader(str(Path(__file__).parent / 'inchi' / input_file))
    cry = cry_reader[0]
    inchi = gen.generate(cry)
    print(f'=== {input_file} ===')
    # Here we generate the composite InChI for the crystal
    print(f'(Composite) InChI string: {inchi.inchi}')
    print(f'(Composite) InChI key: {inchi.key}')
    # We can also generate the InChI for the individual components
    for component in cry.molecule.components:
        inchi = gen.generate(component)
        print(f'InChI string for component {component.identifier}: {inchi.inchi}')
        print(f'InChI key for component {component.identifier}: {inchi.key}')


if __name__ == '__main__':
    main()