Source code for glypy.io.wurcs.carbon_descriptors

# Much code here is derived from https://github.com/glycoinfo/glycocttowurcs
# though the representation of glycans in Eurocarbdb.MolecularFramework
# may not map 1:1.
try:
    from collections.abc import Sequence
except ImportError:
    from collections import Sequence

import warnings

from six import string_types as basestring

from .basetype_conversion import (
    descriptors_to_base_type)

from glypy.structure.monosaccharide import Monosaccharide, ReducedEnd
from glypy.structure.constants import SuperClass, Anomer, Modification, Stem, Configuration, UnknownPosition
from glypy import OrderedMultiMap


anomer_map = {
    Anomer.beta: 'b',
    Anomer.alpha: 'a',
    Anomer.uncyclized: 'o',
    Anomer.x: 'x'
}


[docs]class CarbonDescriptors(Sequence): def __init__(self, descriptors, anomer, anomeric_position, ring_start, ring_end): self.descriptors = tuple(descriptors) self.anomer = Anomer[anomer] self.anomeric_position = self._translate_position(anomeric_position) self.ring_start = ring_start if ring_start is not None else UnknownPosition self.ring_end = ring_end if ring_end is not None else UnknownPosition def _translate_position(self, position): if position == '?': position = -1 elif position == -1: position = '?' else: position = int(position) return position def __len__(self): return len(self.descriptors) def __eq__(self, other): if other is None: return False if isinstance(other, basestring): return str(self) == other if self.descriptors != other.descriptors: return False elif self.anomer != other.anomer: return False elif self.anomeric_position != other.anomeric_position: return False elif self.ring_start != other.ring_start: return False elif self.ring_end != other.ring_end: return False return True def __ne__(self, other): return not self == other def __hash__(self): return hash(self.descriptors) def __getitem__(self, i): return self.descriptors[i] def __iter__(self): return iter(self.descriptors) def to_d_stereoform(self, code): out = [] is_l_stereoform = code[-1] == '3' for site in code: if is_l_stereoform: if site == '3': out.append('4') elif site == '4': out.append('3') else: out.append(site) else: out.append(site) return out def to_base_type(self): '''Convert the :class:`CarbonDescriptors` into a :class:`~.Monosaccharide`, not including substituents. Returns ------- :class:`~.Monosaccharide` ''' superclass = SuperClass[len(self)] carbon_coding = list(map(str, self)) modifications = OrderedMultiMap() is_reduced = False # translate stereocode into generic carbon code for i, site in enumerate(carbon_coding): if site == '1': carbon_coding[i] = '3' elif site == '2': carbon_coding[i] = '4' start = 1 stems = [] configurations = [] anomer = self.anomer ring_start = self.ring_start ring_end = self.ring_end if carbon_coding[0] == carbon_coding[-1] == 'h': anomer = Anomer.uncyclized ring_start = 0 ring_end = 0 is_reduced = True # if the stereosites are all defined if 'x' not in carbon_coding: # incrementally walk along the carbon sequence while start < superclass.value: # consider ring stereoforms of up to four carbons ahead, preferring longer # stereosequences, for i in range(4, 0, -1): # extract the raw stereosequence raw_chunk = carbon_coding[start:start + i] # convert the stereosequence to D configuration and # convert to a string for hash lookup chunk = ''.join(self.to_d_stereoform(raw_chunk)) try: # if the look up is successful stem_name = descriptors_to_base_type[chunk] # save the mapped stem name stems.append(stem_name) # infer the chirality of the ring from the last # stereosite conf = Configuration.x if raw_chunk[-1] == '3': conf = Configuration.l elif raw_chunk[-1] == '4': conf = Configuration.d configurations.append(conf) # start the lookup process again from the next starting # location start += len(raw_chunk) break except KeyError: continue else: # if no stereosequence could be detected, if the start position # is a stereosite, then we may have a grolene trilose component if chunk in ('3', '4'): stems.append(descriptors_to_base_type['x']) # infer the chirality of the ring from the last # stereosite conf = Configuration.x if raw_chunk[-1] == '3': conf = Configuration.l elif raw_chunk[-1] == '4': conf = Configuration.d configurations.append(conf) start += 1 else: # This cannot handle unspecified nonulonic acids and other modified but unspecified # monosaccharides with multiple chiral centers well. stems.append(None) if carbon_coding[0] in ('u', 'h'): configurations.append(None) else: warnings.warn("Cannot infer chirality from %r" % (str(self),)) configurations.append(None) # Guess if the monosaccharide is large enough to have a second chiral center, because # no other rule seems obvious. This could produce incorrect monosaccharide compositions? if len(carbon_coding) > 6: stems.append(None) configurations.append(None) anomeric_position = None double_bonds = [] for i, site in enumerate(self): if site in ('a', 'u', 'U'): anomeric_position = i + 1 if anomeric_position == 2: modifications[anomeric_position] = Modification.keto if site in ('E', 'F'): double_bonds.append(i + 1) if site in ('d', 'm'): modifications[i + 1] = Modification.Deoxygenated if site == 'A': modifications[i + 1] = Modification.Acidic for site in double_bonds[::2]: modifications[i] = Modification.en stems = [Stem[x] for x in stems[::-1]] configurations = configurations[::-1] base = Monosaccharide( anomer, configurations, stems, superclass, ring_start, ring_end, modifications, reduced=ReducedEnd() if is_reduced else None) return base @classmethod def from_monosaccharide(cls, monosaccharide): '''Create a :class:`CarbonDescriptors` from a given :class:`~.Monosaccharide`. Parameters ---------- monosaccharide: :class:`~.Monosaccharide` The monosaccharide to describe Returns ------- :class:`CarbonDescriptors` ''' code = ['x'] * monosaccharide.superclass.value stereocode = monosaccharide.stereocode code = [str(x.value) if x.value is not None else 'x' for x in stereocode] code[0] = 'u' code[-1] = 'h' if monosaccharide.anomer == 'uncyclized': code[0] = 'h' code[-1] = 'h' anomer = monosaccharide.anomer anomeric_position = monosaccharide.ring_start anomeric_sites = [] is_aldose = True # encode the modifications onto the carbon descriptor code for position, modification in monosaccharide.modifications.items(): is_terminal = (position == 1 or position == monosaccharide.superclass.value) if modification == Modification.Acidic: if not is_terminal: raise ValueError("Cannot add a carboxylic acid group to a non-terminal carbon") if position == 1: is_aldose = False code[position - 1] = 'A' elif modification == Modification.Deoxygenated: if position == 1: is_aldose = False if is_terminal: code[position - 1] = 'm' else: code[position - 1] = 'd' elif modification == Modification.Ketone: is_aldose = False # code[position] = 'o' anomeric_sites.append(position) elif modification == Modification.en: code[position - 1] = 'E' elif modification == Modification.Alditol: is_aldose = False if position != 1: raise ValueError("\"aldi\" must occur on the first carbon") if is_aldose: anomeric_sites.append(1) anomeric_position = anomeric_sites[0] # if the anomeric position is fully defined and the monosaccharide is cyclic if monosaccharide.ring_start not in (UnknownPosition, 0): code[anomeric_position - 1] = 'a' # if the anomeric position is partially undefined, the carbon code is 'u' elif monosaccharide.ring_start == UnknownPosition: code[anomeric_position - 1] = 'u' if monosaccharide.ring_start == UnknownPosition: anomeric_position = "?" return cls(code, anomer, anomeric_position, monosaccharide.ring_start, monosaccharide.ring_end) def to_backbone_code(self): '''Convert :class:`CarbonDescriptors` into a string representation matching the ``<BackboneCode>`` pattern from WURCS2.0 Returns ------- :class:`str` ''' parts = [] # carbon descriptors parts.append(''.join([i for i in self])) # if the anomer is completely undefined, do not include it if not (self.anomeric_position == -1 and self.anomer == Anomer.x): parts.append("-%s%s" % (self._translate_position(self.anomeric_position), anomer_map[self.anomer])) # if the ring is neither undefined nor open, include it if (self.ring_start != -1 and self.ring_end != -1) and (self.ring_start != 0 and self.ring_end != 0): parts.append("_%s-%s" % tuple(map(self._translate_position, (self.ring_start, self.ring_end)))) return ''.join(parts) def __str__(self): return self.to_backbone_code() def __repr__(self): descriptors = ''.join(map(str, self)) template = ("{self.__class__.__name__}({descriptors!r}, {self.anomer.name!r}, " "{self.anomeric_position}, {self.ring_start}, {self.ring_end})") return template.format(self=self, descriptors=descriptors)