Source code for glypy.io.wurcs.writer

from collections import OrderedDict

from glypy.structure import Glycan, Monosaccharide
from glypy.structure.glycan_composition import GlycanComposition
from glypy.utils import tree

from .node_type import NodeTypeSpec
from .utils import base52


[docs]class WURCSWriter(object): """Implementation of WURCS encoding process. Includes the steps for creating each section of the WURCS encoding, and populating them from a saccharide structure, composition, or monosaccharide. """ version = '2.0' def __init__(self, glycan): self.glycan = glycan self.node_type_map = OrderedDict() self.node_index_to_node_type = OrderedDict() self.index_to_glyph = dict() self.id_to_index = dict() self.extract_node_types() def extract_node_types(self): node_types = OrderedDict() node_index_to_node_type = OrderedDict() index_to_glyph = dict() id_to_index = dict() for i, node in enumerate(self._iter_monosaccharides(), 1): node_type = NodeTypeSpec.from_monosaccharide(node) index_to_glyph[i] = base52(i - 1) id_to_index[node.id] = i node_index_to_node_type[i] = node_type if node_type not in node_types: node_types[node_type] = len(node_types) + 1 self.node_type_map = node_types self.node_index_to_node_type = node_index_to_node_type self.index_to_glyph = index_to_glyph self.id_to_index = id_to_index def format_version(self): return "WURCS=%s" % (self.version, ) def _iter_monosaccharides(self): if isinstance(self.glycan, GlycanComposition): for key, value in self.glycan.items(): for i in range(value): yield key else: for x in self.glycan.iternodes(): yield x def _iter_links(self): if isinstance(self.glycan, GlycanComposition): pass else: for x in self.glycan.iterlinks(): yield x def format_count_section(self): count_nodes = len(list(self._iter_monosaccharides())) count_links = len(list(self._iter_links())) count_section = "%s,%s,%s" % (len(self.node_type_map), count_nodes, count_links) if isinstance(self.glycan, GlycanComposition): count_section += '+' return count_section def format_node_types(self): return ''.join('[%s]' % (str(s),) for s in self.node_type_map.keys()) def format_node_type_index(self): node_type_sequence = [] for index, node_type in self.node_index_to_node_type.items(): node_type_sequence.append(self.node_type_map[node_type]) return '-'.join(map(str, node_type_sequence)) def format_links(self): links = [] if isinstance(self.glycan, GlycanComposition): return "" for _, link in self.glycan.iterlinks(): parent_index = self.id_to_index[link.parent.id] child_index = self.id_to_index[link.child.id] parent_glyph = self.index_to_glyph[parent_index] child_glyph = self.index_to_glyph[child_index] parent_position = link.parent_position if parent_position == -1: parent_position = '?' child_position = link.child_position if child_position == -1: child_position = '?' link_spec = '%s%s-%s%s' % (parent_glyph, parent_position, child_glyph, child_position) links.append(link_spec) return '_'.join(links) def write(self): self.extract_node_types() sections = [self.format_version(), self.format_count_section(), self.format_node_types(), self.format_node_type_index(), ] if not isinstance(self.glycan, GlycanComposition): sections.append(self.format_links()) return '/'.join(sections)
[docs]def dumps(glycan): """Encode a saccharide object as a WURCS 2.0 string. .. note:: The WURCS canonicalization has not been implemented yet, so the generated string may differ from other sources. However, the :mod:`~.canonicalize` module can be used to standardize structures prior to encoding them. Parameters ---------- glycan : :class:`~.Glycan`, :class:`~.GlycanComposition`, or :class:`~.Monosaccharide` The structure to encode Returns ------- :class:`str` The structure encoded as a string. """ if not isinstance(glycan, GlycanComposition): try: glycan = tree(glycan) except TypeError: if isinstance(glycan, Monosaccharide): nts = NodeTypeSpec.from_monosaccharide(glycan) return nts.to_res() else: raise return WURCSWriter(glycan).write()
Glycan.register_serializer('wurcs', dumps) Monosaccharide.register_serializer('wurcs', dumps)