'''
Represent a sugar graph with pseudo-directed edges.
'''
import warnings
import operator
import logging
import itertools
from functools import partial
from collections import deque, defaultdict
try:
from collections.abc import Callable
except ImportError:
from collections import Callable
from glypy.utils import (
identity,
chrinc,
uid)
from glypy.composition import Composition
from .base import SaccharideCollection
from .monosaccharide import (
Monosaccharide,
graph_clone,
toggle as residue_toggle,
MonosaccharideOccupancy)
from .constants import UnknownPosition, NoPosition
from .substituent import Substituent
from .crossring_fragments import crossring_fragments, CrossRingPair
from .fragment import Subtree
logger = logging.getLogger("Glycan")
_fragment_direction = {
"A": -1,
"B": -1,
"C": -1,
"X": 1,
"Y": 1,
"Z": 1
}
MAIN_BRANCH_SYM = '-'
def fragment_to_substructure(fragment, tree):
"""Extract the substructure of `tree` which is contained in `fragment`
Parameters
----------
fragment: :class:`~.GlycanFragment`
The :class:`~.GlycanFragment` to extract substructure for.
tree: :class:`Glycan`
The |Glycan| to extract substructure from.
Returns
-------
:class:`Glycan`:
The |Glycan| substructure defined by the nodes contained in `fragment` as
found in `tree`
"""
break_targets = fragment.link_ids
crossring_targets = fragment.crossring_cleavages
# All operations will be done on a copy of the tree of interest
tree = tree.clone()
crossring_targets_nodes = []
break_targets_nodes = []
# A point of reference known to be inside the fragment tree
anchor = None
for pos, link in tree.iterlinks():
# If the current link's child was cross-ring cleaved,
# then we must apply that cleavage and find an anchor
if link.child.id in crossring_targets:
ion_type = crossring_targets[link.child.id]
c1, c2 = map(int, ion_type[0].split(","))
target = tree.get(link.child.id)
crossring_targets_nodes.append((target, c1, c2))
# If this link was cleaved, break it and find an anchor
if link.id in break_targets:
break_targets_nodes.append(link)
for target, c1, c2 in crossring_targets_nodes:
a_frag, x_frag = crossring_fragments(
target, c1, c2, attach=True, copy=False)
next(residue_toggle(target))
if crossring_targets[target.id][1] == "A":
anchor = a_frag
else:
anchor = x_frag
for link in break_targets_nodes:
parent, child = link.break_link(refund=True)
if parent.id in fragment.included_nodes:
anchor = parent
elif child.id in fragment.included_nodes:
anchor = child
# Build a new tree from the anchor
substructure = Glycan(root=anchor, index_method=None).reroot()
return substructure
[docs]class Glycan(SaccharideCollection):
'''
Represents a full graph of connected |Monosaccharide| objects and their connecting bonds.
Attributes
----------
root: |Monosaccharide|
The first monosaccharide unit of the glycan, and the reducing end if present.
index: |list|
A list of the |Monosaccharide| instances in `self` in the order they are encountered
by traversal by `traversal_methods[index_method]`
link_index: |list|
A list of the |Link| connecting the |Monosaccharide| instances in `self` in the order they
are encountered by traversal by `traversal_methods[index_method]`
reducing_end: |ReducedEnd| or |None|
The reducing end on :attr:`root`.
branch_lengths: |dict|
A dictionary mapping branch symbols to their lengths
branch_parent_map: |dict|
A dictionary mapping branch symbols to their parent branch symbols
'''
verbose = False
_serializers = {}
@classmethod
def subtree_from(cls, tree, node, visited=None):
if isinstance(node, int):
node = tree[node]
visited = {
node.id for p,
node in node.parents()} if visited is None else visited
subtree = cls(root=node, index_method=None).clone(
index_method=None, visited=visited)
return subtree
[docs] def fragment_to_substructure(self, fragment):
"""Extract the substructure of `tree` which is contained in `fragment`
Parameters
----------
fragment: :class:`~.GlycanFragment`
The :class:`~.GlycanFragment` to extract substructure for.
tree: :class:`Glycan`
The |Glycan| to extract substructure from.
Returns
-------
:class:`Glycan`:
The |Glycan| substructure defined by the nodes contained in `fragment` as
found in `tree`
"""
return fragment_to_substructure(fragment, self)
traversal_methods = {}
def __init__(self, root=None, index_method='dfs', canonicalize=False):
'''
Constructs a new :class:`Glycan` from the collection of connected :class:`~.Monosaccharide`
objects rooted at `root`.
If :obj:`index_method` is not |None|, the graph is indexed by the default search method
given by :obj:`traversal_methods[index_method]`
'''
if root is None:
root = Monosaccharide()
self.root = root
self.index = []
self.link_index = []
self.branch_lengths = {}
if index_method is not None:
self.reindex(method=index_method)
if canonicalize:
self.canonicalize()
def has_index(self):
return bool(self.index)
[docs] def reindex(self, method='dfs'):
'''
Traverse the graph using the function specified by `method`. The order of
traversal defines the new :attr:`id` value for each |Monosaccharide|
and |Link|.
The order of traversal also defines the ordering of the |Monosaccharide|
in :attr:`index` and |Link| in :attr:`link_index`.
Prior to constructing a |Glycan| instance, component |Monosaccharide| instances
may be labeled, converting their id field into a tuple.
Calls :meth:`label_branches` after indexing is complete.
Returns
-------
Glycan:
self
See Also
--------
deindex
label_branches
'''
self.deindex()
traversal = self._get_traversal_method(method)
index = []
visited = set()
i = 1
for node in traversal():
addr = id(node)
if addr in visited:
continue
visited.add(addr)
index.append(node)
for node in index:
node.id = i
i += 1
# reindex substituents as well
try:
for j, subst in node.substituents():
subst.id = i
i += 1
except AttributeError:
if node.node_type is Substituent.node_type:
continue
link_index = []
for pos, link in self.iterlinks(method=method):
link_index.append(link)
i = 1
for link in link_index:
link.id = i
i += 1
self.index = index
self.link_index = link_index
self.label_branches()
return self
def _build_link_index(self, method='dfs'):
link_index = []
for pos, link in self.iterlinks(method=method):
link_index.append(link)
self.link_index = link_index
def _build_node_index(self, method='dfs'):
index = []
for node in self.iternodes(method=method):
index.append(node)
self.index = index
[docs] def deindex(self):
'''
When combining two Glycan structures, very often their component ids will
overlap, making it impossible to differentiate between a cycle and the new
graph. This function mangles all of the node and link ids so that they are
distinct from the pre-existing nodes.
Returns
-------
Glycan:
self
'''
if self.index is not None and len(self.index) > 0:
base = uid()
for node in self.index:
node.id += base
node.id *= -1
try:
for j, subst in node.substituents():
subst.id += base
subst.id *= -1
except AttributeError:
if node.node_type is Substituent.node_type:
warnings.warn("A substituent has been detected as a parent "
"node. This glycan may not fragment correctly")
for link in self.link_index:
link.id += base
link.id *= -1
return self
[docs] def reroot(self, index_method='dfs'):
"""Set :attr:`root` to the node with the lowest :attr:`id`.
Should only be used if the glycan has been indexed.
Parameters
----------
index_method : str, optional
The name of the index method to use to reindex the glycan relative to
the new root node. If :const:`None`, no reindexing is done. The default
is 'dfs'
Returns
-------
Glycan:
self
"""
self.root = sorted(iter(self), key=operator.attrgetter('id'))[0]
if index_method is not None:
self.reindex(method=index_method)
return self
def initialize_structure(self, method='dfs'):
"""Rebuild the structure index from scratch using :meth:`reindex`,
then sets the traversal order using :meth:`canonicalize`.
Use this method when building a :class:`Glycan` instance from a manually
created :class:`~.Monosaccharide` graph.
"""
self.canonicalize()
self.reindex(method=method)
return self
[docs] def __getitem__(self, ix):
'''
Fetch a :class:`~.Monosaccharide` from :attr:`index`.
Returns
-------
Monosaccharide
Raises
------
IndexError:
If the provided `ix` exceeds the length of the index,
or if :attr:`index` has not been populated.
'''
if self.index is not None:
return self.index[ix]
else:
raise IndexError(
"Tried to access the index of an unindexed Glycan.")
def __getstate__(self):
return self.__dict__
def __setstate__(self, state):
self.__dict__.update(state)
def __root__(self):
return self.root
def __tree__(self):
return self
[docs] def get(self, ix):
"""Get a :class:`~.Monosaccharide` from this structure by its :attr:`~.Monosaccharide.id` value.
If :attr:`index` is populated it will be iterated over, otherwise :meth:`__iter__` will be called.
Parameters
----------
ix : int or tuple
The id value to search for.
Returns
-------
Monosaccharide
Raises
------
IndexError:
If the id value is not found
"""
if self.index:
iterable = self.index
else:
iterable = self
for node in iterable:
if node.id == ix:
return node
raise IndexError(
"Could not find a node with the given id {}".format(ix))
[docs] def get_link(self, ix):
"""Search for a :class:`~.Link` by :attr:`~.Link.id` value.
This will use :meth:`iterlinks` to iterate over the linkages
in the structure
Parameters
----------
ix : int
The link index to search for.
Returns
-------
:class:`~.Link`
Raises
------
IndexError:
if the id value is not found
"""
for pos, link in self.iterlinks():
if link.id == ix or link.label == ix:
return link
raise IndexError(
"Could not find a link with the given id or label {}".format(ix))
@property
def reducing_end(self):
'''
An alias for :attr:`Monosaccharide.reducing_end` for :attr:`root`
'''
try:
return self.root.reducing_end
except AttributeError:
if self.root.node_type is Substituent.node_type:
return None
else:
raise
[docs] def set_reducing_end(self, value):
'''
Sets the reducing end type, and configures the :attr:`root` |Monosaccharide| appropriately.
If the reducing_end is not |None|, then the following state changes are made to :attr:`root`:
.. code-block:: python
self.root.ring_start = 0
self.root.ring_end = 0
self.root.anomer = "uncyclized"
Else, the correct state is unknown:
.. code-block:: python
self.root.ring_start = UnknownPosition
self.root.ring_end = UnknownPosition
self.root.anomer = None
.. note:: This method is called automatically when setting :attr:`reducing_end`, and does not
need to be used explicitly.
'''
if self.root.node_type is Substituent.node_type:
raise TypeError("Cannot set reducing end on this glycan composition")
self.root.reducing_end = value
if self.reducing_end is not None:
self.root.ring_start = 0
self.root.ring_end = 0
self.root.anomer = "uncyclized"
else:
self.root.ring_start = UnknownPosition
self.root.ring_end = UnknownPosition
self.root.anomer = None
@reducing_end.setter
def reducing_end(self, value):
self.set_reducing_end(value)
[docs] def depth_first_traversal(self, from_node=None, apply_fn=identity, visited=None):
'''
Make a depth-first traversal of the glycan graph. Children are explored in descending bond-order.
This is the default traversal method for all |Glycan| objects. :meth:`~.dfs` is an alias of this method.
Both names can be used to specify this strategy to :meth:`~._get_traversal_method`.
When selecting an iteration strategy, this strategy is specified as "dfs".
Parameters
----------
from_node: None or Monosaccharide
If `from_node` is |None|, then traversal starts from the root node. Otherwise it begins
from the given node.
apply_fn: function
A function applied to each node on arrival. If this function returns a non-None value,
the result is yielded from the generator, otherwise it is ignored. Defaults to :func:`.identity`
visited: set or None
A :class:`set` of node ID values to ignore. If |None|, defaults to the empty `set`
Yields
------
Return Value of `apply_fn`, by default |Monosaccharide|
See also
--------
Glycan.breadth_first_traversal
'''
node_stack = deque([self.root if from_node is None else from_node])
visited = set() if visited is None else visited
while len(node_stack) > 0:
node = node_stack.pop()
visited.add(node.id)
if apply_fn is identity:
yield node
else:
res = apply_fn(node)
if res is not None:
yield res
for link in node.links.values():
terminal = link.parent
if terminal.id not in visited:
node_stack.append(terminal)
terminal = link.child
if terminal.id not in visited:
node_stack.append(terminal)
traversal_methods['dfs'] = "depth_first_traversal"
traversal_methods['depth_first_traversal'] = "depth_first_traversal"
[docs] def breadth_first_traversal(self, from_node=None, apply_fn=identity, visited=None):
'''
Make a breadth-first traversal of the glycan graph. Children are explored in descending bond-order.
When selecting an iteration strategy, this strategy is specified as "bfs".
Parameters
----------
from_node: None or Monosaccharide
If `from_node` is |None|, then traversal starts from the root node. Otherwise it begins
from the given node.
apply_fn: function
A function applied to each node on arrival. If this function returns a non-None value,
the result is yielded from the generator, otherwise it is ignored. Defaults to :func:`.identity`
visited: set or None
A :class:`set` of node ID values to ignore. If |None|, defaults to the empty `set`
Yields
------
Return Value of `apply_fn`, by default |Monosaccharide|
See also
--------
Glycan.depth_first_traversal
'''
node_queue = deque([self.root if from_node is None else from_node])
visited = set() if visited is None else visited
while len(node_queue) > 0:
node = node_queue.popleft()
visited.add(node.id)
if apply_fn is identity:
yield node
else:
res = apply_fn(node)
if res is not None:
yield res
# node_queue.extend(terminal for link in node.links.values()
# for terminal in link if terminal.id not in visited)
for link in node.links.values():
terminal = link.parent
if terminal.id not in visited:
node_queue.append(terminal)
terminal = link.child
if terminal.id not in visited:
node_queue.append(terminal)
traversal_methods['bfs'] = "breadth_first_traversal"
traversal_methods['breadth_first_traversal'] = "breadth_first_traversal"
[docs] def indexed_traversal(self, from_node=None, apply_fn=identity, visited=None):
"""Traverse the glycan structure along :attr:`index`.
This is substantially faster than other traversal methods for complete traversals
at the cost of a) requiring a call to :meth:`reindex` to populate :attr:`index`
if it has not been called, and b) is not automatically updated if the structure is
modified.
When selecting an iteration strategy, this strategy is specified as "index".
Parameters
----------
from_node: None or Monosaccharide
If `from_node` is |None|, then traversal starts from the root node. Otherwise it begins
from the given node.
apply_fn: function
A function applied to each node on arrival. If this function returns a non-None value,
the result is yielded from the generator, otherwise it is ignored. Defaults to :func:`.identity`
visited: set or None
A :class:`set` of node ID values to ignore. If |None|, defaults to the empty `set`
Yields
------
Return Value of `apply_fn`, by default |Monosaccharide|
See Also
--------
reindex
"""
if not self.index:
self._build_node_index()
if from_node is None and apply_fn is identity:
for node in self.index:
yield node
else:
i = 0
n = len(self.index)
if from_node is not None:
while i < n:
node = self.index[i]
if node == from_node:
break
i += 1
while i < n:
node = self.index[i]
if apply_fn is identity:
yield node
else:
value = apply_fn(node)
if value is not None:
yield value
i += 1
traversal_methods['index'] = "indexed_traversal"
[docs] def _get_traversal_method(self, method):
"""An internal helper method used to resolve traversal
methods by name or alias.
Parameters
----------
method : :class:`str` or :class:`Callable`
If a :class:`str`, it is looked up in the class-level
:attr:`traversal_methods` dictionary and the name of
the appropriate method is retrieved with :func:`getattr`
and returned.
If a :class:`Callable`, the function's first parameter is
bound to `self` and returned.
Returns
-------
:class:`Callable`
"""
if method == 'dfs':
return self.depth_first_traversal
elif method == 'bfs':
return self.breadth_first_traversal
elif isinstance(method, Callable):
return partial(method, self)
try:
traversal = self.traversal_methods[method]
except KeyError:
raise KeyError("Unknown traversal method: {}".format(method))
traversal = getattr(self, traversal)
return traversal
def __iter__(self):
return self.depth_first_traversal()
[docs] def iternodes(self, from_node=None, apply_fn=identity, method='dfs', visited=None):
'''
Generic iterator over nodes dispatching to a strategy given by `method`, defaulting
to :meth:`~.depth_first_traversal`.
Parameters
----------
from_node: None or Monosaccharide
If `from_node` is |None|, then traversal starts from the root node. Otherwise it begins
from the given node.
apply_fn: function
A function applied to each node on arrival. If this function returns a non-None value,
the result is yielded from the generator, otherwise it is ignored. Defaults to :func:`.identity`
method: str or `function`
Traversal method to use. See :meth:`._get_traversal_method`
visited: set or None
A :class:`set` of node ID values to ignore. If |None|, defaults to the empty `set`
Yields
------
Return Value of `apply_fn`, by default Monosaccharide
See also
--------
depth_first_traversal
breadth_first_traversal
_get_traversal_method
'''
traversal = self._get_traversal_method(method)
return traversal(
from_node=from_node, apply_fn=apply_fn, visited=visited)
[docs] def iterlinks(self, apply_fn=identity, substituents=False, method='dfs', visited=None):
'''
Iterates over all |Link| objects in |Glycan|.
Parameters
----------
substituents: bool
If `substituents` is |True|, then include the |Link| objects in
:attr:`substituent_links` on each |Monosaccharide|
method: str or function
The traversal method controlling the order of the nodes visited
visited: None or set
The collection of id values to ignore when traversing
Yields
------
Link
'''
traversal = self._get_traversal_method(method)
links_visited = set()
def links(obj):
if substituents:
for pos, link in obj.substituent_links.items():
res = apply_fn((pos, link))
if res:
yield res
for pos, link in obj.links.items():
if link.id in links_visited:
continue
links_visited.add(link.id)
res = apply_fn((pos, link))
if res:
yield res
return itertools.chain.from_iterable(
traversal(apply_fn=links, visited=visited))
[docs] def canonicalize(self, canonicalizer=None, **kwargs):
"""Canonicalize this glycan, sorting the order in which its links from the
same monosaccharide are traversed.
This currently uses the the :title-reference:`GlycoCT` canonicalization
algorithm.
Parameters
----------
canonicalizer : subclass of :class:`~.CanonicalizerBase`, optional
The canonicalization algorithm to use
**kwargs
Forwarded to the canonicalizer
Returns
-------
:class:`~.Glycan`
This glycan, reordered in place.
"""
from glypy.algorithms.canonicalize import canonicalize
return canonicalize(self, canonicalizer=canonicalizer, **kwargs)
[docs] def leaves(self, bidirectional=False, method='dfs', visited=None):
'''
Iterates over all |Monosaccharide| objects in |Glycan|, yielding only those
that have no child nodes.
Parameters
----------
bidirectional: bool
If `bidirectional` is |True|, then only |Monosaccharide| objects
with only one entry in :attr:`links`.
method: str or function
The traversal method controlling the order of the nodes visited
visited: None or set
The collection of id values to ignore when traversing
Yields
------
:class:`~.Monosaccharide`
'''
traversal = self._get_traversal_method(method)
if bidirectional:
def is_leaf(obj):
if len(obj.links) == 1:
yield obj
else:
def is_leaf(obj):
if len(list(obj.children())) == 0:
yield obj
return itertools.chain.from_iterable(
traversal(apply_fn=is_leaf, visited=visited))
[docs] def ambiguous_links(self):
"""Locate all links which are :class:`.AmbiguousLink` objects
Returns
-------
list
list of ambiguous links
"""
ambiguous_links = []
# find all ambiguous links
for i, link in self.iterlinks():
if link.is_ambiguous():
ambiguous_links.append(link)
return ambiguous_links
[docs] def iterconfiguration(self):
'''Iterate over all valid configurations of ambiguous linkages.
During calculation, the :class:`~.AmbiguousLink` objects may be mutated, but
by the time a new configuration is yielded all changes should be reversed. If an
error occurs during configuration adjustment, it may not be possible to restore the
object to its original state.
Yields
------
:class:`tuple` of (:class:`~.AmbiguousLink`, :class:`~.Monosaccharide`,
:class:`~.Monosaccharide`, :class:`int`, :class:`int`)
The ambiguous link, the parent chosen, the child chosen, the parent linkage site chose, and the child
linkage site chosen
Examples
--------
>>> from glypy.io import glyspace
>>> structure_record = glyspace.get("G81339YK")
>>> structure = structure_record.structure_
>>> configurations = []
>>> for config_list in structure.iterconfiguration():
... instance = structure.clone()
... for link, conf in config_list:
... link = instance.get_link(link.id)
... parent = instance.get(conf[0].id)
... child = instance.get(conf[1].id)
... link.reconfigure(parent, child, conf[2], conf[3])
... configurations.append(instance)
>>> len(configurations)
4
See Also
--------
:meth:`~.AmbiguousLink.iterconfiguration`, :meth:`~.AmbiguousLink.reconfigure`
'''
ambiguous_links = self.ambiguous_links()
combos = itertools.product(*[
link.iterconfiguration() for link in ambiguous_links])
for configs in combos:
n_configs = len(configs)
# count the number of ambiguous links with unknown linkages
has_unknowns = []
for i, conf in enumerate(configs):
if conf.parent_position == UnknownPosition or conf.child_position == UnknownPosition:
has_unknowns.append(i)
if not has_unknowns:
if len(set([(c.parent, c.parent_position) for c in configs])) < n_configs:
continue
yield tuple(zip(ambiguous_links, configs))
else:
# mask the ambiguous links so the possible attachment sites can
# be calculated
for link in ambiguous_links:
link.break_link(refund=True)
parent_map = {
parent.id: MonosaccharideOccupancy.build(parent)
for parent, _, _, _ in configs
}
valid = True
# place all concrete links
for parent, child, parent_site, child_site in configs:
parent_occupancy = parent_map[parent.id]
if parent_site == UnknownPosition:
continue
if parent_site not in parent_occupancy.open_sites:
valid = False
break
# can't add more concrete links if the open sites are allocated
# for unknown localizations
elif len(parent_occupancy.open_sites) - parent_occupancy.unknown_sites <= 0:
valid = False
break
else:
# allocate the requested location so no new connections
# can be made there
ix = parent_occupancy.open_sites.index(parent_site)
parent_occupancy.open_sites.pop(ix)
parent_occupancy.occupied_sites.append(parent_site)
if valid:
# place all unknown links
for parent, child, parent_site, child_site in configs:
parent_occupancy = parent_map[parent.id]
if parent_site != UnknownPosition:
continue
# can't add more unknown links if the open sites are allocated
# for unknown localizations
if len(parent_occupancy.open_sites) - parent_occupancy.unknown_sites <= 0:
valid = False
break
else:
parent_occupancy.unknown_sites += 1
# re-attach ambiguous links so the state of the glycan returns to
# its original state
for link in ambiguous_links:
link.apply()
# if configuration is valid, yield it
if valid:
yield tuple(zip(ambiguous_links, configs))
[docs] def has_undefined_linkages(self):
"""Check if this structure has undefined or ambiguous connectivity
between its nodes.
Returns
-------
bool:
If any of its links are :class:`~.AmbiguousLink` instances, or
have unknown positions (-1).
"""
ambiguous_links = bool(self.ambiguous_links())
if ambiguous_links:
return ambiguous_links
for node in self:
if node.has_undefined_linkages():
return True
return False
[docs] def label_branches(self):
'''
Labels each branch point with an alphabetical symbol. Also computes and stores
each branch's length and stores it in :attr:`branch_lengths`. Sets :attr:`branch_lengths`
of `self` and :attr:`Link.label` for each link attached to `self`. Also populates
:attr:`branch_parent_map`.
Branch symbols are increasing alphabetical characters. The root branch is denoted '-', though
glycans having an :attr:`root` with multiple children will not have any actual branches with
that label.
:attr:`Link.label` updates use the current branch symbol, and the index of that link along
that branch.
.. note:: Labeling always uses a depth-first traversal of nodes.
'''
last_branch_label = MAIN_BRANCH_SYM
self.branch_lengths = defaultdict(int)
branch_parent_map = {}
def parent_link_symbol(node):
try:
label = node.links[node.parents()[0][0]][0].label
if label is None:
return MAIN_BRANCH_SYM
else:
return label[0]
except IndexError:
return MAIN_BRANCH_SYM
for node in self.depth_first_traversal():
links = []
for link in node.links.values():
if link.is_child(node):
continue
links.append(link)
if len(links) == 1:
label_key = parent_link_symbol(node)
self.branch_lengths[label_key] += 1
label = "{}{}".format(
label_key, self.branch_lengths[label_key])
links[0].label = label
else:
last_label_key = label_key = parent_link_symbol(node)
count = self.branch_lengths[last_label_key]
for link in links:
last_branch_label = chrinc(
last_branch_label) if last_branch_label != MAIN_BRANCH_SYM else 'a'
new_label_key = last_branch_label
branch_parent_map[new_label_key] = last_label_key
self.branch_lengths[new_label_key] = count + 1
label = "{}{}".format(
new_label_key, self.branch_lengths[new_label_key])
link.label = label
# Update parent branch lengths
longest = self.branch_lengths[MAIN_BRANCH_SYM]
for branch in sorted(list(self.branch_lengths.keys()), reverse=True):
if branch == MAIN_BRANCH_SYM:
continue
length = self.branch_lengths[branch]
longest = max(longest, length)
parent = branch_parent_map[branch]
self.branch_lengths[parent] = max(length, self.branch_lengths[parent])
self.branch_parent_map = branch_parent_map
self.branch_lengths["-"] = longest
[docs] def count_branches(self):
'''
Count the number of branches in the Glycan tree
Returns
-------
int
'''
count = 0
for node in self:
if len(node.links) > 2:
count += 2 if count == 0 else 1
return count
[docs] def order(self, deep=False):
'''
The number of nodes in the graph. :meth:`__len__` is an alias of this
Returns
-------
int
'''
count = 0
for node in self.depth_first_traversal():
count += 1
return count
def __len__(self):
return self.order()
[docs] @classmethod
def register_serializer(cls, name, method):
"""Add `method` as `name` to the set of serializers to pick from
in :meth:`serialize`
Parameters
----------
name : str
The name of the serializer
method : Callable
A callable object that when called with a :class:`Glycan` returns a :class:`str`
"""
cls._serializers[name] = method
[docs] @classmethod
def available_serializers(cls):
"""Get the list of available serialization formats
Returns
-------
:class:`list` of :class:`str`
"""
return list(cls._serializers.keys())
[docs] def serialize(self, name='glycoct'):
"""Convert the structure to text.
The serialization format is given by a :meth:`available_serializers`.
Parameters
----------
name : str, optional
The name of the serialization format (the default is 'glycoct')
Returns
-------
str
"""
return self._serializers[name](self)
def __repr__(self):
"""An alias for :meth:`serialize`.
Returns
-------
str
"""
return self.serialize()
[docs] def mass(self, average=False, charge=0, mass_data=None, method='dfs'):
'''
Calculates the total mass of the intact graph by querying each
node for its mass.
Parameters
----------
average: bool
Whether or not to use the average isotopic composition when calculating masses.
When ``average == False``, masses are calculated using monoisotopic mass.
charge: int
If charge is non-zero, m/z is calculated, where m is the theoretical mass, and z is `charge`
mass_data: dict
If mass_data is None, standard NIST mass and isotopic abundance data are used. Otherwise the
contents of mass_data are assumed to contain elemental mass and isotopic abundance information.
Returns
-------
float
See also
--------
:func:`glypy.composition.composition.calculate_mass`
'''
if charge == 0:
return sum(
node.mass(average=average, charge=0, mass_data=mass_data) for node in self.iternodes(method=method))
else:
return self.total_composition().calc_mass(average=average, charge=charge, mass_data=mass_data)
[docs] def total_composition(self, method='dfs'):
'''
Computes the sum of the composition of all |Monosaccharide| objects in ``self``
Returns
-------
:class:`~glypy.composition.Composition`
'''
return sum((node.total_composition() for node in self.iternodes(method=method)), Composition())
[docs] def clone(self, index_method='dfs', visited=None, cls=None):
'''
Create a copy of `self`, indexed using `index_method`, a *traversal method* or |None|.
Parameters
----------
index_method: :class:`str`
The indexing method to use when constructing the index of the copied
structure
visited: :class:`set`, optional
A set of nodes to omit traversing through during the copying processing
cls: :class:`type`
A subclass of :class:`Glycan`, defaulting to :attr:`__class__`
Returns
-------
:class:`~glypy.structure.glycan.Glycan`
'''
if cls is None:
cls = self.__class__
clone_root = graph_clone(self.root, visited=visited)
duplicate = cls(root=clone_root, index_method=index_method)
return duplicate
[docs] def exact_ordering_equality(self, other):
'''
Two glycans are considered equal if they are identically ordered nodes.
Parameters
----------
self, other: :class:`~glypy.structure.glycan.Glycan`
Returns
-------
bool
See also
--------
:meth:`glypy.structure.Monosaccharide.exact_ordering_equality`
:term:`Exact Matching`
'''
if other is None:
return False
elif not isinstance(other, Glycan):
return False
return self.root.exact_ordering_equality(other.root)
[docs] def topological_equality(self, other):
'''
Two glycans are considered equal if they are topologically equal.
Parameters
----------
self: :class:`Glycan`
other: :class:`Glycan`
Returns
-------
bool
See also
--------
:meth:`glypy.structure.Monosaccharide.topological_equality`
:term:`Topological Matching`
'''
if other is None:
return False
elif not isinstance(other, Glycan):
return False
return self.root.topological_equality(other.root)
[docs] def __eq__(self, other):
"""Test for exact ordering equality
Parameters
----------
other : :class:`Glycan`
Returns
-------
:class:`bool`
See Also
--------
:meth:`exact_ordering_equality`
:term:`Exact Matching`
"""
return self.exact_ordering_equality(other)
def __ne__(self, other):
return not self == other
[docs] def __hash__(self):
"""Hashes the structure from the GlycoCT text representation
See Also
--------
:meth:`serialize`
"""
return hash(self.serialize("glycoct"))
[docs] def substructures(self, max_cleavages=1, min_cleavages=1, inplace=False):
'''
Generate disjoint subtrees from this glycan by removing one or
more monosaccharide-monosaccharide bond.
Parameters
----------
max_cleavages: |int|
The maximum number of bonds to break per substructure
min_cleavages: |int|
The minimum number of bonds to break per substructure
min_size: |int|
The minimum number of monosaccharides per substructure
See also
--------
:func:`glypy.composition.composition.calculate_mass`
'''
structure = self
if not inplace:
structure = self.clone()
for frag in structure.break_links_subtrees(
max_cleavages):
yield frag
[docs] def name_fragment(self, fragment):
'''
Attempt to assign a full name to a fragment based on the branch and position relative to
the reducing end along side A/B/C/X/Y/Z, according to :title-reference:`Domon and Costello`
The formal grammar for fragment names in Backus-Naur Form:
.. code:: xml
<full-name> ::= <fragment-name>|<fragment-name-list>
<fragment-name> ::= <glycosidic-fragment-name>|<crossring-fragment-name>
<fragment-name-list> ::= <fragment-name>"-"<fragment-name-list>|<fragment-name>
<glycosidic-fragment-name> ::= <branch-identifier><fragment-type><index>
<crossring-fragment-name> ::= <ring-coordinates><fragment-type><branch-identifier><index>
<fragment-type> ::= "A" | "B" | "C" | "X" | "Y" | "Z"
<ring-coordinate> ::= <integer>,<integer>
<index> ::= <integer>
<integer> ::= <digit>|<integer><digit>
<digit> ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
<branch-identifier> ::= <letter>|<letter><digit>|""
<letter> ::= "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
"k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
"u" | "v" | "w" | "x" | "y" | "z"
'''
break_targets = fragment.link_ids
crossring_targets = fragment.crossring_cleavages
# Accumulator for name components
name_parts = []
# Collect cross-ring fragment names
for crossring_id in crossring_targets:
# Seek the link that holds the fragmented residue
for link in self.link_index:
if link.child.id == crossring_id:
ion_type = crossring_targets[crossring_id]
label = link.label
if _fragment_direction[ion_type[1]] > 0:
name = "{}{}".format(
''.join(map(str, ion_type)), label.replace(MAIN_BRANCH_SYM, ""))
name_parts.append(name)
else:
label_key = label[0]
distance = int(label[1:])
inverted_distance = self.branch_lengths[
label_key] - (distance - 1)
name = "{}{}{}".format(
''.join(map(str, ion_type)), label_key.replace(MAIN_BRANCH_SYM, ""), inverted_distance)
name_parts.append(name)
# Collect glycocidic fragment names
for break_id, ion_type in break_targets.items():
ion_type = ion_type[1]
if _fragment_direction[ion_type] > 0:
link = self.link_index[break_id - 1]
label = link.label
name = "{}{}".format(
ion_type,
label.replace(
MAIN_BRANCH_SYM,
""))
name_parts.append(name)
else:
link = self.link_index[break_id - 1]
label = link.label
label_key = label[0]
distance = int(label[1:])
inverted_distance = self.branch_lengths[
label_key] - (distance - 1)
name = "{}{}{}".format(
ion_type, label_key.replace(MAIN_BRANCH_SYM, ""), inverted_distance)
name_parts.append(name)
return '-'.join(sorted(name_parts))
[docs] def break_links_subtrees(self, n_links):
r"""Iteratively generate all subtrees from glycosidic bond cleavages, creating all
:math:`2{L \choose n}` subtrees.
Parameters
----------
n_links : int
Number of links to break simultaneously
Yields
------
Subtree
"""
if len(self.link_index) == 0:
self._build_link_index()
links = list(self.link_index)
for breaks in itertools.combinations(links, n_links):
subtrees = []
for link in breaks:
parent, child = link.break_link(refund=True)
parent_tree = Glycan(parent, index_method=None)
child_tree = Glycan(child, index_method=None)
subtrees.append(parent_tree)
subtrees.append(child_tree)
unique_subtrees = []
for subtree in subtrees:
ids = {n.id for n in subtree}
for uids, unique in unique_subtrees:
if ids == uids:
break
else:
unique_subtrees.append((ids, subtree))
for ids, subtree in unique_subtrees:
subtree = subtree.clone(
index_method=None).reroot(
index_method=None)
include_nodes = {n.id for n in subtree}
link_ids = [link.id for link in breaks
if link.parent.id in include_nodes or
link.child.id in include_nodes]
parent_break_ids = {link.id: link.parent.id for link in breaks
if link.parent.id in include_nodes}
child_break_ids = {link.id: link.child.id for link in breaks
if link.child.id in include_nodes}
yield Subtree(subtree, include_nodes, link_ids, parent_break_ids, child_break_ids)
for link in breaks:
link.apply()
[docs] def crossring_subtrees(self, n_links):
"""Generate all combinations of cross ring fragments and
glycosidic cleavages, cleaving between 1 and `n_links`
monosaccharides paired with `n_links` - 1 to 0 glycosidic cleavages.
Parameters
----------
n_links : int
Total number of breaks to create, between cross ring cleavages and
complemenatary glycosidic cleavages.
Yields
------
Subtree
"""
if len(self.link_index) == 0:
self._build_link_index()
links = list(self.link_index)
# origin_mass = self.mass()
# Localize globals
_str = str
# Break at least one ring
for i in range(1, n_links + 1):
# Generate all combinations of i rings to break
for link_combination in itertools.combinations(links, i):
# Creates a list of lists of CrossRingPairs, each inner list for a
# single Monosaccharide
crossring_combinations = [
CrossRingPair.from_link(link) for link in link_combination]
# Combinations are splatted to unwrap the outer container so that
# the inner lists are multiplexed.
for breaks in itertools.product(*crossring_combinations):
subtrees = []
for ring in breaks:
parent, child = ring.break_link()
parent_tree = Glycan(parent, index_method=None)
parent_tree._build_node_index()
child_tree = Glycan(child, index_method=None)
child_tree._build_node_index()
subtrees.append(parent_tree)
subtrees.append(child_tree)
unique_subtrees = []
for tree in subtrees:
ids = {n.id for n in tree.index}
for uids, unique in unique_subtrees:
if ids == uids:
break
else:
unique_subtrees.append((ids, tree))
# If tis iteration hasn't broken all n rings, there are some
# breaks left over to be generated in glycosidic cleavages.
# Generate all possible glycosidic cleavage subtrees of the
# generated cross ring cleavage subtrees.
if n_links - i > 0:
for ids, subtree in unique_subtrees:
subtree._build_link_index()
partitions = subtree.break_links_subtrees(n_links - i)
for part in partitions:
included_crossring = {}
for crossring in breaks:
if crossring.id in part.include_nodes:
xring_residue = part.tree.get(
crossring.id)
try:
included_crossring[xring_residue.id] = (','.join(
(_str(xring_residue.cleave_1), _str(xring_residue.cleave_2))),
xring_residue.kind)
except AttributeError:
pass
# Not a CrossRingFragment in this instance
part.crossring_cleavages = included_crossring
yield part
else:
for ids, subtree in unique_subtrees:
subtree = subtree.reroot(
index_method=None).clone(
index_method=None)
included_crossring = {}
include_nodes = {n.id for n in subtree.indexed_traversal()}
for crossring in breaks:
if crossring.id in include_nodes:
xring_residue = subtree.get(crossring.id)
try:
included_crossring[xring_residue.id] = (','.join(
(_str(xring_residue.cleave_1), _str(xring_residue.cleave_2))),
xring_residue.kind)
except AttributeError:
pass
# Not a CrossRingFragment in this instance
yield Subtree(subtree, include_nodes, {}, {}, {}, crossring_cleavages=included_crossring)
# Re-join the ring, retracting all links from the crossring objects
for ring in breaks:
ring.apply()
# Clean up any lingering links trapped in the closure created by adjacent
# crossring pairs that were made visible by `ring.apply()`
while(any(ring.is_attached() for ring in breaks)):
for ring in breaks:
ring.release()
for ring in breaks:
ring.release()
# assert round(self.mass(), 4) == round(origin_mass, 4)
[docs] def fragments(self, kind="BY", max_cleavages=1, average=False, charge=0, mass_data=None,
traversal_method='dfs'):
'''
Generate carbohydrate backbone fragments from this glycan by examining the disjoint subtrees
created by removing one or more monosaccharide-monosaccharide bond.
Parameters
----------
kind: :class:`Iterable`
Any :class:`Iterable` of characters corresponding to A/B/C/X/Y/Z
as published by :title-reference:`Domon and Costello`
max_cleavages: |int|
The maximum number of bonds to break per fragment
average: bool, optional, defaults to `False`
Whether or not to use the average isotopic composition when calculating masses.
When ``average == False``, masses are calculated using monoisotopic mass.
charge: int, optional, defaults to 0
If charge is non-zero, m/z is calculated, where m is the theoretical mass, and z is `charge`
mass_data: dict, optional, defaults to `None`
If mass_data is |None|, standard NIST mass and isotopic abundance data are used. Otherwise the
contents of `mass_data` are assumed to contain elemental mass and isotopic abundance information.
Yields
------
:class:`GlycanFragment`
See also
--------
:func:`glypy.composition.composition.calculate_mass`
:meth:`subtrees`
:meth:`crossring_subtrees`
:meth:`.Subtree.to_fragments`
'''
seen = set()
source = self.clone()
for i in range(1, max_cleavages + 1):
gen = source.break_links_subtrees(i)
if len(set("AX") & set(kind)) > 0:
gen = itertools.chain(
gen,
source.crossring_subtrees(i))
for subtree in gen:
for fragment in subtree.to_fragments(kind, average=average,
charge=charge, mass_data=mass_data,
traversal_method=traversal_method):
fragment.name = self.name_fragment(fragment)
if fragment.name in seen:
continue
else:
seen.add(fragment.name)
yield fragment
def subtrees(self, max_cleavages=1, include_crossring=False):
'''
Generate subtrees from this tree by breaking `max_cleavages` bonds or rings.
Parameters
----------
max_cleavages: int
The maximum number of bonds to break per fragment
include_crossring: bool
Whether to include cross ring cleavages
Yields
------
Subtree
'''
source = self.clone()
for i in range(1, max_cleavages + 1):
gen = source.break_links_subtrees(i)
if include_crossring:
gen = itertools.chain(
gen,
source.crossring_subtrees(i))
for subtree in gen:
yield subtree
class NamedGlycan(Glycan):
def __init__(self, name=None, *args, **kwargs):
self.name = name
super(NamedGlycan, self).__init__(*args, **kwargs)
def clone(self, index_method='dfs', visited=None, cls=None):
if cls is None:
cls = NamedGlycan
inst = super(NamedGlycan, self).clone(index_method=index_method, visited=visited, cls=cls)
inst.name = self.name
return inst
def __repr__(self):
rep = super(NamedGlycan, self).__repr__()
return "%s\n%s" % (self.name, rep)