'''
:class:`GlycanComposition`, :class:`MonosaccharideResidue`, and :class:`SubstituentResidue` are
useful for working with bag-of-residues where topology and connections are not relevant, but
the aggregate composition is known. These types work with a subset of the IUPAC three letter code
for specifying compositions.
>>> g = GlycanComposition(Hex=3, HexNAc=2)
>>> g["Hex"]
3
>>> r = MonosaccharideResidue.from_iupac_lite("Hex")
>>> r
MonosaccharideResidue(Hex)
>>> g[r]
3
>>> import glypy
>>> abs(g.mass() - glypy.motifs["N-Glycan core basic 1"].mass()) < 1e-5
True
>>> g2 = GlycanComposition(Hex=5)
>>> g["@n-acetyl"] = -2 # Remove two n-acetyl groups from the composition
>>> abs(g.mass() - g2.mass()) < 1e-5
True
'''
import warnings
try:
from collections.abc import Mapping
except ImportError:
from collections import Mapping
from glypy.utils import tree, uid
from glypy.utils.multimap import OrderedMultiMap
from glypy.composition import Composition
from glypy.structure.base import SaccharideCollection, MoleculeBase
from glypy.structure.glycan import Glycan
from glypy.structure.monosaccharide import Monosaccharide, ReducedEnd
from glypy.structure.substituent import Substituent
from glypy.structure.constants import (Anomer, Stem, Configuration, UnknownPosition)
from glypy.io import iupac
from glypy.io.iupac import (
monosaccharide_reference as _monosaccharide_reference,
resolve_special_base_type as _resolve_special_base_type,
IUPACError)
from glypy.composition.base import formula
from glypy.composition.composition_transform import (
derivatize, has_derivatization, strip_derivatization,
_derivatize_reducing_end, _strip_derivatization_reducing_end,
make_counter)
from six import string_types as basestring
monosaccharide_residue_reference = {}
class IUPACLiteMonosaccharideDeserializer(iupac.SimpleMonosaccharideDeserializer):
def monosaccharide_from_iupac(self, monosaccharide_str, residue_class=None):
"""
Parse a string in a limited subset of IUPAC three letter code into
an instance of :class:`MonosaccharideResidue` or :class:`SubstituentResidue`.
Parameters
----------
monosaccharide_str: str
The string to be parsed
Returns
-------
MonosaccharideResidue
"""
if residue_class is None:
residue_class = MonosaccharideResidue
try:
match_dict = self.extract_pattern(monosaccharide_str)
residue = self.build_residue(match_dict)
except IUPACError:
if monosaccharide_str.startswith(MolecularComposition.sigil):
result = MolecularComposition.from_iupac_lite(monosaccharide_str)
return result
if monosaccharide_str == "Pent":
warnings.warn("Pent is not proper IUPAC. Converting to Pen")
return self.monosaccharide_from_iupac("Pen", residue_class)
try:
result = SubstituentResidue.from_iupac_lite(monosaccharide_str)
return result
except Exception:
try: # pragma: no cover
result = MolecularComposition.from_iupac_lite(monosaccharide_str)
return result
except Exception:
raise IUPACError("Cannot find pattern in {}".format(monosaccharide_str))
except TypeError:
raise TypeError("Expected string, received {} ({})".format(monosaccharide_str, type(monosaccharide_str)))
deriv = match_dict.get("derivatization", '')
if deriv is not None and deriv != "":
self.apply_derivatization(residue, deriv)
return residue_class.from_monosaccharide(residue)
def build_residue(self, match_dict):
residue, linkage = super(IUPACLiteMonosaccharideDeserializer, self).build_residue(match_dict)
return residue
def __call__(self, string, residue_class=None):
return self.monosaccharide_from_iupac(string, residue_class=residue_class)
class IUPACLiteMonosaccharideSerializer(iupac.SimpleMonosaccharideSerializer):
def monosaccharide_to_iupac(self, residue):
"""
Encode a subset of traits of a :class:`Monosaccharide`-like object
using a limited subset of the IUPAC three letter code. The information
present is sufficient to reconstruct a :class:`MonosaccharideResidue` instance
reflecting the base type and its native substituents and modificats.
.. note::
This function is not suitable for use on whole |Glycan| objects. Instead,
see :meth:`GlycanComposition.from_glycan` and :meth:`GlycanComposition.serialize`
Parameters
----------
residue: Monosaccharide
The object to be encoded
Returns
-------
str
See Also
--------
:func:`from_iupac_lite`
"""
try:
string = super(IUPACLiteMonosaccharideSerializer, self).monosaccharide_to_iupac(residue)
except (AttributeError, TypeError, ValueError):
# if the residue passed was *really* a monosaccharide then this error is valid and
# should propagate
if isinstance(residue, Monosaccharide):
raise
else:
string = str(residue)
return string
from_iupac_lite = IUPACLiteMonosaccharideDeserializer()
to_iupac_lite = IUPACLiteMonosaccharideSerializer(
iupac.monosaccharide_reference,
iupac.SubstituentSerializer(monosaccharide_residue_reference))
Monosaccharide.register_serializer('iupac_lite', to_iupac_lite)
def drop_stem(residue, force=False):
"""Drops the stem, or the carbon ring stereochemical
classification from this monosaccharide.
Unless ``force`` is |True|, if :func:`~glypy.io.iupac.resolve_special_base_type`
returns a truthy value, this function will do nothing.
Parameters
----------
residue : :class:`~.Monosaccharide`
The monosaccharide to change
force : bool, optional
Whether or not to override known special case named monosaccharides
Returns
-------
:class:`~.Monosaccharide`
The mutated monosaccharide
"""
if _resolve_special_base_type(residue) is None or force:
residue.stem = (None,)
return residue
def drop_positions(residue, force=False):
"""Drops the position classifiers from all links and modifications
attached to this monosaccharide.
Unless ``force`` is |True|, if :func:`~.iupac.resolve_special_base_type`
returns a truthy value, this function will do nothing.
Parameters
----------
residue : :class:`~.Monosaccharide`
The monosaccharide to change
force : bool, optional
Whether or not to override known special case named monosaccharides
Returns
-------
:class:`~.Monosaccharide`
The mutated monosaccharide
"""
if _resolve_special_base_type(residue) is None or force:
modifications = OrderedMultiMap()
for _k, v in residue.modifications.items():
modifications[UnknownPosition] = v
residue.modifications = modifications
for _p, link in list(residue.substituent_links.items()):
link.break_link(refund=True)
link.parent_position = UnknownPosition
link.apply()
return residue
def drop_configuration(residue, force=False):
"""Drops the absolute stereochemical configuration of this
monosaccharide.
Unless ``force`` is |True|, if :func:`~.iupac.resolve_special_base_type`
returns a truthy value, this function will do nothing.
Parameters
----------
residue : :class:`~.Monosaccharide`
The monosaccharide to change
force : bool, optional
Whether or not to override known special case named monosaccharides
Returns
-------
:class:`~.Monosaccharide`
The mutated monosaccharide
"""
if _resolve_special_base_type(residue) is None or force:
residue.configuration = (None,)
return residue
water_composition = Composition({"O": 1, "H": 2})
class ResidueBase(object):
__slots__ = ()
def drop_stem(self, force=False):
"""Drops the stem, or the carbon ring stereochemical
classification from this monosaccharide.
Unless ``force`` is |True|, if :func:`~.iupac.resolve_special_base_type`
returns a truthy value, this function will do nothing.
Parameters
----------
residue : :class:`~.Monosaccharide`
The monosaccharide to change
force : bool, optional
Whether or not to override known special case named monosaccharides
Returns
-------
:class:`~.Monosaccharide`
The mutated monosaccharide
"""
return self
def drop_positions(self, force=False):
"""Drops the position classifiers from all links and modifications
attached to this monosaccharide.
Unless ``force`` is |True|, if :func:`~.iupac.resolve_special_base_type`
returns a truthy value, this function will do nothing.
Parameters
----------
residue : :class:`~.Monosaccharide`
The monosaccharide to change
force : bool, optional
Whether or not to override known special case named monosaccharides
Returns
-------
:class:`~.Monosaccharide`
The mutated monosaccharide
"""
return self
def drop_configuration(self, force=False):
"""Drops the absolute stereochemical configuration of this
monosaccharide.
Unless ``force`` is |True|, if :func:`~.iupac.resolve_special_base_type`
returns a truthy value, this function will do nothing.
Parameters
----------
residue : :class:`~.Monosaccharide`
The monosaccharide to change
force : bool, optional
Whether or not to override known special case named monosaccharides
Returns
-------
:class:`~.Monosaccharide`
The mutated monosaccharide
"""
return self
def to_iupac_lite(self):
"""Encode this residue using `iupac_lite` notation.
Returns
-------
str
"""
return to_iupac_lite(self)
@classmethod
def from_iupac_lite(cls, string):
"""Parse a string of `iupac_lite` notation to produce a residue object
Parameters
----------
string : :class:`str`
The string to parse
Returns
-------
ResidueBase
"""
return from_iupac_lite(string, residue_class=cls)
[docs]class MonosaccharideResidue(Monosaccharide, ResidueBase):
"""Represents a :class:`Monosaccharide`-like object, save that it does
not connect to other :class:`~.Monosaccharide` objects and does not have
properties related to topology, specifically, :attr:`anomer`.
A single :class:`MonosaccharideResidue` has lost a water molecule from its
composition, reflecting its residual nature. This is accounted for when dealing
with aggreates of residues. They also have altered carbon backbone occupancies.
:class:`MonosaccharideResidue` objects are hashable and comparable on their
`iupac_lite` representation, which is given by :meth:`__str__` or :meth:`name`.
"""
__slots__ = ()
[docs] @classmethod
def from_monosaccharide(cls, monosaccharide, configuration=False, stem=True, ring=False):
"""Construct an instance of :class:`MonosaccharideResidue` from an instance
of |Monosaccharide|. This function attempts to preserve derivatization if possible.
This function will create a *deep copy* of `monosaccharide`.
Parameters
----------
monosaccharide : Monosaccharide
The monosaccharide to be converted
configuration : bool, optional
Whether or not to preserve |Configuration|. Defaults to |False|
stem : bool, optional
Whether or not to preserve |Stem|. Defaults to |True|
ring : bool, optional
Whether or not to preserve |RingType|. Defaults to |False|
Returns
-------
MonosaccharideResidue
"""
residue = monosaccharide.clone(monosaccharide_type=cls)
premass = residue.mass()
deriv = has_derivatization(monosaccharide)
strip_derivatization(residue)
if _resolve_special_base_type(monosaccharide) is None:
if not configuration:
residue.configuration = (Configuration.x,)
if not stem:
residue.stem = (Stem.x,)
if not ring:
residue.ring_start = residue.ring_end = UnknownPosition
if deriv:
derivatize(residue, deriv)
if residue.mass() != premass and not deriv:
residue.composition += water_composition
return residue
def __init__(self, *args, **kwargs):
super(MonosaccharideResidue, self).__init__(*args, **kwargs)
self.composition -= water_composition
self.anomer = Anomer.x
[docs] def clone(self, *args, **kwargs):
'''
Copies just this |Monosaccharide| and its |Substituent| objects, creating a separate instance
with the same data. All mutable data structures are duplicated and distinct from the original.
Does not copy any :attr:`links` as this would cause recursive duplication of the entire |Glycan|
graph.
Parameters
----------
prop_id: :class:`bool`
Whether to copy :attr:`id` from ``self`` to the new instance
fast: :class:`bool`
Whether to use the fast-path initialization process in :meth:`MonosaccharideResidue.__init__`
monosaccharide_type: :class:`type`
A subclass of :class:`MonosaccharideResidue` to use
Returns
-------
:class:`MonosaccharideResidue`
'''
kwargs.setdefault("monosaccharide_type", MonosaccharideResidue)
residue = super(MonosaccharideResidue, self).clone(*args, **kwargs)
return residue
def __repr__(self): # pragma: no cover
return "MonosaccharideResidue(%s)" % self.name()
def __str__(self): # pragma: no cover
return to_iupac_lite(self)
def __hash__(self): # pragma: no cover
"""Obtain a hash value from `self` based on :meth:`MonosaccharideResidue.name`.
Returns
-------
int
"""
return hash(self.name())
[docs] def open_attachment_sites(self, max_occupancy=0):
'''
When attaching :class:`~.Monosaccharide` instances to other objects,
bonds are formed between the carbohydrate backbone and the other object.
If a site is already bound, the occupying object fills that space on the
backbone and prevents other objects from binding there.
Currently only cares about the availability of the hydroxyl group. As there
is not a hydroxyl attached to the ring-ending carbon, that should not be
considered an open site.
If any existing attached units have unknown positions, we can't provide any
known positions, in which case the list of open positions will be a :class:`list`
of ``-1`` s of the length of open sites.
A :class:`MonosaccharideResidue` has two fewer open attachment sites than
the equivalent :class:`~.Monosaccharide`
Parameters
----------
max_occupancy: int
The number of objects that may already be bound at a site before it
is considered unavailable for attachment.
Returns
-------
:class:`list`:
The positions open for binding
:class:`int`:
The number of bound but unknown locations on the backbone.
'''
sites, unknowns = super(
MonosaccharideResidue, self).open_attachment_sites(max_occupancy)
return sites[:-2], unknowns
def __eq__(self, other):
'''
Test for equality between :class:`MonosaccharideResidue` instances by comparing
the result of :meth:`MonosaccharideResidue.name` calls between `self` and `other`.
:meth:`MonosaccharideResidue.name` is an alias of :func:`to_iupac_lite` called on `self`
'''
if (other is None):
return False
if not isinstance(other, (MonosaccharideResidue, str)):
return False
return str(self) == str(other)
[docs] def name(self):
"""Name this object according to `iupac_lite`.
Returns
-------
str
See Also
--------
:meth:`to_iupac_lite`
"""
return to_iupac_lite(self)
[docs] def residue_name(self):
"""Name this object according to `iupac_lite`, omitting any derivatization
Returns
-------
str
See Also
--------
:meth:`to_iupac_lite`
:meth:`name`
"""
name = self.name()
return name.split("^")[0]
drop_stem = drop_stem
drop_positions = drop_positions
drop_configuration = drop_configuration
[docs] def copy_underivatized(self):
"""Create a copy of this residue without derivatization.
Returns
-------
:class:`MonosaccharideResidue`
"""
return from_iupac_lite.strip_derivatization(str(self), residue_class=self.__class__)
monosaccharide_residue_reference.update({
k: MonosaccharideResidue.from_monosaccharide(v) for k, v in _monosaccharide_reference.items()
})
[docs]class FrozenMonosaccharideResidue(MonosaccharideResidue):
'''
A subclass of |MonosaccharideResidue| which caches the result of :func:`to_iupac_lite` and instances returned
by :meth:`FrozenMonosaccharideResidue.clone` and :meth:`FrozenMonosaccharideResidue.from_iupac_lite`.
Also treated as immutable after initialization through :meth:`FrozenMonosaccharideResidue.from_monosaccharide`.
Note that directly calling :meth:`FrozenMonosaccharideResidue.from_monosaccharide` will not retrieve instances
from the cache directly, and direct initialization using normal instance creation will neither touch the cache
nor freeze the instance.
This type is intended for use with :class:`FrozenGlycanComposition` to minimize the number of times
:func:`from_iupac_lite` is called.
'''
__slots__ = ("_frozen", "_total_composition", "_hash", "_name", "_mass")
_attribute_caching_slots = (
'_total_composition', '_hash',
'_mass'
)
# _frozen = False
# _total_composition = None
__cache = {}
[docs] @classmethod
def from_monosaccharide(cls, monosaccharide, *args, **kwargs):
inst = super(FrozenMonosaccharideResidue, cls).from_monosaccharide(monosaccharide, *args, **kwargs)
if str(inst) not in inst.get_cache():
inst.get_cache()[str(inst)] = inst
inst._frozen = True
else:
inst = inst.get_cache()[str(inst)]
return inst
def __init__(self, *args, **kwargs):
self._total_composition = None
self._mass = None
# _name is left undefined to use a fast-path in __str__ by not testing for
# presence first.
# self._name = None
# self._hash = None
super(FrozenMonosaccharideResidue, self).__init__(*args, **kwargs)
self._frozen = kwargs.get("_frozen", False)
def __setattr__(self, key, value):
try:
is_frozen = self._frozen
except AttributeError:
is_frozen = False
if is_frozen and key not in FrozenMonosaccharideResidue._attribute_caching_slots:
self.get_cache().pop(str(self), None)
raise FrozenError("Cannot change a frozen object")
else:
object.__setattr__(self, key, value)
def __repr__(self): # pragma: no cover
return "FrozenMonosaccharideResidue(%s)" % self.name()
def __hash__(self): # pragma: no cover
"""Obtain a hash value from `self` based on :meth:`MonosaccharideResidue.name`.
Returns
-------
int
"""
try:
return self._hash
except AttributeError:
self._hash = hash(str(self))
return self._hash
def _update_hash(self):
self._hash = hash(str(self))
return self._hash
def __eq__(self, other):
'''
Test for equality between :class:`MonosaccharideResidue` instances by comparing
the result of :meth:`MonosaccharideResidue.name` calls between `self` and `other`.
:meth:`MonosaccharideResidue.name` is an alias of :func:`to_iupac_lite` called on `self`
'''
if isinstance(other, MonosaccharideResidue):
try:
return self._name == other._name
except AttributeError:
return str(self) == str(other)
elif isinstance(other, str):
return str(self) == other
if (other is None):
return False
if not isinstance(other, (MonosaccharideResidue, str)):
return False
def _save_to_cache(self):
self.get_cache()[str(self)] = self
def __str__(self):
try:
return self._name
except AttributeError:
name = to_iupac_lite(self)
self._name = name
return name
[docs] def clone(self, *args, **kwargs):
'''
Copies just this |Monosaccharide| and its |Substituent|s, creating a separate instance
with the same data. All mutable data structures are duplicated and distinct from the original.
Does not copy any :attr:`links` as this would cause recursive duplication of the entire |Glycan|
graph.
Parameters
----------
prop_id: :class:`bool`
Whether to copy :attr:`id` from ``self`` to the new instance
fast: :class:`bool`
Whether to use the fast-path initialization process in :meth:`Monosaccharide.__init__`
monosaccharide_type: :class:`type`
A subclass of :class:`Monosaccharide` to use
Returns
-------
:class:`Monosaccharide`
'''
if self._frozen and kwargs.get(
"monosaccharide_type",
FrozenMonosaccharideResidue) is FrozenMonosaccharideResidue:
return self
else:
return super(FrozenMonosaccharideResidue, self).clone(*args, **kwargs)
def __getstate__(self):
state = super(FrozenMonosaccharideResidue, self).__getstate__()
state['_name'] = str(self)
state['_total_composition'] = self.total_composition()
return state
def __setstate__(self, state):
self._frozen = False
self._total_composition = state.get('_total_composition')
self._name = state.get('_name')
self._hash = hash(str(self))
super(FrozenMonosaccharideResidue, self).__setstate__(state)
@classmethod
def get_cache(cls):
return cls.__cache
[docs] @classmethod
def from_iupac_lite(cls, string):
cache = cls.get_cache()
try:
return cache[string]
except KeyError:
result = from_iupac_lite(string, residue_class=cls)
if string not in cache:
for k, v in cache.items():
if v == result:
cache[string] = v
break
else:
cache[string] = result
return result
[docs] def total_composition(self):
if self._frozen:
if self._total_composition is None:
self._total_composition = super(FrozenMonosaccharideResidue, self).total_composition()
self._mass = None
return self._total_composition
else:
return super(FrozenMonosaccharideResidue, self).total_composition()
[docs] def mass(self, average=False, charge=0, mass_data=None, substituents=True):
'''
Calculates the total mass of ``self``.
Parameters
----------
average: bool, optional, defaults to False
Whether or not to use the average isotopic composition when calculating masses.
When ``average == False``, masses are calculated using monoisotopic mass.
charge: int, optional, defaults to 0
If charge is non-zero, m/z is calculated, where m is the theoretical mass, and z is ``charge``
mass_data: dict, optional
If mass_data is None, standard NIST mass and isotopic abundance data are used. Otherwise the
contents of mass_data are assumed to contain elemental mass and isotopic abundance information.
Defaults to :const:`None`.
substituents: bool, optional, defaults to True
Whether or not to include substituents' masses.
Returns
-------
:class:`float`
See also
--------
:func:`glypy.composition.composition.calculate_mass`
'''
if not average and charge == 0 and mass_data is None and self._frozen and not average:
if self._mass is None:
self._mass = self.total_composition().calc_mass()
return self._mass
return self.total_composition().calc_mass(average=average, charge=charge, mass_data=mass_data)
[docs]class SubstituentResidue(Substituent, ResidueBase):
r'''
Represent substituent molecules unassociated with a specific
monosaccharide residue.
.. note::
:class:`SubstituentResidue`'s composition value includes the losses for forming a bond between
a monosaccharide residue and the substituent.
Attributes
----------
name: str
As in |Substituent|, but with :attr:`SubstituentResidue.sigil` prepended.
composition: |Composition|
links: |OrderedMultiMap|
_order: |int|
'''
#: All substituent string identifiers are prefixed with this character
#: for the :func:`from_iupac_lite` parser
sigil = "@"
def __init__(self, name, composition=None, id=None, links=None,
can_nh_derivatize=None, is_nh_derivatizable=None, derivatize=False,
attachment_composition=None):
if name.startswith(SubstituentResidue.sigil):
name = name[1:]
elif name.startswith(MolecularComposition.sigil):
raise TypeError("Invalid Sigil. SubstituentResidue instances must be given names with either"
" no sigil prefix or with '@'")
super(SubstituentResidue, self).__init__(
name=name, composition=composition, links=links, id=id,
can_nh_derivatize=can_nh_derivatize, is_nh_derivatizable=is_nh_derivatizable,
derivatize=derivatize, attachment_composition=attachment_composition)
self._residue_name = SubstituentResidue.sigil + self._name
self.composition -= self.attachment_composition
self.composition -= {"H": 1}
self._hash = None
def __hash__(self): # pragma: no cover
"""Obtain a hash value from `self` based on :attr:`name`.
Returns
-------
int
"""
try:
if self._hash is None:
self._hash = hash(self._residue_name)
return self._hash
except AttributeError:
return hash(self._residue_name)
def __getstate__(self):
state = super(SubstituentResidue, self).__getstate__()
state['_residue_name'] = self._residue_name
return state
def __setstate__(self, state):
super(SubstituentResidue, self).__setstate__(state)
self._residue_name = state.get("_residue_name")
[docs] def to_iupac_lite(self):
return self._residue_name
__str__ = to_iupac_lite
def __repr__(self): # pragma: no cover
return "SubstituentResidue(%s)" % self._residue_name
[docs] @classmethod
def from_iupac_lite(cls, name):
return cls(name)
def __eq__(self, other):
if (other is None):
return False
if isinstance(other, str):
return other == self._residue_name
if not isinstance(other, SubstituentResidue):
return False
return self.name == other.name
def __ne__(self, other): # pragma: no cover
return not self == other
def _backsolve_original_composition(self):
comp = super(SubstituentResidue, self)._backsolve_original_composition()
comp += {"H": 1}
return comp
def copy_underivatized(self):
inst = self.clone()
strip_derivatization(inst)
return inst
class MolecularComposition(MoleculeBase, ResidueBase): # pragma: no cover
sigil = "#"
__slots__ = ('name', 'composition', '_hash')
def __init__(self, name, composition):
self.name = name
self.composition = composition
self._hash = None
def mass(self, average=False, charge=0, mass_data=None):
return self.composition.calc_mass(average=average, charge=charge, mass_data=mass_data)
def __repr__(self):
return "%s%s%s%s" % (
self.sigil, self.name, self.sigil,
formula(self.composition))
to_iupac_lite = __repr__
def __reduce__(self):
return self.__class__, (self.name, self.composition)
def open_attachment_sites(self, *args, **kwargs):
return 0
def clone(self):
return self.__class__(self.name, Composition(self.composition))
def total_composition(self):
return self.composition.clone()
@classmethod
def from_iupac_lite(cls, string):
if not string.startswith(cls.sigil):
raise TypeError("%s does not start with header %s" % (string, cls.sigil))
_, header, composition = string.split("#")
name = header
return cls(name, Composition(composition))
def __hash__(self): # pragma: no cover
"""Obtain a hash value from `self` based on :attr:`name`.
Returns
-------
int
"""
try:
if self._hash is None:
self._hash = hash(self.name)
return self._hash
except AttributeError:
return hash(self.name)
def __eq__(self, other):
try:
return self.name == other or self.name == other.name
except AttributeError:
return self.name == str(other)
def __ne__(self, other):
return not (self == other)
class _CompositionBase(dict):
def _setitem_fast(self, key, value):
dict.__setitem__(self, key, value)
def _getitem_fast(self, key):
try:
return dict.__getitem__(self, key)
except KeyError:
return 0
def __reduce__(self):
return self.__class__, (), self.__getstate__()
def __getstate__(self):
d = {
'mapping': dict(self),
'reducing_end': self._reducing_end,
'composition_offset': self._composition_offset
}
return d
def __setstate__(self, state):
self.update(state['mapping'])
self._reducing_end = state['reducing_end']
self._composition_offset = state['composition_offset']
@classmethod
def _empty(cls):
inst = cls.__new__(cls)
inst._composition_offset = water_composition.clone()
inst._reducing_end = None
inst._mass = None
return inst
def _update_from_typed_map(self, template, copy_nodes=False):
if copy_nodes:
for name, count in template.items():
self._setitem_fast(name.clone(), count)
else:
for name, count in template.items():
self._setitem_fast(name, count)
reduced = template.reducing_end
if reduced is not None:
self.reducing_end = reduced.clone()
self._mass = None
def serialize(self):
"""Convert a glycan composition into a curly brace-enclosed string specifying
pairs of `iupac_lite` and a integer count.
If the glycan is reduced, it will be appended to the closing brace following a
`$` character.
Returns
-------
str
"""
form = "{%s}" % '; '.join("{}:{}".format(str(k), v) for k, v in sorted(
self.items(), key=lambda x: (x[0].mass(), str(x[0]))) if v != 0)
reduced = self._reducing_end
if reduced is not None:
form = "%s$%s" % (form, formula(reduced.total_composition()))
return form
try:
from glypy._c.structure.glycan_composition import _CompositionBase
except ImportError:
pass
[docs]class GlycanComposition(_CompositionBase, SaccharideCollection):
"""
Describe a glycan as a collection of :class:`MonosaccharideResidue` counts without
explicit linkage information relating how each monosaccharide is connected to its neighbors.
This class subclasses |dict|, and assumes that keys will either be :class:`MonosaccharideResidue`
instances, :class:`SubstituentResidue` instances, or strings in `iupac_lite` format which will be parsed
into one of these types. While other types may be used, this is not recommended. All standard |dict| methods
are supported.
|GlycanComposition| objects may be derivatized just as |Glycan| objects are, with
:func:`glypy.composition.composition_transform.derivatize` and
:func:`glypy.composition.composition_transform.strip_derivatization`.
GlycanComposition objects also support composition arithmetic, and can be added or subtracted from each other
or multiplied by an integer.
As GlycanComposition is not a complete structure, they cannot be translated into text formats as
full |Glycan| objects are. They may instead be converted to and from a short-form text notation using
:meth:`GlycanComposition.serialize` and reconstructed from this format using :meth:`GlycanComposition.parse`.
Attributes
----------
reducing_end : ReducedEnd
Describe the reducing end of the aggregate without binding it to a specific monosaccharide.
This will contribute to composition and mass calculations.
_composition_offset: CComposition
Account for the one water molecule's worth of composition left over from applying the "residue"
transformation to each monosaccharide in the aggregate.
"""
_monosaccharide_type = MonosaccharideResidue
_key_parser = staticmethod(from_iupac_lite)
[docs] @classmethod
def from_glycan(cls, glycan):
"""
Convert a |Glycan| into a |GlycanComposition|.
Parameters
----------
glycan : :class:`~.Glycan`
The instance to be converted
Returns
-------
GlycanComposition
"""
inst = cls()
glycan = tree(glycan)
inst.extend(glycan)
inst.reducing_end = glycan.reducing_end
deriv = has_derivatization(glycan.root)
if deriv:
inst._composition_offset += (
deriv.total_composition() - deriv.attachment_composition_loss()) * 2
return inst
[docs] def __init__(self, *args, **kwargs):
"""Initialize a :class:`GlycanComposition` using the provided objects or keyword
arguments, imitating the :class:`dict` initialization signature.
If a :class:`Mapping` is provided as a positional argument, it will be used as a
template. If arbitrary keyword arguments are provided, they will be interpreted
using :meth:`update`. As a special case, if another :class:`GlycanComposition` is
provided, its :attr:`reducing_end` attribute will also be copied.
Parameters
----------
*args:
Arbitrary positional arguments
**kwargs:
Arbitrary keyword arguments
"""
# dict.__init__ just calls C update method. Expensive parameter parsing
# _CompositionBase.__init__(self)
self._reducing_end = None
self._mass = None
if args or kwargs:
self.update(*args, **kwargs)
if args:
template = args[0]
if isinstance(template, GlycanComposition):
reduced = template.reducing_end
if reduced is not None:
self.reducing_end = reduced.clone()
self._composition_offset = template._composition_offset.clone()
else:
self._composition_offset = water_composition.clone()
else:
self._composition_offset = water_composition.clone()
def __setitem__(self, key, value):
"""
Set the quantity of `key` to `value`
If `key` is a string, it will be passed through :func:`from_iupac_lite`
If `key` has a reducing end value, that reducing end will be set on `self`
Parameters
----------
key : str, MonosaccharideResidue, SubstituentResidue, or MolecularComposition
The entity to store
value : int
The value to store
"""
if isinstance(key, basestring):
key = self._key_parser(key)
if key.node_type is Monosaccharide.node_type and key.reducing_end is not None:
self.reducing_end = key.reducing_end
key = key.clone()
key.reducing_end = None
_CompositionBase.__setitem__(self, key, int(value))
self._mass = None
def __getitem__(self, key):
"""
Get the quantity of `key`
If `key` is a string, it will be passed through :func:`from_iupac_lite`
If `key` has a reducing end value, that reducing end will be set on `self`
Parameters
----------
key : str, MonosaccharideResidue, SubstituentResidue, or MolecularComposition
The entity to store
Returns
-------
int
"""
if isinstance(key, basestring):
key = self._key_parser(key)
try:
return _CompositionBase.__getitem__(self, key)
except KeyError:
return 0
def __delitem__(self, key):
if isinstance(key, basestring):
key = self._key_parser(key)
_CompositionBase.__delitem__(self, key)
self._mass = None
[docs] def mass(self, average=False, charge=0, mass_data=None):
'''
Calculates the total mass of ``self``.
.. note::
The monoisotopic mass is cached on first computation in :attr:`_mass`.
Parameters
----------
average: bool, optional, defaults to False
Whether or not to use the average isotopic composition when calculating masses.
When ``average == False``, masses are calculated using monoisotopic mass.
charge: int, optional, defaults to 0
If charge is non-zero, m/z is calculated, where m is the theoretical mass, and z is ``charge``
mass_data: dict, optional
If mass_data is :const:`None`, standard NIST mass and isotopic abundance data are used. Otherwise the
contents of mass_data are assumed to contain elemental mass and isotopic abundance information.
Defaults to :const:`None`.
Returns
-------
:class:`float`
See also
--------
:func:`glypy.composition.composition.calculate_mass`
'''
if self._mass is not None and charge == 0 and not average:
return self._mass
if charge == 0:
mass = self._composition_offset.mass
for residue_type, count in self.items():
mass += residue_type.mass(average=average, charge=0, mass_data=mass_data) * count
if self._reducing_end is not None:
mass += self._reducing_end.mass(average=average, charge=0, mass_data=mass_data)
if not average:
self._mass = mass
else:
mass = self.total_composition().calc_mass(average=average, charge=charge, mass_data=mass_data)
return mass
[docs] def update(self, *args, **kwargs):
if len(args) == 1:
if isinstance(args[0], Mapping):
for name, count in args[0].items():
if count != 0:
self[name] = count
else:
for name, count in args:
if count != 0:
self[name] = count
for name, count in kwargs.items():
if count != 0:
self[name] = count
self._mass = None
def extend(self, *args):
if not isinstance(args[0], MonosaccharideResidue):
if isinstance(args[0], (Monosaccharide)):
args = map(MonosaccharideResidue.from_monosaccharide, args)
elif isinstance(args[0], Glycan):
args = map(
MonosaccharideResidue.from_monosaccharide,
[node for node in args[0] if node.node_type is MonosaccharideResidue.node_type])
else:
raise TypeError(
"Can't convert {} to MonosaccharideResidue".format(
type(args[0])))
for residue in args:
self[residue] += 1
def __iadd__(self, other):
for elem, cnt in (other.items()):
self[elem] += cnt
return self
def __add__(self, other):
result = self.clone()
for elem, cnt in other.items():
result[elem] += cnt
return result
def __radd__(self, other):
return self + other
def __isub__(self, other):
for elem, cnt in other.items():
self[elem] -= cnt
return self
def __sub__(self, other):
result = self.clone()
for elem, cnt in other.items():
result[elem] -= cnt
return result
def __rsub__(self, other):
return (self - other) * (-1)
def __mul__(self, other):
if not isinstance(other, int):
raise TypeError(
'Cannot multiply Composition by non-integer',
other)
prod = {}
for k, v in self.items():
prod[k] = v * other
return self.__class__(prod)
def __rmul__(self, other):
return self * other
def __eq__(self, other):
if isinstance(other, basestring):
return str(self) == other
if not isinstance(other, Mapping):
return False
self_items = set([i for i in self.items() if i[1]])
other_items = set([i for i in other.items() if i[1]])
return self_items == other_items
def __ne__(self, other):
return not (self == other)
def __neg__(self):
return -1 * self
def __missing__(self, key):
return 0
def __contains__(self, key):
if isinstance(key, basestring):
key = self._key_parser(key)
return _CompositionBase.__contains__(self, key)
def drop_stems(self):
for t in self:
drop_stem(t)
self.collapse()
return self
def drop_positions(self):
for t in self:
drop_positions(t)
self.collapse()
return self
def drop_configurations(self):
for t in self:
drop_configuration(t)
self.collapse()
return self
[docs] def total_composition(self):
'''
Computes the sum of the composition of all |Monosaccharide| objects in ``self``
Returns
-------
:class:`~glypy.composition.Composition`
'''
comp = self._composition_offset.clone()
for residue, count in self.items():
comp += residue.total_composition() * count
if self._reducing_end is not None:
comp += self._reducing_end.total_composition()
return comp
[docs] def collapse(self):
'''
Merge redundant keys.
After performing a structure-detail removing operation like
:meth:`drop_positions`, :meth:`drop_configurations`, or :meth:`drop_stems`,
monosaccharide keys may be redundant.
`collapse` will merge keys which refer to the same type of molecule.
'''
items = list(self.items())
self.clear()
for k, v in items:
self[k] += v
return self
[docs] def query(self, query, exact=True, **kwargs):
"""Return the total count of all residues in `self` which
match `query` using :func:`glypy.io.nomenclature.identity.is_a`
Parameters
----------
query : :class:`~.MonosaccharideResidue` or :class:`str`
A monosaccharide residue or a string which will be converted into one by
:func:`from_iupac_lite` to test for an `is-a` relationship with.
exact : bool, optional
Passed to :func:`~.is_a`. Explicitly |True| by default
**kwargs
Passed to :func:`~.is_a`
Returns
-------
int
The total count of all residues which satisfy the `is-a` relationship
See Also
--------
:func:`glypy.io.nomenclature.identity.is_a`
"""
from glypy.io.nomenclature.identity import is_a
if isinstance(query, basestring):
query = self._key_parser(query)
count = 0
for key, value in self.items():
if is_a(key, query, exact=exact, **kwargs):
count += value
return count
[docs] def reinterpret(self, references, exact=True, **kwargs):
"""Aggregate the counts of all residues in `self` for each
monosaccharide in `references` satisfying an `is-a` relationship,
collapsing multiple residues to a single key. Any residue not
aggregated will be preserved as-is.
.. note::
The order of ``references`` matters as any residue matched by
a reference will not be considered for later references.
Parameters
----------
references : :class:`Iterable` of :class:`~.MonosaccharideResidue`
The monosaccharides with which to test for an `is-a` relationship
exact : bool, optional
Passed to :func:`~.is_a`. Explicitly |True| by default
**kwargs
Passed to :func:`~.is_a`
Returns
-------
:class:`~.GlycanComposition`
self after key collection and collapse
"""
from glypy.io.nomenclature.identity import is_a
new_counts = []
pairs = list(self.items())
remaining_pairs = []
for ref in references:
count = 0
for key, value in pairs:
if is_a(key, ref, exact=exact, **kwargs):
count += value
else:
remaining_pairs.append((key, value))
if count > 0:
new_counts.append((ref, count))
pairs = remaining_pairs
remaining_pairs = []
self.clear()
for key, value in new_counts:
self[key] = value
for key, value in pairs:
self[key] = value
return self
@property
def reducing_end(self):
return self._reducing_end
@reducing_end.setter
def reducing_end(self, value):
self._invalidate()
self._reducing_end = value
def set_reducing_end(self, value):
self._invalidate()
self._reducing_end = value
def _invalidate(self):
self._mass = None
@property
def composition_offset(self):
return self._composition_offset
@composition_offset.setter
def composition_offset(self, value):
self._invalidate()
self._composition_offset = value
def clone(self, propogate_composition_offset=True, copy_nodes=True):
dup = self._empty()
dup._update_from_typed_map(self, copy_nodes=copy_nodes)
if not propogate_composition_offset:
dup._composition_offset = Composition('H2O')
else:
dup._composition_offset = self._composition_offset.clone()
return dup
# inheriting from dict overwrites MoleculeBase.copy
[docs] def copy(self, *args, **kwargs):
return self.clone(*args, **kwargs)
def __str__(self):
return self.serialize()
@classmethod
def _get_parse_tokens(cls, string):
string = str(string)
parts = string.split('$')
if len(parts) == 1:
tokens = parts[0]
reduced = None
elif len(parts) == 2:
tokens, reduced = parts
else:
raise ValueError("Could not interpret %r" % string)
tokens = tokens[1:-1].split('; ')
return tokens, reduced
def _handle_reduction_and_derivatization(self, reduced, deriv):
if reduced:
reduced = ReducedEnd(Composition(reduced))
self.reducing_end = reduced
if deriv:
self._derivatized(deriv.clone(), make_counter(uid()), include_reducing_end=False)
[docs] @classmethod
def parse(cls, string):
"""Parse a :class:`str` into a :class:`GlycanComposition`.
This will parse the format produced by :meth:`serialize`
Parameters
----------
string : :class:`str`
The string to parse
Returns
-------
:class:`GlycanComposition`
"""
tokens, reduced = cls._get_parse_tokens(string)
inst = cls._empty()
deriv = None
for token in tokens:
try:
residue, count = _parse_name_count(token)
except ValueError:
if string == "{}":
return inst
else:
raise ValueError("Malformed Token, %s" % (token,))
key = cls._key_parser(residue)
if "^" in residue:
_deriv = has_derivatization(key)
if _deriv:
deriv = _deriv
inst._setitem_fast(key, count)
inst._handle_reduction_and_derivatization(reduced, deriv)
return inst
def _derivatized(self, substituent, id_base, include_reducing_end=True):
n = 2
items = list(self.items())
self.clear()
for k, v in items:
self._setitem_fast(k, v)
if k.node_type is Substituent.node_type:
n -= v
self._composition_offset += (
substituent.total_composition() -
substituent.attachment_composition_loss() * 2) * n
if self._reducing_end is not None and include_reducing_end:
_derivatize_reducing_end(self._reducing_end, substituent, id_base)
self.collapse()
self._invalidate()
def _strip_derivatization(self):
self._composition_offset = Composition("H2O")
if self._reducing_end is not None:
_strip_derivatization_reducing_end(self._reducing_end)
self.collapse()
self._invalidate()
def _invalidate(self):
self._mass = None
from_glycan = GlycanComposition.from_glycan
parse = GlycanComposition.parse
[docs]class FrozenGlycanComposition(GlycanComposition):
'''
A subclass of |GlycanComposition| which uses :class:`FrozenMonosaccharideResidue` instead
of |MonosaccharideResidue| which reduces the number of times :func:`from_iupac_lite` is called.
Only use this type if residue names are pre-validated, residue types will not be transformed,
and when creating many, many instances. :func:`from_iupac_lite` invokes expensive introspection
algorithms which can be costly when repeatedly manipulating the same residue types.
'''
_str = None
_monosaccharide_type = FrozenMonosaccharideResidue
_key_parser = staticmethod(FrozenMonosaccharideResidue.from_iupac_lite)
def __setitem__(self, key, value):
key = self._key_parser(str(key))
_CompositionBase.__setitem__(self, key, value)
self._invalidate()
def __getitem__(self, key):
if not isinstance(key, FrozenMonosaccharideResidue):
key = self._key_parser(str(key))
return _CompositionBase.__getitem__(self, key)
def __delitem__(self, key):
key = self._key_parser(str(key))
_CompositionBase.__delitem__(self, key)
self._invalidate()
[docs] @classmethod
def parse(cls, string):
tokens, reduced = cls._get_parse_tokens(string)
inst = cls._empty()
deriv = None
key_parser = cls._key_parser
for token in tokens:
try:
residue, count = _parse_name_count(token)
except ValueError:
if string == "{}":
return inst
else:
raise ValueError("Malformed Token, %s" % (token,))
key = key_parser(residue)
if "^" in residue:
_deriv = has_derivatization(key)
if _deriv:
deriv = _deriv
inst._setitem_fast(key, count)
inst._handle_reduction_and_derivatization(reduced, deriv)
return inst
def serialize(self):
if self._str is None:
self._str = super(FrozenGlycanComposition, self).serialize()
return self._str
__str__ = serialize
def __contains__(self, key):
if isinstance(key, basestring):
key = self._key_parser(key)
return _CompositionBase.__contains__(self, key)
[docs] def thaw(self):
"""Convert this :class:`FrozenGlycanComposition` into a :class:`GlycanComposition`
that is not frozen.
Returns
-------
:class:`GlycanComposition`
"""
return GlycanComposition.parse(self)
def extend(self, *args):
if not isinstance(args[0], FrozenMonosaccharideResidue):
if isinstance(args[0], (Monosaccharide)):
args = map(FrozenMonosaccharideResidue.from_monosaccharide, args)
elif isinstance(args[0], Glycan):
args = map(
FrozenMonosaccharideResidue.from_monosaccharide,
[node for node in args[0]
if node.node_type is FrozenMonosaccharideResidue.node_type])
else:
raise TypeError(
"Can't convert {} to FrozenMonosaccharideResidue".format(
type(args[0])))
for residue in args:
self[residue] += 1
def _validate(self):
'''Populate the caching fields used for common behaviors, e.g.
mass and string representation.
'''
if self._mass is None:
self.mass()
if self._str is None:
self.serialize()
def _invalidate(self):
'''Clear the caching fields, forcing them to all be recalculated
when next requested.
'''
self._mass = None
self._str = None
self._total_composition = None
def clone(self, propogate_composition_offset=True, copy_nodes=False):
dup = self._empty()
dup._update_from_typed_map(self, copy_nodes=copy_nodes)
if not propogate_composition_offset:
dup._composition_offset = Composition('H2O')
else:
dup._composition_offset = self._composition_offset.clone()
return dup
class FrozenError(ValueError):
pass
[docs]class HashableGlycanComposition(FrozenGlycanComposition):
def __str__(self):
self._validate()
# Directly use internal cache variable to save time calling
# the super method chain
return self._str
def __hash__(self):
if self._str is None:
rep = str(self)
else:
rep = self._str
return hash(rep)
def __eq__(self, other):
if isinstance(other, HashableGlycanComposition):
if self._str is not None:
if other._str is not None:
return self._str == other._str
return self._str == str(other)
return str(other) == str(self)
else:
return super(HashableGlycanComposition, self).__eq__(other)
def _parse_name_count(string):
name, count = string.split(":")
count = int(count)
return name, count
try:
_has_c = True
from glypy._c.utils import get_parse_tokens, parse_name_count as _parse_name_count
GlycanComposition._get_parse_tokens = get_parse_tokens
except ImportError:
_has_c = False