Source code for glypy.structure.substituent

from glypy.composition.structure_composition import substituent_compositions

from .base import SubstituentBase
from .link import Link
from .constants import UnknownPosition

from glypy.utils import uid
from glypy.composition import Composition, calculate_mass
from glypy.utils.multimap import OrderedMultiMap


class DerivatizePathway(object):

    def __init__(self, can_nh_derivatize=False, is_nh_derivatizable=False, name=None):
        self.can_nh_derivatize = can_nh_derivatize
        self.is_nh_derivatizable = is_nh_derivatizable
        if name is not None:
            derivatize_info[name] = self

    def __repr__(self):  # pragma: no cover
        return "<DerivatizePathway {}>".format(self.__dict__)

    @classmethod
    def register(cls, name, can_nh_derivatize, is_nh_derivatizable):
        derivatize_info[Substituent.internalize_name(name)] = DerivatizePathway(can_nh_derivatize, is_nh_derivatizable)


attachment_composition_info = {
    "sulfate": Composition("H"),
    "methyl": Composition("H"),
    "n_acetyl": Composition("OH"),
    "n_glycolyl": Composition("OH"),
    "n_sulfate": Composition("OH"),
    "amino": Composition("OH"),
    "imino": Composition("OH"),
    "anhydro": Composition("H"),
    "dimethylamine": Composition("OH"),
    "phosphate": Composition("H")
}
default_attachment_composition = Composition("H")


derivatize_info = {
    "acetyl": DerivatizePathway(False, False),
    "amino": DerivatizePathway(True, False),
    "anhydro": DerivatizePathway(True, False),
    "bromo": DerivatizePathway(True, False),
    "chloro": DerivatizePathway(True, False),
    "diphospho_ethanolamine": DerivatizePathway(True, False),
    "ethanolamine": DerivatizePathway(True, False),
    "ethyl": DerivatizePathway(True, False),
    "fluoro": DerivatizePathway(True, False),
    "formyl": DerivatizePathway(True, False),
    "glycolyl": DerivatizePathway(True, False),
    "hydroxymethyl": DerivatizePathway(True, False),
    "iodo": DerivatizePathway(True, False),
    "lactone": DerivatizePathway(True, False),
    "methyl": DerivatizePathway(True, False),
    "phosphate": DerivatizePathway(True, False),
    "phosphocholine": DerivatizePathway(True, False),
    "phospho_ethanolamine": DerivatizePathway(True, False),
    "pyrophosphate": DerivatizePathway(True, False),
    "pyruvate": DerivatizePathway(True, False),
    "succinate": DerivatizePathway(True, False),
    "sulfate": DerivatizePathway(True, False),
    "thio": DerivatizePathway(True, False),
    "triphosphate": DerivatizePathway(True, False),

    "(x)_lactate": DerivatizePathway(True, False),
    "(r)_carboxyethyl": DerivatizePathway(True, False),
    "(r)_lactate": DerivatizePathway(True, False),
    "(r)_pyruvate": DerivatizePathway(True, False),
    "(s)_carboxyethyl": DerivatizePathway(True, False),
    "(s)_lactate": DerivatizePathway(True, False),
    "(s)_pyruvate": DerivatizePathway(True, False),

    "n_acetyl": DerivatizePathway(True, True),
    "n_amidino": DerivatizePathway(True, True),
    "n_formyl": DerivatizePathway(True, True),
    "n_glycolyl": DerivatizePathway(True, True),
    "n_methyl": DerivatizePathway(True, True),
    "n_succinate": DerivatizePathway(True, True),
    "n_sulfate": DerivatizePathway(True, True),
    "n_dimethyl": DerivatizePathway(True, True),

    "phospho_choline": DerivatizePathway(True, False),
}


[docs]def register(name, composition, can_nh_derivatize=None, is_nh_derivatizable=None, attachment_composition=None): """Register common information about a |Substituent| group to be used during initialization of instances of |Substituent| which share that name. Parameters ---------- name : str The name to be registered composition : |Composition| The shared base composition that will be initialized for each instance can_nh_derivatize : None, optional Passed to `DerivatizePathway.register` is_nh_derivatizable : None, optional Passed to `DerivatizePathway.register` attachment_composition : None, optional The shared composition that will be lost from the parent molecule when forming a bond with substituents of this type. """ name = Substituent.internalize_name(name) substituent_compositions[name] = composition.clone() attachment_composition_info[name] = attachment_composition if attachment_composition is not None\ else default_attachment_composition DerivatizePathway.register(name, can_nh_derivatize or False, is_nh_derivatizable or False) return Substituent(name)
[docs]def unregister(name): """Removes all information about the |Substituent| group denoted by `name` from the shared indices. Parameters ---------- name : str The name to un-register """ name = Substituent.internalize_name(name) substituent_compositions.pop(name) attachment_composition_info.pop(name) derivatize_info.pop(name)
def is_registered(name): name = Substituent.internalize_name(name) return name in substituent_compositions
[docs]class Substituent(SubstituentBase): ''' Represents a non-saccharide molecule commonly found bound to saccharide units. Attributes ---------- name: |str| The name of the substituent, used to uniquely identify it. links: |OrderedMultiMap| All links to all molecules connected to this one. composition: |Composition| The chemical makeup of this molecule. attachment_composition: |Composition| The default cost of attaching this substituent to a |Monosaccharide| id: |int| A unique identifier number for this molecule. can_nh_derivatize: |bool| Whether this substituent will derivatize at an amine group. is_nh_derivatizable: |bool| Whether this substituent contains a derivatizable amine group. _derivatize: |bool| Whether this substituent was added by a derivatization process. _degree: |int| The number of connections to this molecule. Mutated internally by |Link| objects, not for external use. See :meth:`order`. ''' register = staticmethod(register) unregister = staticmethod(unregister) __slots__ = ( "_name", "links", "composition", "id", "can_nh_derivatize", "is_nh_derivatizable", "_derivatize", "attachment_composition", "_degree" ) def __init__(self, name, links=None, composition=None, id=None, can_nh_derivatize=None, is_nh_derivatizable=None, derivatize=False, attachment_composition=None): if links is None: links = OrderedMultiMap() self.name = name self.links = links if composition is None: composition = substituent_compositions[self._name] elif composition is not None and not is_registered(self._name): self.register( name, composition, can_nh_derivatize=can_nh_derivatize, is_nh_derivatizable=is_nh_derivatizable, attachment_composition=attachment_composition) self.composition = composition self.id = id or uid() self._degree = self.order() try: if can_nh_derivatize is is_nh_derivatizable is None: derivatize_pathway = derivatize_info[self.name] self.can_nh_derivatize = derivatize_pathway.can_nh_derivatize self.is_nh_derivatizable = derivatize_pathway.is_nh_derivatizable else: self.can_nh_derivatize = can_nh_derivatize or False self.is_nh_derivatizable = is_nh_derivatizable or False except KeyError: self.can_nh_derivatize = can_nh_derivatize or False self.is_nh_derivatizable = is_nh_derivatizable or False self._derivatize = derivatize self.attachment_composition = attachment_composition if attachment_composition is not None\ else attachment_composition_info.get(self.name, default_attachment_composition) @staticmethod def internalize_name(name): return name.replace('-', '_') @property def name(self): return self._name @name.setter def name(self, value): ''' Translate the name of the substituent from the common dash-separated notation to a valid identifier, replacing - with _. Parameters ---------- value: str The name being set ''' self._name = self.internalize_name(value) def __repr__(self): # pragma: no cover return "<Substituent {name}>".format(name=self._name) def __hash__(self): return hash((self.id, self.name)) def __eq__(self, other): return (other is not None) and (self.name == other.name) and (self.composition == other.composition) def __ne__(self, other): return not self == other def open_attachment_sites(self): return [1, 2], 0
[docs] def is_occupied(self, position): ''' Check to see if `position` is occupied. Unlike |Monosaccharide|, |Substituent| objects can only have two attachment sites at this time. Parameters ---------- position: int Returns ------- int: Number of links at `position` Raises ------ IndexError: If `position` > 2 or < 1 ''' if position > 2 or position < 1: raise IndexError("Position out of range") return len(self.links[position])
[docs] def add_substituent(self, substitent, position=2, max_occupancy=1, child_position=1, parent_loss=None, child_loss=None): ''' Adds a :class:`~glypy.structure.substituent.Substituent` and associated :class:`~glypy.structure.link.Link` to :attr:`links` at the site given by ``position``. This new substituent is included when calculating mass with substituents included Parameters ---------- substituent: str or Substituent The substituent to add. If passed a |str|, it will be translated into an instance of |Substituent| position: int or 'x' The location to add the |Substituent| link to :attr:`links`. Defaults to 2 child_position: int The location to add the link to in `substituent`'s :attr:`links`. Defaults to 1. Substituent indices are currently not checked. max_occupancy: int, optional The maximum number of items acceptable at ``position``. Defaults to :const:`1` parent_loss: Composition or str The elemental composition removed from ``self``. Defaults to ``H1``. child_loss: Composition or str The elemental composition removed from ``substituent``. Defaults to ``H1``. Raises ------ ValueError: ``position`` is occupied by more than ``max_occupancy`` elements ''' if self.is_occupied(position) > max_occupancy: raise ValueError("Site is already occupied") if parent_loss is None: parent_loss = Composition(H=1) if child_loss is None: child_loss = Composition(H=1) Link(parent=self, child=substitent, parent_position=position, child_position=child_position, parent_loss=parent_loss, child_loss=child_loss) return self
def drop_substituent(self, position, substituent=None, refund=True): link_obj = None for substituent_link in self.links[position]: if substituent_link.child == substituent or substituent is None: link_obj = substituent_link break if link_obj is None: raise IndexError( "No matching substituent found at {position}".format(position=position)) link_obj.break_link(refund=refund) return self
[docs] def mass(self, average=False, charge=0, mass_data=None): ''' Calculates the total mass of `self` and all nodes returned by :meth:`children`. Parameters ---------- average: bool, optional, defaults to False Whether or not to use the average isotopic composition when calculating masses. When ``average == False``, masses are calculated using monoisotopic mass. charge: int, optional, defaults to 0 If charge is non-zero, m/z is calculated, where m is the theoretical mass, and z is `charge` mass_data: dict, optional, defaults to `None` If mass_data is None, standard NIST mass and isotopic abundance data are used. Otherwise the contents of mass_data are assumed to contain elemental mass and isotopic abundance information. Returns ------- :class:`float` See also -------- :func:`glypy.composition.composition.calculate_mass` ''' if charge == 0: mass = calculate_mass( self.composition, average=average, charge=0, mass_data=mass_data) for link_pos, child in self.children(): mass += child.mass(average=average, charge=0, mass_data=mass_data) else: mass = self.total_composition().calc_mass(average=average, charge=charge, mass_data=mass_data) return mass
def __getstate__(self): state = { "_name": self._name, "links": self.links, "composition": self.composition, "id": self.id, "can_nh_derivatize": self.can_nh_derivatize, "is_nh_derivatizable": self.is_nh_derivatizable, "_derivatize": self._derivatize, "attachment_composition": self.attachment_composition, "_degree": self._degree } return state def __setstate__(self, state): self._name = state['_name'] self.links = state['links'] self.composition = state['composition'] self.id = state['id'] self.can_nh_derivatize = state['can_nh_derivatize'] self.is_nh_derivatizable = state['is_nh_derivatizable'] self._derivatize = state['_derivatize'] self.attachment_composition = state['attachment_composition'] self._degree = state.get("_degree", len(self.links))
[docs] def clone(self, prop_id=True): ''' Duplicates this |Substituent| object, recursively copying all children as well. Parameters ---------- prop_id: bool Whether or not to propagate :attr:`id` to the clone. Returns ------- Substituent See Also -------- :meth:`.structure.Monosaccharide.clone` ''' substituent = self.__class__( self.name, can_nh_derivatize=self.can_nh_derivatize, is_nh_derivatizable=self.is_nh_derivatizable, id=self.id if prop_id else None, derivatize=self._derivatize, attachment_composition=self.attachment_composition) for pos, link in self.links.items(): if link.is_child(self): continue sub = link.to(self) dup = sub.clone(prop_id=prop_id) link.clone(substituent, dup) return substituent
def degree(self): return len(self.links)
[docs] def total_composition(self): ''' Computes the sum of the composition of `self` and each of its linked :class:`~.substituent.Substituent`s Returns ------- :class:`~glypy.composition.Composition` ''' comp = self.composition for pos, sub in self.children(): comp = comp + sub.total_composition() return comp
[docs] def children(self, links=False, bridging=False): ''' Returns an iterator over the :class:`Monosaccharide`s which are considered the descendants of ``self``. ''' result = [] for pos, link in self.links.items(): if link.is_child(self): continue if links: if bridging and not link.is_bridge_link(): continue result.append((pos, link)) else: if bridging and not link.is_bridge_link(): continue result.append((pos, link.child)) return result
[docs] def parents(self, links=False): ''' Returns an iterator over the objects which are considered the ancestors of ``self``. ''' result = [] for pos, link in self.links.items(): if link.is_parent(self): continue if links: result.append((pos, link)) else: result.append((pos, link.parent)) return result
def is_bridge(self): for pos, link in self.children(links=True): if link.is_bridge(): return True return False def attachment_composition_loss(self): return self.attachment_composition.clone() def _backsolve_original_composition(self): comp = self.composition.clone() has_substituent_link = 0 for pos, link in self.links.items(): if link.is_child(self): if link.is_substituent_link(): has_substituent_link += 1 comp += link.child_loss else: comp += link.parent_loss if has_substituent_link == 0: comp += self.attachment_composition return comp def has_undefined_linkages(self): for link in self.links.values(): if link.parent_position == UnknownPosition or link.child_position == UnknownPosition: return True return False