'''
Glyconnect
----------
A simple dialect of the Glyconnect/GlycoMod glycan composition notation.
'''
import re
from typing import Dict, Union, Optional, List, Tuple
from urllib.parse import quote
from dataclasses import dataclass, field
from typing import List, Optional, Type, Generic, TypeVar
from glypy.structure.glycan_composition import (
FrozenGlycanComposition,
FrozenMonosaccharideResidue,
SubstituentResidue)
from glypy.structure.glycan import Glycan
try:
import requests
except ImportError:
requests = None
#: The set of defined symbols and their mappings.
defined_symbols: Dict[str, Union[SubstituentResidue, FrozenMonosaccharideResidue]] = {
"Hex": FrozenMonosaccharideResidue.from_iupac_lite("Hex"),
"HexNAc": FrozenMonosaccharideResidue.from_iupac_lite('HexNAc'),
"dHex": FrozenMonosaccharideResidue.from_iupac_lite('dHex'),
"NeuAc": FrozenMonosaccharideResidue.from_iupac_lite("NeuAc"),
"NeuGc": FrozenMonosaccharideResidue.from_iupac_lite("NeuGc"),
"S": SubstituentResidue("sulfate"),
"Su": SubstituentResidue("sulfate"),
"Sulpho": SubstituentResidue("sulfate"),
"P": SubstituentResidue("phosphate"),
"Ph": SubstituentResidue("phosphate"),
"Phospho": SubstituentResidue("phosphate"),
"Xyl": FrozenMonosaccharideResidue.from_iupac_lite("Xyl"),
"HexA": FrozenMonosaccharideResidue.from_iupac_lite("HexA"),
"Pent": FrozenMonosaccharideResidue.from_iupac_lite("Pen"),
"Kdn": FrozenMonosaccharideResidue.from_iupac_lite("Kdn"),
}
def _invert_mapping(table: Dict[str, Union[SubstituentResidue, FrozenMonosaccharideResidue]]) -> Dict[Union[SubstituentResidue, FrozenMonosaccharideResidue], str]:
inverted = {}
for k, v in table.items():
if v in inverted:
if len(k) > len(inverted[v]):
continue
inverted[v] = k
return inverted
monosaccharide_to_symbol = _invert_mapping(defined_symbols)
def _generate_pattern(symbols: List[str]) -> re.Pattern:
symbols = sorted(symbols, key=len, reverse=True)
return re.compile(f"({'|'.join(symbols)})(\d+?)")
tokenizer = re.compile(r"([^:\s]+):(\d+)")
undelimited_tokenizer = _generate_pattern(defined_symbols)
[docs]def loads(string):
'''Parse a GlyConnect glycan composition into a :class:`~.FrozenGlycanComposition`
Parameters
----------
string: str
The string to parse
Returns
-------
:class:`~.FrozenGlycanComposition`
Raises
------
:class:`KeyError`: Raised if a key isn't defined by the GlyConnect dialect
'''
tokens = tokenizer.findall(string)
if not tokens:
tokens = undelimited_tokenizer.findall(string)
gc = FrozenGlycanComposition()
for mono, count in tokens:
mono = defined_symbols[mono]
count = int(count)
gc[mono] += count
return gc
[docs]def dumps(composition):
'''Encode :class:`~.GlycanComposition` or :class:`~.Glycan` into the GlyConnect
glycan composition text format.
Parameters
----------
composition: :class:`~.GlycanComposition` or :class:`~.Glycan`
The structure to format
Returns
-------
:class:`str`
Raises
------
:class:`KeyError`: Raised if a key isn't defined by the GlyConnect Compozitor dialect
'''
if isinstance(composition, Glycan):
composition = FrozenGlycanComposition.from_glycan(composition)
tokens = []
for key, value in composition.items():
key = monosaccharide_to_symbol[key]
tokens.append("%s:%d" % (key, value))
return ' '.join(tokens)
API_SERVER = "https://glyconnect.expasy.org/api"
def from_glytoucan_id(glytoucan_id):
response = requests.post(
f"{API_SERVER}/structures/search/glytoucan",
data={"glytoucan_id": glytoucan_id})
response.raise_for_status()
data = response.json()
return data
@dataclass
class RecordBase:
@classmethod
def from_dict(cls, data):
return cls(**data)
@dataclass
class TaxonomyRecord(RecordBase):
id: int
taxonomy_id: str
common_name: Optional[str] = None
species: Optional[str] = None
@dataclass
class UniprotProteinAccessionRecord(RecordBase):
uniprot_acc: str
uniprot_id: Optional[str] = None
glygen: Optional[str] = None
nextprot: Optional[str] = None
genecards: Optional[str] = None
glycodomain: Optional[str] = None
@dataclass
class ProteinRecord(RecordBase):
id: int
name: str
taxonomy: TaxonomyRecord
uniprots: List[UniprotProteinAccessionRecord]
@classmethod
def from_dict(cls, data: dict):
tax = data.get("taxonomy")
if tax:
tax = TaxonomyRecord.from_dict(tax)
uniprots = list(map(UniprotProteinAccessionRecord.from_dict,
data.get("uniprots", [])))
return cls(data['id'], data['name'], tax, uniprots)
@dataclass
class SourceRecord(RecordBase):
type: str
name: str
id: int
ref: Optional[str] = None
ontology: Optional[str] = None
brenda_id: Optional[str] = None
@dataclass
class Source(RecordBase):
source: List[SourceRecord]
taxons: List[TaxonomyRecord]
@classmethod
def from_dict(cls, data: dict):
source = [SourceRecord.from_dict(x) for x in data.get("source", [])]
taxons = [TaxonomyRecord.from_dict(x) for x in data.get("taxons", [])]
return cls(source, taxons)
@dataclass
class CellLine(RecordBase):
cellosaurus_id: str
id: int
is_problematic: bool
name: str
@dataclass
class Disease:
id: int
name: str
do_id: Optional[str] = None
taxons: List[TaxonomyRecord] = field(default_factory=list)
@classmethod
def from_dict(cls, data: dict):
taxons = [TaxonomyRecord.from_dict(x) for x in data.get("taxons", [])]
return cls(data['id'], data['name'], data.get('do_id'), taxons)
@dataclass
class CompositionRecord(RecordBase):
format_byonic: str
format_condensed: str
format_glyconnect: str
format_numeric: str
id: int
mass: float
mass_monoisotopic: float
reviewed: bool
glytoucan_id: Optional[str] = None
def parse(self):
return loads(self.format_glyconnect)
@dataclass
class StructureRecord(RecordBase):
glycan_core: str
glycan_type: str
has_image: bool
id: int
is_undefined: bool
reviewed: bool
glytoucan_id: Optional[str] = None
@dataclass
class CompozitorGlycan(RecordBase):
composition: CompositionRecord
structure: StructureRecord
taxonomy: Optional[TaxonomyRecord]
protein: Optional[ProteinRecord]
@classmethod
def from_dict(cls, data: dict):
comp = CompositionRecord.from_dict(data['composition'])
struct = StructureRecord.from_dict(data['structure'])
protein = ProteinRecord.from_dict(data['protein'])
taxonomy = TaxonomyRecord.from_dict(data['taxonomy'])
return cls(comp, struct, protein, taxonomy)
T = TypeVar("T", bound=RecordBase)
@dataclass
class APICollectionProperty(Generic[T]):
url: str
record_type: Type[T]
def __get__(self, obj, objtype=None) -> List[T]:
if obj is None:
return self
result = obj._cache.get(self.url)
if result is not None:
return result
resp = requests.get(self.url)
resp.raise_for_status()
data = resp.json()
result = [self.record_type.from_dict(d) for d in data]
obj._cache[self.url] = result
return result
def __delete__(self, obj):
del obj._cache[self.url]
@dataclass
class Compozitor:
_cache: dict = field(default_factory=dict, repr=False)
proteins = APICollectionProperty(
f"{API_SERVER}/proteins-all",
ProteinRecord
)
sources = APICollectionProperty(
f"{API_SERVER}/sources-all",
Source
)
cell_lines = APICollectionProperty(
f"{API_SERVER}/cell_lines-all",
CellLine
)
diseases = APICollectionProperty(
f"{API_SERVER}/diseases-all",
Disease
)
def query(self, taxonomy: Optional[str]=None, cell_line: Optional[str]=None,
protein: Optional[str]=None, disease: Optional[str]=None):
params = {}
if taxonomy:
params['taxonomy'] = (taxonomy)
if cell_line:
params['cell_line'] = (cell_line)
if protein:
params['protein'] = (protein)
if disease:
params['disease'] = (disease)
resp = requests.get(f"{API_SERVER}/glycosylations", params)
resp.raise_for_status()
data = resp.json()
if isinstance(data, list):
raise ValueError("Malformed query or invalid response")
results = []
if data['results']:
for res in data['results']:
results.append(CompozitorGlycan.from_dict(res))
return results
client = Compozitor()
query = client.query