Source code for delphin.codecs.edspenman

# -*- coding: utf-8 -*-

"""
EDS-PENMAN serialization and deserialization.
"""

from pathlib import Path

import penman

from delphin.lnk import Lnk
from delphin.sembase import (role_priority, property_priority)
from delphin.eds import (EDS, Node)


CODEC_INFO = {
    'representation': 'eds',
}


[docs]def load(source): """ Deserialize a EDS-PENMAN file (handle or filename) to EDS objects. Args: source: filename or file object Returns: a list of EDS objects """ if not hasattr(source, 'read'): source = str(Path(source).expanduser()) graphs = penman.load(source) xs = [from_triples(g.triples()) for g in graphs] return xs
[docs]def loads(s): """ Deserialize a EDS-PENMAN string to EDS objects. Args: s (str): a EDS-PENMAN string Returns: a list of EDS objects """ graphs = penman.loads(s) xs = [from_triples(g.triples()) for g in graphs] return xs
[docs]def dump(es, destination, properties=True, lnk=True, indent=False, encoding='utf-8'): """ Serialize EDS objects to a EDS-PENMAN file. Args: destination: filename or file object es: iterator of :class:`~delphin.eds.EDS` objects to serialize properties: if `True`, encode variable properties lnk: if `False`, suppress surface alignments and strings indent: if `True`, adaptively indent; if `False` or `None`, don't indent; if a non-negative integer N, indent N spaces per level encoding (str): if *destination* is a filename, write to the file with the given encoding; otherwise it is ignored """ text = dumps(es, properties=properties, lnk=lnk, indent=indent) if hasattr(destination, 'write'): print(text, file=destination) else: destination = Path(destination).expanduser() with destination.open('w', encoding=encoding) as fh: print(text, file=fh)
[docs]def dumps(es, properties=True, lnk=True, indent=False): """ Serialize EDS objects to a EDS-PENMAN string. Args: es: iterator of :class:`~delphin.eds.EDS` objects to serialize properties: if `True`, encode variable properties lnk: if `False`, suppress surface alignments and strings indent: if `True`, adaptively indent; if `False` or `None`, don't indent; if a non-negative integer N, indent N spaces per level Returns: a EDS-PENMAN-serialization of the EDS objects """ codec = penman.PENMANCodec() to_graph = codec.triples_to_graph graphs = [to_graph(to_triples(e, properties=properties, lnk=lnk)) for e in es] return penman.dumps(graphs, indent=indent)
[docs]def decode(s): """ Deserialize a EDS object from a EDS-PENMAN string. """ return from_triples(penman.decode(s).triples())
[docs]def encode(e, properties=True, lnk=True, indent=False): """ Serialize the EDS object *e* to an EDS-PENMAN string. Args: e: a EDS object properties (bool): if `False`, suppress variable properties lnk: if `False`, suppress surface alignments and strings indent (bool, int): if `True` or an integer value, add newlines and indentation Returns: a EDS-PENMAN-serialization of the EDS object """ triples = to_triples(e, properties=properties, lnk=lnk) g = penman.PENMANCodec().triples_to_graph(triples) return penman.encode(g, indent=indent)
[docs]def to_triples(e, properties=True, lnk=True): """ Encode the Eds as triples suitable for PENMAN serialization. """ # attempt to convert if necessary if not isinstance(e, EDS): e = EDS.from_xmrs(e) triples = [] # sort node ids just so top var is first nodes = sorted(e.nodes, key=lambda n: n.id != e.top) for node in nodes: nid = node.id triples.append((nid, 'instance', node.predicate)) if lnk and node.lnk: triples.append((nid, 'lnk', '"{}"'.format(str(node.lnk)))) if node.carg: triples.append((nid, 'carg', '"{}"'.format(node.carg))) if node.type is not None: triples.append((nid, 'type', node.type)) if properties: for prop in sorted(node.properties, key=property_priority): triples.append((nid, prop.lower(), node.properties[prop])) for role in sorted(node.edges, key=role_priority): triples.append((nid, role, node.edges[role])) return triples
[docs]def from_triples(triples): """ Decode triples, as from :func:`to_triples`, into an EDS object. """ nids, nd = [], {} for src, rel, tgt in triples: if src not in nd: nids.append(src) nd[src] = {'pred': None, 'type': None, 'edges': {}, 'props': {}, 'lnk': None, 'carg': None} if rel == 'predicate': nd[src]['pred'] = tgt elif rel == 'lnk': nd[src]['lnk'] = Lnk(tgt.strip('"')) elif rel == 'carg': if (tgt[0], tgt[-1]) == ('"', '"'): tgt = tgt[1:-1] nd[src]['carg'] = tgt elif rel == 'type': nd[src]['type'] = tgt elif rel.islower(): nd[src]['props'][rel.upper()] = tgt else: nd[src]['edges'][rel] = tgt nodes = [Node(nid, nd[nid]['pred'], type=nd[nid]['type'], edges=nd[nid]['edges'], properties=nd[nid]['props'], carg=nd[nid]['carg'], lnk=nd[nid]['lnk']) for nid in nids] top = nids[0] if nids else None return EDS(top=top, nodes=nodes)