Source code for delphin.codecs.mrsprolog

Serialization functions for the MRS-Prolog format.

from pathlib import Path

from delphin.mrs import CONSTANT_ROLE
from delphin.sembase import role_priority

    'representation': 'mrs',

[docs] def dump(ms, destination, properties=True, lnk=True, indent=False, encoding='utf-8'): """ Serialize MRS objects to the Prolog representation and write to a file. Args: ms: an iterator of MRS objects to serialize destination: filename or file object where data will be written properties: if `True`, encode variable properties lnk: if `False`, suppress surface alignments and strings indent (bool, int): if `True` or an integer value, add newlines and indentation encoding (str): if *destination* is a filename, write to the file with the given encoding; otherwise it is ignored """ text = dumps(ms, properties=properties, lnk=lnk, indent=indent) if hasattr(destination, 'write'): print(text, file=destination) else: destination = Path(destination).expanduser() with'w', encoding=encoding) as fh: print(text, file=fh)
[docs] def dumps(ms, properties=True, lnk=True, indent=False): """ Serialize MRS objects to the Prolog representation Args: ms: an iterator of MRS objects to serialize properties: if `True`, encode variable properties lnk: if `False`, suppress surface alignments and strings indent (bool, int): if `True` or an integer value, add newlines and indentation Returns: the Prolog string representation of a corpus of MRSs """ return _encode(ms, properties=properties, lnk=lnk, indent=indent)
[docs] def encode(m, properties=True, lnk=True, indent=False): """ Serialize a MRS object to a Prolog string. Args: m: an MRS object properties (bool): if `False`, suppress variable properties lnk: if `False`, suppress surface alignments and strings indent (bool, int): if `True` or an integer value, add newlines and indentation Returns: a Prolog-serialization of the MRS object """ return _encode_mrs(m, properties, lnk, indent)
def _encode(ms, properties, lnk, indent): if indent is not None and indent is not False: delim = '\n' else: delim = ' ' return delim.join(_encode_mrs(m, properties, lnk, indent) for m in ms) def _encode_mrs(m, properties, lnk, indent): pl = 'psoa({topvars},{_}[{rels}],{_}hcons([{hcons}]){icons})' plvc = '{reln}({left},{right})' # pre-compute the various indent levels if indent is None or indent is False: _, __, ___, ____ = '', ',', '', ',' else: if indent is True: indent = 2 _ = '\n' + (' ' * indent) __ = ',' + _ + (' ' * len('[')) ___ = _ + (' ' * len('[rel(')) ____ = __ + (' ' * len('rel([')) topvars = [str(] if m.index is not None: topvars.append(str(m.index)) rels = [_encode_rel(rel, ___, ____) for rel in m.rels] icons = '' if m.icons: icons = ',{_}icons([{ics}])'.format( _=_, ics=','.join( plvc.format(reln=ic.relation, left=ic.left, right=ic.right) for ic in m.icons ) ) return pl.format( topvars=','.join(topvars), rels=__.join(rels), hcons=','.join( plvc.format(reln=hc.relation, left=hc.hi, right=hc.lo) for hc in m.hcons ), icons=icons, _=_, ___=___ ) def _encode_rel(ep, ___, ____): args = [] plav = "attrval('{}',{})" for role in sorted(ep.args, key=role_priority): val = ep.args[role] if role == CONSTANT_ROLE: val = "'{}'".format(val) args.append(plav.format(role, val)) return "rel('{pred}',{lbl},{___}[{attrvals}])".format( pred=ep.predicate, lbl=ep.label, ___=___, attrvals=____.join(args))