"""
MRS-JSON serialization and deserialization.
"""
import json
from pathlib import Path
from delphin import variable
from delphin.lnk import Lnk
from delphin.mrs import EP, MRS, HCons, ICons
CODEC_INFO = {
"representation": "mrs",
}
HEADER = "["
JOINER = ","
FOOTER = "]"
[docs]
def load(source):
"""
Deserialize a MRS-JSON file (handle or filename) to MRS objects
Args:
source: filename or file object
Returns:
a list of MRS objects
"""
if hasattr(source, "read"):
data = json.load(source)
else:
source = Path(source).expanduser()
with source.open() as fh:
data = json.load(fh)
return [from_dict(d) for d in data]
[docs]
def loads(s):
"""
Deserialize a MRS-JSON string to MRS objects
Args:
s (str): a MRS-JSON string
Returns:
a list of MRS objects
"""
data = json.loads(s)
return [from_dict(d) for d in data]
[docs]
def dump(ms, destination, properties=True, lnk=True, indent=False, encoding="utf-8"):
"""
Serialize MRS objects to a MRS-JSON file.
Args:
ms: iterator of :class:`~delphin.mrs.MRS` objects to
serialize
destination: filename or file object
properties: if `True`, encode variable properties
lnk: if `False`, suppress surface alignments and strings
indent: if `True`, adaptively indent; if `False` or `None`,
don't indent; if a non-negative integer N, indent N spaces
per level
encoding (str): if *destination* is a filename, write to the
file with the given encoding; otherwise it is ignored
"""
if indent is False:
indent = None
elif indent is True:
indent = 2
data = [to_dict(m, properties=properties, lnk=lnk) for m in ms]
if hasattr(destination, "write"):
json.dump(data, destination, indent=indent)
else:
destination = Path(destination).expanduser()
with destination.open("w", encoding=encoding) as fh:
json.dump(data, fh)
[docs]
def dumps(ms, properties=True, lnk=True, indent=False):
"""
Serialize MRS objects to a MRS-JSON string.
Args:
ms: iterator of :class:`~delphin.mrs.MRS` objects to
serialize
properties: if `True`, encode variable properties
lnk: if `False`, suppress surface alignments and strings
indent: if `True`, adaptively indent; if `False` or `None`,
don't indent; if a non-negative integer N, indent N spaces
per level
Returns:
a MRS-JSON-serialization of the MRS objects
"""
if indent is False:
indent = None
elif indent is True:
indent = 2
data = [to_dict(m, properties=properties, lnk=lnk) for m in ms]
return json.dumps(data, indent=indent)
[docs]
def decode(s):
"""
Deserialize a MRS object from a MRS-JSON string.
"""
return from_dict(json.loads(s))
[docs]
def encode(m, properties=True, lnk=True, indent=False):
"""
Serialize a MRS object to a MRS-JSON string.
Args:
m: a MRS object
properties (bool): if `False`, suppress variable properties
lnk: if `False`, suppress surface alignments and strings
indent (bool, int): if `True` or an integer value, add
newlines and indentation
Returns:
a MRS-JSON-serialization of the MRS object
"""
if indent is False:
indent = None
elif indent is True:
indent = 2
return json.dumps(to_dict(m, properties=properties, lnk=lnk), indent=indent)
[docs]
def to_dict(mrs, properties=True, lnk=True):
"""
Encode the MRS as a dictionary suitable for JSON serialization.
"""
def _ep(ep):
d = {"label": ep.label, "predicate": ep.predicate, "arguments": ep.args}
if lnk:
if ep.lnk:
d["lnk"] = {"from": ep.cfrom, "to": ep.cto}
if ep.surface:
d["surface"] = ep.surface
if ep.base:
d["base"] = ep.base
return d
def _hcons(hc):
return {"relation": hc.relation, "high": hc.hi, "low": hc.lo}
def _icons(ic):
return {"relation": ic.relation, "left": ic.left, "right": ic.right}
def _var(v):
d = {"type": variable.type(v)}
if properties and mrs.variables.get(v):
d["properties"] = dict(mrs.variables[v])
return d
d = dict(
top=mrs.top,
index=mrs.index,
relations=list(map(_ep, mrs.rels)),
constraints=(list(map(_hcons, mrs.hcons)) + list(map(_icons, mrs.icons))),
variables={v: _var(v) for v in mrs.variables},
)
# skipping mrs.lnk, mrs.surface, or mrs.identifier
return d
[docs]
def from_dict(d):
"""
Decode a dictionary, as from :func:`to_dict`, into an MRS object.
"""
def _lnk(o):
return None if o is None else Lnk.charspan(o["from"], o["to"])
def _ep(_d):
return EP(
_d["predicate"],
_d["label"],
args=_d.get("arguments", {}),
lnk=_lnk(_d.get("lnk")),
surface=_d.get("surface"),
base=_d.get("base"),
)
def _hcons(_d):
return HCons(_d["high"], _d["relation"], _d["low"])
def _icons(_d):
return ICons(_d["left"], _d["relation"], _d["right"])
hcons = [c for c in d.get("constraints", []) if "high" in c]
icons = [c for c in d.get("constraints", []) if "left" in c]
variables = {
var: data.get("properties", {}) for var, data in d.get("variables", {}).items()
}
return MRS(
d["top"],
d.get("index"),
list(map(_ep, d.get("relations", []))),
list(map(_hcons, hcons)),
icons=list(map(_icons, icons)),
variables=variables,
lnk=_lnk(d.get("lnk")),
surface=d.get("surface"),
identifier=d.get("identifier"),
)