"""
DMRS-JSON serialization and deserialization.
"""
import json
from pathlib import Path
from delphin.dmrs import (
CVARSORT,
DMRS,
Link,
Node,
)
from delphin.lnk import Lnk
CODEC_INFO = {
"representation": "dmrs",
}
HEADER = "["
JOINER = ","
FOOTER = "]"
[docs]
def load(source):
"""
Deserialize a DMRS-JSON file (handle or filename) to DMRS objects
Args:
source: filename or file object
Returns:
a list of DMRS objects
"""
if hasattr(source, "read"):
data = json.load(source)
else:
source = Path(source).expanduser()
with source.open() as fh:
data = json.load(fh)
return [from_dict(d) for d in data]
[docs]
def loads(s):
"""
Deserialize a DMRS-JSON string to DMRS objects
Args:
s (str): a DMRS-JSON string
Returns:
a list of DMRS objects
"""
data = json.loads(s)
return [from_dict(d) for d in data]
[docs]
def dump(ds, destination, properties=True, lnk=True, indent=False, encoding="utf-8"):
"""
Serialize DMRS objects to a DMRS-JSON file.
Args:
destination: filename or file object
ds: iterator of :class:`~delphin.dmrs.DMRS` objects to
serialize
properties: if `True`, encode variable properties
lnk: if `False`, suppress surface alignments and strings
indent: if `True`, adaptively indent; if `False` or `None`,
don't indent; if a non-negative integer N, indent N spaces
per level
encoding (str): if *destination* is a filename, write to the
file with the given encoding; otherwise it is ignored
"""
if indent is False:
indent = None
elif indent is True:
indent = 2
data = [to_dict(d, properties=properties, lnk=lnk) for d in ds]
if hasattr(destination, "write"):
json.dump(data, destination, indent=indent)
else:
destination = Path(destination).expanduser()
with destination.open("w", encoding=encoding) as fh:
json.dump(data, fh)
[docs]
def dumps(ds, properties=True, lnk=True, indent=False):
"""
Serialize DMRS objects to a DMRS-JSON string.
Args:
ds: iterator of :class:`~delphin.dmrs.DMRS` objects to
serialize
properties: if `True`, encode variable properties
lnk: if `False`, suppress surface alignments and strings
indent: if `True`, adaptively indent; if `False` or `None`,
don't indent; if a non-negative integer N, indent N spaces
per level
Returns:
a DMRS-JSON-serialization of the DMRS objects
"""
if indent is False:
indent = None
elif indent is True:
indent = 2
data = [to_dict(d, properties=properties, lnk=lnk) for d in ds]
return json.dumps(data, indent=indent)
[docs]
def decode(s):
"""
Deserialize a DMRS object from a DMRS-JSON string.
"""
return from_dict(json.loads(s))
[docs]
def encode(d, properties=True, lnk=True, indent=False):
"""
Serialize a DMRS object to a DMRS-JSON string.
Args:
d: a DMRS object
properties (bool): if `False`, suppress variable properties
lnk: if `False`, suppress surface alignments and strings
indent (bool, int): if `True` or an integer value, add
newlines and indentation
Returns:
a DMRS-JSON-serialization of the DMRS object
"""
if indent is False:
indent = None
elif indent is True:
indent = 2
return json.dumps(to_dict(d, properties=properties, lnk=lnk), indent=indent)
[docs]
def to_dict(d, properties=True, lnk=True):
"""
Encode DMRS *d* as a dictionary suitable for JSON serialization.
"""
nodes = []
for node in d.nodes:
n = dict(nodeid=node.id, predicate=node.predicate)
if properties and node.sortinfo:
n["sortinfo"] = node.sortinfo
if node.carg is not None:
n["carg"] = node.carg
if lnk:
if node.lnk:
n["lnk"] = {"from": node.cfrom, "to": node.cto}
if node.surface:
n["surface"] = node.surface
if node.base:
n["base"] = node.base
nodes.append(n)
links = []
for link in d.links:
links.append(
{
"from": link.start,
"to": link.end,
"rargname": link.role,
"post": link.post,
}
)
data = dict(nodes=nodes, links=links)
if d.top is not None: # could be 0
data["top"] = d.top
if d.index:
data["index"] = d.index
if lnk:
if d.lnk:
data["lnk"] = {"from": d.cfrom, "to": d.cto}
if d.surface:
data["surface"] = d.surface
if d.identifier is not None:
data["identifier"] = d.identifier
return data
[docs]
def from_dict(d):
"""
Decode a dictionary, as from :func:`to_dict`, into a DMRS object.
"""
def _lnk(x):
return None if x is None else Lnk.charspan(x["from"], x["to"])
nodes = []
for node in d.get("nodes", []):
properties = {
str(key): str(val) # ensure keys and values are strings
for key, val in node.get("sortinfo", {}).items()
} # make a copy
type = None
if CVARSORT in properties:
type = properties.pop(CVARSORT)
nodes.append(
Node(
node["nodeid"],
node["predicate"],
type=type,
properties=properties,
carg=node.get("carg"),
lnk=_lnk(node.get("lnk")),
surface=node.get("surface"),
base=node.get("base"),
)
)
links = []
for link in d.get("links", []):
links.append(
Link(
link["from"],
link["to"],
link.get("rargname"),
link.get("post"),
)
)
return DMRS(
top=d.get("top"),
index=d.get("index"),
nodes=nodes,
links=links,
lnk=_lnk(d.get("lnk")),
surface=d.get("surface"),
identifier=d.get("identifier"),
)