Source code for delphin.codecs.dmrsjson

"""
DMRS-JSON serialization and deserialization.
"""

import json
from pathlib import Path

from delphin.dmrs import (
    CVARSORT,
    DMRS,
    Link,
    Node,
)
from delphin.lnk import Lnk

CODEC_INFO = {
    "representation": "dmrs",
}

HEADER = "["
JOINER = ","
FOOTER = "]"


[docs] def load(source): """ Deserialize a DMRS-JSON file (handle or filename) to DMRS objects Args: source: filename or file object Returns: a list of DMRS objects """ if hasattr(source, "read"): data = json.load(source) else: source = Path(source).expanduser() with source.open() as fh: data = json.load(fh) return [from_dict(d) for d in data]
[docs] def loads(s): """ Deserialize a DMRS-JSON string to DMRS objects Args: s (str): a DMRS-JSON string Returns: a list of DMRS objects """ data = json.loads(s) return [from_dict(d) for d in data]
[docs] def dump(ds, destination, properties=True, lnk=True, indent=False, encoding="utf-8"): """ Serialize DMRS objects to a DMRS-JSON file. Args: destination: filename or file object ds: iterator of :class:`~delphin.dmrs.DMRS` objects to serialize properties: if `True`, encode variable properties lnk: if `False`, suppress surface alignments and strings indent: if `True`, adaptively indent; if `False` or `None`, don't indent; if a non-negative integer N, indent N spaces per level encoding (str): if *destination* is a filename, write to the file with the given encoding; otherwise it is ignored """ if indent is False: indent = None elif indent is True: indent = 2 data = [to_dict(d, properties=properties, lnk=lnk) for d in ds] if hasattr(destination, "write"): json.dump(data, destination, indent=indent) else: destination = Path(destination).expanduser() with destination.open("w", encoding=encoding) as fh: json.dump(data, fh)
[docs] def dumps(ds, properties=True, lnk=True, indent=False): """ Serialize DMRS objects to a DMRS-JSON string. Args: ds: iterator of :class:`~delphin.dmrs.DMRS` objects to serialize properties: if `True`, encode variable properties lnk: if `False`, suppress surface alignments and strings indent: if `True`, adaptively indent; if `False` or `None`, don't indent; if a non-negative integer N, indent N spaces per level Returns: a DMRS-JSON-serialization of the DMRS objects """ if indent is False: indent = None elif indent is True: indent = 2 data = [to_dict(d, properties=properties, lnk=lnk) for d in ds] return json.dumps(data, indent=indent)
[docs] def decode(s): """ Deserialize a DMRS object from a DMRS-JSON string. """ return from_dict(json.loads(s))
[docs] def encode(d, properties=True, lnk=True, indent=False): """ Serialize a DMRS object to a DMRS-JSON string. Args: d: a DMRS object properties (bool): if `False`, suppress variable properties lnk: if `False`, suppress surface alignments and strings indent (bool, int): if `True` or an integer value, add newlines and indentation Returns: a DMRS-JSON-serialization of the DMRS object """ if indent is False: indent = None elif indent is True: indent = 2 return json.dumps(to_dict(d, properties=properties, lnk=lnk), indent=indent)
[docs] def to_dict(d, properties=True, lnk=True): """ Encode DMRS *d* as a dictionary suitable for JSON serialization. """ nodes = [] for node in d.nodes: n = dict(nodeid=node.id, predicate=node.predicate) if properties and node.sortinfo: n["sortinfo"] = node.sortinfo if node.carg is not None: n["carg"] = node.carg if lnk: if node.lnk: n["lnk"] = {"from": node.cfrom, "to": node.cto} if node.surface: n["surface"] = node.surface if node.base: n["base"] = node.base nodes.append(n) links = [] for link in d.links: links.append( { "from": link.start, "to": link.end, "rargname": link.role, "post": link.post, } ) data = dict(nodes=nodes, links=links) if d.top is not None: # could be 0 data["top"] = d.top if d.index: data["index"] = d.index if lnk: if d.lnk: data["lnk"] = {"from": d.cfrom, "to": d.cto} if d.surface: data["surface"] = d.surface if d.identifier is not None: data["identifier"] = d.identifier return data
[docs] def from_dict(d): """ Decode a dictionary, as from :func:`to_dict`, into a DMRS object. """ def _lnk(x): return None if x is None else Lnk.charspan(x["from"], x["to"]) nodes = [] for node in d.get("nodes", []): properties = { str(key): str(val) # ensure keys and values are strings for key, val in node.get("sortinfo", {}).items() } # make a copy type = None if CVARSORT in properties: type = properties.pop(CVARSORT) nodes.append( Node( node["nodeid"], node["predicate"], type=type, properties=properties, carg=node.get("carg"), lnk=_lnk(node.get("lnk")), surface=node.get("surface"), base=node.get("base"), ) ) links = [] for link in d.get("links", []): links.append( Link( link["from"], link["to"], link.get("rargname"), link.get("post"), ) ) return DMRS( top=d.get("top"), index=d.get("index"), nodes=nodes, links=links, lnk=_lnk(d.get("lnk")), surface=d.get("surface"), identifier=d.get("identifier"), )