"""
DELPH-IN Web API Server
"""
import datetime
import functools
import json
import pathlib
import urllib.parse
import falcon
from delphin import ace, derivation, dmrs, eds, interface, itsdb, tokens
from delphin.codecs import (
dmrsjson,
edsjson,
mrsjson,
simplemrs,
)
[docs]
class ProcessorServer:
"""
A server for results from an ACE processor.
Note:
This class is not meant to be used directly. Use a subclass
instead.
"""
processor_class: type[interface.Processor] | None = None
def __init__(self, grammar, *args, **kwargs):
self.grammar = grammar
self.args = list(args)
self.kwargs = kwargs
def spawn(self, *args):
cmdargs = self.args + list(args)
return self.processor_class(self.grammar, cmdargs, **self.kwargs)
def on_get(self, req, resp):
inp = req.get_param("input", required=True)
n = req.get_param_as_int("results", min_value=1, default=1)
with self.spawn("-n", str(n)) as cpu:
ace_resp = cpu.interact(inp)
args = _get_args(req)
resp.media = _make_response(inp, ace_resp, args)
resp.status = falcon.HTTP_OK
[docs]
class ParseServer(ProcessorServer):
"""
A server for parse results from ACE.
"""
processor_class = ace.ACEParser
[docs]
class GenerationServer(ProcessorServer):
"""
A server for generation results from ACE.
"""
processor_class = ace.ACEGenerator
def _get_args(req):
args = {}
params = req.params
for name in ("tokens", "derivation", "mrs", "eds", "dmrs"):
if name in params:
val = params[name]
# handle 'json' and 'null' for ErgAPI compatibility
args[name] = val == "json" or (
val != "null" and req.get_param_as_bool(name)
)
else:
args[name] = False
return args
def _make_response(inp, ace_response, params):
tcpu = ace_response.get("tcpu")
pedges = ace_response.get("pedges")
readings = ace_response.get("readings")
if readings is None:
readings = len(ace_response.get("results", []))
results = []
for i, res in enumerate(ace_response.results()):
m = res.mrs()
d = res.derivation()
result = {"result-id": i}
if params["derivation"]:
result["derivation"] = d.to_dict(
fields=["id", "entity", "score", "form", "tokens"]
)
if params["mrs"]:
result["mrs"] = mrsjson.to_dict(m)
if params["eds"]:
e = eds.from_mrs(m, predicate_modifiers=True)
result["eds"] = edsjson.to_dict(e)
if params["dmrs"]:
_d = dmrs.from_mrs(m)
result["dmrs"] = dmrsjson.to_dict(_d)
# surface is for generation
if "surface" in res:
result["surface"] = res["surface"]
results.append(result)
response = {"input": inp, "readings": readings, "results": results}
if tcpu is not None:
response["tcpu"] = tcpu
if pedges is not None:
response["pedges"] = pedges
if params.get("tokens") == "json":
t1 = ace_response.tokens("initial")
t2 = ace_response.tokens("internal")
response["tokens"] = {"initial": t1.to_list(), "internal": t2.to_list()}
return response
[docs]
class TestSuiteServer:
"""
A server for a collection of test suites.
Args:
testsuites: list of test suite descriptions
transforms: mapping of table names to lists of (column,
transform) pairs.
"""
def __init__(self, testsuites, transforms=None):
self.testsuites = testsuites
self.index = {entry["name"]: entry for entry in testsuites}
if transforms is None:
transforms = FIELD_TRANSFORMS
elif not transforms:
transforms = []
self.transforms = dict(transforms)
def on_get(self, req, resp):
quote = urllib.parse.quote
base = req.uri
data = []
for entry in self.testsuites:
name = entry["name"]
uri = "/".join([base, quote(name)])
data.append({"name": name, "url": uri})
resp.media = data
resp.status = falcon.HTTP_OK
def on_get_name(self, req, resp, name):
try:
entry = self.index[name]
except KeyError as e:
raise falcon.HTTPNotFound() from e
ts = itsdb.TestSuite(entry["path"])
quote = urllib.parse.quote
base = req.uri
resp.media = {
tablename: "/".join([base, quote(tablename)]) for tablename in ts.schema
}
resp.status = falcon.HTTP_OK
def on_get_table(self, req, resp, name, table):
try:
entry = self.index[name]
except KeyError as e:
raise falcon.HTTPNotFound() from e
ts = itsdb.TestSuite(entry["path"])
table_ = ts[table]
limit = req.get_param_as_int("limit", default=len(table_))
page = req.get_param_as_int("page", default=1)
rowslice = slice((page - 1) * limit, page * limit)
rows = []
transforms = [
(table_.column_index(colname), transform)
for colname, transform in self.transforms.get(table, [])
]
for row in table_[rowslice]:
row = list(row)
for colidx, transform in transforms:
row[colidx] = transform(row[colidx])
rows.append(row)
resp.media = rows
resp.status = falcon.HTTP_OK
# default field transformers
def _transform_tokens(s):
return tokens.YYTokenLattice.from_string(s).to_list()
def _transform_mrs(s):
return mrsjson.to_dict(simplemrs.decode(s))
def _transform_derivation(s):
return derivation.from_string(s).to_dict()
FIELD_TRANSFORMS = [
("parse", [("p-input", _transform_tokens), ("p-tokens", _transform_tokens)]),
("result", [("mrs", _transform_mrs), ("derivation", _transform_derivation)]),
]
# override default JSON handler so it can serialize datetime
def _datetime_default(obj):
if isinstance(obj, datetime.datetime):
return str(obj)
else:
raise TypeError(type(obj))
_json_handler = falcon.media.JSONHandler(
dumps=functools.partial(json.dumps, default=_datetime_default), loads=json.loads
)