Source code for delphin.web.server

"""
DELPH-IN Web API Server
"""

import datetime
import functools
import json
import pathlib
import urllib.parse

import falcon

from delphin import ace, derivation, dmrs, eds, interface, itsdb, tokens
from delphin.codecs import (
    dmrsjson,
    edsjson,
    mrsjson,
    simplemrs,
)


[docs] def configure(api, parser=None, generator=None, testsuites=None): """ Configure server application *api*. This is the preferred way to setup the server application, but the task-specific classes defined in this module can also be used to setup custom routes, for instance. If a path is given for *parser* or *generator*, it will be used to construct a :class:`ParseServer` or :class:`GenerationServer` instance, respectively, with default arguments to the underlying :class:`~delphin.ace.ACEProcessor`. If non-default arguments are needed, pass in the customized :class:`ParseServer` or :class:`GenerationServer` instances directly. Args: api: an instance of :class:`falcon.App` parser: a path to a grammar or a :class:`ParseServer` instance generator: a path to a grammar or a :class:`GenerationServer` instance testsuites: mapping of collection names to lists of test suite entries Example: >>> server.configure( ... api, ... parser="~/grammars/erg-2018-x86-64-0.9.30.dat", ... testsuites={ ... "gold": [{"name": "mrs", "path": "~/grammars/erg/tsdb/gold/mrs"}] ... }, ... ) """ if parser is not None: if isinstance(parser, (str, pathlib.Path)): parser = ParseServer(parser) api.add_route("/parse", parser) if generator is not None: if isinstance(generator, (str, pathlib.Path)): generator = GenerationServer(generator) api.add_route("/generate", generator) if testsuites is not None: for collection, entries in testsuites.items(): collection = "/" + urllib.parse.quote(collection) resource = TestSuiteServer(entries) api.add_route(collection, resource) api.add_route(collection + "/{name}", resource, suffix="name") api.add_route(collection + "/{name}/{table}", resource, suffix="table") api.req_options.strip_url_path_trailing_slash = True api.req_options.media_handlers["application/json"] = _json_handler api.resp_options.media_handlers["application/json"] = _json_handler
[docs] class ProcessorServer: """ A server for results from an ACE processor. Note: This class is not meant to be used directly. Use a subclass instead. """ processor_class: type[interface.Processor] | None = None def __init__(self, grammar, *args, **kwargs): self.grammar = grammar self.args = list(args) self.kwargs = kwargs def spawn(self, *args): cmdargs = self.args + list(args) return self.processor_class(self.grammar, cmdargs, **self.kwargs) def on_get(self, req, resp): inp = req.get_param("input", required=True) n = req.get_param_as_int("results", min_value=1, default=1) with self.spawn("-n", str(n)) as cpu: ace_resp = cpu.interact(inp) args = _get_args(req) resp.media = _make_response(inp, ace_resp, args) resp.status = falcon.HTTP_OK
[docs] class ParseServer(ProcessorServer): """ A server for parse results from ACE. """ processor_class = ace.ACEParser
[docs] class GenerationServer(ProcessorServer): """ A server for generation results from ACE. """ processor_class = ace.ACEGenerator
def _get_args(req): args = {} params = req.params for name in ("tokens", "derivation", "mrs", "eds", "dmrs"): if name in params: val = params[name] # handle 'json' and 'null' for ErgAPI compatibility args[name] = val == "json" or ( val != "null" and req.get_param_as_bool(name) ) else: args[name] = False return args def _make_response(inp, ace_response, params): tcpu = ace_response.get("tcpu") pedges = ace_response.get("pedges") readings = ace_response.get("readings") if readings is None: readings = len(ace_response.get("results", [])) results = [] for i, res in enumerate(ace_response.results()): m = res.mrs() d = res.derivation() result = {"result-id": i} if params["derivation"]: result["derivation"] = d.to_dict( fields=["id", "entity", "score", "form", "tokens"] ) if params["mrs"]: result["mrs"] = mrsjson.to_dict(m) if params["eds"]: e = eds.from_mrs(m, predicate_modifiers=True) result["eds"] = edsjson.to_dict(e) if params["dmrs"]: _d = dmrs.from_mrs(m) result["dmrs"] = dmrsjson.to_dict(_d) # surface is for generation if "surface" in res: result["surface"] = res["surface"] results.append(result) response = {"input": inp, "readings": readings, "results": results} if tcpu is not None: response["tcpu"] = tcpu if pedges is not None: response["pedges"] = pedges if params.get("tokens") == "json": t1 = ace_response.tokens("initial") t2 = ace_response.tokens("internal") response["tokens"] = {"initial": t1.to_list(), "internal": t2.to_list()} return response
[docs] class TestSuiteServer: """ A server for a collection of test suites. Args: testsuites: list of test suite descriptions transforms: mapping of table names to lists of (column, transform) pairs. """ def __init__(self, testsuites, transforms=None): self.testsuites = testsuites self.index = {entry["name"]: entry for entry in testsuites} if transforms is None: transforms = FIELD_TRANSFORMS elif not transforms: transforms = [] self.transforms = dict(transforms) def on_get(self, req, resp): quote = urllib.parse.quote base = req.uri data = [] for entry in self.testsuites: name = entry["name"] uri = "/".join([base, quote(name)]) data.append({"name": name, "url": uri}) resp.media = data resp.status = falcon.HTTP_OK def on_get_name(self, req, resp, name): try: entry = self.index[name] except KeyError as e: raise falcon.HTTPNotFound() from e ts = itsdb.TestSuite(entry["path"]) quote = urllib.parse.quote base = req.uri resp.media = { tablename: "/".join([base, quote(tablename)]) for tablename in ts.schema } resp.status = falcon.HTTP_OK def on_get_table(self, req, resp, name, table): try: entry = self.index[name] except KeyError as e: raise falcon.HTTPNotFound() from e ts = itsdb.TestSuite(entry["path"]) table_ = ts[table] limit = req.get_param_as_int("limit", default=len(table_)) page = req.get_param_as_int("page", default=1) rowslice = slice((page - 1) * limit, page * limit) rows = [] transforms = [ (table_.column_index(colname), transform) for colname, transform in self.transforms.get(table, []) ] for row in table_[rowslice]: row = list(row) for colidx, transform in transforms: row[colidx] = transform(row[colidx]) rows.append(row) resp.media = rows resp.status = falcon.HTTP_OK
# default field transformers def _transform_tokens(s): return tokens.YYTokenLattice.from_string(s).to_list() def _transform_mrs(s): return mrsjson.to_dict(simplemrs.decode(s)) def _transform_derivation(s): return derivation.from_string(s).to_dict() FIELD_TRANSFORMS = [ ("parse", [("p-input", _transform_tokens), ("p-tokens", _transform_tokens)]), ("result", [("mrs", _transform_mrs), ("derivation", _transform_derivation)]), ] # override default JSON handler so it can serialize datetime def _datetime_default(obj): if isinstance(obj, datetime.datetime): return str(obj) else: raise TypeError(type(obj)) _json_handler = falcon.media.JSONHandler( dumps=functools.partial(json.dumps, default=_datetime_default), loads=json.loads )