Source code for delphin.web.server


"""
DELPH-IN Web API Server
"""

import datetime
import functools
import json
import pathlib
import urllib.parse
from typing import Optional, Type

import falcon

from delphin import ace, derivation, dmrs, eds, interface, itsdb, tokens
from delphin.codecs import (
    dmrsjson,
    edsjson,
    mrsjson,
    simplemrs,
)


[docs] def configure(api, parser=None, generator=None, testsuites=None): """ Configure server application *api*. This is the preferred way to setup the server application, but the task-specific classes defined in this module can also be used to setup custom routes, for instance. If a path is given for *parser* or *generator*, it will be used to construct a :class:`ParseServer` or :class:`GenerationServer` instance, respectively, with default arguments to the underlying :class:`~delphin.ace.ACEProcessor`. If non-default arguments are needed, pass in the customized :class:`ParseServer` or :class:`GenerationServer` instances directly. Args: api: an instance of :class:`falcon.API` parser: a path to a grammar or a :class:`ParseServer` instance generator: a path to a grammar or a :class:`GenerationServer` instance testsuites: mapping of collection names to lists of test suite entries Example: >>> server.configure( ... api, ... parser='~/grammars/erg-2018-x86-64-0.9.30.dat', ... testsuites={ ... 'gold': [ ... {'name': 'mrs', ... 'path': '~/grammars/erg/tsdb/gold/mrs'}]}) """ if parser is not None: if isinstance(parser, (str, pathlib.Path)): parser = ParseServer(parser) api.add_route('/parse', parser) if generator is not None: if isinstance(generator, (str, pathlib.Path)): generator = GenerationServer(generator) api.add_route('/generate', generator) if testsuites is not None: for collection, entries in testsuites.items(): collection = '/' + urllib.parse.quote(collection) resource = TestSuiteServer(entries) api.add_route(collection, resource) api.add_route(collection + '/{name}', resource, suffix='name') api.add_route( collection + '/{name}/{table}', resource, suffix='table') api.req_options.strip_url_path_trailing_slash = True api.req_options.media_handlers['application/json'] = _json_handler api.resp_options.media_handlers['application/json'] = _json_handler
[docs] class ProcessorServer: """ A server for results from an ACE processor. Note: This class is not meant to be used directly. Use a subclass instead. """ processor_class: Optional[Type[interface.Processor]] = None def __init__(self, grammar, *args, **kwargs): self.grammar = grammar self.args = list(args) self.kwargs = kwargs def spawn(self, *args): cmdargs = self.args + list(args) return self.processor_class( self.grammar, cmdargs, **self.kwargs) def on_get(self, req, resp): inp = req.get_param('input', required=True) n = req.get_param_as_int('results', min_value=1, default=1) with self.spawn('-n', str(n)) as cpu: ace_resp = cpu.interact(inp) args = _get_args(req) resp.media = _make_response(inp, ace_resp, args) resp.status = falcon.HTTP_OK
[docs] class ParseServer(ProcessorServer): """ A server for parse results from ACE. """ processor_class = ace.ACEParser
[docs] class GenerationServer(ProcessorServer): """ A server for generation results from ACE. """ processor_class = ace.ACEGenerator
def _get_args(req): args = {} params = req.params for name in ('tokens', 'derivation', 'mrs', 'eds', 'dmrs'): if name in params: val = params[name] # handle 'json' and 'null' for ErgAPI compatibility args[name] = (val == 'json' or (val != 'null' and req.get_param_as_bool(name))) else: args[name] = False return args def _make_response(inp, ace_response, params): tcpu = ace_response.get('tcpu') pedges = ace_response.get('pedges') readings = ace_response.get('readings') if readings is None: readings = len(ace_response.get('results', [])) results = [] for i, res in enumerate(ace_response.results()): m = res.mrs() d = res.derivation() result = {'result-id': i} if params['derivation']: result['derivation'] = d.to_dict( fields=['id', 'entity', 'score', 'form', 'tokens']) if params['mrs']: result['mrs'] = mrsjson.to_dict(m) if params['eds']: e = eds.from_mrs(m, predicate_modifiers=True) result['eds'] = edsjson.to_dict(e) if params['dmrs']: _d = dmrs.from_mrs(m) result['dmrs'] = dmrsjson.to_dict(_d) # surface is for generation if 'surface' in res: result['surface'] = res['surface'] results.append(result) response = { 'input': inp, 'readings': readings, 'results': results } if tcpu is not None: response['tcpu'] = tcpu if pedges is not None: response['pedges'] = pedges if params.get('tokens') == 'json': t1 = ace_response.tokens('initial') t2 = ace_response.tokens('internal') response['tokens'] = { 'initial': t1.to_list(), 'internal': t2.to_list() } return response
[docs] class TestSuiteServer: """ A server for a collection of test suites. Args: testsuites: list of test suite descriptions transforms: mapping of table names to lists of (column, transform) pairs. """ def __init__(self, testsuites, transforms=None): self.testsuites = testsuites self.index = {entry['name']: entry for entry in testsuites} if transforms is None: transforms = FIELD_TRANSFORMS elif not transforms: transforms = [] self.transforms = dict(transforms) def on_get(self, req, resp): quote = urllib.parse.quote base = req.uri data = [] for entry in self.testsuites: name = entry['name'] uri = '/'.join([base, quote(name)]) data.append({'name': name, 'url': uri}) resp.media = data resp.status = falcon.HTTP_OK def on_get_name(self, req, resp, name): try: entry = self.index[name] except KeyError as e: raise falcon.HTTPNotFound() from e ts = itsdb.TestSuite(entry['path']) quote = urllib.parse.quote base = req.uri resp.media = {tablename: '/'.join([base, quote(tablename)]) for tablename in ts.schema} resp.status = falcon.HTTP_OK def on_get_table(self, req, resp, name, table): try: entry = self.index[name] except KeyError as e: raise falcon.HTTPNotFound() from e ts = itsdb.TestSuite(entry['path']) table_ = ts[table] limit = req.get_param_as_int('limit', default=len(table_)) page = req.get_param_as_int('page', default=1) rowslice = slice((page - 1) * limit, page * limit) rows = [] transforms = [(table_.column_index(colname), transform) for colname, transform in self.transforms.get(table, [])] for row in table_[rowslice]: row = list(row) for colidx, transform in transforms: row[colidx] = transform(row[colidx]) rows.append(row) resp.media = rows resp.status = falcon.HTTP_OK
# default field transformers def _transform_tokens(s): return tokens.YYTokenLattice.from_string(s).to_list() def _transform_mrs(s): return mrsjson.to_dict(simplemrs.decode(s)) def _transform_derivation(s): return derivation.from_string(s).to_dict() FIELD_TRANSFORMS = [ ('parse', [ ('p-input', _transform_tokens), ('p-tokens', _transform_tokens)]), ('result', [ ('mrs', _transform_mrs), ('derivation', _transform_derivation)]), ] # override default JSON handler so it can serialize datetime def _datetime_default(obj): if isinstance(obj, datetime.datetime): return str(obj) else: raise TypeError(type(obj)) _json_handler = falcon.media.JSONHandler( dumps=functools.partial(json.dumps, default=_datetime_default), loads=json.loads )