Source code for delphin.dmrs._dmrs


from typing import Iterable, Optional

from delphin import scope, variable
from delphin.lnk import Lnk
from delphin.sembase import Predication

TOP_NODE_ID      = 0
FIRST_NODE_ID    = 10000
RESTRICTION_ROLE = 'RSTR'  # DMRS establishes that quantifiers have a RSTR link
BARE_EQ_ROLE     = 'MOD'
EQ_POST          = 'EQ'
HEQ_POST         = 'HEQ'
NEQ_POST         = 'NEQ'
H_POST           = 'H'
NIL_POST         = 'NIL'
CVARSORT         = 'cvarsort'


[docs] class Node(Predication): """ A DMRS node. Nodes are very simple predications for DMRSs. Nodes don't have arguments or labels like :class:`delphin.mrs.EP` objects, but they do have an attribute for CARGs and contain their vestigial variable type and properties in `sortinfo`. Args: id: node identifier predicate: semantic predicate type: node type (corresponds to the intrinsic variable type in MRS) properties: morphosemantic properties carg: constant value (e.g., for named entities) lnk: surface alignment surface: surface string base: base form Attributes: id: node identifier predicate: semantic predicate type: node type (corresponds to the intrinsic variable type in MRS) properties: morphosemantic properties sortinfo: properties with the node type at key `"cvarsort"` carg: constant value (e.g., for named entities) lnk: surface alignment cfrom: surface alignment starting position cto: surface alignment ending position surface: surface string base: base form """ __slots__ = ('properties', 'carg') def __init__(self, id: int, predicate: str, type: Optional[str] = None, properties: Optional[dict] = None, carg: Optional[str] = None, lnk: Optional[Lnk] = None, surface=None, base=None): id = int(id) super().__init__(id, predicate, type, lnk, surface, base) if not properties: properties = {} self.properties = properties self.carg = carg @property def sortinfo(self): """ Morphosemantic property mapping with cvarsort. """ d = dict(self.properties) if self.type is not None: d[CVARSORT] = self.type return d def __eq__(self, other): if not isinstance(other, Node): return NotImplemented return (self.predicate == other.predicate and self.type == other.type and self.properties == other.properties and self.carg == other.carg)
[docs] class DMRS(scope.ScopingSemanticStructure): """ Dependency Minimal Recursion Semantics (DMRS) class. DMRS instances have a list of Node objects and a list of Link objects. The scopal top node may be set directly via a parameter or may be implicitly set via a `/H` Link from the special node id `0`. If both are given, the link is ignored. The non-scopal top (index) node may only be set via the *index* parameter. Args: top: the id of the scopal top node index: the id of the non-scopal top node nodes: an iterable of DMRS nodes links: an iterable of DMRS links lnk: surface alignment surface: surface string identifier: a discourse-utterance identifier Attributes: top: The scopal top node. index: The non-scopal top node. nodes: The list of Nodes (alias of :attr:`~delphin.sembase.SemanticStructure.predications`). links: The list of Links. lnk: The surface alignment for the whole MRS. surface: The surface string represented by the MRS. identifier: A discourse-utterance identifier. Example: >>> rain = Node(10000, '_rain_v_1', type='e') >>> heavy = Node(10001, '_heavy_a_1', type='e') >>> arg1_link = Link(10000, 10001, role='ARG1', post='EQ') >>> d = DMRS(top=10000, index=10000, [rain], [arg1_link]) """ __slots__ = ('links') def __init__(self, top: Optional[int] = None, index: Optional[int] = None, nodes: Optional[Iterable[Node]] = None, links: Optional[Iterable[Link]] = None, lnk: Optional[Lnk] = None, surface=None, identifier=None): top, links = _normalize_top_and_links(top, links) if top: top = int(top) if index: index = int(index) if nodes is None: nodes = [] super().__init__(top, index, list(nodes), lnk, surface, identifier) self.links = links @property def nodes(self): return self.predications def __eq__(self, other): if not isinstance(other, DMRS): return NotImplemented return (self.top == other.top and self.index == other.index and self.nodes == other.nodes and self.links == other.links) # SemanticStructure methods
[docs] def properties(self, id): return self[id].properties
[docs] def is_quantifier(self, id): """ Return `True` if *id* is the id of a quantifier node. """ return any(link.role == RESTRICTION_ROLE for link in self.links if link.start == id)
[docs] def quantification_pairs(self): qs = set() qmap = {} for link in self.links: if link.role == RESTRICTION_ROLE: qs.add(link.start) qmap[link.end] = self[link.start] pairs = [] # first pair non-quantifiers to their quantifier, if any for node in self.nodes: if node.id not in qs: pairs.append((node, qmap.get(node.id))) # for MRS any unpaired quantifiers are added here, but in DMRS # I'm not sure what an unpaired quantifier would look like; # its link.end must point to something return pairs
[docs] def arguments(self, types=None, expressed=None): """ Return a mapping of the argument structure. When *types* is used, any DMRS Links with :attr:`Link.attr` set to :data:`H_POST` or :data:`HEQ_POST` are considered to have a type of `'h'`, so one can exclude scopal arguments by omitting `'h'` on *types*. Otherwise an argument's type is the :attr:`Node.type` of the link's target. Args: types: an iterable of predication types to include expressed: if `True`, only include arguments to expressed predications; if `False`, only include those unexpressed; if `None`, include both Returns: A mapping of predication ids to lists of (role, target) pairs for outgoing arguments for the predication. """ args = {node.id: [] for node in self.nodes} H = variable.HANDLE for link in self.links: # MOD/EQ links are not arguments if link.role == BARE_EQ_ROLE: continue # ignore undesired argument types if types: if link.post in (H_POST, HEQ_POST): if H not in types: continue else: node = self[link.end] if node.type is None or node.type not in types: continue # currently DMRS cannot encode unexpressed arguments if expressed is not None and not expressed: continue args[link.start].append((link.role, link.end)) return args
# ScopingSemanticStructure methods
[docs] def scopes(self): """ Return a tuple containing the top label and the scope map. Note that the top label is different from :attr:`top`, which the top node's id. If :attr:`top` does not select a top node, the `None` is returned for the top label. The scope map is a dictionary mapping scope labels to the lists of predications sharing a scope. """ h = variable.HANDLE vfac = variable.VariableFactory(starting_vid=1) id_to_lbl = {node.id: vfac.new(h) for node in self.nodes} leqs = [(id_to_lbl[link.start], id_to_lbl[link.end]) for link in self.links if link.post == EQ_POST] prescopes = {id_to_lbl[node.id]: [node] for node in self.nodes} scopes = scope.conjoin(prescopes, leqs) top = None if self.top is not None: top_node = self[self.top] top = next((label for label, nodes in scopes.items() if top_node in nodes), None) return top, scopes
[docs] def scopal_arguments(self, scopes=None): """ Return a mapping of the scopal argument structure. The return value maps node ids to lists of scopal arguments as (role, scope_relation, target) triples. If *scopes* is given, the target is the scope label, otherwise it is the target node's id. Note that ``MOD/EQ`` links are not included as scopal arguments. Args: scopes: mapping of scope labels to lists of predications Example: >>> d = DMRS(...) # for "It doesn't rain. >>> d.scopal_arguments() {10000: [('ARG1', 'qeq', 10001)]} >>> top, scopes = d.scopes() >>> d.scopal_arguments(scopes=scopes) {10000: [('ARG1', 'qeq', 'h2')]} """ id_to_lbl = {} if scopes is not None: for label, nodes in scopes.items(): for node in nodes: id_to_lbl[node.id] = label scargs = {node.id: [] for node in self.nodes} for link in self.links: if link.post == HEQ_POST: relation = scope.LHEQ elif link.post == H_POST: relation = scope.QEQ else: continue # get the label if scopes was given target = id_to_lbl.get(link.end, link.end) scargs[link.start].append((link.role, relation, target)) return scargs
def _normalize_top_and_links(top, links): """ Original DMRS had a /H link from a special node id of 0 to indicate the top node, but now the `top` attribute is used. Remove any such links and use them to specify `top` if it was not specified already (otherwise ignore them). """ _links = [] if links is not None: for link in links: if link.start == TOP_NODE_ID: if top is None: top = link.end else: _links.append(link) return top, _links