Source code for delphin.dmrs._dmrs

from collections.abc import Iterable
from typing import Any

from delphin import scope, variable
from delphin.lnk import Lnk
from delphin.sembase import (
    ArgumentStructure,
    Predication,
    ScopalArguments,
    ScopeMap,
    ScopeRelation,
    ScopingSemanticStructure,
)

TOP_NODE_ID = 0
FIRST_NODE_ID = 10000
RESTRICTION_ROLE = "RSTR"  # DMRS establishes that quantifiers have a RSTR link
BARE_EQ_ROLE = "MOD"
EQ_POST = "EQ"
HEQ_POST = "HEQ"
NEQ_POST = "NEQ"
H_POST = "H"
NIL_POST = "NIL"
CVARSORT = "cvarsort"


[docs] class Node(Predication[int]): """ A DMRS node. Nodes are very simple predications for DMRSs. Nodes don't have arguments or labels like :class:`delphin.mrs.EP` objects, but they do have an attribute for CARGs and contain their vestigial variable type and properties in `sortinfo`. Args: id: node identifier predicate: semantic predicate type: node type (corresponds to the intrinsic variable type in MRS) properties: morphosemantic properties carg: constant value (e.g., for named entities) lnk: surface alignment surface: surface string base: base form Attributes: id: node identifier predicate: semantic predicate type: node type (corresponds to the intrinsic variable type in MRS) properties: morphosemantic properties carg: constant value (e.g., for named entities) lnk: surface alignment cfrom: surface alignment starting position cto: surface alignment ending position surface: surface string base: base form """ __slots__ = ("carg", "properties") properties: dict[str, str] def __init__( self, id: int, predicate: str, type: str | None = None, properties: dict[str, str] | None = None, carg: str | None = None, lnk: Lnk | None = None, surface=None, base=None, ) -> None: id = int(id) super().__init__(id, predicate, type, lnk, surface, base) if not properties: properties = {} self.properties = properties self.carg = carg @property def sortinfo(self) -> dict[str, str]: """ Morphosemantic property mapping including ``"cvarsort"``. The ``"cvarsort"`` key encodes :data:`Node.type`. """ d = dict(self.properties) if self.type is not None: d[CVARSORT] = self.type return d def __eq__(self, other: Any) -> bool: if not isinstance(other, Node): return NotImplemented return ( self.predicate == other.predicate and self.type == other.type and self.properties == other.properties and self.carg == other.carg )
[docs] class DMRS(ScopingSemanticStructure[int, Node]): """ Dependency Minimal Recursion Semantics (DMRS) class. DMRS instances have a list of Node objects and a list of Link objects. The scopal top node may be set directly via a parameter or may be implicitly set via a `/H` Link from the special node id `0`. If both are given, the link is ignored. The non-scopal top (index) node may only be set via the *index* parameter. Args: top: the id of the scopal top node index: the id of the non-scopal top node nodes: an iterable of DMRS nodes links: an iterable of DMRS links lnk: surface alignment surface: surface string identifier: a discourse-utterance identifier Attributes: top: The scopal top node. index: The non-scopal top node. nodes: The list of Nodes (alias of :attr:`~delphin.sembase.SemanticStructure.predications`). links: The list of Links. lnk: The surface alignment for the whole MRS. surface: The surface string represented by the MRS. identifier: A discourse-utterance identifier. Example: >>> rain = Node(10000, "_rain_v_1", type="e") >>> heavy = Node(10001, "_heavy_a_1", type="e") >>> arg1_link = Link(10000, 10001, role="ARG1", post="EQ") >>> d = DMRS(top=10000, index=10000, [rain, heavy], [arg1_link]) """ __slots__ = ("links",) links: list[Link] def __init__( self, top: int | None = None, index: int | None = None, nodes: Iterable[Node] | None = None, links: Iterable[Link] | None = None, lnk: Lnk | None = None, surface=None, identifier=None, ) -> None: top, links = _normalize_top_and_links(top, links) if top: top = int(top) if index: index = int(index) if nodes is None: nodes = [] super().__init__(top, index, list(nodes), lnk, surface, identifier) self.links = links @property def nodes(self) -> list[Node]: return self.predications def __eq__(self, other: Any) -> bool: if not isinstance(other, DMRS): return NotImplemented return ( self.top == other.top and self.index == other.index and self.nodes == other.nodes and self.links == other.links ) # SemanticStructure methods
[docs] def properties(self, id: int | None) -> dict[str, str]: return self[id].properties
[docs] def is_quantifier(self, id: int | None) -> bool: """ Return `True` if *id* is the id of a quantifier node. """ return any( link.role == RESTRICTION_ROLE for link in self.links if link.start == id )
[docs] def quantification_pairs( self, ) -> list[tuple[Node | None, Node | None]]: qs: set[int] = set() qmap: dict[int, Node] = {} for link in self.links: if link.role == RESTRICTION_ROLE: qs.add(link.start) qmap[link.end] = self[link.start] pairs: list[tuple[Node | None, Node | None]] = [] # first pair non-quantifiers to their quantifier, if any for node in self.nodes: if node.id not in qs: pairs.append((node, qmap.get(node.id))) # for MRS any unpaired quantifiers are added here, but in DMRS # I'm not sure what an unpaired quantifier would look like; # its link.end must point to something return pairs
[docs] def arguments( self, types: Iterable[str] | None = None, expressed: bool | None = None, ) -> ArgumentStructure[int]: """ Return a mapping of the argument structure. When *types* is used, any DMRS Links with :attr:`Link.attr` set to :data:`H_POST` or :data:`HEQ_POST` are considered to have a type of `'h'`, so one can exclude scopal arguments by omitting `'h'` on *types*. Otherwise an argument's type is the :attr:`Node.type` of the link's target. Args: types: an iterable of predication types to include expressed: if `True`, only include arguments to expressed predications; if `False`, only include those unexpressed; if `None`, include both Returns: A mapping of predication ids to lists of (role, target) pairs for outgoing arguments for the predication. """ args: dict[int, list[tuple[str, int]]] = {node.id: [] for node in self.nodes} H = variable.HANDLE for link in self.links: # MOD/EQ links are not arguments if link.role == BARE_EQ_ROLE: continue # ignore undesired argument types if types: if link.post in (H_POST, HEQ_POST): if H not in types: continue else: node = self[link.end] if node.type is None or node.type not in types: continue # currently DMRS cannot encode unexpressed arguments if expressed is not None and not expressed: continue args[link.start].append((link.role, link.end)) return args
# ScopingSemanticStructure methods
[docs] def scopes(self) -> tuple[str | None, dict[str, list[Node]]]: """ Return a tuple containing the top label and the scope map. Note that the top label is different from :attr:`top`, which the top node's id. If :attr:`top` does not select a top node, the `None` is returned for the top label. The scope map is a dictionary mapping scope labels to the lists of nodes sharing a scope. """ h = variable.HANDLE vfac = variable.VariableFactory(starting_vid=1) id_to_lbl = {node.id: vfac.new(h) for node in self.nodes} leqs = [ (id_to_lbl[link.start], id_to_lbl[link.end]) for link in self.links if link.post == EQ_POST ] prescopes = {id_to_lbl[node.id]: [node] for node in self.nodes} scopes = scope.conjoin(prescopes, leqs) top = None if self.top is not None: top_node = self[self.top] top = next( (label for label, nodes in scopes.items() if top_node in nodes), None ) return top, scopes
[docs] def scopal_arguments( self, scopes: ScopeMap[Node] | None = None, ) -> ScopalArguments[int]: """ Return a mapping of the scopal argument structure. The return value maps node ids to lists of scopal arguments as (role, scope_relation, scope_label) triples. If *scopes* is given, it is used as the source of scope labels. Otherwise, :meth:`scopes` is first called to generate those labels. Note that ``MOD/EQ`` links are not included as scopal arguments. Args: scopes: mapping of scope labels to lists of predications Example: >>> d = DMRS(...) # for "It doesn't rain. >>> d.scopal_arguments() {10000: [('ARG1', 'qeq', 10001)]} >>> top, scopes = d.scopes() >>> d.scopal_arguments(scopes=scopes) {10000: [('ARG1', 'qeq', 'h2')]} """ if scopes is None: _, scopes = self.scopes() id_to_lbl: dict[int, str] = { node.id: lbl for lbl, nodes in scopes.items() for node in nodes } scargs: dict[int, list[tuple[str, ScopeRelation, str]]] = { node.id: [] for node in self.nodes } for link in self.links: if link.post == HEQ_POST: relation = ScopeRelation.LHEQ elif link.post == H_POST: relation = ScopeRelation.QEQ else: continue label = id_to_lbl[link.end] scargs[link.start].append((link.role, relation, label)) return scargs
def _normalize_top_and_links( top: int | None, links: Iterable[Link] | None, ) -> tuple[int | None, list[Link]]: """ Original DMRS had a /H link from a special node id of 0 to indicate the top node, but now the `top` attribute is used. Remove any such links and use them to specify `top` if it was not specified already (otherwise ignore them). """ _links: list[Link] = [] if links is not None: for link in links: if link.start == TOP_NODE_ID: if top is None: top = link.end else: _links.append(link) return top, _links