Source code for delphin.dmrs._dmrs


from typing import Iterable, Optional

from delphin import scope, variable
from delphin.lnk import Lnk
from delphin.sembase import Predication

TOP_NODE_ID      = 0
FIRST_NODE_ID    = 10000
RESTRICTION_ROLE = 'RSTR'  # DMRS establishes that quantifiers have a RSTR link
BARE_EQ_ROLE     = 'MOD'
EQ_POST          = 'EQ'
HEQ_POST         = 'HEQ'
NEQ_POST         = 'NEQ'
H_POST           = 'H'
NIL_POST         = 'NIL'
CVARSORT         = 'cvarsort'



[docs]
class Node(Predication):
    """
    A DMRS node.

    Nodes are very simple predications for DMRSs. Nodes don't have
    arguments or labels like :class:`delphin.mrs.EP` objects, but they
    do have an attribute for CARGs and contain their vestigial
    variable type and properties in `sortinfo`.

    Args:
        id: node identifier
        predicate: semantic predicate
        type: node type (corresponds to the intrinsic variable type in MRS)
        properties: morphosemantic properties
        carg: constant value (e.g., for named entities)
        lnk: surface alignment
        surface: surface string
        base: base form
    Attributes:
        id: node identifier
        predicate: semantic predicate
        type: node type (corresponds to the intrinsic variable type in MRS)
        properties: morphosemantic properties
        sortinfo: properties with the node type at key `"cvarsort"`
        carg: constant value (e.g., for named entities)
        lnk: surface alignment
        cfrom: surface alignment starting position
        cto: surface alignment ending position
        surface: surface string
        base: base form
    """

    __slots__ = ('properties', 'carg')

    def __init__(self,
                 id: int,
                 predicate: str,
                 type: Optional[str] = None,
                 properties: Optional[dict] = None,
                 carg: Optional[str] = None,
                 lnk: Optional[Lnk] = None,
                 surface=None,
                 base=None):
        id = int(id)
        super().__init__(id, predicate, type, lnk, surface, base)
        if not properties:
            properties = {}
        self.properties = properties
        self.carg = carg

    @property
    def sortinfo(self):
        """
        Morphosemantic property mapping with cvarsort.
        """
        d = dict(self.properties)
        if self.type is not None:
            d[CVARSORT] = self.type
        return d

    def __eq__(self, other):
        if not isinstance(other, Node):
            return NotImplemented
        return (self.predicate == other.predicate
                and self.type == other.type
                and self.properties == other.properties
                and self.carg == other.carg)




[docs]
class Link:
    """
    DMRS-style dependency link.

    Links are a way of representing arguments without variables. A
    Link encodes a start and end node, the role name, and the scopal
    relationship between the start and end (e.g. label equality, qeq,
    etc).

    Args:
        start: node id of the start of the Link
        end: node id of the end of the Link
        role: role of the argument
        post: "post-slash label" indicating the scopal
            relationship between the start and end of the Link;
            possible values are `NEQ`, `EQ`, `HEQ`, and `H`
    Attributes:
        start: node id of the start of the Link
        end: node id of the end of the Link
        role: role of the argument
        post: "post-slash label" indicating the scopal
            relationship between the start and end of the Link
    """

    __slots__ = ('start', 'end', 'role', 'post')

    def __init__(self, start: int, end: int, role: str, post: str):
        self.start = int(start)
        self.end = int(end)
        self.role = role
        self.post = post

    def __repr__(self):
        return '<Link object ({} :{}/{} {}) at {}>'.format(
            self.start, self.role or '', self.post, self.end, id(self)
        )

    def __eq__(self, other):
        if not isinstance(other, Link):
            return NotImplemented
        return (self.start == other.start
                and self.end == other.end
                and self.role == other.role
                and self.post == other.post)




[docs]
class DMRS(scope.ScopingSemanticStructure):
    """
    Dependency Minimal Recursion Semantics (DMRS) class.

    DMRS instances have a list of Node objects and a list of Link
    objects. The scopal top node may be set directly via a parameter
    or may be implicitly set via a `/H` Link from the special node id
    `0`. If both are given, the link is ignored. The non-scopal top
    (index) node may only be set via the *index* parameter.

    Args:
        top: the id of the scopal top node
        index: the id of the non-scopal top node
        nodes: an iterable of DMRS nodes
        links: an iterable of DMRS links
        lnk: surface alignment
        surface: surface string
        identifier: a discourse-utterance identifier

    Attributes:
        top: The scopal top node.
        index: The non-scopal top node.
        nodes: The list of Nodes (alias of
            :attr:`~delphin.sembase.SemanticStructure.predications`).
        links: The list of Links.
        lnk: The surface alignment for the whole MRS.
        surface: The surface string represented by the MRS.
        identifier: A discourse-utterance identifier.

    Example:

    >>> rain = Node(10000, '_rain_v_1', type='e')
    >>> heavy = Node(10001, '_heavy_a_1', type='e')
    >>> arg1_link = Link(10000, 10001, role='ARG1', post='EQ')
    >>> d = DMRS(top=10000, index=10000, [rain], [arg1_link])
    """

    __slots__ = ('links')

    def __init__(self,
                 top: Optional[int] = None,
                 index: Optional[int] = None,
                 nodes: Optional[Iterable[Node]] = None,
                 links: Optional[Iterable[Link]] = None,
                 lnk: Optional[Lnk] = None,
                 surface=None,
                 identifier=None):

        top, links = _normalize_top_and_links(top, links)

        if top:
            top = int(top)
        if index:
            index = int(index)
        if nodes is None:
            nodes = []

        super().__init__(top, index, list(nodes), lnk, surface, identifier)

        self.links = links

    @property
    def nodes(self):
        return self.predications

    def __eq__(self, other):
        if not isinstance(other, DMRS):
            return NotImplemented
        return (self.top == other.top
                and self.index == other.index
                and self.nodes == other.nodes
                and self.links == other.links)

    # SemanticStructure methods


[docs]
    def properties(self, id):
        return self[id].properties



[docs]
    def is_quantifier(self, id):
        """
        Return `True` if *id* is the id of a quantifier node.
        """
        return any(link.role == RESTRICTION_ROLE
                   for link in self.links if link.start == id)



[docs]
    def quantification_pairs(self):
        qs = set()
        qmap = {}
        for link in self.links:
            if link.role == RESTRICTION_ROLE:
                qs.add(link.start)
                qmap[link.end] = self[link.start]
        pairs = []
        # first pair non-quantifiers to their quantifier, if any
        for node in self.nodes:
            if node.id not in qs:
                pairs.append((node, qmap.get(node.id)))
        # for MRS any unpaired quantifiers are added here, but in DMRS
        # I'm not sure what an unpaired quantifier would look like;
        # its link.end must point to something
        return pairs



[docs]
    def arguments(self, types=None, expressed=None):
        """
        Return a mapping of the argument structure.

        When *types* is used, any DMRS Links with :attr:`Link.attr`
        set to :data:`H_POST` or :data:`HEQ_POST` are considered to
        have a type of `'h'`, so one can exclude scopal arguments by
        omitting `'h'` on *types*. Otherwise an argument's type is the
        :attr:`Node.type` of the link's target.

        Args:
            types: an iterable of predication types to include
            expressed: if `True`, only include arguments to expressed
                predications; if `False`, only include those
                unexpressed; if `None`, include both
        Returns:
            A mapping of predication ids to lists of (role, target)
            pairs for outgoing arguments for the predication.
        """

        args = {node.id: [] for node in self.nodes}
        H = variable.HANDLE

        for link in self.links:
            # MOD/EQ links are not arguments
            if link.role == BARE_EQ_ROLE:
                continue
            # ignore undesired argument types
            if types:
                if link.post in (H_POST, HEQ_POST):
                    if H not in types:
                        continue
                else:
                    node = self[link.end]
                    if node.type is None or node.type not in types:
                        continue
            # currently DMRS cannot encode unexpressed arguments
            if expressed is not None and not expressed:
                continue
            args[link.start].append((link.role, link.end))

        return args


    # ScopingSemanticStructure methods


[docs]
    def scopes(self):
        """
        Return a tuple containing the top label and the scope map.

        Note that the top label is different from :attr:`top`, which
        the top node's id. If :attr:`top` does not select a top node,
        the `None` is returned for the top label.

        The scope map is a dictionary mapping scope labels to the
        lists of predications sharing a scope.
        """

        h = variable.HANDLE
        vfac = variable.VariableFactory(starting_vid=1)

        id_to_lbl = {node.id: vfac.new(h) for node in self.nodes}

        leqs = [(id_to_lbl[link.start], id_to_lbl[link.end])
                for link in self.links
                if link.post == EQ_POST]
        prescopes = {id_to_lbl[node.id]: [node] for node in self.nodes}

        scopes = scope.conjoin(prescopes, leqs)
        top = None
        if self.top is not None:
            top_node = self[self.top]
            top = next((label for label, nodes in scopes.items()
                        if top_node in nodes),
                       None)

        return top, scopes



[docs]
    def scopal_arguments(self, scopes=None):
        """
        Return a mapping of the scopal argument structure.

        The return value maps node ids to lists of scopal arguments as
        (role, scope_relation, target) triples. If *scopes* is given,
        the target is the scope label, otherwise it is the target
        node's id. Note that ``MOD/EQ`` links are not included as
        scopal arguments.

        Args:
            scopes: mapping of scope labels to lists of predications
        Example:
            >>> d = DMRS(...)  # for "It doesn't rain.
            >>> d.scopal_arguments()
            {10000: [('ARG1', 'qeq', 10001)]}
            >>> top, scopes = d.scopes()
            >>> d.scopal_arguments(scopes=scopes)
            {10000: [('ARG1', 'qeq', 'h2')]}
        """
        id_to_lbl = {}
        if scopes is not None:
            for label, nodes in scopes.items():
                for node in nodes:
                    id_to_lbl[node.id] = label

        scargs = {node.id: [] for node in self.nodes}
        for link in self.links:
            if link.post == HEQ_POST:
                relation = scope.LHEQ
            elif link.post == H_POST:
                relation = scope.QEQ
            else:
                continue
            # get the label if scopes was given
            target = id_to_lbl.get(link.end, link.end)
            scargs[link.start].append((link.role, relation, target))

        return scargs




def _normalize_top_and_links(top, links):
    """
    Original DMRS had a /H link from a special node id of 0 to
    indicate the top node, but now the `top` attribute is used.
    Remove any such links and use them to specify `top` if it was not
    specified already (otherwise ignore them).
    """
    _links = []
    if links is not None:
        for link in links:
            if link.start == TOP_NODE_ID:
                if top is None:
                    top = link.end
            else:
                _links.append(link)
    return top, _links