Source code for delphin.mrs._operations

"""
Operations on MRS structures
"""

from collections.abc import Iterable

from delphin import dmrs, mrs, predicate, scope, util, variable
from delphin.sembase import ScopeMap, property_priority



[docs]
def is_connected(m: mrs.MRS) -> bool:
    """
    Return `True` if *m* is a fully-connected MRS.

    A connected MRS is one where, when viewed as a graph, all EPs are
    connected to each other via regular (non-scopal) arguments, scopal
    arguments (including qeqs), or label equalities.
    """
    ids = {ep.id for ep in m.rels}
    g: dict[str, set[str | None]] = {id: set() for id in ids}
    # first establish links from labels and intrinsic variables to EPs
    for ep in m.rels:
        id, lbl, iv = ep.id, ep.label, ep.iv
        g[id].update((lbl, iv))
        g.setdefault(lbl, set()).add(id)
        if iv:
            g.setdefault(iv, set()).add(id)
    # arguments may link EPs with IVs or labels (or qeq) as targets
    hcmap = {hc.hi: hc.lo for hc in m.hcons}
    for id, roleargs in m.arguments().items():
        for _role, value in roleargs:
            value = hcmap.get(value, value)  # resolve qeq if any
            if value in g:
                g[id].add(value)
                g[value].add(id)
    return ids.issubset(util._bfs(g))




[docs]
def has_intrinsic_variable_property(m: mrs.MRS) -> bool:
    """
    Return `True` if *m* satisfies the intrinsic variable property.

    An MRS has the intrinsic variable property when it passes the
    following:

    - :func:`has_complete_intrinsic_variables`
    - :func:`has_unique_intrinsic_variables`

    Note that for quantifier EPs, `ARG0` is overloaded to mean "bound
    variable". Each quantifier should have an `ARG0` that is the
    intrinsic variable of exactly one non-quantifier EP, but this
    function does not check for that.
    """
    return has_complete_intrinsic_variables(m) and has_unique_intrinsic_variables(m)




[docs]
def has_complete_intrinsic_variables(m: mrs.MRS) -> bool:
    """
    Return `True` if all non-quantifier EPs have intrinsic variables.
    """
    return all(ep.iv is not None for ep in m.rels if not ep.is_quantifier())




[docs]
def has_unique_intrinsic_variables(m: mrs.MRS) -> bool:
    """
    Return `True` if all intrinsic variables are unique to their EPs.
    """
    ivs = [ep.iv for ep in m.rels if not ep.is_quantifier() and ep.iv is not None]
    return len(set(ivs)) == len(ivs)




[docs]
def is_well_formed(m: mrs.MRS) -> bool:
    """
    Return `True` if MRS *m* is well-formed.

    A well-formed MRS meets the following criteria:

    - :func:`is_connected`
    - :func:`has_intrinsic_variable_property`
    - :func:`plausibly_scopes`

    The final criterion is a heuristic for determining if the MRS
    scopes by checking if handle constraints and scopal arguments have
    any immediate violations (e.g., a scopal argument selecting the
    label of its EP).
    """
    return (
        is_connected(m) and has_intrinsic_variable_property(m) and plausibly_scopes(m)
    )




[docs]
def plausibly_scopes(m: mrs.MRS) -> bool:
    """
    Quickly test if MRS *m* can plausibly resolve a scopal reading.

    This tests a number of things:

        - Is the MRS's top qeq to a label
        - Do any EPs scope over themselves
        - Do multiple EPs use the handle constraint
        - Is the lo handle of a qeq not actually a label
        - Are any qeqs not selected by an EP

    It does not test for transitive scopal plausibility.
    """
    scope_labels = set(ep.label for ep in m.rels)
    hcmap = {hc.hi: hc.lo for hc in m.hcons}
    if m.top not in hcmap:
        return False
    seen = set([m.top])
    for id, roleargs in m.arguments(types="h").items():
        ep = m[id]
        for _, handle in roleargs:
            if handle == ep.label:
                return False
            elif handle in hcmap:
                if handle in seen:
                    return False
                if hcmap[handle] not in scope_labels:
                    return False
                seen.add(hcmap[handle])
            elif handle in scope_labels and handle in seen:
                return False
            seen.add(handle)
    for hi, lo in hcmap.items():
        if hi not in seen or lo not in scope_labels:
            return False
    return True




[docs]
def is_isomorphic(m1: mrs.MRS, m2: mrs.MRS, properties: bool = True) -> bool:
    """
    Return `True` if *m1* and *m2* are isomorphic MRSs.

    Isomorphicity compares the predicates of a semantic structure, the
    morphosemantic properties of their predications (if
    `properties=True`), constant arguments, and the argument structure
    between predications. Non-semantic properties like identifiers and
    surface alignments are ignored.

    Args:
        m1: the left MRS to compare
        m2: the right MRS to compare
        properties: if `True`, ensure variable properties are
            equal for mapped predications
    """
    # simple tests
    if (
        len(m1.rels) != len(m2.rels)
        or len(m1.hcons) != len(m2.hcons)
        or len(m1.icons) != len(m2.icons)
        or len(m1.variables) != len(m2.variables)
    ):
        return False

    g1 = _make_mrs_isograph(m1, properties)
    g2 = _make_mrs_isograph(m2, properties)

    iso = util._vf2(g1, g2)
    return set(iso) == set(g1)



def _make_mrs_isograph(x: mrs.MRS, properties: bool) -> util._IsoGraph:
    g: util._IsoGraph = {}
    g.update((v, {}) for v in x.variables)
    g.update((ep.id, {}) for ep in x.rels)

    for ep in x.rels:
        # optimization: retrieve early to avoid successive lookup
        lbl = ep.label
        id = ep.id
        props = x.variables.get(ep.iv or "")  # or '' for type consistency
        args = ep.args
        carg = ep.carg
        # scope labels (may be targets of arguments or hcons)
        g[lbl][id] = "eq-scope"
        # predicate-argument structure
        s = predicate.normalize(ep.predicate)
        if carg is not None:
            s += f"({carg})"
        elif properties and props:
            proplist = []
            for prop in sorted(props, key=property_priority):
                val = props[prop]
                proplist.append(f"{prop.upper()}={val.lower()}")
            s += "{" + "|".join(proplist) + "}"
        g[id][None] = s
        for role in args:
            if role != mrs.CONSTANT_ROLE:
                # there may be multiple roles (e.g., L-INDEX, L-HNDL, etc.)
                roles = [*g[id].get(args[role], "").split(), role]
                g[id][args[role]] = " ".join(sorted(roles))

    # hcons
    for hc in x.hcons:
        g[hc.hi][hc.lo] = hc.relation

    # icons
    for ic in x.icons:
        g[ic.left][ic.right] = ic.relation

    return g



[docs]
def compare_bags(
    testbag: Iterable[mrs.MRS],
    goldbag: Iterable[mrs.MRS],
    properties: bool = True,
    count_only: bool = True,
):
    """
    Compare two bags of MRS objects, returning a triple of
    (unique-in-test, shared, unique-in-gold).

    Args:
        testbag: An iterable of MRS objects to test
        goldbag: An iterable of MRS objects to compare against
        properties: if `True`, ensure variable properties are
            equal for mapped predications
        count_only: If `True`, the returned triple will only have the
            counts of each; if `False`, a list of MRS objects will be
            returned for each (using the ones from *testbag* for the
            shared set)
    Returns:
        A triple of (unique-in-test, shared, unique-in-gold), where
        each of the three items is an integer count if the
        *count_only* parameter is `True`, or a list of MRS objects
        otherwise.
    """
    gold_remaining = list(goldbag)
    test_unique = []
    shared = []
    for test in testbag:
        gold_match = None
        for gold in gold_remaining:
            if is_isomorphic(test, gold, properties=properties):
                gold_match = gold
                break
        if gold_match is not None:
            gold_remaining.remove(gold_match)
            shared.append(test)
        else:
            test_unique.append(test)
    if count_only:
        return (len(test_unique), len(shared), len(gold_remaining))
    else:
        return (test_unique, shared, gold_remaining)




[docs]
def from_dmrs(d: dmrs.DMRS) -> mrs.MRS:
    """
    Create an MRS by converting from DMRS *d*.

    Args:
        d: the input DMRS
    Returns:
        MRS
    Raises:
        MRSError when conversion fails.
    """
    H = variable.HANDLE
    qeq = mrs.HCons.qeq
    vfac = variable.VariableFactory(starting_vid=0)
    top = vfac.new(H) if d.top is not None else None

    # do d.scopes() once to avoid potential errors if label generation
    # is ever non-deterministic
    _top, scopes = d.scopes()
    ns_args = d.arguments(types="xeipu")
    sc_args = d.scopal_arguments(scopes=scopes)

    id_to_lbl, id_to_iv = _dmrs_build_maps(d, scopes, vfac)
    # for index see https://github.com/delph-in/pydelphin/issues/214
    index = None if not d.index else id_to_iv[d.index]

    hcons: list[mrs.HCons] = []
    if top is not None:
        hcons.append(qeq(top, _top))

    icons = None  # see https://github.com/delph-in/pydelphin/issues/220

    rels: list[mrs.EP] = []
    for node in d.nodes:
        id = node.id
        label = id_to_lbl[id]
        args = {mrs.INTRINSIC_ROLE: id_to_iv[id]}

        for role, tgt in ns_args[id]:
            args[role] = id_to_iv[tgt]

        for role, relation, tgt_label in sc_args[id]:
            if relation == scope.LHEQ:
                args[role] = tgt_label
            elif relation == scope.QEQ:
                hole = vfac.new(H)
                args[role] = hole
                hcons.append(qeq(hole, tgt_label))
            else:
                raise mrs.MRSError("DMRS-to-MRS: invalid scope constraint")

        if node.carg is not None:
            args[mrs.CONSTANT_ROLE] = node.carg

        if d.is_quantifier(id) and mrs.BODY_ROLE not in args:
            args[mrs.BODY_ROLE] = vfac.new(H)

        rels.append(
            mrs.EP(
                node.predicate,
                label,
                args=args,
                lnk=node.lnk,
                surface=node.surface,
                base=node.base,
            )
        )

    return mrs.MRS(
        top=top,
        index=index,
        rels=rels,
        hcons=hcons,
        icons=icons,
        variables=vfac.store,
        lnk=d.lnk,
        surface=d.surface,
        identifier=d.identifier,
    )



def _dmrs_build_maps(
    d: dmrs.DMRS,
    scopes: ScopeMap[dmrs.Node],
    vfac: variable.VariableFactory,
) -> tuple[dict[int, str], dict[int, str]]:
    id_to_lbl: dict[int, str] = {}
    for label, nodes in scopes.items():
        vfac.index[variable.id(label)] = label  # prevent vid reuse
        id_to_lbl.update((node.id, label) for node in nodes)

    id_to_iv: dict[int, str] = {}
    for node, q in d.quantification_pairs():
        if node is not None:
            iv = vfac.new(node.type, list(node.properties.items()))
            id_to_iv[node.id] = iv
            if q is not None:
                id_to_iv[q.id] = iv
        else:
            pass  # ignore unpaired quantifiers (ill-formed)

    return id_to_lbl, id_to_iv