"""
Surface alignment for semantic entities.
"""
__all__ = [
"Lnk",
"LnkError",
"LnkMixin", # noqa: F822 ; for backward compatibility
]
import warnings
from collections.abc import Iterable
from typing import Any, overload
from delphin.__about__ import __version__ # noqa: F401
from delphin.exceptions import PyDelphinException, PyDelphinWarning
[docs]
class LnkError(PyDelphinException):
"""Raised on invalid Lnk values or operations."""
[docs]
class Lnk:
"""
Surface-alignment information for predications.
Lnk objects link predicates to the surface form in one of several
ways, the most common of which being the character span of the
original string.
Valid types and their associated *data* shown in the table below.
============= =================== =========
type data example
============= =================== =========
Lnk.CHARSPAN surface string span (0, 5)
Lnk.CHARTSPAN chart vertex span (0, 5)
Lnk.TOKENS token identifiers (0, 1, 2)
Lnk.EDGE edge identifier 1
============= =================== =========
Args:
arg: Lnk type or the string representation of a Lnk
data: alignment data (assumes *arg* is a Lnk type)
Attributes:
type: the way the Lnk relates the semantics to the surface form
data: the alignment data (depends on the Lnk type)
Example:
>>> Lnk("<0:5>").data
(0, 5)
>>> str(Lnk.charspan(0, 5))
'<0:5>'
>>> str(Lnk.chartspan(0, 5))
'<0#5>'
>>> str(Lnk.tokens([0, 1, 2]))
'<0 1 2>'
>>> str(Lnk.edge(1))
'<@1>'
"""
__slots__ = ("_data", "type")
type: int
_data: tuple[int, ...]
# These types determine how a lnk on an EP or MRS are to be
# interpreted, and thus determine the data type/structure of the
# lnk data.
UNSPECIFIED = 0
CHARSPAN = 1 # Character span; a pair of offsets
CHARTSPAN = 2 # Chart vertex span: a pair of indices
TOKENS = 3 # Token numbers: a list of indices
EDGE = 4 # An edge identifier: a number
@overload
def __init__(self, arg: str, data: None = None) -> None: ...
@overload
def __init__(
self,
arg: int,
data: None | int | tuple[int, ...] = None,
) -> None: ...
def __init__(
self,
arg: str | int,
data: None | int | tuple[int, ...] = None,
) -> None:
if isinstance(arg, str):
if data is not None:
raise LnkError("data argument should be None when arg is a string")
if (arg[:1], arg[-1:]) != ("<", ">"):
raise LnkError(f"invalid Lnk string: {arg!r}")
arg = arg[1:-1]
if arg.startswith("@"):
self.type = Lnk.EDGE
self._data = (int(arg[1:]),)
elif ":" in arg:
cfrom, cto = arg.split(":")
self.type = Lnk.CHARSPAN
self._data = (int(cfrom), int(cto))
elif "#" in arg:
vfrom, vto = arg.split("#")
self.type = Lnk.CHARTSPAN
self._data = (int(vfrom), int(vto))
else:
self.type = Lnk.TOKENS
self._data = tuple(map(int, arg.split()))
elif isinstance(arg, int):
if arg not in (
Lnk.UNSPECIFIED,
Lnk.CHARSPAN,
Lnk.CHARTSPAN,
Lnk.TOKENS,
Lnk.EDGE,
):
raise LnkError(f"invalid Lnk type {arg!r}")
self.type = arg
match data:
case tuple():
self._data = data
case int():
self._data = (data,)
case None:
self._data = ()
case _:
raise LnkError(f"invalid Lnk data: f{data}")
else:
raise LnkError(f"invalid Lnk: {(arg, data)!r}")
[docs]
@classmethod
def default(cls):
"""
Create a Lnk object for when no information is given.
"""
return cls(Lnk.UNSPECIFIED)
[docs]
@classmethod
def charspan(cls, start: str | int, end: str | int):
"""
Create a Lnk object for a character span.
Args:
start: the initial character position (cfrom)
end: the final character position (cto)
"""
return cls(Lnk.CHARSPAN, (int(start), int(end)))
[docs]
@classmethod
def chartspan(cls, start: str | int, end: str | int):
"""
Create a Lnk object for a chart span.
Args:
start: the initial chart vertex
end: the final chart vertex
"""
return cls(Lnk.CHARTSPAN, (int(start), int(end)))
[docs]
@classmethod
def tokens(cls, tokens: Iterable[str | int]):
"""
Create a Lnk object for a token range.
Args:
tokens: a list of token identifiers
"""
return cls(Lnk.TOKENS, tuple(map(int, tokens)))
[docs]
@classmethod
def edge(cls, edge: str | int):
"""
Create a Lnk object for an edge (used internally in generation).
Args:
edge: an edge identifier
"""
return cls(Lnk.EDGE, int(edge))
@property
def data(self) -> int | tuple[int, ...] | None:
match self.type:
case Lnk.UNSPECIFIED:
return None
case Lnk.CHARSPAN | Lnk.CHARTSPAN | Lnk.TOKENS:
return self._data
case Lnk.EDGE:
if len(self._data) != 1:
raise LnkError(f"invalid data for edge-type Lnk: {self._data}")
return self._data[0]
case _:
raise LnkError("invalid Lnk type")
def __str__(self) -> str:
match self.type:
case Lnk.UNSPECIFIED:
return ""
case Lnk.CHARSPAN:
return f"<{self._data[0]}:{self._data[1]}>"
case Lnk.CHARTSPAN:
return f"<{self._data[0]}#{self._data[1]}>"
case Lnk.EDGE:
return f"<@{self._data[0]}>"
case Lnk.TOKENS:
return "<{}>".format(" ".join(map(str, self._data)))
case _:
raise LnkError("invalid Lnk type")
def __repr__(self):
return f"<Lnk object {self!s} at {id(self)}>"
def __eq__(self, other):
return self.type == other.type and self._data == other._data
def __bool__(self):
if self.type == Lnk.UNSPECIFIED:
return False
if self.type == Lnk.CHARSPAN and self._data == (-1, -1):
return False
return True
# LnkMixin has been moved to delphin.sembase. To keep backward
# compatibility and avoid circular imports, load it only when
# requested.
def __getattr__(name: str) -> Any:
if name == "LnkMixin":
from delphin.sembase import LnkMixin
warnings.warn(
"LnkMixin has been moved to delphin.sembase.LnkMixin",
PyDelphinWarning,
stacklevel=2,
)
return LnkMixin
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")