"""
Surface alignment for semantic entities.
"""
from typing import Iterable, Optional, Tuple, Union, overload
# Default modules need to import the PyDelphin version
from delphin.__about__ import __version__ # noqa: F401
from delphin.exceptions import PyDelphinException
[docs]
class LnkError(PyDelphinException):
"""Raised on invalid Lnk values or operations."""
[docs]
class Lnk:
"""
Surface-alignment information for predications.
Lnk objects link predicates to the surface form in one of several
ways, the most common of which being the character span of the
original string.
Valid types and their associated *data* shown in the table below.
============= =================== =========
type data example
============= =================== =========
Lnk.CHARSPAN surface string span (0, 5)
Lnk.CHARTSPAN chart vertex span (0, 5)
Lnk.TOKENS token identifiers (0, 1, 2)
Lnk.EDGE edge identifier 1
============= =================== =========
Args:
arg: Lnk type or the string representation of a Lnk
data: alignment data (assumes *arg* is a Lnk type)
Attributes:
type: the way the Lnk relates the semantics to the surface form
data: the alignment data (depends on the Lnk type)
Example:
>>> Lnk('<0:5>').data
(0, 5)
>>> str(Lnk.charspan(0,5))
'<0:5>'
>>> str(Lnk.chartspan(0,5))
'<0#5>'
>>> str(Lnk.tokens([0,1,2]))
'<0 1 2>'
>>> str(Lnk.edge(1))
'<@1>'
"""
__slots__ = ('type', 'data')
type: int
data: Union[int, Tuple[int, ...]]
# These types determine how a lnk on an EP or MRS are to be
# interpreted, and thus determine the data type/structure of the
# lnk data.
UNSPECIFIED = 0
CHARSPAN = 1 # Character span; a pair of offsets
CHARTSPAN = 2 # Chart vertex span: a pair of indices
TOKENS = 3 # Token numbers: a list of indices
EDGE = 4 # An edge identifier: a number
@overload
def __init__(self, arg: None, data: None = None):
...
@overload
def __init__(self, arg: str, data: None = None):
...
@overload
def __init__(self,
arg: int,
data: Union[None, int, Tuple[int, ...]] = None):
...
def __init__(self, arg, data=None):
if not arg:
self.type = Lnk.UNSPECIFIED
self.data = None
elif data is None and (arg[:1], arg[-1:]) == ('<', '>'):
arg = arg[1:-1]
if arg.startswith('@'):
self.type = Lnk.EDGE
self.data = int(arg[1:])
elif ':' in arg:
cfrom, cto = arg.split(':')
self.type = Lnk.CHARSPAN
self.data = (int(cfrom), int(cto))
elif '#' in arg:
vfrom, vto = arg.split('#')
self.type = Lnk.CHARTSPAN
self.data = (int(vfrom), int(vto))
else:
self.type = Lnk.TOKENS
self.data = tuple(map(int, arg.split()))
elif arg in (Lnk.CHARSPAN, Lnk.CHARTSPAN, Lnk.TOKENS, Lnk.EDGE):
self.type = arg
self.data = data
else:
raise LnkError('invalid Lnk: {!r}'.format((arg, data)))
[docs]
@classmethod
def default(cls):
"""
Create a Lnk object for when no information is given.
"""
return cls(None)
[docs]
@classmethod
def charspan(cls, start: Union[str, int], end: Union[str, int]):
"""
Create a Lnk object for a character span.
Args:
start: the initial character position (cfrom)
end: the final character position (cto)
"""
return cls(Lnk.CHARSPAN, (int(start), int(end)))
[docs]
@classmethod
def chartspan(cls, start: Union[str, int], end: Union[str, int]):
"""
Create a Lnk object for a chart span.
Args:
start: the initial chart vertex
end: the final chart vertex
"""
return cls(Lnk.CHARTSPAN, (int(start), int(end)))
[docs]
@classmethod
def tokens(cls, tokens: Iterable[Union[str, int]]):
"""
Create a Lnk object for a token range.
Args:
tokens: a list of token identifiers
"""
return cls(Lnk.TOKENS, tuple(map(int, tokens)))
[docs]
@classmethod
def edge(cls, edge: Union[str, int]):
"""
Create a Lnk object for an edge (used internally in generation).
Args:
edge: an edge identifier
"""
return cls(Lnk.EDGE, int(edge))
def __str__(self):
if self.type == Lnk.UNSPECIFIED:
return ''
elif self.type == Lnk.CHARSPAN:
return '<{}:{}>'.format(self.data[0], self.data[1])
elif self.type == Lnk.CHARTSPAN:
return '<{}#{}>'.format(self.data[0], self.data[1])
elif self.type == Lnk.EDGE:
return '<@{}>'.format(self.data)
elif self.type == Lnk.TOKENS:
return '<{}>'.format(' '.join(map(str, self.data)))
def __repr__(self):
return f'<Lnk object {self!s} at {id(self)}>'
def __eq__(self, other):
return self.type == other.type and self.data == other.data
def __bool__(self):
if self.type == Lnk.UNSPECIFIED:
return False
if self.type == Lnk.CHARSPAN and self.data == (-1, -1):
return False
return True
[docs]
class LnkMixin:
"""
A mixin class for adding `cfrom` and `cto` properties on structures.
"""
__slots__ = ('lnk', 'surface')
def __init__(self,
lnk: Optional[Lnk] = None,
surface: Optional[str] = None):
if lnk is None:
lnk = Lnk.default()
self.lnk = lnk
self.surface = surface
@property
def cfrom(self) -> int:
"""
The initial character position in the surface string.
Defaults to -1 if there is no valid cfrom value.
"""
cfrom = -1
try:
if self.lnk.type == Lnk.CHARSPAN:
cfrom = self.lnk.data[0] # type: ignore
except AttributeError:
pass # use default cfrom of -1
return cfrom
@property
def cto(self) -> int:
"""
The final character position in the surface string.
Defaults to -1 if there is no valid cto value.
"""
cto = -1
try:
if self.lnk.type == Lnk.CHARSPAN:
cto = self.lnk.data[1] # type: ignore
except AttributeError:
pass # use default cto of -1
return cto