import re
import textwrap
from delphin.tdl._exceptions import TDLError
from delphin.tdl._model import (
AVM,
BlockComment,
ConfigEntry,
ConfigEnvironment,
Conjunction,
ConsList,
Coreference,
DiffList,
FileInclude,
InstanceEnvironment,
LetterSet,
LineComment,
Regex,
String,
Term,
TypeDefinition,
TypeEnvironment,
TypeIdentifier,
WildCard,
_Environment,
_ImplicitAVM,
_MorphSet,
)
# Values for serialization
_base_indent = 2 # indent when an AVM starts on the next line
_max_inline_list_items = 3 # number of list items that may appear inline
_line_width = 79 # try not to go beyond this number of characters
# Serialization helpers
def _format_term(term, indent):
fmt = {
TypeIdentifier: _format_id,
String: _format_string,
Regex: _format_regex,
Coreference: _format_coref,
AVM: _format_avm,
_ImplicitAVM: _format_avm,
ConsList: _format_conslist,
DiffList: _format_difflist,
}.get(term.__class__, None)
if fmt is None:
raise TDLError(f"not a valid term: {type(term).__name__}")
if term.docstring is not None:
return "{}\n{}{}".format(
_format_docstring(term.docstring, indent),
" " * indent,
fmt(term, indent),
)
else:
return fmt(term, indent)
def _format_id(term, indent):
return str(term)
def _format_string(term, indent):
return f'"{term!s}"'
def _format_regex(term, indent):
return f"^{term!s}$"
def _format_coref(term, indent):
return f"#{term!s}"
def _format_avm(avm, indent):
lines = []
for feat, val in avm.features():
val = _format_conjunction(val, indent + len(feat) + 3)
if not val.startswith("\n"):
feat += " "
lines.append(feat + val)
if not lines:
return "[ ]"
else:
return "[ {} ]".format((",\n" + " " * (indent + 2)).join(lines))
def _format_conslist(cl, indent):
values = [
_format_conjunction(val, indent + 2) # 2 = len('< ')
for val in cl.values()
]
end = ""
if not cl.terminated:
if values:
end = ", ..."
else:
values = ["..."]
elif cl._avm is not None and cl[cl._last_path] is not None:
end = " . " + values[-1]
values = values[:-1]
if not values: # only if no values and terminated
return "< >"
elif (
len(values) <= _max_inline_list_items
and sum(len(v) + 2 for v in values) + 2 + indent <= _line_width
):
return "< {} >".format(", ".join(values) + end)
else:
i = " " * (indent + 2) # 2 = len('< ')
lines = [f"< {values[0]}"]
lines.extend(i + val for val in values[1:])
return ",\n".join(lines) + end + " >"
def _format_difflist(dl, indent):
values = [
_format_conjunction(val, indent + 3) # 3 == len('<! ')
for val in dl.values()
]
if not values:
# empty
return "<! !>"
elif (
len(values) <= _max_inline_list_items
and sum(len(v) + 2 for v in values) + 4 + indent <= _line_width
):
# single-line
return "<! {} !>".format(", ".join(values))
else:
# multi-line
delim = ",\n" + " " * (indent + 3)
list_items = delim.join(values)
return f"<! {list_items} !>"
def _format_conjunction(conj, indent):
if isinstance(conj, Term):
return _format_term(conj, indent)
elif len(conj._terms) == 0:
return ""
else:
tokens = []
width = indent
for term in conj._terms:
tok = _format_term(term, width)
flen = max(len(s) for s in tok.splitlines())
width += flen + 3 # 3 == len(' & ')
tokens.append(tok)
lines = [tokens] # all terms joined without newlines (for now)
return (" &\n" + " " * indent).join(" & ".join(line) for line in lines if line)
def _format_typedef(td, indent):
i = " " * indent
if hasattr(td, "affix_type"):
patterns = " ".join(f"({a} {b})" for a, b in td.patterns)
body = _format_typedef_body(td, indent, indent + 2)
return (
f"{i}{td.identifier} {td._operator}\n%{td.affix_type} {patterns}\n {body}."
)
else:
body = _format_typedef_body(td, indent, indent + len(td.identifier) + 4)
return f"{i}{td.identifier} {td._operator} {body}."
def _format_typedef_body(td, indent, offset):
parts = [[]]
for term in td.conjunction.terms:
if isinstance(term, AVM) and len(parts) == 1:
parts.append([])
parts[-1].append(term)
if parts[0] == []:
parts = [parts[1]]
assert len(parts) <= 2
if len(parts) == 1:
formatted_conj = _format_conjunction(td.conjunction, offset)
else:
formatted_conj = "{} &\n{}{}".format(
_format_conjunction(Conjunction(parts[0]), offset),
" " * (_base_indent + indent),
_format_conjunction(Conjunction(parts[1]), _base_indent + indent),
)
if td.docstring is not None:
docstring = "\n " + _format_docstring(td.docstring, 2)
else:
docstring = ""
return formatted_conj + docstring
def _format_docstring(doc, indent):
if doc is None:
return ""
lines = textwrap.dedent(doc).splitlines()
if lines:
if lines[0].strip() == "":
lines = lines[1:]
if lines[-1].strip() == "":
lines = lines[:-1]
ind = " " * indent
contents = _escape_docstring("\n{0}{1}\n{0}".format(ind, ("\n" + ind).join(lines)))
return f'"""{contents}"""'
def _escape_docstring(s):
cs = []
cnt = 0
lastindex = len(s) - 1
for i, c in enumerate(s):
if cnt == -1 or c not in '"\\':
cnt = 0
elif c == '"':
cnt += 1
if cnt == 3 or i == lastindex:
cs.append("\\")
cnt = 0
elif c == "\\":
cnt = -1
cs.append(c)
return "".join(cs)
def _format_morphset(obj, indent):
if isinstance(obj, LetterSet):
mstype = "letter-set"
elif isinstance(obj, WildCard):
mstype = "wild-card"
else:
raise TypeError(f"not a valid morph-set class: {type(obj).__name__}")
return "{}%({} ({} {}))".format(" " * indent, mstype, obj.var, obj.characters)
def _format_environment(env, indent):
post = ""
if isinstance(env, TypeEnvironment):
envtype = ":type"
elif isinstance(env, InstanceEnvironment):
envtype = ":instance"
if env.status:
post = " :status " + env.status
elif isinstance(env, ConfigEnvironment):
envtype = ":config"
if env.label:
post = f" {env.label}"
else:
raise TDLError(f"invalid environment type: {type(env).__name__}")
contents = "\n".join(format(obj, indent + 2) for obj in env.entries)
if contents:
contents += "\n"
return "{0}:begin {1}{2}.\n{3}{0}:end {1}.".format(
" " * indent, envtype, post, contents
)
def _format_configentry(obj: ConfigEntry, indent: int) -> str:
values: list[str] = []
for value in obj.values:
if not re.fullmatch(r"""[^\s!"#$%&'(),.\/:;<=>[\]^|]+""", value):
value = '"' + value.replace("\\", "\\\\").replace('"', '\\"') + '"'
values.append(value)
return "{}{} := {}.".format(" " * indent, obj.key, " ".join(values))
def _format_include(fi, indent):
return '{}:include "{}".'.format(" " * indent, fi.value)
def _format_linecomment(obj, indent):
return "{};{}".format(" " * indent, str(obj))
def _format_blockcomment(obj, indent):
return "{}#|{}|#".format(" " * indent, str(obj))