Source code for delphin.extra.highlight


"""
Pygments-based highlighting lexers for DELPH-IN formats.
"""

import re
from pygments.lexer import RegexLexer, include, bygroups
from pygments.token import (
    Token, Whitespace, Text, Number, String,
    Keyword, Name, Operator, Punctuation,
    Comment, Error
)

_tdl_break_characters = re.escape(r'<>!=:.#&,[];$()^/')

[docs]class TdlLexer(RegexLexer): """ A Pygments-based Lexer for Typed Description Language. """ name = 'TDL' aliases = ['tdl'] filenames = ['*.tdl'] tokens = { 'root': [ (r'\s+', Text), include('comment'), (r'(\S+?)(\s*)(:[=<+])', bygroups(Name.Class, Text, Operator), 'typedef'), (r'(%)(\s*\(\s*)(letter-set)', bygroups(Operator, Punctuation, Name.Builtin), ('letterset', 'letterset')), # need to pop twice (r':begin', Name.Builtin, 'macro') ], 'comment': [ (r';.*?$', Comment.Singleline), (r'#\|', Comment.Multiline, 'multilinecomment') ], 'multilinecomment': [ (r'[^#|]', Comment.Multiline), (r'#\|', Comment.Multiline, '#push'), (r'\|#', Comment.Multiline, '#pop'), (r'[#|]', Comment.Multiline) ], 'typedef': [ (r'\s+', Text), (r'\.', Punctuation, '#pop'), # probably ok to reuse letterset for %suffix and %prefix (r'(%prefix|%suffix)', Name.Builtin, 'letterset'), include('conjunction') ], 'conjunction': [ (r'\s+', Text), (r'&', Operator), (r'"[^"\\]*(?:\\.[^"\\]*)*"', String.Doc), include('term'), (r'', Text, '#pop') ], 'term': [ include('comment'), (r'\[', Punctuation, 'avm'), (r'<!', Punctuation, 'difflist'), (r'<', Punctuation, 'conslist'), (r'#[^\s{}]+'.format(_tdl_break_characters), Name.Label), include('strings'), (r'\*top\*', Keyword.Constant), (r'\.\.\.', Name), (r'[^\s{}]+'.format(_tdl_break_characters), Name), (r'', Text, '#pop') ], 'avm': [ include('comment'), (r'\s+', Text), (r'\]', Punctuation, '#pop'), (r',', Punctuation), (r'((?:[^\s{0}]+)(?:\s*\.\s*[^\s{0}]+)*)' .format(_tdl_break_characters), Name.Attribute, 'conjunction') ], 'conslist': [ (r'>', Punctuation, '#pop'), (r',|\.', Punctuation), include('conjunction') ], 'difflist': [ (r'!>', Punctuation, '#pop'), (r',|\.', Punctuation), include('conjunction') ], 'strings': [ (r'"[^"\\]*(?:\\.[^"\\]*)*"', String.Double), (r"'[^ \\]*(?:\\.[^ \\]*)*", String.Single), (r"\^[^ \\]*(?:\\.[^ \\]*)*\$", String.Regex) ], 'letterset': [ (r'\(', Punctuation, '#push'), (r'\)|\n', Punctuation, '#pop'), (r'!\w', Name.Variable), (r'\s+', Text), (r'\*', Name.Constant), (r'.', String.Char) ], 'macro': [ (r'\s+', Text), include('comment'), (r'(:end.*?)(\.)', bygroups(Name.Builtin, Punctuation), '#pop'), (r'(:begin.*?)(\.)', bygroups(Name.Builtin, Punctuation), '#push'), (r':[-\w]+', Name.Builtin), include('strings'), (r'[-\w]+', Name), (r'\.', Punctuation) ] }
mrs_colorscheme = { Token: ('', ''), #Whitespace: ('lightgray', 'darkgray'), #Comment: ('lightgray', 'darkgray'), #Comment.Preproc: ('teal', 'turquoise'), #Keyword: ('darkblue', 'blue'), #Keyword.Type: ('teal', 'turquoise'), Operator.Word: ('__', '__'), # HCONS or ICONS relations Name.Builtin: ('**', '**'), # LTOP, RELS, etc # used for variables Name.Label: ('brown', '*yellow*'), # handles Name.Function: ('*purple*', '*fuchsia*'), # events Name.Variable: ('*darkblue*', '*blue*'), # ref-inds (x) Name.Other: ('*teal*', '*turquoise*'), # underspecified (i, p, u) # role arguments Name.Namespace: ('__', '__'), # LBL Name.Class: ('__', '__'), # ARG0 Name.Constant: ('darkred', 'red'), # CARG Name.Tag: ('__', '__'), # others #Name.Exception: ('teal', 'turquoise'), #Name.Decorator: ('darkgray', 'lightgray'), Name.Attribute: ('darkgray', 'darkgray'), # variable properties String: ('brown', 'brown'), String.Symbol: ('darkgreen', 'green'), String.Other: ('green', 'darkgreen'), Number: ('lightgray', 'lightgray'), # lnk # Generic.Deleted: ('red', 'red'), # Generic.Inserted: ('darkgreen', 'green'), # Generic.Heading: ('**', '**'), # Generic.Subheading: ('*purple*', '*fuchsia*'), # Generic.Error: ('red', 'red'), Error: ('_red_', '_red_'), }
[docs]class SimpleMrsLexer(RegexLexer): """ A Pygments-based Lexer for the SimpleMRS serialization format. """ name = 'SimpleMRS' aliases = ['mrs'] filenames = ['*.mrs'] tokens = { 'root': [ (r'\s+', Text), (r'\[|\]', Punctuation, 'mrs') ], 'mrs': [ (r'\s+', Text), include('strings'), include('vars'), (r'\]', Punctuation, '#pop'), (r'<', Number, 'lnk'), (r'(TOP|LTOP|INDEX)(\s*)(:)', bygroups(Name.Builtin, Text, Punctuation)), (r'(RELS|HCONS|ICONS)(\s*)(:)(\s*)(<)', bygroups(Name.Builtin, Text, Punctuation, Text, Punctuation), 'list'), ], 'strings': [ (r'"[^"\\]*(?:\\.[^"\\]*)*"', String.Double), (r"'[^ \\]*(?:\\.[^ \\]*)*", String.Single), ], 'vars': [ (r'(?:h|handle)\d+', Name.Label), (r'(?:e|event)\d+', Name.Function, 'var'), (r'(?:x|ref-ind)\d+', Name.Variable, 'var'), (r'(?:i|individual|p|non_event|u|semarg)\d+', Name.Other, 'var'), ], 'var': [ (r'\s+', Text), (r'\[', Punctuation, 'proplist'), (r'', Text, '#pop') ], 'proplist': [ (r'\s+', Text), (r'([^:\s]+)(\s*)(:)(\s*)([^\s]+)', bygroups(Name.Attribute, Text, Punctuation, Text, Text)), (r'\]', Punctuation, '#pop'), (r'e|event|x|ref-ind', Name.Variable), (r'\w+', Name.Other) ], 'lnk': [ (r'\s+', Text), (r'>', Number, '#pop'), (r'\d+[:#]\d+|@\d+|\d+(?:\s+\d+)*', Number), ], 'list': [ (r'\s+', Text), (r'>', Punctuation, '#pop'), (r'\[', Punctuation, ('ep', 'pred')), include('vars'), (r'qeq|outscopes|lheq|[^\s]+', Operator.Word), ], 'ep': [ (r'\s+', Text), (r'<', Number, 'lnk'), (r'\]', Punctuation, '#pop'), include('strings'), (r'(LBL)(\s*)(:)', bygroups(Name.Namespace, Text, Punctuation)), (r'(ARG0)(\s*)(:)', bygroups(Name.Class, Text, Punctuation)), (r'(CARG)(\s*)(:)', bygroups(Name.Constant, Text, Punctuation)), (r'([^:\s]+)(\s*)(:)', bygroups(Name.Tag, Text, Punctuation)), include('vars') ], 'pred': [ (r'\s+', Text), (r'"[^"_\\]*(?:\\.[^"\\]*)*"', String.Symbol, '#pop'), (r"'[^ _\\]*(?:\\.[^ \\]*?)*", String.Symbol, '#pop'), (r'[^ <]+', String.Symbol, '#pop') ] }
[docs] def get_tokens_unprocessed(self, text): for index, token, value in RegexLexer.get_tokens_unprocessed(self, text): if token is String.Symbol and '_q_' in value: yield index, String.Other, value else: yield index, token, value