|
""" |
|
pygments.lexers.dsls |
|
~~~~~~~~~~~~~~~~~~~~ |
|
|
|
Lexers for various domain-specific languages. |
|
|
|
:copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS. |
|
:license: BSD, see LICENSE for details. |
|
""" |
|
|
|
import re |
|
|
|
from pygments.lexer import ExtendedRegexLexer, RegexLexer, bygroups, words, \ |
|
include, default, this, using, combined |
|
from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ |
|
Number, Punctuation, Whitespace |
|
|
|
__all__ = ['ProtoBufLexer', 'ZeekLexer', 'PuppetLexer', 'RslLexer', |
|
'MscgenLexer', 'VGLLexer', 'AlloyLexer', 'PanLexer', |
|
'CrmshLexer', 'ThriftLexer', 'FlatlineLexer', 'SnowballLexer'] |
|
|
|
|
|
class ProtoBufLexer(RegexLexer): |
|
""" |
|
Lexer for Protocol Buffer definition files. |
|
""" |
|
|
|
name = 'Protocol Buffer' |
|
url = 'https://developers.google.com/protocol-buffers/' |
|
aliases = ['protobuf', 'proto'] |
|
filenames = ['*.proto'] |
|
version_added = '1.4' |
|
|
|
tokens = { |
|
'root': [ |
|
(r'[ \t]+', Whitespace), |
|
(r'[,;{}\[\]()<>]', Punctuation), |
|
(r'/(\\\n)?/(\n|(.|\n)*?[^\\]\n)', Comment.Single), |
|
(r'/(\\\n)?\*(.|\n)*?\*(\\\n)?/', Comment.Multiline), |
|
(words(( |
|
'import', 'option', 'optional', 'required', 'repeated', |
|
'reserved', 'default', 'packed', 'ctype', 'extensions', 'to', |
|
'max', 'rpc', 'returns', 'oneof', 'syntax'), prefix=r'\b', suffix=r'\b'), |
|
Keyword), |
|
(words(( |
|
'int32', 'int64', 'uint32', 'uint64', 'sint32', 'sint64', |
|
'fixed32', 'fixed64', 'sfixed32', 'sfixed64', |
|
'float', 'double', 'bool', 'string', 'bytes'), suffix=r'\b'), |
|
Keyword.Type), |
|
(r'(true|false)\b', Keyword.Constant), |
|
(r'(package)(\s+)', bygroups(Keyword.Namespace, Whitespace), 'package'), |
|
(r'(message|extend)(\s+)', |
|
bygroups(Keyword.Declaration, Whitespace), 'message'), |
|
(r'(enum|group|service)(\s+)', |
|
bygroups(Keyword.Declaration, Whitespace), 'type'), |
|
(r'\".*?\"', String), |
|
(r'\'.*?\'', String), |
|
(r'(\d+\.\d*|\.\d+|\d+)[eE][+-]?\d+[LlUu]*', Number.Float), |
|
(r'(\d+\.\d*|\.\d+|\d+[fF])[fF]?', Number.Float), |
|
(r'(\-?(inf|nan))\b', Number.Float), |
|
(r'0x[0-9a-fA-F]+[LlUu]*', Number.Hex), |
|
(r'0[0-7]+[LlUu]*', Number.Oct), |
|
(r'\d+[LlUu]*', Number.Integer), |
|
(r'[+-=]', Operator), |
|
(r'([a-zA-Z_][\w.]*)([ \t]*)(=)', |
|
bygroups(Name.Attribute, Whitespace, Operator)), |
|
(r'[a-zA-Z_][\w.]*', Name), |
|
], |
|
'package': [ |
|
(r'[a-zA-Z_]\w*', Name.Namespace, '#pop'), |
|
default('#pop'), |
|
], |
|
'message': [ |
|
(r'[a-zA-Z_]\w*', Name.Class, '#pop'), |
|
default('#pop'), |
|
], |
|
'type': [ |
|
(r'[a-zA-Z_]\w*', Name, '#pop'), |
|
default('#pop'), |
|
], |
|
} |
|
|
|
|
|
class ThriftLexer(RegexLexer): |
|
""" |
|
For Thrift interface definitions. |
|
""" |
|
name = 'Thrift' |
|
url = 'https://thrift.apache.org/' |
|
aliases = ['thrift'] |
|
filenames = ['*.thrift'] |
|
mimetypes = ['application/x-thrift'] |
|
version_added = '2.1' |
|
|
|
tokens = { |
|
'root': [ |
|
include('whitespace'), |
|
include('comments'), |
|
(r'"', String.Double, combined('stringescape', 'dqs')), |
|
(r'\'', String.Single, combined('stringescape', 'sqs')), |
|
(r'(namespace)(\s+)', |
|
bygroups(Keyword.Namespace, Whitespace), 'namespace'), |
|
(r'(enum|union|struct|service|exception)(\s+)', |
|
bygroups(Keyword.Declaration, Whitespace), 'class'), |
|
(r'((?:(?:[^\W\d]|\$)[\w.\[\]$<>]*\s+)+?)' |
|
r'((?:[^\W\d]|\$)[\w$]*)' |
|
r'(\s*)(\()', |
|
bygroups(using(this), Name.Function, Whitespace, Operator)), |
|
include('keywords'), |
|
include('numbers'), |
|
(r'[&=]', Operator), |
|
(r'[:;,{}()<>\[\]]', Punctuation), |
|
(r'[a-zA-Z_](\.\w|\w)*', Name), |
|
], |
|
'whitespace': [ |
|
(r'\n', Whitespace), |
|
(r'\s+', Whitespace), |
|
], |
|
'comments': [ |
|
(r'#.*$', Comment), |
|
(r'//.*?\n', Comment), |
|
(r'/\*[\w\W]*?\*/', Comment.Multiline), |
|
], |
|
'stringescape': [ |
|
(r'\\([\\nrt"\'])', String.Escape), |
|
], |
|
'dqs': [ |
|
(r'"', String.Double, '#pop'), |
|
(r'[^\\"\n]+', String.Double), |
|
], |
|
'sqs': [ |
|
(r"'", String.Single, '#pop'), |
|
(r'[^\\\'\n]+', String.Single), |
|
], |
|
'namespace': [ |
|
(r'[a-z*](\.\w|\w)*', Name.Namespace, '#pop'), |
|
default('#pop'), |
|
], |
|
'class': [ |
|
(r'[a-zA-Z_]\w*', Name.Class, '#pop'), |
|
default('#pop'), |
|
], |
|
'keywords': [ |
|
(r'(async|oneway|extends|throws|required|optional)\b', Keyword), |
|
(r'(true|false)\b', Keyword.Constant), |
|
(r'(const|typedef)\b', Keyword.Declaration), |
|
(words(( |
|
'cpp_namespace', 'cpp_include', 'cpp_type', 'java_package', |
|
'cocoa_prefix', 'csharp_namespace', 'delphi_namespace', |
|
'php_namespace', 'py_module', 'perl_package', |
|
'ruby_namespace', 'smalltalk_category', 'smalltalk_prefix', |
|
'xsd_all', 'xsd_optional', 'xsd_nillable', 'xsd_namespace', |
|
'xsd_attrs', 'include'), suffix=r'\b'), |
|
Keyword.Namespace), |
|
(words(( |
|
'void', 'bool', 'byte', 'i16', 'i32', 'i64', 'double', |
|
'string', 'binary', 'map', 'list', 'set', 'slist', |
|
'senum'), suffix=r'\b'), |
|
Keyword.Type), |
|
(words(( |
|
'BEGIN', 'END', '__CLASS__', '__DIR__', '__FILE__', |
|
'__FUNCTION__', '__LINE__', '__METHOD__', '__NAMESPACE__', |
|
'abstract', 'alias', 'and', 'args', 'as', 'assert', 'begin', |
|
'break', 'case', 'catch', 'class', 'clone', 'continue', |
|
'declare', 'def', 'default', 'del', 'delete', 'do', 'dynamic', |
|
'elif', 'else', 'elseif', 'elsif', 'end', 'enddeclare', |
|
'endfor', 'endforeach', 'endif', 'endswitch', 'endwhile', |
|
'ensure', 'except', 'exec', 'finally', 'float', 'for', |
|
'foreach', 'function', 'global', 'goto', 'if', 'implements', |
|
'import', 'in', 'inline', 'instanceof', 'interface', 'is', |
|
'lambda', 'module', 'native', 'new', 'next', 'nil', 'not', |
|
'or', 'pass', 'public', 'print', 'private', 'protected', |
|
'raise', 'redo', 'rescue', 'retry', 'register', 'return', |
|
'self', 'sizeof', 'static', 'super', 'switch', 'synchronized', |
|
'then', 'this', 'throw', 'transient', 'try', 'undef', |
|
'unless', 'unsigned', 'until', 'use', 'var', 'virtual', |
|
'volatile', 'when', 'while', 'with', 'xor', 'yield'), |
|
prefix=r'\b', suffix=r'\b'), |
|
Keyword.Reserved), |
|
], |
|
'numbers': [ |
|
(r'[+-]?(\d+\.\d+([eE][+-]?\d+)?|\.?\d+[eE][+-]?\d+)', Number.Float), |
|
(r'[+-]?0x[0-9A-Fa-f]+', Number.Hex), |
|
(r'[+-]?[0-9]+', Number.Integer), |
|
], |
|
} |
|
|
|
|
|
class ZeekLexer(RegexLexer): |
|
""" |
|
For Zeek scripts. |
|
""" |
|
name = 'Zeek' |
|
url = 'https://www.zeek.org/' |
|
aliases = ['zeek', 'bro'] |
|
filenames = ['*.zeek', '*.bro'] |
|
version_added = '2.5' |
|
|
|
_hex = r'[0-9a-fA-F]' |
|
_float = r'((\d*\.?\d+)|(\d+\.?\d*))([eE][-+]?\d+)?' |
|
_h = r'[A-Za-z0-9][-A-Za-z0-9]*' |
|
|
|
tokens = { |
|
'root': [ |
|
include('whitespace'), |
|
include('comments'), |
|
include('directives'), |
|
include('attributes'), |
|
include('types'), |
|
include('keywords'), |
|
include('literals'), |
|
include('operators'), |
|
include('punctuation'), |
|
(r'((?:[A-Za-z_]\w*)(?:::(?:[A-Za-z_]\w*))*)(?=\s*\()', |
|
Name.Function), |
|
include('identifiers'), |
|
], |
|
|
|
'whitespace': [ |
|
(r'\n', Whitespace), |
|
(r'\s+', Whitespace), |
|
(r'(\\)(\n)', bygroups(Text, Whitespace)), |
|
], |
|
|
|
'comments': [ |
|
(r'#.*$', Comment), |
|
], |
|
|
|
'directives': [ |
|
(r'@(load-plugin|load-sigs|load|unload)\b.*$', Comment.Preproc), |
|
(r'@(DEBUG|DIR|FILENAME|deprecated|if|ifdef|ifndef|else|endif)\b', Comment.Preproc), |
|
(r'(@prefixes)(\s*)((\+?=).*)$', bygroups(Comment.Preproc, |
|
Whitespace, Comment.Preproc)), |
|
], |
|
|
|
'attributes': [ |
|
(words(('redef', 'priority', 'log', 'optional', 'default', 'add_func', |
|
'delete_func', 'expire_func', 'read_expire', 'write_expire', |
|
'create_expire', 'synchronized', 'persistent', 'rotate_interval', |
|
'rotate_size', 'encrypt', 'raw_output', 'mergeable', 'error_handler', |
|
'type_column', 'deprecated'), |
|
prefix=r'&', suffix=r'\b'), |
|
Keyword.Pseudo), |
|
], |
|
|
|
'types': [ |
|
(words(('any', |
|
'enum', 'record', 'set', 'table', 'vector', |
|
'function', 'hook', 'event', |
|
'addr', 'bool', 'count', 'double', 'file', 'int', 'interval', |
|
'pattern', 'port', 'string', 'subnet', 'time'), |
|
suffix=r'\b'), |
|
Keyword.Type), |
|
|
|
(r'(opaque)(\s+)(of)(\s+)((?:[A-Za-z_]\w*)(?:::(?:[A-Za-z_]\w*))*)\b', |
|
bygroups(Keyword.Type, Whitespace, Operator.Word, Whitespace, Keyword.Type)), |
|
|
|
(r'(type)(\s+)((?:[A-Za-z_]\w*)(?:::(?:[A-Za-z_]\w*))*)(\s*)(:)(\s*)\b(record|enum)\b', |
|
bygroups(Keyword, Whitespace, Name.Class, Whitespace, Operator, Whitespace, Keyword.Type)), |
|
|
|
(r'(type)(\s+)((?:[A-Za-z_]\w*)(?:::(?:[A-Za-z_]\w*))*)(\s*)(:)', |
|
bygroups(Keyword, Whitespace, Name, Whitespace, Operator)), |
|
|
|
(r'(redef)(\s+)(record|enum)(\s+)((?:[A-Za-z_]\w*)(?:::(?:[A-Za-z_]\w*))*)\b', |
|
bygroups(Keyword, Whitespace, Keyword.Type, Whitespace, Name.Class)), |
|
], |
|
|
|
'keywords': [ |
|
(words(('redef', 'export', 'if', 'else', 'for', 'while', |
|
'return', 'break', 'next', 'continue', 'fallthrough', |
|
'switch', 'default', 'case', |
|
'add', 'delete', |
|
'when', 'timeout', 'schedule'), |
|
suffix=r'\b'), |
|
Keyword), |
|
(r'(print)\b', Keyword), |
|
(r'(global|local|const|option)\b', Keyword.Declaration), |
|
(r'(module)(\s+)(([A-Za-z_]\w*)(?:::([A-Za-z_]\w*))*)\b', |
|
bygroups(Keyword.Namespace, Whitespace, Name.Namespace)), |
|
], |
|
|
|
'literals': [ |
|
(r'"', String, 'string'), |
|
|
|
|
|
|
|
|
|
(r'/(?=.*/)', String.Regex, 'regex'), |
|
|
|
(r'(T|F)\b', Keyword.Constant), |
|
|
|
|
|
(r'\d{1,5}/(udp|tcp|icmp|unknown)\b', Number), |
|
|
|
|
|
(r'(\d{1,3}.){3}(\d{1,3})\b', Number), |
|
|
|
|
|
(r'\[([0-9a-fA-F]{0,4}:){2,7}([0-9a-fA-F]{0,4})?((\d{1,3}.){3}(\d{1,3}))?\]', Number), |
|
|
|
|
|
(r'0[xX]' + _hex + r'+\b', Number.Hex), |
|
(_float + r'\s*(day|hr|min|sec|msec|usec)s?\b', Number.Float), |
|
(_float + r'\b', Number.Float), |
|
(r'(\d+)\b', Number.Integer), |
|
|
|
|
|
(_h + r'(\.' + _h + r')+', String), |
|
], |
|
|
|
'operators': [ |
|
(r'[!%*/+<=>~|&^-]', Operator), |
|
(r'([-+=&|]{2}|[+=!><-]=)', Operator), |
|
(r'(in|as|is|of)\b', Operator.Word), |
|
(r'\??\$', Operator), |
|
], |
|
|
|
'punctuation': [ |
|
(r'[{}()\[\],;.]', Punctuation), |
|
|
|
|
|
|
|
|
|
(r'[?:]', Punctuation), |
|
], |
|
|
|
'identifiers': [ |
|
(r'([a-zA-Z_]\w*)(::)', bygroups(Name, Punctuation)), |
|
(r'[a-zA-Z_]\w*', Name) |
|
], |
|
|
|
'string': [ |
|
(r'\\.', String.Escape), |
|
(r'%-?[0-9]*(\.[0-9]+)?[DTd-gsx]', String.Escape), |
|
(r'"', String, '#pop'), |
|
(r'.', String), |
|
], |
|
|
|
'regex': [ |
|
(r'\\.', String.Escape), |
|
(r'/', String.Regex, '#pop'), |
|
(r'.', String.Regex), |
|
], |
|
} |
|
|
|
|
|
BroLexer = ZeekLexer |
|
|
|
|
|
class PuppetLexer(RegexLexer): |
|
""" |
|
For Puppet configuration DSL. |
|
""" |
|
name = 'Puppet' |
|
url = 'https://puppet.com/' |
|
aliases = ['puppet'] |
|
filenames = ['*.pp'] |
|
version_added = '1.6' |
|
|
|
tokens = { |
|
'root': [ |
|
include('comments'), |
|
include('keywords'), |
|
include('names'), |
|
include('numbers'), |
|
include('operators'), |
|
include('strings'), |
|
|
|
(r'[]{}:(),;[]', Punctuation), |
|
(r'\s+', Whitespace), |
|
], |
|
|
|
'comments': [ |
|
(r'(\s*)(#.*)$', bygroups(Whitespace, Comment)), |
|
(r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline), |
|
], |
|
|
|
'operators': [ |
|
(r'(=>|\?|<|>|=|\+|-|/|\*|~|!|\|)', Operator), |
|
(r'(in|and|or|not)\b', Operator.Word), |
|
], |
|
|
|
'names': [ |
|
(r'[a-zA-Z_]\w*', Name.Attribute), |
|
(r'(\$\S+)(\[)(\S+)(\])', bygroups(Name.Variable, Punctuation, |
|
String, Punctuation)), |
|
(r'\$\S+', Name.Variable), |
|
], |
|
|
|
'numbers': [ |
|
|
|
(r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?j?', Number.Float), |
|
(r'\d+[eE][+-]?[0-9]+j?', Number.Float), |
|
(r'0[0-7]+j?', Number.Oct), |
|
(r'0[xX][a-fA-F0-9]+', Number.Hex), |
|
(r'\d+L', Number.Integer.Long), |
|
(r'\d+j?', Number.Integer) |
|
], |
|
|
|
'keywords': [ |
|
|
|
|
|
(words(( |
|
'absent', 'alert', 'alias', 'audit', 'augeas', 'before', 'case', |
|
'check', 'class', 'computer', 'configured', 'contained', |
|
'create_resources', 'crit', 'cron', 'debug', 'default', |
|
'define', 'defined', 'directory', 'else', 'elsif', 'emerg', |
|
'err', 'exec', 'extlookup', 'fail', 'false', 'file', |
|
'filebucket', 'fqdn_rand', 'generate', 'host', 'if', 'import', |
|
'include', 'info', 'inherits', 'inline_template', 'installed', |
|
'interface', 'k5login', 'latest', 'link', 'loglevel', |
|
'macauthorization', 'mailalias', 'maillist', 'mcx', 'md5', |
|
'mount', 'mounted', 'nagios_command', 'nagios_contact', |
|
'nagios_contactgroup', 'nagios_host', 'nagios_hostdependency', |
|
'nagios_hostescalation', 'nagios_hostextinfo', 'nagios_hostgroup', |
|
'nagios_service', 'nagios_servicedependency', 'nagios_serviceescalation', |
|
'nagios_serviceextinfo', 'nagios_servicegroup', 'nagios_timeperiod', |
|
'node', 'noop', 'notice', 'notify', 'package', 'present', 'purged', |
|
'realize', 'regsubst', 'resources', 'role', 'router', 'running', |
|
'schedule', 'scheduled_task', 'search', 'selboolean', 'selmodule', |
|
'service', 'sha1', 'shellquote', 'split', 'sprintf', |
|
'ssh_authorized_key', 'sshkey', 'stage', 'stopped', 'subscribe', |
|
'tag', 'tagged', 'template', 'tidy', 'true', 'undef', 'unmounted', |
|
'user', 'versioncmp', 'vlan', 'warning', 'yumrepo', 'zfs', 'zone', |
|
'zpool'), prefix='(?i)', suffix=r'\b'), |
|
Keyword), |
|
], |
|
|
|
'strings': [ |
|
(r'"([^"])*"', String), |
|
(r"'(\\'|[^'])*'", String), |
|
], |
|
|
|
} |
|
|
|
|
|
class RslLexer(RegexLexer): |
|
""" |
|
RSL is the formal specification |
|
language used in RAISE (Rigorous Approach to Industrial Software Engineering) |
|
method. |
|
""" |
|
name = 'RSL' |
|
url = 'http://en.wikipedia.org/wiki/RAISE' |
|
aliases = ['rsl'] |
|
filenames = ['*.rsl'] |
|
mimetypes = ['text/rsl'] |
|
version_added = '2.0' |
|
|
|
flags = re.MULTILINE | re.DOTALL |
|
|
|
tokens = { |
|
'root': [ |
|
(words(( |
|
'Bool', 'Char', 'Int', 'Nat', 'Real', 'Text', 'Unit', 'abs', |
|
'all', 'always', 'any', 'as', 'axiom', 'card', 'case', 'channel', |
|
'chaos', 'class', 'devt_relation', 'dom', 'elems', 'else', 'elif', |
|
'end', 'exists', 'extend', 'false', 'for', 'hd', 'hide', 'if', |
|
'in', 'is', 'inds', 'initialise', 'int', 'inter', 'isin', 'len', |
|
'let', 'local', 'ltl_assertion', 'object', 'of', 'out', 'post', |
|
'pre', 'read', 'real', 'rng', 'scheme', 'skip', 'stop', 'swap', |
|
'then', 'theory', 'test_case', 'tl', 'transition_system', 'true', |
|
'type', 'union', 'until', 'use', 'value', 'variable', 'while', |
|
'with', 'write', '~isin', '-inflist', '-infset', '-list', |
|
'-set'), prefix=r'\b', suffix=r'\b'), |
|
Keyword), |
|
(r'(variable|value)\b', Keyword.Declaration), |
|
(r'--.*?\n', Comment), |
|
(r'<:.*?:>', Comment), |
|
(r'\{!.*?!\}', Comment), |
|
(r'/\*.*?\*/', Comment), |
|
(r'^([ \t]*)([\w]+)([ \t]*)(:[^:])', bygroups(Whitespace, |
|
Name.Function, Whitespace, Name.Function)), |
|
(r'(^[ \t]*)([\w]+)([ \t]*)(\([\w\s,]*\))([ \t]*)(is|as)', |
|
bygroups(Whitespace, Name.Function, Whitespace, Text, |
|
Whitespace, Keyword)), |
|
(r'\b[A-Z]\w*\b', Keyword.Type), |
|
(r'(true|false)\b', Keyword.Constant), |
|
(r'".*"', String), |
|
(r'\'.\'', String.Char), |
|
(r'(><|->|-m->|/\\|<=|<<=|<\.|\|\||\|\^\||-~->|-~m->|\\/|>=|>>|' |
|
r'\.>|\+\+|-\\|<->|=>|:-|~=|\*\*|<<|>>=|\+>|!!|\|=\||#)', |
|
Operator), |
|
(r'[0-9]+\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float), |
|
(r'0x[0-9a-f]+', Number.Hex), |
|
(r'[0-9]+', Number.Integer), |
|
(r'\s+', Whitespace), |
|
(r'.', Text), |
|
], |
|
} |
|
|
|
def analyse_text(text): |
|
""" |
|
Check for the most common text in the beginning of a RSL file. |
|
""" |
|
if re.search(r'scheme\s*.*?=\s*class\s*type', text, re.I) is not None: |
|
return 1.0 |
|
|
|
|
|
class MscgenLexer(RegexLexer): |
|
""" |
|
For Mscgen files. |
|
""" |
|
name = 'Mscgen' |
|
url = 'http://www.mcternan.me.uk/mscgen/' |
|
aliases = ['mscgen', 'msc'] |
|
filenames = ['*.msc'] |
|
version_added = '1.6' |
|
|
|
_var = r'(\w+|"(?:\\"|[^"])*")' |
|
|
|
tokens = { |
|
'root': [ |
|
(r'msc\b', Keyword.Type), |
|
|
|
(r'(hscale|HSCALE|width|WIDTH|wordwraparcs|WORDWRAPARCS' |
|
r'|arcgradient|ARCGRADIENT)\b', Name.Property), |
|
|
|
(r'(abox|ABOX|rbox|RBOX|box|BOX|note|NOTE)\b', Operator.Word), |
|
(r'(\.|-|\|){3}', Keyword), |
|
(r'(?:-|=|\.|:){2}' |
|
r'|<<=>>|<->|<=>|<<>>|<:>' |
|
r'|->|=>>|>>|=>|:>|-x|-X' |
|
r'|<-|<<=|<<|<=|<:|x-|X-|=', Operator), |
|
|
|
(r'\*', Name.Builtin), |
|
(_var, Name.Variable), |
|
|
|
(r'\[', Punctuation, 'attrs'), |
|
(r'\{|\}|,|;', Punctuation), |
|
include('comments') |
|
], |
|
'attrs': [ |
|
(r'\]', Punctuation, '#pop'), |
|
(_var + r'(\s*)(=)(\s*)' + _var, |
|
bygroups(Name.Attribute, Whitespace, Operator, Whitespace, |
|
String)), |
|
(r',', Punctuation), |
|
include('comments') |
|
], |
|
'comments': [ |
|
(r'(?://|#).*?\n', Comment.Single), |
|
(r'/\*(?:.|\n)*?\*/', Comment.Multiline), |
|
(r'[ \t\r\n]+', Whitespace) |
|
] |
|
} |
|
|
|
|
|
class VGLLexer(RegexLexer): |
|
""" |
|
For SampleManager VGL source code. |
|
""" |
|
name = 'VGL' |
|
url = 'http://www.thermoscientific.com/samplemanager' |
|
aliases = ['vgl'] |
|
filenames = ['*.rpf'] |
|
version_added = '1.6' |
|
|
|
flags = re.MULTILINE | re.DOTALL | re.IGNORECASE |
|
|
|
tokens = { |
|
'root': [ |
|
(r'\{[^}]*\}', Comment.Multiline), |
|
(r'declare', Keyword.Constant), |
|
(r'(if|then|else|endif|while|do|endwhile|and|or|prompt|object' |
|
r'|create|on|line|with|global|routine|value|endroutine|constant' |
|
r'|global|set|join|library|compile_option|file|exists|create|copy' |
|
r'|delete|enable|windows|name|notprotected)(?! *[=<>.,()])', |
|
Keyword), |
|
(r'(true|false|null|empty|error|locked)', Keyword.Constant), |
|
(r'[~^*#!%&\[\]()<>|+=:;,./?-]', Operator), |
|
(r'"[^"]*"', String), |
|
(r'(\.)([a-z_$][\w$]*)', bygroups(Operator, Name.Attribute)), |
|
(r'[0-9][0-9]*(\.[0-9]+(e[+\-]?[0-9]+)?)?', Number), |
|
(r'[a-z_$][\w$]*', Name), |
|
(r'[\r\n]+', Whitespace), |
|
(r'\s+', Whitespace) |
|
] |
|
} |
|
|
|
|
|
class AlloyLexer(RegexLexer): |
|
""" |
|
For Alloy source code. |
|
""" |
|
|
|
name = 'Alloy' |
|
url = 'http://alloy.mit.edu' |
|
aliases = ['alloy'] |
|
filenames = ['*.als'] |
|
mimetypes = ['text/x-alloy'] |
|
version_added = '2.0' |
|
|
|
flags = re.MULTILINE | re.DOTALL |
|
|
|
iden_rex = r'[a-zA-Z_][\w]*"*' |
|
string_rex = r'"\b(\\\\|\\[^\\]|[^"\\])*"' |
|
text_tuple = (r'[^\S\n]+', Whitespace) |
|
|
|
tokens = { |
|
'sig': [ |
|
(r'(extends)\b', Keyword, '#pop'), |
|
(iden_rex, Name), |
|
text_tuple, |
|
(r',', Punctuation), |
|
(r'\{', Operator, '#pop'), |
|
], |
|
'module': [ |
|
text_tuple, |
|
(iden_rex, Name, '#pop'), |
|
], |
|
'fun': [ |
|
text_tuple, |
|
(r'\{', Operator, '#pop'), |
|
(iden_rex, Name, '#pop'), |
|
], |
|
'fact': [ |
|
include('fun'), |
|
(string_rex, String, '#pop'), |
|
], |
|
'root': [ |
|
(r'--.*?$', Comment.Single), |
|
(r'//.*?$', Comment.Single), |
|
(r'/\*.*?\*/', Comment.Multiline), |
|
text_tuple, |
|
(r'(module|open)(\s+)', bygroups(Keyword.Namespace, Whitespace), |
|
'module'), |
|
(r'(sig|enum)(\s+)', bygroups(Keyword.Declaration, Whitespace), 'sig'), |
|
(r'(iden|univ|none)\b', Keyword.Constant), |
|
(r'(int|Int)\b', Keyword.Type), |
|
(r'(var|this|abstract|extends|set|seq|one|lone|let)\b', Keyword), |
|
(r'(all|some|no|sum|disj|when|else)\b', Keyword), |
|
(r'(run|check|for|but|exactly|expect|as|steps)\b', Keyword), |
|
(r'(always|after|eventually|until|release)\b', Keyword), |
|
(r'(historically|before|once|since|triggered)\b', Keyword), |
|
(r'(and|or|implies|iff|in)\b', Operator.Word), |
|
(r'(fun|pred|assert)(\s+)', bygroups(Keyword, Whitespace), 'fun'), |
|
(r'(fact)(\s+)', bygroups(Keyword, Whitespace), 'fact'), |
|
(r'!|#|&&|\+\+|<<|>>|>=|<=>|<=|\.\.|\.|->', Operator), |
|
(r'[-+/*%=<>&!^|~{}\[\]().\';]', Operator), |
|
(iden_rex, Name), |
|
(r'[:,]', Punctuation), |
|
(r'[0-9]+', Number.Integer), |
|
(string_rex, String), |
|
(r'\n', Whitespace), |
|
] |
|
} |
|
|
|
|
|
class PanLexer(RegexLexer): |
|
""" |
|
Lexer for pan source files. |
|
|
|
Based on tcsh lexer. |
|
""" |
|
|
|
name = 'Pan' |
|
url = 'https://github.com/quattor/pan/' |
|
aliases = ['pan'] |
|
filenames = ['*.pan'] |
|
version_added = '2.0' |
|
|
|
tokens = { |
|
'root': [ |
|
include('basic'), |
|
(r'\(', Keyword, 'paren'), |
|
(r'\{', Keyword, 'curly'), |
|
include('data'), |
|
], |
|
'basic': [ |
|
(words(( |
|
'if', 'for', 'with', 'else', 'type', 'bind', 'while', 'valid', 'final', |
|
'prefix', 'unique', 'object', 'foreach', 'include', 'template', |
|
'function', 'variable', 'structure', 'extensible', 'declaration'), |
|
prefix=r'\b', suffix=r'\b'), |
|
Keyword), |
|
(words(( |
|
'file_contents', 'format', 'index', 'length', 'match', 'matches', |
|
'replace', 'splice', 'split', 'substr', 'to_lowercase', 'to_uppercase', |
|
'debug', 'error', 'traceback', 'deprecated', 'base64_decode', |
|
'base64_encode', 'digest', 'escape', 'unescape', 'append', 'create', |
|
'first', 'nlist', 'key', 'list', 'merge', 'next', 'prepend', 'is_boolean', |
|
'is_defined', 'is_double', 'is_list', 'is_long', 'is_nlist', 'is_null', |
|
'is_number', 'is_property', 'is_resource', 'is_string', 'to_boolean', |
|
'to_double', 'to_long', 'to_string', 'clone', 'delete', 'exists', |
|
'path_exists', 'if_exists', 'return', 'value'), |
|
prefix=r'\b', suffix=r'\b'), |
|
Name.Builtin), |
|
(r'#.*', Comment), |
|
(r'\\[\w\W]', String.Escape), |
|
(r'(\b\w+)(\s*)(=)', bygroups(Name.Variable, Whitespace, Operator)), |
|
(r'[\[\]{}()=]+', Operator), |
|
(r'<<\s*(\'?)\\?(\w+)[\w\W]+?\2', String), |
|
(r';', Punctuation), |
|
], |
|
'data': [ |
|
(r'(?s)"(\\\\|\\[0-7]+|\\.|[^"\\])*"', String.Double), |
|
(r"(?s)'(\\\\|\\[0-7]+|\\.|[^'\\])*'", String.Single), |
|
(r'\s+', Whitespace), |
|
(r'[^=\s\[\]{}()$"\'`\\;#]+', Text), |
|
(r'\d+(?= |\Z)', Number), |
|
], |
|
'curly': [ |
|
(r'\}', Keyword, '#pop'), |
|
(r':-', Keyword), |
|
(r'\w+', Name.Variable), |
|
(r'[^}:"\'`$]+', Punctuation), |
|
(r':', Punctuation), |
|
include('root'), |
|
], |
|
'paren': [ |
|
(r'\)', Keyword, '#pop'), |
|
include('root'), |
|
], |
|
} |
|
|
|
|
|
class CrmshLexer(RegexLexer): |
|
""" |
|
Lexer for crmsh configuration files for Pacemaker clusters. |
|
""" |
|
name = 'Crmsh' |
|
url = 'http://crmsh.github.io/' |
|
aliases = ['crmsh', 'pcmk'] |
|
filenames = ['*.crmsh', '*.pcmk'] |
|
mimetypes = [] |
|
version_added = '2.1' |
|
|
|
elem = words(( |
|
'node', 'primitive', 'group', 'clone', 'ms', 'location', |
|
'colocation', 'order', 'fencing_topology', 'rsc_ticket', |
|
'rsc_template', 'property', 'rsc_defaults', |
|
'op_defaults', 'acl_target', 'acl_group', 'user', 'role', |
|
'tag'), suffix=r'(?![\w#$-])') |
|
sub = words(( |
|
'params', 'meta', 'operations', 'op', 'rule', |
|
'attributes', 'utilization'), suffix=r'(?![\w#$-])') |
|
acl = words(('read', 'write', 'deny'), suffix=r'(?![\w#$-])') |
|
bin_rel = words(('and', 'or'), suffix=r'(?![\w#$-])') |
|
un_ops = words(('defined', 'not_defined'), suffix=r'(?![\w#$-])') |
|
date_exp = words(('in_range', 'date', 'spec', 'in'), suffix=r'(?![\w#$-])') |
|
acl_mod = (r'(?:tag|ref|reference|attribute|type|xpath)') |
|
bin_ops = (r'(?:lt|gt|lte|gte|eq|ne)') |
|
val_qual = (r'(?:string|version|number)') |
|
rsc_role_action = (r'(?:Master|Started|Slave|Stopped|' |
|
r'start|promote|demote|stop)') |
|
|
|
tokens = { |
|
'root': [ |
|
(r'^(#.*)(\n)?', bygroups(Comment, Whitespace)), |
|
|
|
(r'([\w#$-]+)(=)("(?:""|[^"])*"|\S+)', |
|
bygroups(Name.Attribute, Punctuation, String)), |
|
|
|
|
|
|
|
(r'(node)(\s+)([\w#$-]+)(:)', |
|
bygroups(Keyword, Whitespace, Name, Punctuation)), |
|
|
|
(r'([+-]?([0-9]+|inf)):', Number), |
|
|
|
(elem, Keyword), |
|
(sub, Keyword), |
|
(acl, Keyword), |
|
|
|
(rf'(?:{val_qual}:)?({bin_ops})(?![\w#$-])', Operator.Word), |
|
|
|
(bin_rel, Operator.Word), |
|
(un_ops, Operator.Word), |
|
(date_exp, Operator.Word), |
|
|
|
(r'#[a-z]+(?![\w#$-])', Name.Builtin), |
|
|
|
(rf'({acl_mod})(:)("(?:""|[^"])*"|\S+)', |
|
bygroups(Keyword, Punctuation, Name)), |
|
|
|
|
|
(rf'([\w#$-]+)(?:(:)({rsc_role_action}))?(?![\w#$-])', |
|
bygroups(Name, Punctuation, Operator.Word)), |
|
|
|
(r'(\\(?=\n)|[\[\](){}/:@])', Punctuation), |
|
(r'\s+|\n', Whitespace), |
|
], |
|
} |
|
|
|
|
|
class FlatlineLexer(RegexLexer): |
|
""" |
|
Lexer for Flatline expressions. |
|
""" |
|
name = 'Flatline' |
|
url = 'https://github.com/bigmlcom/flatline' |
|
aliases = ['flatline'] |
|
filenames = [] |
|
mimetypes = ['text/x-flatline'] |
|
version_added = '2.2' |
|
|
|
special_forms = ('let',) |
|
|
|
builtins = ( |
|
"!=", "*", "+", "-", "<", "<=", "=", ">", ">=", "abs", "acos", "all", |
|
"all-but", "all-with-defaults", "all-with-numeric-default", "and", |
|
"asin", "atan", "avg", "avg-window", "bin-center", "bin-count", "call", |
|
"category-count", "ceil", "cond", "cond-window", "cons", "cos", "cosh", |
|
"count", "diff-window", "div", "ensure-value", "ensure-weighted-value", |
|
"epoch", "epoch-day", "epoch-fields", "epoch-hour", "epoch-millisecond", |
|
"epoch-minute", "epoch-month", "epoch-second", "epoch-weekday", |
|
"epoch-year", "exp", "f", "field", "field-prop", "fields", "filter", |
|
"first", "floor", "head", "if", "in", "integer", "language", "length", |
|
"levenshtein", "linear-regression", "list", "ln", "log", "log10", "map", |
|
"matches", "matches?", "max", "maximum", "md5", "mean", "median", "min", |
|
"minimum", "missing", "missing-count", "missing?", "missing_count", |
|
"mod", "mode", "normalize", "not", "nth", "occurrences", "or", |
|
"percentile", "percentile-label", "population", "population-fraction", |
|
"pow", "preferred", "preferred?", "quantile-label", "rand", "rand-int", |
|
"random-value", "re-quote", "real", "replace", "replace-first", "rest", |
|
"round", "row-number", "segment-label", "sha1", "sha256", "sin", "sinh", |
|
"sqrt", "square", "standard-deviation", "standard_deviation", "str", |
|
"subs", "sum", "sum-squares", "sum-window", "sum_squares", "summary", |
|
"summary-no", "summary-str", "tail", "tan", "tanh", "to-degrees", |
|
"to-radians", "variance", "vectorize", "weighted-random-value", "window", |
|
"winnow", "within-percentiles?", "z-score", |
|
) |
|
|
|
valid_name = r'(?!#)[\w!$%*+<=>?/.#-]+' |
|
|
|
tokens = { |
|
'root': [ |
|
|
|
(r'[,]+', Text), |
|
(r'\s+', Whitespace), |
|
|
|
|
|
(r'-?\d+\.\d+', Number.Float), |
|
(r'-?\d+', Number.Integer), |
|
(r'0x-?[a-f\d]+', Number.Hex), |
|
|
|
|
|
(r'"(\\\\|\\[^\\]|[^"\\])*"', String), |
|
(r"\\(.|[a-z]+)", String.Char), |
|
|
|
|
|
(r'_', String.Symbol), |
|
|
|
|
|
(words(special_forms, suffix=' '), Keyword), |
|
|
|
|
|
(words(builtins, suffix=' '), Name.Builtin), |
|
|
|
|
|
(r'(?<=\()' + valid_name, Name.Function), |
|
|
|
|
|
(valid_name, Name.Variable), |
|
|
|
|
|
(r'(\(|\))', Punctuation), |
|
], |
|
} |
|
|
|
|
|
class SnowballLexer(ExtendedRegexLexer): |
|
""" |
|
Lexer for Snowball source code. |
|
""" |
|
|
|
name = 'Snowball' |
|
url = 'https://snowballstem.org/' |
|
aliases = ['snowball'] |
|
filenames = ['*.sbl'] |
|
version_added = '2.2' |
|
|
|
_ws = r'\n\r\t ' |
|
|
|
def __init__(self, **options): |
|
self._reset_stringescapes() |
|
ExtendedRegexLexer.__init__(self, **options) |
|
|
|
def _reset_stringescapes(self): |
|
self._start = "'" |
|
self._end = "'" |
|
|
|
def _string(do_string_first): |
|
def callback(lexer, match, ctx): |
|
s = match.start() |
|
text = match.group() |
|
string = re.compile(rf'([^{re.escape(lexer._start)}]*)(.)').match |
|
escape = re.compile(rf'([^{re.escape(lexer._end)}]*)(.)').match |
|
pos = 0 |
|
do_string = do_string_first |
|
while pos < len(text): |
|
if do_string: |
|
match = string(text, pos) |
|
yield s + match.start(1), String.Single, match.group(1) |
|
if match.group(2) == "'": |
|
yield s + match.start(2), String.Single, match.group(2) |
|
ctx.stack.pop() |
|
break |
|
yield s + match.start(2), String.Escape, match.group(2) |
|
pos = match.end() |
|
match = escape(text, pos) |
|
yield s + match.start(), String.Escape, match.group() |
|
if match.group(2) != lexer._end: |
|
ctx.stack[-1] = 'escape' |
|
break |
|
pos = match.end() |
|
do_string = True |
|
ctx.pos = s + match.end() |
|
return callback |
|
|
|
def _stringescapes(lexer, match, ctx): |
|
lexer._start = match.group(3) |
|
lexer._end = match.group(5) |
|
return bygroups(Keyword.Reserved, Whitespace, String.Escape, Whitespace, |
|
String.Escape)(lexer, match, ctx) |
|
|
|
tokens = { |
|
'root': [ |
|
(r'len\b', Name.Builtin), |
|
(r'lenof\b', Operator.Word), |
|
include('root1'), |
|
], |
|
'root1': [ |
|
(rf'[{_ws}]+', Whitespace), |
|
(r'\d+', Number.Integer), |
|
(r"'", String.Single, 'string'), |
|
(r'[()]', Punctuation), |
|
(r'/\*[\w\W]*?\*/', Comment.Multiline), |
|
(r'//.*', Comment.Single), |
|
(r'[!*+\-/<=>]=|[-=]>|<[+-]|[$*+\-/<=>?\[\]]', Operator), |
|
(words(('as', 'get', 'hex', 'among', 'define', 'decimal', |
|
'backwardmode'), suffix=r'\b'), |
|
Keyword.Reserved), |
|
(words(('strings', 'booleans', 'integers', 'routines', 'externals', |
|
'groupings'), suffix=r'\b'), |
|
Keyword.Reserved, 'declaration'), |
|
(words(('do', 'or', 'and', 'for', 'hop', 'non', 'not', 'set', 'try', |
|
'fail', 'goto', 'loop', 'next', 'test', 'true', |
|
'false', 'unset', 'atmark', 'attach', 'delete', 'gopast', |
|
'insert', 'repeat', 'sizeof', 'tomark', 'atleast', |
|
'atlimit', 'reverse', 'setmark', 'tolimit', 'setlimit', |
|
'backwards', 'substring'), suffix=r'\b'), |
|
Operator.Word), |
|
(words(('size', 'limit', 'cursor', 'maxint', 'minint'), |
|
suffix=r'\b'), |
|
Name.Builtin), |
|
(rf'(stringdef\b)([{_ws}]*)([^{_ws}]+)', |
|
bygroups(Keyword.Reserved, Whitespace, String.Escape)), |
|
(rf'(stringescapes\b)([{_ws}]*)(.)([{_ws}]*)(.)', |
|
_stringescapes), |
|
(r'[A-Za-z]\w*', Name), |
|
], |
|
'declaration': [ |
|
(r'\)', Punctuation, '#pop'), |
|
(words(('len', 'lenof'), suffix=r'\b'), Name, |
|
('root1', 'declaration')), |
|
include('root1'), |
|
], |
|
'string': [ |
|
(r"[^']*'", _string(True)), |
|
], |
|
'escape': [ |
|
(r"[^']*'", _string(False)), |
|
], |
|
} |
|
|
|
def get_tokens_unprocessed(self, text=None, context=None): |
|
self._reset_stringescapes() |
|
return ExtendedRegexLexer.get_tokens_unprocessed(self, text, context) |
|
|