|
""" |
|
pygments.lexers.esoteric |
|
~~~~~~~~~~~~~~~~~~~~~~~~ |
|
|
|
Lexers for esoteric languages. |
|
|
|
:copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS. |
|
:license: BSD, see LICENSE for details. |
|
""" |
|
|
|
from pygments.lexer import RegexLexer, include, words, bygroups |
|
from pygments.token import Comment, Operator, Keyword, Name, String, Number, \ |
|
Punctuation, Error, Whitespace |
|
|
|
__all__ = ['BrainfuckLexer', 'BefungeLexer', 'RedcodeLexer', 'CAmkESLexer', |
|
'CapDLLexer', 'AheuiLexer'] |
|
|
|
|
|
class BrainfuckLexer(RegexLexer): |
|
""" |
|
Lexer for the esoteric BrainFuck language. |
|
""" |
|
|
|
name = 'Brainfuck' |
|
url = 'http://www.muppetlabs.com/~breadbox/bf/' |
|
aliases = ['brainfuck', 'bf'] |
|
filenames = ['*.bf', '*.b'] |
|
mimetypes = ['application/x-brainfuck'] |
|
version_added = '' |
|
|
|
tokens = { |
|
'common': [ |
|
|
|
(r'[.,]+', Name.Tag), |
|
(r'[+-]+', Name.Builtin), |
|
(r'[<>]+', Name.Variable), |
|
(r'[^.,+\-<>\[\]]+', Comment), |
|
], |
|
'root': [ |
|
(r'\[', Keyword, 'loop'), |
|
(r'\]', Error), |
|
include('common'), |
|
], |
|
'loop': [ |
|
(r'\[', Keyword, '#push'), |
|
(r'\]', Keyword, '#pop'), |
|
include('common'), |
|
] |
|
} |
|
|
|
def analyse_text(text): |
|
"""It's safe to assume that a program which mostly consists of + - |
|
and < > is brainfuck.""" |
|
plus_minus_count = 0 |
|
greater_less_count = 0 |
|
|
|
range_to_check = max(256, len(text)) |
|
|
|
for c in text[:range_to_check]: |
|
if c == '+' or c == '-': |
|
plus_minus_count += 1 |
|
if c == '<' or c == '>': |
|
greater_less_count += 1 |
|
|
|
if plus_minus_count > (0.25 * range_to_check): |
|
return 1.0 |
|
if greater_less_count > (0.25 * range_to_check): |
|
return 1.0 |
|
|
|
result = 0 |
|
if '[-]' in text: |
|
result += 0.5 |
|
|
|
return result |
|
|
|
|
|
class BefungeLexer(RegexLexer): |
|
""" |
|
Lexer for the esoteric Befunge language. |
|
""" |
|
name = 'Befunge' |
|
url = 'http://en.wikipedia.org/wiki/Befunge' |
|
aliases = ['befunge'] |
|
filenames = ['*.befunge'] |
|
mimetypes = ['application/x-befunge'] |
|
version_added = '0.7' |
|
|
|
tokens = { |
|
'root': [ |
|
(r'[0-9a-f]', Number), |
|
(r'[+*/%!`-]', Operator), |
|
(r'[<>^v?\[\]rxjk]', Name.Variable), |
|
(r'[:\\$.,n]', Name.Builtin), |
|
(r'[|_mw]', Keyword), |
|
(r'[{}]', Name.Tag), |
|
(r'".*?"', String.Double), |
|
(r'\'.', String.Single), |
|
(r'[#;]', Comment), |
|
(r'[pg&~=@iotsy]', Keyword), |
|
(r'[()A-Z]', Comment), |
|
(r'\s+', Whitespace), |
|
], |
|
} |
|
|
|
|
|
class CAmkESLexer(RegexLexer): |
|
""" |
|
Basic lexer for the input language for the CAmkES component platform. |
|
""" |
|
name = 'CAmkES' |
|
url = 'https://sel4.systems/CAmkES/' |
|
aliases = ['camkes', 'idl4'] |
|
filenames = ['*.camkes', '*.idl4'] |
|
version_added = '2.1' |
|
|
|
tokens = { |
|
'root': [ |
|
|
|
(r'^(\s*)(#.*)(\n)', bygroups(Whitespace, Comment.Preproc, |
|
Whitespace)), |
|
|
|
|
|
(r'\s+', Whitespace), |
|
(r'/\*(.|\n)*?\*/', Comment), |
|
(r'//.*$', Comment), |
|
|
|
(r'[\[(){},.;\]]', Punctuation), |
|
(r'[~!%^&*+=|?:<>/-]', Operator), |
|
|
|
(words(('assembly', 'attribute', 'component', 'composition', |
|
'configuration', 'connection', 'connector', 'consumes', |
|
'control', 'dataport', 'Dataport', 'Dataports', 'emits', |
|
'event', 'Event', 'Events', 'export', 'from', 'group', |
|
'hardware', 'has', 'interface', 'Interface', 'maybe', |
|
'procedure', 'Procedure', 'Procedures', 'provides', |
|
'template', 'thread', 'threads', 'to', 'uses', 'with'), |
|
suffix=r'\b'), Keyword), |
|
|
|
(words(('bool', 'boolean', 'Buf', 'char', 'character', 'double', |
|
'float', 'in', 'inout', 'int', 'int16_6', 'int32_t', |
|
'int64_t', 'int8_t', 'integer', 'mutex', 'out', 'real', |
|
'refin', 'semaphore', 'signed', 'string', 'struct', |
|
'uint16_t', 'uint32_t', 'uint64_t', 'uint8_t', 'uintptr_t', |
|
'unsigned', 'void'), |
|
suffix=r'\b'), Keyword.Type), |
|
|
|
|
|
(r'[a-zA-Z_]\w*_(priority|domain|buffer)', Keyword.Reserved), |
|
(words(('dma_pool', 'from_access', 'to_access'), suffix=r'\b'), |
|
Keyword.Reserved), |
|
|
|
|
|
(r'(import)(\s+)((?:<[^>]*>|"[^"]*");)', |
|
bygroups(Comment.Preproc, Whitespace, Comment.Preproc)), |
|
|
|
|
|
(r'(include)(\s+)((?:<[^>]*>|"[^"]*");)', |
|
bygroups(Comment.Preproc, Whitespace, Comment.Preproc)), |
|
|
|
|
|
(r'0[xX][\da-fA-F]+', Number.Hex), |
|
(r'-?[\d]+', Number), |
|
(r'-?[\d]+\.[\d]+', Number.Float), |
|
(r'"[^"]*"', String), |
|
(r'[Tt]rue|[Ff]alse', Name.Builtin), |
|
|
|
|
|
(r'[a-zA-Z_]\w*', Name), |
|
], |
|
} |
|
|
|
|
|
class CapDLLexer(RegexLexer): |
|
""" |
|
Basic lexer for CapDL. |
|
|
|
The source of the primary tool that reads such specifications is available |
|
at https://github.com/seL4/capdl/tree/master/capDL-tool. Note that this |
|
lexer only supports a subset of the grammar. For example, identifiers can |
|
shadow type names, but these instances are currently incorrectly |
|
highlighted as types. Supporting this would need a stateful lexer that is |
|
considered unnecessarily complex for now. |
|
""" |
|
name = 'CapDL' |
|
url = 'https://ssrg.nicta.com.au/publications/nictaabstracts/Kuz_KLW_10.abstract.pml' |
|
aliases = ['capdl'] |
|
filenames = ['*.cdl'] |
|
version_added = '2.2' |
|
|
|
tokens = { |
|
'root': [ |
|
|
|
(r'^(\s*)(#.*)(\n)', |
|
bygroups(Whitespace, Comment.Preproc, Whitespace)), |
|
|
|
|
|
(r'\s+', Whitespace), |
|
(r'/\*(.|\n)*?\*/', Comment), |
|
(r'(//|--).*$', Comment), |
|
|
|
(r'[<>\[(){},:;=\]]', Punctuation), |
|
(r'\.\.', Punctuation), |
|
|
|
(words(('arch', 'arm11', 'caps', 'child_of', 'ia32', 'irq', 'maps', |
|
'objects'), suffix=r'\b'), Keyword), |
|
|
|
(words(('aep', 'asid_pool', 'cnode', 'ep', 'frame', 'io_device', |
|
'io_ports', 'io_pt', 'notification', 'pd', 'pt', 'tcb', |
|
'ut', 'vcpu'), suffix=r'\b'), Keyword.Type), |
|
|
|
|
|
(words(('asid', 'addr', 'badge', 'cached', 'dom', 'domainID', 'elf', |
|
'fault_ep', 'G', 'guard', 'guard_size', 'init', 'ip', |
|
'prio', 'sp', 'R', 'RG', 'RX', 'RW', 'RWG', 'RWX', 'W', |
|
'WG', 'WX', 'level', 'masked', 'master_reply', 'paddr', |
|
'ports', 'reply', 'uncached'), suffix=r'\b'), |
|
Keyword.Reserved), |
|
|
|
|
|
(r'0[xX][\da-fA-F]+', Number.Hex), |
|
(r'\d+(\.\d+)?(k|M)?', Number), |
|
(words(('bits',), suffix=r'\b'), Number), |
|
(words(('cspace', 'vspace', 'reply_slot', 'caller_slot', |
|
'ipc_buffer_slot'), suffix=r'\b'), Number), |
|
|
|
|
|
(r'[a-zA-Z_][-@\.\w]*', Name), |
|
], |
|
} |
|
|
|
|
|
class RedcodeLexer(RegexLexer): |
|
""" |
|
A simple Redcode lexer based on ICWS'94. |
|
Contributed by Adam Blinkinsop <blinks@acm.org>. |
|
""" |
|
name = 'Redcode' |
|
aliases = ['redcode'] |
|
filenames = ['*.cw'] |
|
url = 'https://en.wikipedia.org/wiki/Core_War' |
|
version_added = '0.8' |
|
|
|
opcodes = ('DAT', 'MOV', 'ADD', 'SUB', 'MUL', 'DIV', 'MOD', |
|
'JMP', 'JMZ', 'JMN', 'DJN', 'CMP', 'SLT', 'SPL', |
|
'ORG', 'EQU', 'END') |
|
modifiers = ('A', 'B', 'AB', 'BA', 'F', 'X', 'I') |
|
|
|
tokens = { |
|
'root': [ |
|
|
|
(r'\s+', Whitespace), |
|
(r';.*$', Comment.Single), |
|
|
|
|
|
(r'\b({})\b'.format('|'.join(opcodes)), Name.Function), |
|
(r'\b({})\b'.format('|'.join(modifiers)), Name.Decorator), |
|
(r'[A-Za-z_]\w+', Name), |
|
|
|
(r'[-+*/%]', Operator), |
|
(r'[#$@<>]', Operator), |
|
(r'[.,]', Punctuation), |
|
|
|
(r'[-+]?\d+', Number.Integer), |
|
], |
|
} |
|
|
|
|
|
class AheuiLexer(RegexLexer): |
|
""" |
|
Aheui is esoteric language based on Korean alphabets. |
|
""" |
|
|
|
name = 'Aheui' |
|
url = 'http://aheui.github.io/' |
|
aliases = ['aheui'] |
|
filenames = ['*.aheui'] |
|
version_added = '' |
|
|
|
tokens = { |
|
'root': [ |
|
('[' |
|
'๋-๋ณ๋-๋ซ๋-๋ฃ๋
-๋
๋
ธ-๋๋จ-๋๋ด-๋' |
|
'๋ค-๋ฟ๋-๋ท๋-๋ฏ๋-๋ง๋-๋๋ด-๋ซ๋-๋' |
|
'๋ฐ-๋๋จ-๋๋ -๋ป๋-๋ณ๋-๋ซ๋-๋ท๋-๋' |
|
'๋ผ-๋๋ด-๋๋ฌ-๋ ๋ ค-๋ ฟ๋ก-๋กท๋ฃ-๋ค๋ฅ-๋ฆซ' |
|
'๋ง-๋งฃ๋จ-๋จ๋จธ-๋ฉ๋ฉฐ-๋ช๋ชจ-๋ซ๋ฌ-๋ญ๋ฎค-๋ฏท' |
|
'๋ฐ-๋ฐฏ๋ฑ-๋ฑง๋ฒ-๋ฒ๋ฒผ-๋ณ๋ณด-๋ด๋ตค-๋ถ๋ทฐ-๋น' |
|
'๋น -๋นป๋บ-๋บณ๋ป-๋ปซ๋ผ-๋ผฃ๋ฝ-๋ฝ๋พฐ-๋ฟง์ผ-์' |
|
'์ฌ-์์ค-์ฟ์-์ท์
-์
ฏ์-์ง์ผ-์ณ์-์' |
|
'์ธ-์์ฐ-์์จ-์์ -์ป์-์ณ์-์ฟ์-์ง' |
|
'์-์ซ์-์ฃ์ -์ ์ ธ-์ก์กฐ-์ข์ฃ -์ค์ฅฌ-์ฆฟ' |
|
'์ฐจ-์ฑ์ฑ -์ฑป์ฒ-์ฒณ์ณ-์ณซ์ด-์ดฃ์ตธ-์ถฏ์ธ-์น' |
|
'์นด-์บ์บฌ-์ป์ปค-์ปฟ์ผ-์ผท์ฝ-์ฝฏ์ฟ-์ฟปํ-ํฃ' |
|
'ํ-ํํธ-ํํฐ-ํ
ํ
จ-ํํ -ํปํ-ํํ-ํฏ' |
|
'ํ-ํงํ-ํํผ-ํํด-ํํฌ-ํํ-ํํจ-ํป' |
|
'ํ-ํณํ-ํซํ-ํฃํ-ํํธ-ํํจ-ํํด-ํ' |
|
']', Operator), |
|
('.', Comment), |
|
], |
|
} |
|
|