File size: 3,671 Bytes
9c6594c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
"""
    pygments.lexers.bqn
    ~~~~~~~~~~~~~~~~~~~

    Lexer for BQN.

    :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
    :license: BSD, see LICENSE for details.
"""

from pygments.lexer import RegexLexer
from pygments.token import Comment, Operator, Keyword, Name, String, \
    Number, Punctuation, Whitespace

__all__ = ['BQNLexer']


class BQNLexer(RegexLexer):
    """
    A simple BQN lexer.
    """
    name = 'BQN'
    url = 'https://mlochbaum.github.io/BQN/index.html'
    aliases = ['bqn']
    filenames = ['*.bqn']
    mimetypes = []
    version_added = '2.16'

    # An inter_word_char. Necessary because \w matches all alphanumeric
    # Unicode characters, including ones (e.g., π•Š) that BQN treats special.
    _iwc = r'((?=[^π•Žπ•π”½π”Ύπ•Šπ•¨π•©π•—π•˜π•€π•£])\w)'

    tokens = {
        'root': [
            # Whitespace
            # ==========
            (r'\s+', Whitespace),
            #
            # Comment
            # =======
            # '#' is a comment that continues to the end of the line
            (r'#.*$', Comment.Single),
            #
            # Strings
            # =======
            (r'\'((\'\')|[^\'])*\'', String.Single),
            (r'"(("")|[^"])*"', String.Double),
            #
            # Null Character
            # ==============
            # Literal representation of the null character
            (r'@', String.Symbol),
            #
            # Punctuation
            # ===========
            # This token type is used for diamond, commas
            # and  array and list brackets and strand syntax
            (r'[\.β‹„,\[\]βŸ¨βŸ©β€Ώ]', Punctuation),
            #
            # Expression Grouping
            # ===================
            # Since this token type is important in BQN, it is not included in
            # the punctuation token type but rather in the following one
            (r'[\(\)]', String.Regex),
            #
            # Numbers
            # =======
            # Includes the numeric literals and the Nothing character
            (r'Β―?[0-9](([0-9]|_)*\.?([0-9]|_)+|([0-9]|_)*)([Ee][Β―]?([0-9]|_)+)?|Β―|∞|Ο€|Β·', Number),
            #
            # Variables
            # =========
            (r'[a-z]' + _iwc + r'*', Name.Variable),
            #
            # 2-Modifiers
            # ===========
            # Needs to come before the 1-modifiers due to _𝕣 and _𝕣_
            (r'[βˆ˜β—‹βŠΈβŸœβŒΎβŠ˜β—ΆβŽ‰βš‡βŸβŽŠ]', Name.Property),
            (r'_(𝕣|[a-zA-Z0-9]+)_', Name.Property),
            #
            # 1-Modifiers
            # ===========
            (r'[Λ™ΛœΛ˜Β¨βŒœβΌΒ΄Λ`𝕣]', Name.Attribute),
            (r'_(𝕣|[a-zA-Z0-9]+)', Name.Attribute),
            #
            # Functions
            # =========
            # The monadic or dyadic function primitives and function
            # operands and arguments, along with function self-reference
            (r'[+\-Γ—Γ·\β‹†βˆšβŒŠβŒˆβˆ§βˆ¨Β¬|≀<>β‰₯=β‰ β‰‘β‰’βŠ£βŠ’β₯ŠβˆΎβ‰β‹ˆβ†‘β†“β†•Β«Β»βŒ½β‰/β‹β’βŠβŠ‘βŠβŠ’βˆŠβ·βŠ”!π•Žπ•π”½π”Ύπ•Š]',
             Operator),
            (r'[A-Z]' + _iwc + r'*|β€’' + _iwc + r'+', Operator),
            #
            # Constant
            # ========
            (r'Λ™', Name.Constant),
            #
            # Define/Export/Change
            # ====================
            (r'[←↩⇐]', Keyword.Declaration),
            #
            # Blocks
            # ======
            (r'[{}]', Keyword.Type),
            #
            # Extra characters
            # ================
            (r'[;:?π•¨π•©π•—π•˜π•€]', Name.Entity),
            #

        ],
    }