|
""" |
|
pygments.lexers.special |
|
~~~~~~~~~~~~~~~~~~~~~~~ |
|
|
|
Special lexers. |
|
|
|
:copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS. |
|
:license: BSD, see LICENSE for details. |
|
""" |
|
|
|
import ast |
|
|
|
from pygments.lexer import Lexer, line_re |
|
from pygments.token import Token, Error, Text, Generic |
|
from pygments.util import get_choice_opt |
|
|
|
|
|
__all__ = ['TextLexer', 'OutputLexer', 'RawTokenLexer'] |
|
|
|
|
|
class TextLexer(Lexer): |
|
""" |
|
"Null" lexer, doesn't highlight anything. |
|
""" |
|
name = 'Text only' |
|
aliases = ['text'] |
|
filenames = ['*.txt'] |
|
mimetypes = ['text/plain'] |
|
url = "" |
|
version_added = '' |
|
|
|
priority = 0.01 |
|
|
|
def get_tokens_unprocessed(self, text): |
|
yield 0, Text, text |
|
|
|
def analyse_text(text): |
|
return TextLexer.priority |
|
|
|
|
|
class OutputLexer(Lexer): |
|
""" |
|
Simple lexer that highlights everything as ``Token.Generic.Output``. |
|
""" |
|
name = 'Text output' |
|
aliases = ['output'] |
|
url = "" |
|
version_added = '2.10' |
|
_example = "output/output" |
|
|
|
def get_tokens_unprocessed(self, text): |
|
yield 0, Generic.Output, text |
|
|
|
|
|
_ttype_cache = {} |
|
|
|
|
|
class RawTokenLexer(Lexer): |
|
""" |
|
Recreate a token stream formatted with the `RawTokenFormatter`. |
|
|
|
Additional options accepted: |
|
|
|
`compress` |
|
If set to ``"gz"`` or ``"bz2"``, decompress the token stream with |
|
the given compression algorithm before lexing (default: ``""``). |
|
""" |
|
name = 'Raw token data' |
|
aliases = [] |
|
filenames = [] |
|
mimetypes = ['application/x-pygments-tokens'] |
|
url = 'https://pygments.org/docs/formatters/#RawTokenFormatter' |
|
version_added = '' |
|
|
|
def __init__(self, **options): |
|
self.compress = get_choice_opt(options, 'compress', |
|
['', 'none', 'gz', 'bz2'], '') |
|
Lexer.__init__(self, **options) |
|
|
|
def get_tokens(self, text): |
|
if self.compress: |
|
if isinstance(text, str): |
|
text = text.encode('latin1') |
|
try: |
|
if self.compress == 'gz': |
|
import gzip |
|
text = gzip.decompress(text) |
|
elif self.compress == 'bz2': |
|
import bz2 |
|
text = bz2.decompress(text) |
|
except OSError: |
|
yield Error, text.decode('latin1') |
|
if isinstance(text, bytes): |
|
text = text.decode('latin1') |
|
|
|
|
|
text = text.strip('\n') + '\n' |
|
for i, t, v in self.get_tokens_unprocessed(text): |
|
yield t, v |
|
|
|
def get_tokens_unprocessed(self, text): |
|
length = 0 |
|
for match in line_re.finditer(text): |
|
try: |
|
ttypestr, val = match.group().rstrip().split('\t', 1) |
|
ttype = _ttype_cache.get(ttypestr) |
|
if not ttype: |
|
ttype = Token |
|
ttypes = ttypestr.split('.')[1:] |
|
for ttype_ in ttypes: |
|
if not ttype_ or not ttype_[0].isupper(): |
|
raise ValueError('malformed token name') |
|
ttype = getattr(ttype, ttype_) |
|
_ttype_cache[ttypestr] = ttype |
|
val = ast.literal_eval(val) |
|
if not isinstance(val, str): |
|
raise ValueError('expected str') |
|
except (SyntaxError, ValueError): |
|
val = match.group() |
|
ttype = Error |
|
yield length, ttype, val |
|
length += len(val) |
|
|