|
""" |
|
pygments.lexers |
|
~~~~~~~~~~~~~~~ |
|
|
|
Pygments lexers. |
|
|
|
:copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS. |
|
:license: BSD, see LICENSE for details. |
|
""" |
|
|
|
import re |
|
import sys |
|
import types |
|
import fnmatch |
|
from os.path import basename |
|
|
|
from pygments.lexers._mapping import LEXERS |
|
from pygments.modeline import get_filetype_from_buffer |
|
from pygments.plugin import find_plugin_lexers |
|
from pygments.util import ClassNotFound, guess_decode |
|
|
|
COMPAT = { |
|
'Python3Lexer': 'PythonLexer', |
|
'Python3TracebackLexer': 'PythonTracebackLexer', |
|
'LeanLexer': 'Lean3Lexer', |
|
} |
|
|
|
__all__ = ['get_lexer_by_name', 'get_lexer_for_filename', 'find_lexer_class', |
|
'guess_lexer', 'load_lexer_from_file'] + list(LEXERS) + list(COMPAT) |
|
|
|
_lexer_cache = {} |
|
_pattern_cache = {} |
|
|
|
|
|
def _fn_matches(fn, glob): |
|
"""Return whether the supplied file name fn matches pattern filename.""" |
|
if glob not in _pattern_cache: |
|
pattern = _pattern_cache[glob] = re.compile(fnmatch.translate(glob)) |
|
return pattern.match(fn) |
|
return _pattern_cache[glob].match(fn) |
|
|
|
|
|
def _load_lexers(module_name): |
|
"""Load a lexer (and all others in the module too).""" |
|
mod = __import__(module_name, None, None, ['__all__']) |
|
for lexer_name in mod.__all__: |
|
cls = getattr(mod, lexer_name) |
|
_lexer_cache[cls.name] = cls |
|
|
|
|
|
def get_all_lexers(plugins=True): |
|
"""Return a generator of tuples in the form ``(name, aliases, |
|
filenames, mimetypes)`` of all know lexers. |
|
|
|
If *plugins* is true (the default), plugin lexers supplied by entrypoints |
|
are also returned. Otherwise, only builtin ones are considered. |
|
""" |
|
for item in LEXERS.values(): |
|
yield item[1:] |
|
if plugins: |
|
for lexer in find_plugin_lexers(): |
|
yield lexer.name, lexer.aliases, lexer.filenames, lexer.mimetypes |
|
|
|
|
|
def find_lexer_class(name): |
|
""" |
|
Return the `Lexer` subclass that with the *name* attribute as given by |
|
the *name* argument. |
|
""" |
|
if name in _lexer_cache: |
|
return _lexer_cache[name] |
|
|
|
for module_name, lname, aliases, _, _ in LEXERS.values(): |
|
if name == lname: |
|
_load_lexers(module_name) |
|
return _lexer_cache[name] |
|
|
|
for cls in find_plugin_lexers(): |
|
if cls.name == name: |
|
return cls |
|
|
|
|
|
def find_lexer_class_by_name(_alias): |
|
""" |
|
Return the `Lexer` subclass that has `alias` in its aliases list, without |
|
instantiating it. |
|
|
|
Like `get_lexer_by_name`, but does not instantiate the class. |
|
|
|
Will raise :exc:`pygments.util.ClassNotFound` if no lexer with that alias is |
|
found. |
|
|
|
.. versionadded:: 2.2 |
|
""" |
|
if not _alias: |
|
raise ClassNotFound(f'no lexer for alias {_alias!r} found') |
|
|
|
for module_name, name, aliases, _, _ in LEXERS.values(): |
|
if _alias.lower() in aliases: |
|
if name not in _lexer_cache: |
|
_load_lexers(module_name) |
|
return _lexer_cache[name] |
|
|
|
for cls in find_plugin_lexers(): |
|
if _alias.lower() in cls.aliases: |
|
return cls |
|
raise ClassNotFound(f'no lexer for alias {_alias!r} found') |
|
|
|
|
|
def get_lexer_by_name(_alias, **options): |
|
""" |
|
Return an instance of a `Lexer` subclass that has `alias` in its |
|
aliases list. The lexer is given the `options` at its |
|
instantiation. |
|
|
|
Will raise :exc:`pygments.util.ClassNotFound` if no lexer with that alias is |
|
found. |
|
""" |
|
if not _alias: |
|
raise ClassNotFound(f'no lexer for alias {_alias!r} found') |
|
|
|
|
|
for module_name, name, aliases, _, _ in LEXERS.values(): |
|
if _alias.lower() in aliases: |
|
if name not in _lexer_cache: |
|
_load_lexers(module_name) |
|
return _lexer_cache[name](**options) |
|
|
|
for cls in find_plugin_lexers(): |
|
if _alias.lower() in cls.aliases: |
|
return cls(**options) |
|
raise ClassNotFound(f'no lexer for alias {_alias!r} found') |
|
|
|
|
|
def load_lexer_from_file(filename, lexername="CustomLexer", **options): |
|
"""Load a lexer from a file. |
|
|
|
This method expects a file located relative to the current working |
|
directory, which contains a Lexer class. By default, it expects the |
|
Lexer to be name CustomLexer; you can specify your own class name |
|
as the second argument to this function. |
|
|
|
Users should be very careful with the input, because this method |
|
is equivalent to running eval on the input file. |
|
|
|
Raises ClassNotFound if there are any problems importing the Lexer. |
|
|
|
.. versionadded:: 2.2 |
|
""" |
|
try: |
|
|
|
custom_namespace = {} |
|
with open(filename, 'rb') as f: |
|
exec(f.read(), custom_namespace) |
|
|
|
if lexername not in custom_namespace: |
|
raise ClassNotFound(f'no valid {lexername} class found in {filename}') |
|
lexer_class = custom_namespace[lexername] |
|
|
|
return lexer_class(**options) |
|
except OSError as err: |
|
raise ClassNotFound(f'cannot read {filename}: {err}') |
|
except ClassNotFound: |
|
raise |
|
except Exception as err: |
|
raise ClassNotFound(f'error when loading custom lexer: {err}') |
|
|
|
|
|
def find_lexer_class_for_filename(_fn, code=None): |
|
"""Get a lexer for a filename. |
|
|
|
If multiple lexers match the filename pattern, use ``analyse_text()`` to |
|
figure out which one is more appropriate. |
|
|
|
Returns None if not found. |
|
""" |
|
matches = [] |
|
fn = basename(_fn) |
|
for modname, name, _, filenames, _ in LEXERS.values(): |
|
for filename in filenames: |
|
if _fn_matches(fn, filename): |
|
if name not in _lexer_cache: |
|
_load_lexers(modname) |
|
matches.append((_lexer_cache[name], filename)) |
|
for cls in find_plugin_lexers(): |
|
for filename in cls.filenames: |
|
if _fn_matches(fn, filename): |
|
matches.append((cls, filename)) |
|
|
|
if isinstance(code, bytes): |
|
|
|
code = guess_decode(code) |
|
|
|
def get_rating(info): |
|
cls, filename = info |
|
|
|
bonus = '*' not in filename and 0.5 or 0 |
|
|
|
|
|
|
|
|
|
if code: |
|
return cls.analyse_text(code) + bonus, cls.__name__ |
|
return cls.priority + bonus, cls.__name__ |
|
|
|
if matches: |
|
matches.sort(key=get_rating) |
|
|
|
return matches[-1][0] |
|
|
|
|
|
def get_lexer_for_filename(_fn, code=None, **options): |
|
"""Get a lexer for a filename. |
|
|
|
Return a `Lexer` subclass instance that has a filename pattern |
|
matching `fn`. The lexer is given the `options` at its |
|
instantiation. |
|
|
|
Raise :exc:`pygments.util.ClassNotFound` if no lexer for that filename |
|
is found. |
|
|
|
If multiple lexers match the filename pattern, use their ``analyse_text()`` |
|
methods to figure out which one is more appropriate. |
|
""" |
|
res = find_lexer_class_for_filename(_fn, code) |
|
if not res: |
|
raise ClassNotFound(f'no lexer for filename {_fn!r} found') |
|
return res(**options) |
|
|
|
|
|
def get_lexer_for_mimetype(_mime, **options): |
|
""" |
|
Return a `Lexer` subclass instance that has `mime` in its mimetype |
|
list. The lexer is given the `options` at its instantiation. |
|
|
|
Will raise :exc:`pygments.util.ClassNotFound` if not lexer for that mimetype |
|
is found. |
|
""" |
|
for modname, name, _, _, mimetypes in LEXERS.values(): |
|
if _mime in mimetypes: |
|
if name not in _lexer_cache: |
|
_load_lexers(modname) |
|
return _lexer_cache[name](**options) |
|
for cls in find_plugin_lexers(): |
|
if _mime in cls.mimetypes: |
|
return cls(**options) |
|
raise ClassNotFound(f'no lexer for mimetype {_mime!r} found') |
|
|
|
|
|
def _iter_lexerclasses(plugins=True): |
|
"""Return an iterator over all lexer classes.""" |
|
for key in sorted(LEXERS): |
|
module_name, name = LEXERS[key][:2] |
|
if name not in _lexer_cache: |
|
_load_lexers(module_name) |
|
yield _lexer_cache[name] |
|
if plugins: |
|
yield from find_plugin_lexers() |
|
|
|
|
|
def guess_lexer_for_filename(_fn, _text, **options): |
|
""" |
|
As :func:`guess_lexer()`, but only lexers which have a pattern in `filenames` |
|
or `alias_filenames` that matches `filename` are taken into consideration. |
|
|
|
:exc:`pygments.util.ClassNotFound` is raised if no lexer thinks it can |
|
handle the content. |
|
""" |
|
fn = basename(_fn) |
|
primary = {} |
|
matching_lexers = set() |
|
for lexer in _iter_lexerclasses(): |
|
for filename in lexer.filenames: |
|
if _fn_matches(fn, filename): |
|
matching_lexers.add(lexer) |
|
primary[lexer] = True |
|
for filename in lexer.alias_filenames: |
|
if _fn_matches(fn, filename): |
|
matching_lexers.add(lexer) |
|
primary[lexer] = False |
|
if not matching_lexers: |
|
raise ClassNotFound(f'no lexer for filename {fn!r} found') |
|
if len(matching_lexers) == 1: |
|
return matching_lexers.pop()(**options) |
|
result = [] |
|
for lexer in matching_lexers: |
|
rv = lexer.analyse_text(_text) |
|
if rv == 1.0: |
|
return lexer(**options) |
|
result.append((rv, lexer)) |
|
|
|
def type_sort(t): |
|
|
|
|
|
|
|
|
|
|
|
return (t[0], primary[t[1]], t[1].priority, t[1].__name__) |
|
result.sort(key=type_sort) |
|
|
|
return result[-1][1](**options) |
|
|
|
|
|
def guess_lexer(_text, **options): |
|
""" |
|
Return a `Lexer` subclass instance that's guessed from the text in |
|
`text`. For that, the :meth:`.analyse_text()` method of every known lexer |
|
class is called with the text as argument, and the lexer which returned the |
|
highest value will be instantiated and returned. |
|
|
|
:exc:`pygments.util.ClassNotFound` is raised if no lexer thinks it can |
|
handle the content. |
|
""" |
|
|
|
if not isinstance(_text, str): |
|
inencoding = options.get('inencoding', options.get('encoding')) |
|
if inencoding: |
|
_text = _text.decode(inencoding or 'utf8') |
|
else: |
|
_text, _ = guess_decode(_text) |
|
|
|
|
|
ft = get_filetype_from_buffer(_text) |
|
|
|
if ft is not None: |
|
try: |
|
return get_lexer_by_name(ft, **options) |
|
except ClassNotFound: |
|
pass |
|
|
|
best_lexer = [0.0, None] |
|
for lexer in _iter_lexerclasses(): |
|
rv = lexer.analyse_text(_text) |
|
if rv == 1.0: |
|
return lexer(**options) |
|
if rv > best_lexer[0]: |
|
best_lexer[:] = (rv, lexer) |
|
if not best_lexer[0] or best_lexer[1] is None: |
|
raise ClassNotFound('no lexer matching the text found') |
|
return best_lexer[1](**options) |
|
|
|
|
|
class _automodule(types.ModuleType): |
|
"""Automatically import lexers.""" |
|
|
|
def __getattr__(self, name): |
|
info = LEXERS.get(name) |
|
if info: |
|
_load_lexers(info[0]) |
|
cls = _lexer_cache[info[1]] |
|
setattr(self, name, cls) |
|
return cls |
|
if name in COMPAT: |
|
return getattr(self, COMPAT[name]) |
|
raise AttributeError(name) |
|
|
|
|
|
oldmod = sys.modules[__name__] |
|
newmod = _automodule(__name__) |
|
newmod.__dict__.update(oldmod.__dict__) |
|
sys.modules[__name__] = newmod |
|
del newmod.newmod, newmod.oldmod, newmod.sys, newmod.types |
|
|