|
""" |
|
pygments.formatters.latex |
|
~~~~~~~~~~~~~~~~~~~~~~~~~ |
|
|
|
Formatter for LaTeX fancyvrb output. |
|
|
|
:copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS. |
|
:license: BSD, see LICENSE for details. |
|
""" |
|
|
|
from io import StringIO |
|
|
|
from pygments.formatter import Formatter |
|
from pygments.lexer import Lexer, do_insertions |
|
from pygments.token import Token, STANDARD_TYPES |
|
from pygments.util import get_bool_opt, get_int_opt |
|
|
|
|
|
__all__ = ['LatexFormatter'] |
|
|
|
|
|
def escape_tex(text, commandprefix): |
|
return text.replace('\\', '\x00'). \ |
|
replace('{', '\x01'). \ |
|
replace('}', '\x02'). \ |
|
replace('\x00', rf'\{commandprefix}Zbs{{}}'). \ |
|
replace('\x01', rf'\{commandprefix}Zob{{}}'). \ |
|
replace('\x02', rf'\{commandprefix}Zcb{{}}'). \ |
|
replace('^', rf'\{commandprefix}Zca{{}}'). \ |
|
replace('_', rf'\{commandprefix}Zus{{}}'). \ |
|
replace('&', rf'\{commandprefix}Zam{{}}'). \ |
|
replace('<', rf'\{commandprefix}Zlt{{}}'). \ |
|
replace('>', rf'\{commandprefix}Zgt{{}}'). \ |
|
replace('#', rf'\{commandprefix}Zsh{{}}'). \ |
|
replace('%', rf'\{commandprefix}Zpc{{}}'). \ |
|
replace('$', rf'\{commandprefix}Zdl{{}}'). \ |
|
replace('-', rf'\{commandprefix}Zhy{{}}'). \ |
|
replace("'", rf'\{commandprefix}Zsq{{}}'). \ |
|
replace('"', rf'\{commandprefix}Zdq{{}}'). \ |
|
replace('~', rf'\{commandprefix}Zti{{}}') |
|
|
|
|
|
DOC_TEMPLATE = r''' |
|
\documentclass{%(docclass)s} |
|
\usepackage{fancyvrb} |
|
\usepackage{color} |
|
\usepackage[%(encoding)s]{inputenc} |
|
%(preamble)s |
|
|
|
%(styledefs)s |
|
|
|
\begin{document} |
|
|
|
\section*{%(title)s} |
|
|
|
%(code)s |
|
\end{document} |
|
''' |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
STYLE_TEMPLATE = r''' |
|
\makeatletter |
|
\def\%(cp)s@reset{\let\%(cp)s@it=\relax \let\%(cp)s@bf=\relax%% |
|
\let\%(cp)s@ul=\relax \let\%(cp)s@tc=\relax%% |
|
\let\%(cp)s@bc=\relax \let\%(cp)s@ff=\relax} |
|
\def\%(cp)s@tok#1{\csname %(cp)s@tok@#1\endcsname} |
|
\def\%(cp)s@toks#1+{\ifx\relax#1\empty\else%% |
|
\%(cp)s@tok{#1}\expandafter\%(cp)s@toks\fi} |
|
\def\%(cp)s@do#1{\%(cp)s@bc{\%(cp)s@tc{\%(cp)s@ul{%% |
|
\%(cp)s@it{\%(cp)s@bf{\%(cp)s@ff{#1}}}}}}} |
|
\def\%(cp)s#1#2{\%(cp)s@reset\%(cp)s@toks#1+\relax+\%(cp)s@do{#2}} |
|
|
|
%(styles)s |
|
|
|
\def\%(cp)sZbs{\char`\\} |
|
\def\%(cp)sZus{\char`\_} |
|
\def\%(cp)sZob{\char`\{} |
|
\def\%(cp)sZcb{\char`\}} |
|
\def\%(cp)sZca{\char`\^} |
|
\def\%(cp)sZam{\char`\&} |
|
\def\%(cp)sZlt{\char`\<} |
|
\def\%(cp)sZgt{\char`\>} |
|
\def\%(cp)sZsh{\char`\#} |
|
\def\%(cp)sZpc{\char`\%%} |
|
\def\%(cp)sZdl{\char`\$} |
|
\def\%(cp)sZhy{\char`\-} |
|
\def\%(cp)sZsq{\char`\'} |
|
\def\%(cp)sZdq{\char`\"} |
|
\def\%(cp)sZti{\char`\~} |
|
%% for compatibility with earlier versions |
|
\def\%(cp)sZat{@} |
|
\def\%(cp)sZlb{[} |
|
\def\%(cp)sZrb{]} |
|
\makeatother |
|
''' |
|
|
|
|
|
def _get_ttype_name(ttype): |
|
fname = STANDARD_TYPES.get(ttype) |
|
if fname: |
|
return fname |
|
aname = '' |
|
while fname is None: |
|
aname = ttype[-1] + aname |
|
ttype = ttype.parent |
|
fname = STANDARD_TYPES.get(ttype) |
|
return fname + aname |
|
|
|
|
|
class LatexFormatter(Formatter): |
|
r""" |
|
Format tokens as LaTeX code. This needs the `fancyvrb` and `color` |
|
standard packages. |
|
|
|
Without the `full` option, code is formatted as one ``Verbatim`` |
|
environment, like this: |
|
|
|
.. sourcecode:: latex |
|
|
|
\begin{Verbatim}[commandchars=\\\{\}] |
|
\PY{k}{def }\PY{n+nf}{foo}(\PY{n}{bar}): |
|
\PY{k}{pass} |
|
\end{Verbatim} |
|
|
|
Wrapping can be disabled using the `nowrap` option. |
|
|
|
The special command used here (``\PY``) and all the other macros it needs |
|
are output by the `get_style_defs` method. |
|
|
|
With the `full` option, a complete LaTeX document is output, including |
|
the command definitions in the preamble. |
|
|
|
The `get_style_defs()` method of a `LatexFormatter` returns a string |
|
containing ``\def`` commands defining the macros needed inside the |
|
``Verbatim`` environments. |
|
|
|
Additional options accepted: |
|
|
|
`nowrap` |
|
If set to ``True``, don't wrap the tokens at all, not even inside a |
|
``\begin{Verbatim}`` environment. This disables most other options |
|
(default: ``False``). |
|
|
|
`style` |
|
The style to use, can be a string or a Style subclass (default: |
|
``'default'``). |
|
|
|
`full` |
|
Tells the formatter to output a "full" document, i.e. a complete |
|
self-contained document (default: ``False``). |
|
|
|
`title` |
|
If `full` is true, the title that should be used to caption the |
|
document (default: ``''``). |
|
|
|
`docclass` |
|
If the `full` option is enabled, this is the document class to use |
|
(default: ``'article'``). |
|
|
|
`preamble` |
|
If the `full` option is enabled, this can be further preamble commands, |
|
e.g. ``\usepackage`` (default: ``''``). |
|
|
|
`linenos` |
|
If set to ``True``, output line numbers (default: ``False``). |
|
|
|
`linenostart` |
|
The line number for the first line (default: ``1``). |
|
|
|
`linenostep` |
|
If set to a number n > 1, only every nth line number is printed. |
|
|
|
`verboptions` |
|
Additional options given to the Verbatim environment (see the *fancyvrb* |
|
docs for possible values) (default: ``''``). |
|
|
|
`commandprefix` |
|
The LaTeX commands used to produce colored output are constructed |
|
using this prefix and some letters (default: ``'PY'``). |
|
|
|
.. versionadded:: 0.7 |
|
.. versionchanged:: 0.10 |
|
The default is now ``'PY'`` instead of ``'C'``. |
|
|
|
`texcomments` |
|
If set to ``True``, enables LaTeX comment lines. That is, LaTex markup |
|
in comment tokens is not escaped so that LaTeX can render it (default: |
|
``False``). |
|
|
|
.. versionadded:: 1.2 |
|
|
|
`mathescape` |
|
If set to ``True``, enables LaTeX math mode escape in comments. That |
|
is, ``'$...$'`` inside a comment will trigger math mode (default: |
|
``False``). |
|
|
|
.. versionadded:: 1.2 |
|
|
|
`escapeinside` |
|
If set to a string of length 2, enables escaping to LaTeX. Text |
|
delimited by these 2 characters is read as LaTeX code and |
|
typeset accordingly. It has no effect in string literals. It has |
|
no effect in comments if `texcomments` or `mathescape` is |
|
set. (default: ``''``). |
|
|
|
.. versionadded:: 2.0 |
|
|
|
`envname` |
|
Allows you to pick an alternative environment name replacing Verbatim. |
|
The alternate environment still has to support Verbatim's option syntax. |
|
(default: ``'Verbatim'``). |
|
|
|
.. versionadded:: 2.0 |
|
""" |
|
name = 'LaTeX' |
|
aliases = ['latex', 'tex'] |
|
filenames = ['*.tex'] |
|
|
|
def __init__(self, **options): |
|
Formatter.__init__(self, **options) |
|
self.nowrap = get_bool_opt(options, 'nowrap', False) |
|
self.docclass = options.get('docclass', 'article') |
|
self.preamble = options.get('preamble', '') |
|
self.linenos = get_bool_opt(options, 'linenos', False) |
|
self.linenostart = abs(get_int_opt(options, 'linenostart', 1)) |
|
self.linenostep = abs(get_int_opt(options, 'linenostep', 1)) |
|
self.verboptions = options.get('verboptions', '') |
|
self.nobackground = get_bool_opt(options, 'nobackground', False) |
|
self.commandprefix = options.get('commandprefix', 'PY') |
|
self.texcomments = get_bool_opt(options, 'texcomments', False) |
|
self.mathescape = get_bool_opt(options, 'mathescape', False) |
|
self.escapeinside = options.get('escapeinside', '') |
|
if len(self.escapeinside) == 2: |
|
self.left = self.escapeinside[0] |
|
self.right = self.escapeinside[1] |
|
else: |
|
self.escapeinside = '' |
|
self.envname = options.get('envname', 'Verbatim') |
|
|
|
self._create_stylesheet() |
|
|
|
def _create_stylesheet(self): |
|
t2n = self.ttype2name = {Token: ''} |
|
c2d = self.cmd2def = {} |
|
cp = self.commandprefix |
|
|
|
def rgbcolor(col): |
|
if col: |
|
return ','.join(['%.2f' % (int(col[i] + col[i + 1], 16) / 255.0) |
|
for i in (0, 2, 4)]) |
|
else: |
|
return '1,1,1' |
|
|
|
for ttype, ndef in self.style: |
|
name = _get_ttype_name(ttype) |
|
cmndef = '' |
|
if ndef['bold']: |
|
cmndef += r'\let\$$@bf=\textbf' |
|
if ndef['italic']: |
|
cmndef += r'\let\$$@it=\textit' |
|
if ndef['underline']: |
|
cmndef += r'\let\$$@ul=\underline' |
|
if ndef['roman']: |
|
cmndef += r'\let\$$@ff=\textrm' |
|
if ndef['sans']: |
|
cmndef += r'\let\$$@ff=\textsf' |
|
if ndef['mono']: |
|
cmndef += r'\let\$$@ff=\textsf' |
|
if ndef['color']: |
|
cmndef += (r'\def\$$@tc##1{{\textcolor[rgb]{{{}}}{{##1}}}}'.format(rgbcolor(ndef['color']))) |
|
if ndef['border']: |
|
cmndef += (r'\def\$$@bc##1{{{{\setlength{{\fboxsep}}{{\string -\fboxrule}}' |
|
r'\fcolorbox[rgb]{{{}}}{{{}}}{{\strut ##1}}}}}}'.format(rgbcolor(ndef['border']), |
|
rgbcolor(ndef['bgcolor']))) |
|
elif ndef['bgcolor']: |
|
cmndef += (r'\def\$$@bc##1{{{{\setlength{{\fboxsep}}{{0pt}}' |
|
r'\colorbox[rgb]{{{}}}{{\strut ##1}}}}}}'.format(rgbcolor(ndef['bgcolor']))) |
|
if cmndef == '': |
|
continue |
|
cmndef = cmndef.replace('$$', cp) |
|
t2n[ttype] = name |
|
c2d[name] = cmndef |
|
|
|
def get_style_defs(self, arg=''): |
|
""" |
|
Return the command sequences needed to define the commands |
|
used to format text in the verbatim environment. ``arg`` is ignored. |
|
""" |
|
cp = self.commandprefix |
|
styles = [] |
|
for name, definition in self.cmd2def.items(): |
|
styles.append(rf'\@namedef{{{cp}@tok@{name}}}{{{definition}}}') |
|
return STYLE_TEMPLATE % {'cp': self.commandprefix, |
|
'styles': '\n'.join(styles)} |
|
|
|
def format_unencoded(self, tokensource, outfile): |
|
|
|
t2n = self.ttype2name |
|
cp = self.commandprefix |
|
|
|
if self.full: |
|
realoutfile = outfile |
|
outfile = StringIO() |
|
|
|
if not self.nowrap: |
|
outfile.write('\\begin{' + self.envname + '}[commandchars=\\\\\\{\\}') |
|
if self.linenos: |
|
start, step = self.linenostart, self.linenostep |
|
outfile.write(',numbers=left' + |
|
(start and ',firstnumber=%d' % start or '') + |
|
(step and ',stepnumber=%d' % step or '')) |
|
if self.mathescape or self.texcomments or self.escapeinside: |
|
outfile.write(',codes={\\catcode`\\$=3\\catcode`\\^=7' |
|
'\\catcode`\\_=8\\relax}') |
|
if self.verboptions: |
|
outfile.write(',' + self.verboptions) |
|
outfile.write(']\n') |
|
|
|
for ttype, value in tokensource: |
|
if ttype in Token.Comment: |
|
if self.texcomments: |
|
|
|
start = value[0:1] |
|
for i in range(1, len(value)): |
|
if start[0] != value[i]: |
|
break |
|
start += value[i] |
|
|
|
value = value[len(start):] |
|
start = escape_tex(start, cp) |
|
|
|
|
|
value = start + value |
|
elif self.mathescape: |
|
|
|
parts = value.split('$') |
|
in_math = False |
|
for i, part in enumerate(parts): |
|
if not in_math: |
|
parts[i] = escape_tex(part, cp) |
|
in_math = not in_math |
|
value = '$'.join(parts) |
|
elif self.escapeinside: |
|
text = value |
|
value = '' |
|
while text: |
|
a, sep1, text = text.partition(self.left) |
|
if sep1: |
|
b, sep2, text = text.partition(self.right) |
|
if sep2: |
|
value += escape_tex(a, cp) + b |
|
else: |
|
value += escape_tex(a + sep1 + b, cp) |
|
else: |
|
value += escape_tex(a, cp) |
|
else: |
|
value = escape_tex(value, cp) |
|
elif ttype not in Token.Escape: |
|
value = escape_tex(value, cp) |
|
styles = [] |
|
while ttype is not Token: |
|
try: |
|
styles.append(t2n[ttype]) |
|
except KeyError: |
|
|
|
styles.append(_get_ttype_name(ttype)) |
|
ttype = ttype.parent |
|
styleval = '+'.join(reversed(styles)) |
|
if styleval: |
|
spl = value.split('\n') |
|
for line in spl[:-1]: |
|
if line: |
|
outfile.write(f"\\{cp}{{{styleval}}}{{{line}}}") |
|
outfile.write('\n') |
|
if spl[-1]: |
|
outfile.write(f"\\{cp}{{{styleval}}}{{{spl[-1]}}}") |
|
else: |
|
outfile.write(value) |
|
|
|
if not self.nowrap: |
|
outfile.write('\\end{' + self.envname + '}\n') |
|
|
|
if self.full: |
|
encoding = self.encoding or 'utf8' |
|
|
|
encoding = { |
|
'utf_8': 'utf8', |
|
'latin_1': 'latin1', |
|
'iso_8859_1': 'latin1', |
|
}.get(encoding.replace('-', '_'), encoding) |
|
realoutfile.write(DOC_TEMPLATE % |
|
dict(docclass = self.docclass, |
|
preamble = self.preamble, |
|
title = self.title, |
|
encoding = encoding, |
|
styledefs = self.get_style_defs(), |
|
code = outfile.getvalue())) |
|
|
|
|
|
class LatexEmbeddedLexer(Lexer): |
|
""" |
|
This lexer takes one lexer as argument, the lexer for the language |
|
being formatted, and the left and right delimiters for escaped text. |
|
|
|
First everything is scanned using the language lexer to obtain |
|
strings and comments. All other consecutive tokens are merged and |
|
the resulting text is scanned for escaped segments, which are given |
|
the Token.Escape type. Finally text that is not escaped is scanned |
|
again with the language lexer. |
|
""" |
|
def __init__(self, left, right, lang, **options): |
|
self.left = left |
|
self.right = right |
|
self.lang = lang |
|
Lexer.__init__(self, **options) |
|
|
|
def get_tokens_unprocessed(self, text): |
|
|
|
|
|
buffered = '' |
|
insertions = [] |
|
insertion_buf = [] |
|
for i, t, v in self._find_safe_escape_tokens(text): |
|
if t is None: |
|
if insertion_buf: |
|
insertions.append((len(buffered), insertion_buf)) |
|
insertion_buf = [] |
|
buffered += v |
|
else: |
|
insertion_buf.append((i, t, v)) |
|
if insertion_buf: |
|
insertions.append((len(buffered), insertion_buf)) |
|
return do_insertions(insertions, |
|
self.lang.get_tokens_unprocessed(buffered)) |
|
|
|
def _find_safe_escape_tokens(self, text): |
|
""" find escape tokens that are not in strings or comments """ |
|
for i, t, v in self._filter_to( |
|
self.lang.get_tokens_unprocessed(text), |
|
lambda t: t in Token.Comment or t in Token.String |
|
): |
|
if t is None: |
|
for i2, t2, v2 in self._find_escape_tokens(v): |
|
yield i + i2, t2, v2 |
|
else: |
|
yield i, None, v |
|
|
|
def _filter_to(self, it, pred): |
|
""" Keep only the tokens that match `pred`, merge the others together """ |
|
buf = '' |
|
idx = 0 |
|
for i, t, v in it: |
|
if pred(t): |
|
if buf: |
|
yield idx, None, buf |
|
buf = '' |
|
yield i, t, v |
|
else: |
|
if not buf: |
|
idx = i |
|
buf += v |
|
if buf: |
|
yield idx, None, buf |
|
|
|
def _find_escape_tokens(self, text): |
|
""" Find escape tokens within text, give token=None otherwise """ |
|
index = 0 |
|
while text: |
|
a, sep1, text = text.partition(self.left) |
|
if a: |
|
yield index, None, a |
|
index += len(a) |
|
if sep1: |
|
b, sep2, text = text.partition(self.right) |
|
if sep2: |
|
yield index + len(sep1), Token.Escape, b |
|
index += len(sep1) + len(b) + len(sep2) |
|
else: |
|
yield index, Token.Error, sep1 |
|
index += len(sep1) |
|
text = b |
|
|