|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
r"""Support for regular expressions (RE). |
|
|
|
This module provides regular expression matching operations similar to those |
|
found in Perl. It supports both 8-bit and Unicode strings; both the pattern and |
|
the strings being processed can contain null bytes and characters outside the |
|
US ASCII range. |
|
|
|
Regular expressions can contain both special and ordinary characters. Most |
|
ordinary characters, like "A", "a", or "0", are the simplest regular |
|
expressions; they simply match themselves. You can concatenate ordinary |
|
characters, so last matches the string 'last'. |
|
|
|
There are a few differences between the old (legacy) behaviour and the new |
|
(enhanced) behaviour, which are indicated by VERSION0 or VERSION1. |
|
|
|
The special characters are: |
|
"." Matches any character except a newline. |
|
"^" Matches the start of the string. |
|
"$" Matches the end of the string or just before the |
|
newline at the end of the string. |
|
"*" Matches 0 or more (greedy) repetitions of the preceding |
|
RE. Greedy means that it will match as many repetitions |
|
as possible. |
|
"+" Matches 1 or more (greedy) repetitions of the preceding |
|
RE. |
|
"?" Matches 0 or 1 (greedy) of the preceding RE. |
|
*?,+?,?? Non-greedy versions of the previous three special |
|
characters. |
|
*+,++,?+ Possessive versions of the previous three special |
|
characters. |
|
{m,n} Matches from m to n repetitions of the preceding RE. |
|
{m,n}? Non-greedy version of the above. |
|
{m,n}+ Possessive version of the above. |
|
{...} Fuzzy matching constraints. |
|
"\\" Either escapes special characters or signals a special |
|
sequence. |
|
[...] Indicates a set of characters. A "^" as the first |
|
character indicates a complementing set. |
|
"|" A|B, creates an RE that will match either A or B. |
|
(...) Matches the RE inside the parentheses. The contents are |
|
captured and can be retrieved or matched later in the |
|
string. |
|
(?flags-flags) VERSION1: Sets/clears the flags for the remainder of |
|
the group or pattern; VERSION0: Sets the flags for the |
|
entire pattern. |
|
(?:...) Non-capturing version of regular parentheses. |
|
(?>...) Atomic non-capturing version of regular parentheses. |
|
(?flags-flags:...) Non-capturing version of regular parentheses with local |
|
flags. |
|
(?P<name>...) The substring matched by the group is accessible by |
|
name. |
|
(?<name>...) The substring matched by the group is accessible by |
|
name. |
|
(?P=name) Matches the text matched earlier by the group named |
|
name. |
|
(?#...) A comment; ignored. |
|
(?=...) Matches if ... matches next, but doesn't consume the |
|
string. |
|
(?!...) Matches if ... doesn't match next. |
|
(?<=...) Matches if preceded by .... |
|
(?<!...) Matches if not preceded by .... |
|
(?(id)yes|no) Matches yes pattern if group id matched, the (optional) |
|
no pattern otherwise. |
|
(?(DEFINE)...) If there's no group called "DEFINE", then ... will be |
|
ignored, but any group definitions will be available. |
|
(?|...|...) (?|A|B), creates an RE that will match either A or B, |
|
but reuses capture group numbers across the |
|
alternatives. |
|
(*FAIL) Forces matching to fail, which means immediate |
|
backtracking. |
|
(*F) Abbreviation for (*FAIL). |
|
(*PRUNE) Discards the current backtracking information. Its |
|
effect doesn't extend outside an atomic group or a |
|
lookaround. |
|
(*SKIP) Similar to (*PRUNE), except that it also sets where in |
|
the text the next attempt at matching the entire |
|
pattern will start. Its effect doesn't extend outside |
|
an atomic group or a lookaround. |
|
|
|
The fuzzy matching constraints are: "i" to permit insertions, "d" to permit |
|
deletions, "s" to permit substitutions, "e" to permit any of these. Limits are |
|
optional with "<=" and "<". If any type of error is provided then any type not |
|
provided is not permitted. |
|
|
|
A cost equation may be provided. |
|
|
|
Examples: |
|
(?:fuzzy){i<=2} |
|
(?:fuzzy){i<=1,s<=2,d<=1,1i+1s+1d<3} |
|
|
|
VERSION1: Set operators are supported, and a set can include nested sets. The |
|
set operators, in order of increasing precedence, are: |
|
|| Set union ("x||y" means "x or y"). |
|
~~ (double tilde) Symmetric set difference ("x~~y" means "x or y, but not |
|
both"). |
|
&& Set intersection ("x&&y" means "x and y"). |
|
-- (double dash) Set difference ("x--y" means "x but not y"). |
|
|
|
Implicit union, ie, simple juxtaposition like in [ab], has the highest |
|
precedence. |
|
|
|
VERSION0 and VERSION1: |
|
The special sequences consist of "\\" and a character from the list below. If |
|
the ordinary character is not on the list, then the resulting RE will match the |
|
second character. |
|
\number Matches the contents of the group of the same number if |
|
number is no more than 2 digits, otherwise the character |
|
with the 3-digit octal code. |
|
\a Matches the bell character. |
|
\A Matches only at the start of the string. |
|
\b Matches the empty string, but only at the start or end of a |
|
word. |
|
\B Matches the empty string, but not at the start or end of a |
|
word. |
|
\d Matches any decimal digit; equivalent to the set [0-9] when |
|
matching a bytestring or a Unicode string with the ASCII |
|
flag, or the whole range of Unicode digits when matching a |
|
Unicode string. |
|
\D Matches any non-digit character; equivalent to [^\d]. |
|
\f Matches the formfeed character. |
|
\g<name> Matches the text matched by the group named name. |
|
\G Matches the empty string, but only at the position where |
|
the search started. |
|
\h Matches horizontal whitespace. |
|
\K Keeps only what follows for the entire match. |
|
\L<name> Named list. The list is provided as a keyword argument. |
|
\m Matches the empty string, but only at the start of a word. |
|
\M Matches the empty string, but only at the end of a word. |
|
\n Matches the newline character. |
|
\N{name} Matches the named character. |
|
\p{name=value} Matches the character if its property has the specified |
|
value. |
|
\P{name=value} Matches the character if its property hasn't the specified |
|
value. |
|
\r Matches the carriage-return character. |
|
\s Matches any whitespace character; equivalent to |
|
[ \t\n\r\f\v]. |
|
\S Matches any non-whitespace character; equivalent to [^\s]. |
|
\t Matches the tab character. |
|
\uXXXX Matches the Unicode codepoint with 4-digit hex code XXXX. |
|
\UXXXXXXXX Matches the Unicode codepoint with 8-digit hex code |
|
XXXXXXXX. |
|
\v Matches the vertical tab character. |
|
\w Matches any alphanumeric character; equivalent to |
|
[a-zA-Z0-9_] when matching a bytestring or a Unicode string |
|
with the ASCII flag, or the whole range of Unicode |
|
alphanumeric characters (letters plus digits plus |
|
underscore) when matching a Unicode string. With LOCALE, it |
|
will match the set [0-9_] plus characters defined as |
|
letters for the current locale. |
|
\W Matches the complement of \w; equivalent to [^\w]. |
|
\xXX Matches the character with 2-digit hex code XX. |
|
\X Matches a grapheme. |
|
\Z Matches only at the end of the string. |
|
\\ Matches a literal backslash. |
|
|
|
This module exports the following functions: |
|
match Match a regular expression pattern at the beginning of a string. |
|
fullmatch Match a regular expression pattern against all of a string. |
|
search Search a string for the presence of a pattern. |
|
sub Substitute occurrences of a pattern found in a string using a |
|
template string. |
|
subf Substitute occurrences of a pattern found in a string using a |
|
format string. |
|
subn Same as sub, but also return the number of substitutions made. |
|
subfn Same as subf, but also return the number of substitutions made. |
|
split Split a string by the occurrences of a pattern. VERSION1: will |
|
split at zero-width match; VERSION0: won't split at zero-width |
|
match. |
|
splititer Return an iterator yielding the parts of a split string. |
|
findall Find all occurrences of a pattern in a string. |
|
finditer Return an iterator yielding a match object for each match. |
|
compile Compile a pattern into a Pattern object. |
|
purge Clear the regular expression cache. |
|
escape Backslash all non-alphanumerics or special characters in a |
|
string. |
|
|
|
Most of the functions support a concurrent parameter: if True, the GIL will be |
|
released during matching, allowing other Python threads to run concurrently. If |
|
the string changes during matching, the behaviour is undefined. This parameter |
|
is not needed when working on the builtin (immutable) string classes. |
|
|
|
Some of the functions in this module take flags as optional parameters. Most of |
|
these flags can also be set within an RE: |
|
A a ASCII Make \w, \W, \b, \B, \d, and \D match the |
|
corresponding ASCII character categories. Default |
|
when matching a bytestring. |
|
B b BESTMATCH Find the best fuzzy match (default is first). |
|
D DEBUG Print the parsed pattern. |
|
E e ENHANCEMATCH Attempt to improve the fit after finding the first |
|
fuzzy match. |
|
F f FULLCASE Use full case-folding when performing |
|
case-insensitive matching in Unicode. |
|
I i IGNORECASE Perform case-insensitive matching. |
|
L L LOCALE Make \w, \W, \b, \B, \d, and \D dependent on the |
|
current locale. (One byte per character only.) |
|
M m MULTILINE "^" matches the beginning of lines (after a newline) |
|
as well as the string. "$" matches the end of lines |
|
(before a newline) as well as the end of the string. |
|
P p POSIX Perform POSIX-standard matching (leftmost longest). |
|
R r REVERSE Searches backwards. |
|
S s DOTALL "." matches any character at all, including the |
|
newline. |
|
U u UNICODE Make \w, \W, \b, \B, \d, and \D dependent on the |
|
Unicode locale. Default when matching a Unicode |
|
string. |
|
V0 V0 VERSION0 Turn on the old legacy behaviour. |
|
V1 V1 VERSION1 Turn on the new enhanced behaviour. This flag |
|
includes the FULLCASE flag. |
|
W w WORD Make \b and \B work with default Unicode word breaks |
|
and make ".", "^" and "$" work with Unicode line |
|
breaks. |
|
X x VERBOSE Ignore whitespace and comments for nicer looking REs. |
|
|
|
This module also defines an exception 'error'. |
|
|
|
""" |
|
|
|
|
|
__all__ = ["cache_all", "compile", "DEFAULT_VERSION", "escape", "findall", |
|
"finditer", "fullmatch", "match", "purge", "search", "split", "splititer", |
|
"sub", "subf", "subfn", "subn", "template", "Scanner", "A", "ASCII", "B", |
|
"BESTMATCH", "D", "DEBUG", "E", "ENHANCEMATCH", "S", "DOTALL", "F", |
|
"FULLCASE", "I", "IGNORECASE", "L", "LOCALE", "M", "MULTILINE", "P", "POSIX", |
|
"R", "REVERSE", "T", "TEMPLATE", "U", "UNICODE", "V0", "VERSION0", "V1", |
|
"VERSION1", "X", "VERBOSE", "W", "WORD", "error", "Regex", "__version__", |
|
"__doc__", "RegexFlag"] |
|
|
|
__version__ = "2.5.148" |
|
|
|
|
|
|
|
|
|
def match(pattern, string, flags=0, pos=None, endpos=None, partial=False, |
|
concurrent=None, timeout=None, ignore_unused=False, **kwargs): |
|
"""Try to apply the pattern at the start of the string, returning a match |
|
object, or None if no match was found.""" |
|
pat = _compile(pattern, flags, ignore_unused, kwargs, True) |
|
return pat.match(string, pos, endpos, concurrent, partial, timeout) |
|
|
|
def fullmatch(pattern, string, flags=0, pos=None, endpos=None, partial=False, |
|
concurrent=None, timeout=None, ignore_unused=False, **kwargs): |
|
"""Try to apply the pattern against all of the string, returning a match |
|
object, or None if no match was found.""" |
|
pat = _compile(pattern, flags, ignore_unused, kwargs, True) |
|
return pat.fullmatch(string, pos, endpos, concurrent, partial, timeout) |
|
|
|
def search(pattern, string, flags=0, pos=None, endpos=None, partial=False, |
|
concurrent=None, timeout=None, ignore_unused=False, **kwargs): |
|
"""Search through string looking for a match to the pattern, returning a |
|
match object, or None if no match was found.""" |
|
pat = _compile(pattern, flags, ignore_unused, kwargs, True) |
|
return pat.search(string, pos, endpos, concurrent, partial, timeout) |
|
|
|
def sub(pattern, repl, string, count=0, flags=0, pos=None, endpos=None, |
|
concurrent=None, timeout=None, ignore_unused=False, **kwargs): |
|
"""Return the string obtained by replacing the leftmost (or rightmost with a |
|
reverse pattern) non-overlapping occurrences of the pattern in string by the |
|
replacement repl. repl can be either a string or a callable; if a string, |
|
backslash escapes in it are processed; if a callable, it's passed the match |
|
object and must return a replacement string to be used.""" |
|
pat = _compile(pattern, flags, ignore_unused, kwargs, True) |
|
return pat.sub(repl, string, count, pos, endpos, concurrent, timeout) |
|
|
|
def subf(pattern, format, string, count=0, flags=0, pos=None, endpos=None, |
|
concurrent=None, timeout=None, ignore_unused=False, **kwargs): |
|
"""Return the string obtained by replacing the leftmost (or rightmost with a |
|
reverse pattern) non-overlapping occurrences of the pattern in string by the |
|
replacement format. format can be either a string or a callable; if a string, |
|
it's treated as a format string; if a callable, it's passed the match object |
|
and must return a replacement string to be used.""" |
|
pat = _compile(pattern, flags, ignore_unused, kwargs, True) |
|
return pat.subf(format, string, count, pos, endpos, concurrent, timeout) |
|
|
|
def subn(pattern, repl, string, count=0, flags=0, pos=None, endpos=None, |
|
concurrent=None, timeout=None, ignore_unused=False, **kwargs): |
|
"""Return a 2-tuple containing (new_string, number). new_string is the string |
|
obtained by replacing the leftmost (or rightmost with a reverse pattern) |
|
non-overlapping occurrences of the pattern in the source string by the |
|
replacement repl. number is the number of substitutions that were made. repl |
|
can be either a string or a callable; if a string, backslash escapes in it |
|
are processed; if a callable, it's passed the match object and must return a |
|
replacement string to be used.""" |
|
pat = _compile(pattern, flags, ignore_unused, kwargs, True) |
|
return pat.subn(repl, string, count, pos, endpos, concurrent, timeout) |
|
|
|
def subfn(pattern, format, string, count=0, flags=0, pos=None, endpos=None, |
|
concurrent=None, timeout=None, ignore_unused=False, **kwargs): |
|
"""Return a 2-tuple containing (new_string, number). new_string is the string |
|
obtained by replacing the leftmost (or rightmost with a reverse pattern) |
|
non-overlapping occurrences of the pattern in the source string by the |
|
replacement format. number is the number of substitutions that were made. format |
|
can be either a string or a callable; if a string, it's treated as a format |
|
string; if a callable, it's passed the match object and must return a |
|
replacement string to be used.""" |
|
pat = _compile(pattern, flags, ignore_unused, kwargs, True) |
|
return pat.subfn(format, string, count, pos, endpos, concurrent, timeout) |
|
|
|
def split(pattern, string, maxsplit=0, flags=0, concurrent=None, timeout=None, |
|
ignore_unused=False, **kwargs): |
|
"""Split the source string by the occurrences of the pattern, returning a |
|
list containing the resulting substrings. If capturing parentheses are used |
|
in pattern, then the text of all groups in the pattern are also returned as |
|
part of the resulting list. If maxsplit is nonzero, at most maxsplit splits |
|
occur, and the remainder of the string is returned as the final element of |
|
the list.""" |
|
pat = _compile(pattern, flags, ignore_unused, kwargs, True) |
|
return pat.split(string, maxsplit, concurrent, timeout) |
|
|
|
def splititer(pattern, string, maxsplit=0, flags=0, concurrent=None, |
|
timeout=None, ignore_unused=False, **kwargs): |
|
"Return an iterator yielding the parts of a split string." |
|
pat = _compile(pattern, flags, ignore_unused, kwargs, True) |
|
return pat.splititer(string, maxsplit, concurrent, timeout) |
|
|
|
def findall(pattern, string, flags=0, pos=None, endpos=None, overlapped=False, |
|
concurrent=None, timeout=None, ignore_unused=False, **kwargs): |
|
"""Return a list of all matches in the string. The matches may be overlapped |
|
if overlapped is True. If one or more groups are present in the pattern, |
|
return a list of groups; this will be a list of tuples if the pattern has |
|
more than one group. Empty matches are included in the result.""" |
|
pat = _compile(pattern, flags, ignore_unused, kwargs, True) |
|
return pat.findall(string, pos, endpos, overlapped, concurrent, timeout) |
|
|
|
def finditer(pattern, string, flags=0, pos=None, endpos=None, overlapped=False, |
|
partial=False, concurrent=None, timeout=None, ignore_unused=False, **kwargs): |
|
"""Return an iterator over all matches in the string. The matches may be |
|
overlapped if overlapped is True. For each match, the iterator returns a |
|
match object. Empty matches are included in the result.""" |
|
pat = _compile(pattern, flags, ignore_unused, kwargs, True) |
|
return pat.finditer(string, pos, endpos, overlapped, concurrent, partial, |
|
timeout) |
|
|
|
def compile(pattern, flags=0, ignore_unused=False, cache_pattern=None, **kwargs): |
|
"Compile a regular expression pattern, returning a pattern object." |
|
if cache_pattern is None: |
|
cache_pattern = _cache_all |
|
return _compile(pattern, flags, ignore_unused, kwargs, cache_pattern) |
|
|
|
def purge(): |
|
"Clear the regular expression cache" |
|
_cache.clear() |
|
_locale_sensitive.clear() |
|
|
|
|
|
_cache_all = True |
|
|
|
def cache_all(value=True): |
|
"""Sets whether to cache all patterns, even those are compiled explicitly. |
|
Passing None has no effect, but returns the current setting.""" |
|
global _cache_all |
|
|
|
if value is None: |
|
return _cache_all |
|
|
|
_cache_all = value |
|
|
|
def template(pattern, flags=0): |
|
"Compile a template pattern, returning a pattern object." |
|
return _compile(pattern, flags | TEMPLATE, False, {}, False) |
|
|
|
def escape(pattern, special_only=True, literal_spaces=False): |
|
"""Escape a string for use as a literal in a pattern. If special_only is |
|
True, escape only special characters, else escape all non-alphanumeric |
|
characters. If literal_spaces is True, don't escape spaces.""" |
|
|
|
if isinstance(pattern, bytes): |
|
p = pattern.decode("latin-1") |
|
else: |
|
p = pattern |
|
|
|
s = [] |
|
if special_only: |
|
for c in p: |
|
if c == " " and literal_spaces: |
|
s.append(c) |
|
elif c in _METACHARS or c.isspace(): |
|
s.append("\\") |
|
s.append(c) |
|
else: |
|
s.append(c) |
|
else: |
|
for c in p: |
|
if c == " " and literal_spaces: |
|
s.append(c) |
|
elif c in _ALNUM: |
|
s.append(c) |
|
else: |
|
s.append("\\") |
|
s.append(c) |
|
|
|
r = "".join(s) |
|
|
|
if isinstance(pattern, bytes): |
|
r = r.encode("latin-1") |
|
|
|
return r |
|
|
|
|
|
|
|
|
|
import regex._regex_core as _regex_core |
|
import regex._regex as _regex |
|
from threading import RLock as _RLock |
|
from locale import getpreferredencoding as _getpreferredencoding |
|
from regex._regex_core import * |
|
from regex._regex_core import (_ALL_VERSIONS, _ALL_ENCODINGS, _FirstSetError, |
|
_UnscopedFlagSet, _check_group_features, _compile_firstset, |
|
_compile_replacement, _flatten_code, _fold_case, _get_required_string, |
|
_parse_pattern, _shrink_cache) |
|
from regex._regex_core import (ALNUM as _ALNUM, Info as _Info, OP as _OP, Source |
|
as _Source, Fuzzy as _Fuzzy) |
|
|
|
|
|
|
|
|
|
DEFAULT_VERSION = VERSION0 |
|
|
|
_METACHARS = frozenset("()[]{}?*+|^$\\.-#&~") |
|
|
|
_regex_core.DEFAULT_VERSION = DEFAULT_VERSION |
|
|
|
|
|
_cache = {} |
|
_cache_lock = _RLock() |
|
_named_args = {} |
|
_replacement_cache = {} |
|
_locale_sensitive = {} |
|
|
|
|
|
_MAXCACHE = 500 |
|
_MAXREPCACHE = 500 |
|
|
|
def _compile(pattern, flags, ignore_unused, kwargs, cache_it): |
|
"Compiles a regular expression to a PatternObject." |
|
|
|
global DEFAULT_VERSION |
|
try: |
|
from regex import DEFAULT_VERSION |
|
except ImportError: |
|
pass |
|
|
|
|
|
if (flags & DEBUG) != 0: |
|
cache_it = False |
|
|
|
|
|
locale_key = (type(pattern), pattern) |
|
if _locale_sensitive.get(locale_key, True) or (flags & LOCALE) != 0: |
|
|
|
pattern_locale = _getpreferredencoding() |
|
else: |
|
|
|
pattern_locale = None |
|
|
|
def complain_unused_args(): |
|
if ignore_unused: |
|
return |
|
|
|
|
|
unused_kwargs = set(kwargs) - {k for k, v in args_needed} |
|
if unused_kwargs: |
|
any_one = next(iter(unused_kwargs)) |
|
raise ValueError('unused keyword argument {!a}'.format(any_one)) |
|
|
|
if cache_it: |
|
try: |
|
|
|
args_key = pattern, type(pattern), flags |
|
args_needed = _named_args[args_key] |
|
|
|
|
|
args_supplied = set() |
|
if args_needed: |
|
for k, v in args_needed: |
|
try: |
|
args_supplied.add((k, frozenset(kwargs[k]))) |
|
except KeyError: |
|
raise error("missing named list: {!r}".format(k)) |
|
|
|
complain_unused_args() |
|
|
|
args_supplied = frozenset(args_supplied) |
|
|
|
|
|
pattern_key = (pattern, type(pattern), flags, args_supplied, |
|
DEFAULT_VERSION, pattern_locale) |
|
return _cache[pattern_key] |
|
except KeyError: |
|
|
|
pass |
|
|
|
|
|
if isinstance(pattern, str): |
|
guess_encoding = UNICODE |
|
elif isinstance(pattern, bytes): |
|
guess_encoding = ASCII |
|
elif isinstance(pattern, Pattern): |
|
if flags: |
|
raise ValueError("cannot process flags argument with a compiled pattern") |
|
|
|
return pattern |
|
else: |
|
raise TypeError("first argument must be a string or compiled pattern") |
|
|
|
|
|
_regex_core.DEFAULT_VERSION = DEFAULT_VERSION |
|
|
|
global_flags = flags |
|
|
|
while True: |
|
caught_exception = None |
|
try: |
|
source = _Source(pattern) |
|
info = _Info(global_flags, source.char_type, kwargs) |
|
info.guess_encoding = guess_encoding |
|
source.ignore_space = bool(info.flags & VERBOSE) |
|
parsed = _parse_pattern(source, info) |
|
break |
|
except _UnscopedFlagSet: |
|
|
|
global_flags = info.global_flags |
|
except error as e: |
|
caught_exception = e |
|
|
|
if caught_exception: |
|
raise error(caught_exception.msg, caught_exception.pattern, |
|
caught_exception.pos) |
|
|
|
if not source.at_end(): |
|
raise error("unbalanced parenthesis", pattern, source.pos) |
|
|
|
|
|
version = (info.flags & _ALL_VERSIONS) or DEFAULT_VERSION |
|
if version not in (0, VERSION0, VERSION1): |
|
raise ValueError("VERSION0 and VERSION1 flags are mutually incompatible") |
|
|
|
if (info.flags & _ALL_ENCODINGS) not in (0, ASCII, LOCALE, UNICODE): |
|
raise ValueError("ASCII, LOCALE and UNICODE flags are mutually incompatible") |
|
|
|
if isinstance(pattern, bytes) and (info.flags & UNICODE): |
|
raise ValueError("cannot use UNICODE flag with a bytes pattern") |
|
|
|
if not (info.flags & _ALL_ENCODINGS): |
|
if isinstance(pattern, str): |
|
info.flags |= UNICODE |
|
else: |
|
info.flags |= ASCII |
|
|
|
reverse = bool(info.flags & REVERSE) |
|
fuzzy = isinstance(parsed, _Fuzzy) |
|
|
|
|
|
_locale_sensitive[locale_key] = info.inline_locale |
|
|
|
|
|
caught_exception = None |
|
try: |
|
parsed.fix_groups(pattern, reverse, False) |
|
except error as e: |
|
caught_exception = e |
|
|
|
if caught_exception: |
|
raise error(caught_exception.msg, caught_exception.pattern, |
|
caught_exception.pos) |
|
|
|
|
|
if flags & DEBUG: |
|
parsed.dump(indent=0, reverse=reverse) |
|
|
|
|
|
parsed = parsed.optimise(info, reverse) |
|
parsed = parsed.pack_characters(info) |
|
|
|
|
|
req_offset, req_chars, req_flags = _get_required_string(parsed, info.flags) |
|
|
|
|
|
named_lists = {} |
|
named_list_indexes = [None] * len(info.named_lists_used) |
|
args_needed = set() |
|
for key, index in info.named_lists_used.items(): |
|
name, case_flags = key |
|
values = frozenset(kwargs[name]) |
|
if case_flags: |
|
items = frozenset(_fold_case(info, v) for v in values) |
|
else: |
|
items = values |
|
named_lists[name] = values |
|
named_list_indexes[index] = items |
|
args_needed.add((name, values)) |
|
|
|
complain_unused_args() |
|
|
|
|
|
_check_group_features(info, parsed) |
|
|
|
|
|
code = parsed.compile(reverse) |
|
|
|
|
|
key = (0, reverse, fuzzy) |
|
ref = info.call_refs.get(key) |
|
if ref is not None: |
|
code = [(_OP.CALL_REF, ref)] + code + [(_OP.END, )] |
|
|
|
|
|
code += [(_OP.SUCCESS, )] |
|
|
|
|
|
for group, rev, fuz in info.additional_groups: |
|
code += group.compile(rev, fuz) |
|
|
|
|
|
code = _flatten_code(code) |
|
|
|
if not parsed.has_simple_start(): |
|
|
|
try: |
|
fs_code = _compile_firstset(info, parsed.get_firstset(reverse)) |
|
fs_code = _flatten_code(fs_code) |
|
code = fs_code + code |
|
except _FirstSetError: |
|
pass |
|
|
|
|
|
index_group = dict((v, n) for n, v in info.group_index.items()) |
|
|
|
|
|
|
|
|
|
|
|
|
|
compiled_pattern = _regex.compile(pattern, info.flags | version, code, |
|
info.group_index, index_group, named_lists, named_list_indexes, |
|
req_offset, req_chars, req_flags, info.group_count) |
|
|
|
|
|
if len(_cache) >= _MAXCACHE: |
|
with _cache_lock: |
|
_shrink_cache(_cache, _named_args, _locale_sensitive, _MAXCACHE) |
|
|
|
if cache_it: |
|
if (info.flags & LOCALE) == 0: |
|
pattern_locale = None |
|
|
|
args_needed = frozenset(args_needed) |
|
|
|
|
|
pattern_key = (pattern, type(pattern), flags, args_needed, |
|
DEFAULT_VERSION, pattern_locale) |
|
_cache[pattern_key] = compiled_pattern |
|
|
|
|
|
_named_args[args_key] = args_needed |
|
|
|
return compiled_pattern |
|
|
|
def _compile_replacement_helper(pattern, template): |
|
"Compiles a replacement template." |
|
|
|
|
|
|
|
key = pattern.pattern, pattern.flags, template |
|
compiled = _replacement_cache.get(key) |
|
if compiled is not None: |
|
return compiled |
|
|
|
if len(_replacement_cache) >= _MAXREPCACHE: |
|
_replacement_cache.clear() |
|
|
|
is_unicode = isinstance(template, str) |
|
source = _Source(template) |
|
if is_unicode: |
|
def make_string(char_codes): |
|
return "".join(chr(c) for c in char_codes) |
|
else: |
|
def make_string(char_codes): |
|
return bytes(char_codes) |
|
|
|
compiled = [] |
|
literal = [] |
|
while True: |
|
ch = source.get() |
|
if not ch: |
|
break |
|
if ch == "\\": |
|
|
|
|
|
|
|
is_group, items = _compile_replacement(source, pattern, is_unicode) |
|
if is_group: |
|
|
|
if literal: |
|
compiled.append(make_string(literal)) |
|
literal = [] |
|
compiled.extend(items) |
|
else: |
|
literal.extend(items) |
|
else: |
|
literal.append(ord(ch)) |
|
|
|
|
|
if literal: |
|
compiled.append(make_string(literal)) |
|
|
|
_replacement_cache[key] = compiled |
|
|
|
return compiled |
|
|
|
|
|
_pat = _compile('', 0, False, {}, False) |
|
Pattern = type(_pat) |
|
Match = type(_pat.match('')) |
|
del _pat |
|
|
|
|
|
__all__.append("Pattern") |
|
__all__.append("Match") |
|
|
|
|
|
|
|
Regex = compile |
|
|
|
|
|
import copyreg as _copy_reg |
|
|
|
def _pickle(pattern): |
|
return _regex.compile, pattern._pickled_data |
|
|
|
_copy_reg.pickle(Pattern, _pickle) |
|
|