|
from einops import EinopsError |
|
import keyword |
|
import warnings |
|
from typing import List, Optional, Set, Tuple, Union |
|
|
|
_ellipsis: str = "β¦" |
|
|
|
|
|
class AnonymousAxis(object): |
|
"""Important thing: all instances of this class are not equal to each other""" |
|
|
|
def __init__(self, value: str): |
|
self.value = int(value) |
|
if self.value <= 1: |
|
if self.value == 1: |
|
raise EinopsError("No need to create anonymous axis of length 1. Report this as an issue") |
|
else: |
|
raise EinopsError("Anonymous axis should have positive length, not {}".format(self.value)) |
|
|
|
def __repr__(self): |
|
return "{}-axis".format(str(self.value)) |
|
|
|
|
|
class ParsedExpression: |
|
""" |
|
non-mutable structure that contains information about one side of expression (e.g. 'b c (h w)') |
|
and keeps some information important for downstream |
|
""" |
|
|
|
def __init__(self, expression: str, *, allow_underscore: bool = False, allow_duplicates: bool = False): |
|
self.has_ellipsis: bool = False |
|
self.has_ellipsis_parenthesized: Optional[bool] = None |
|
self.identifiers: Set[str] = set() |
|
|
|
self.has_non_unitary_anonymous_axes: bool = False |
|
|
|
self.composition: List[Union[List[str], str]] = [] |
|
if "." in expression: |
|
if "..." not in expression: |
|
raise EinopsError("Expression may contain dots only inside ellipsis (...)") |
|
if str.count(expression, "...") != 1 or str.count(expression, ".") != 3: |
|
raise EinopsError( |
|
"Expression may contain dots only inside ellipsis (...); only one ellipsis for tensor " |
|
) |
|
expression = expression.replace("...", _ellipsis) |
|
self.has_ellipsis = True |
|
|
|
bracket_group: Optional[List[str]] = None |
|
|
|
def add_axis_name(x): |
|
if x in self.identifiers: |
|
if not (allow_underscore and x == "_") and not allow_duplicates: |
|
raise EinopsError('Indexing expression contains duplicate dimension "{}"'.format(x)) |
|
if x == _ellipsis: |
|
self.identifiers.add(_ellipsis) |
|
if bracket_group is None: |
|
self.composition.append(_ellipsis) |
|
self.has_ellipsis_parenthesized = False |
|
else: |
|
bracket_group.append(_ellipsis) |
|
self.has_ellipsis_parenthesized = True |
|
else: |
|
is_number = str.isdecimal(x) |
|
if is_number and int(x) == 1: |
|
|
|
if bracket_group is None: |
|
self.composition.append([]) |
|
else: |
|
pass |
|
return |
|
is_axis_name, reason = self.check_axis_name_return_reason(x, allow_underscore=allow_underscore) |
|
if not (is_number or is_axis_name): |
|
raise EinopsError("Invalid axis identifier: {}\n{}".format(x, reason)) |
|
if is_number: |
|
x = AnonymousAxis(x) |
|
self.identifiers.add(x) |
|
if is_number: |
|
self.has_non_unitary_anonymous_axes = True |
|
if bracket_group is None: |
|
self.composition.append([x]) |
|
else: |
|
bracket_group.append(x) |
|
|
|
current_identifier = None |
|
for char in expression: |
|
if char in "() ": |
|
if current_identifier is not None: |
|
add_axis_name(current_identifier) |
|
current_identifier = None |
|
if char == "(": |
|
if bracket_group is not None: |
|
raise EinopsError("Axis composition is one-level (brackets inside brackets not allowed)") |
|
bracket_group = [] |
|
elif char == ")": |
|
if bracket_group is None: |
|
raise EinopsError("Brackets are not balanced") |
|
self.composition.append(bracket_group) |
|
bracket_group = None |
|
elif str.isalnum(char) or char in ["_", _ellipsis]: |
|
if current_identifier is None: |
|
current_identifier = char |
|
else: |
|
current_identifier += char |
|
else: |
|
raise EinopsError("Unknown character '{}'".format(char)) |
|
|
|
if bracket_group is not None: |
|
raise EinopsError('Imbalanced parentheses in expression: "{}"'.format(expression)) |
|
if current_identifier is not None: |
|
add_axis_name(current_identifier) |
|
|
|
def flat_axes_order(self) -> List: |
|
result = [] |
|
for composed_axis in self.composition: |
|
assert isinstance(composed_axis, list), "does not work with ellipsis" |
|
for axis in composed_axis: |
|
result.append(axis) |
|
return result |
|
|
|
def has_composed_axes(self) -> bool: |
|
|
|
for axes in self.composition: |
|
if isinstance(axes, list) and len(axes) > 1: |
|
return True |
|
return False |
|
|
|
@staticmethod |
|
def check_axis_name_return_reason(name: str, allow_underscore: bool = False) -> Tuple[bool, str]: |
|
if not str.isidentifier(name): |
|
return False, "not a valid python identifier" |
|
elif name[0] == "_" or name[-1] == "_": |
|
if name == "_" and allow_underscore: |
|
return True, "" |
|
return False, "axis name should should not start or end with underscore" |
|
else: |
|
if keyword.iskeyword(name): |
|
warnings.warn("It is discouraged to use axes names that are keywords: {}".format(name), RuntimeWarning) |
|
if name in ["axis"]: |
|
warnings.warn( |
|
"It is discouraged to use 'axis' as an axis name " "and will raise an error in future", |
|
FutureWarning, |
|
) |
|
return True, "" |
|
|
|
@staticmethod |
|
def check_axis_name(name: str) -> bool: |
|
""" |
|
Valid axes names are python identifiers except keywords, |
|
and additionally should not start or end with underscore |
|
""" |
|
is_valid, _reason = ParsedExpression.check_axis_name_return_reason(name) |
|
return is_valid |
|
|