pytorch/tools/codegen/code_template.py

import re
from typing import Match, Optional, Sequence, Mapping

# match $identifier or ${identifier} and replace with value in env
# If this identifier is at the beginning of whitespace on a line
# and its value is a list then it is treated as
# block substitution by indenting to that depth and putting each element
# of the list on its own line
# if the identifier is on a line starting with non-whitespace and a list
# then it is comma separated ${,foo} will insert a comma before the list
# if this list is not empty and ${foo,} will insert one after.


class CodeTemplate:
    # Python 2.7.5 has a bug where the leading (^[^\n\S]*)? does not work,
    # workaround via appending another [^\n\S]? inside

    substitution_str = r'(^[^\n\S]*[^\n\S]?)?\$([^\d\W]\w*|\{,?[^\d\W]\w*\,?})'

    # older versions of Python have a bug where \w* does not work,
    # so we need to replace with the non-shortened version [a-zA-Z0-9_]*
    # https://bugs.python.org/issue18647

    substitution_str = substitution_str.replace(r'\w', r'[a-zA-Z0-9_]')

    substitution = re.compile(substitution_str, re.MULTILINE)

    pattern: str
    filename: str

    @staticmethod
    def from_file(filename: str) -> 'CodeTemplate':
        with open(filename, 'r') as f:
            return CodeTemplate(f.read(), filename)

    def __init__(self, pattern: str, filename: str = "") -> None:
        self.pattern = pattern
        self.filename = filename

    def substitute(self, env: Optional[Mapping[str, object]] = None, **kwargs: object) -> str:
        if env is None:
            env = {}

        def lookup(v: str) -> object:
            assert env is not None
            return kwargs[v] if v in kwargs else env[v]

        def indent_lines(indent: str, v: Sequence[object]) -> str:
            return "".join([indent + l + "\n" for e in v for l in str(e).splitlines()]).rstrip()

        def replace(match: Match[str]) -> str:
            indent = match.group(1)
            key = match.group(2)
            comma_before = ''
            comma_after = ''
            if key[0] == "{":
                key = key[1:-1]
                if key[0] == ",":
                    comma_before = ', '
                    key = key[1:]
                if key[-1] == ',':
                    comma_after = ', '
                    key = key[:-1]
            v = lookup(key)
            if indent is not None:
                if not isinstance(v, list):
                    v = [v]
                return indent_lines(indent, v)
            elif isinstance(v, list):
                middle = ', '.join([str(x) for x in v])
                if len(v) == 0:
                    return middle
                return comma_before + middle + comma_after
            else:
                return str(v)
        return self.substitution.sub(replace, self.pattern)


if __name__ == "__main__":
    c = CodeTemplate("""\
    int foo($args) {

        $bar
            $bar
        $a+$b
    }
    int commatest(int a${,stuff})
    int notest(int a${,empty,})
    """)
    print(c.substitute(args=["hi", 8], bar=["what", 7],
                       a=3, b=4, stuff=["things...", "others"], empty=[]))
Initial commit of framework for TensorLib 2017-06-02 19:02:02 +00:00			`import re`
2020-08-06 nightly release (644d787cd8d9512b92d18e9a94d6f3a401b5ca69) 2020-08-06 07:03:49 +00:00			`from typing import Match, Optional, Sequence, Mapping`
Initial commit of framework for TensorLib 2017-06-02 19:02:02 +00:00
addressing comments from pull request: processors codemodded to backend and other minor changes 2017-06-10 23:56:24 +00:00			`# match $identifier or ${identifier} and replace with value in env`
adding Type object which will handle dispatch 2017-06-03 03:08:47 +00:00			`# If this identifier is at the beginning of whitespace on a line`
			`# and its value is a list then it is treated as`
2020-08-06 nightly release (644d787cd8d9512b92d18e9a94d6f3a401b5ca69) 2020-08-06 07:03:49 +00:00			`# block substitution by indenting to that depth and putting each element`
adding Type object which will handle dispatch 2017-06-03 03:08:47 +00:00			`# of the list on its own line`
			`# if the identifier is on a line starting with non-whitespace and a list`
			`# then it is comma separated ${,foo} will insert a comma before the list`
			`# if this list is not empty and ${foo,} will insert one after.`
Initial commit of framework for TensorLib 2017-06-02 19:02:02 +00:00
autopep8 2017-06-09 20:53:27 +00:00
2020-08-06 nightly release (644d787cd8d9512b92d18e9a94d6f3a401b5ca69) 2020-08-06 07:03:49 +00:00			`class CodeTemplate:`
fix build with python-2.7.5 (#20137) Summary: pytorch failed to build with the following error, complaining about the first regex match It may be caused by a bug in python 2.7.5 This change proposed is a workaround for building pytorch with python 2.7.5 Since the '*' star notation is greedy in python regex, the new expression shall produce the identical result with the old one. ``` Traceback (most recent call last): File "/data2/nihuini/pytorch/cmake/../aten/src/ATen/gen.py", line 14, in <module> import preprocess_declarations File "/data2/nihuini/pytorch/aten/src/ATen/preprocess_declarations.py", line 3, in <module> from function_wrapper import TYPE_FORMAL_GENERIC File "/data2/nihuini/pytorch/aten/src/ATen/function_wrapper.py", line 5, in <module> from code_template import CodeTemplate File "/data2/nihuini/pytorch/aten/src/ATen/code_template.py", line 13, in <module> class CodeTemplate(object): File "/data2/nihuini/pytorch/aten/src/ATen/code_template.py", line 23, in CodeTemplate subtitution = re.compile(substitution_str, re.MULTILINE) File "/usr/lib64/python2.7/re.py", line 190, in compile return _compile(pattern, flags) File "/usr/lib64/python2.7/re.py", line 242, in _compile raise error, v # invalid expression sre_constants.error: nothing to repeat -- CMake Error at cmake/Codegen.cmake:162 (message): Failed to get generated_cpp list Call Stack (most recent call first): caffe2/CMakeLists.txt:2 (include) ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/20137 Differential Revision: D15218122 Pulled By: ezyang fbshipit-source-id: 10b618ff92a04e9074f5d83e31411fc2341e0cf8 2019-05-06 15:04:23 +00:00			`# Python 2.7.5 has a bug where the leading (^[^\n\S]*)? does not work,`
			`# workaround via appending another [^\n\S]? inside`

			`substitution_str = r'(^[^\n\S][^\n\S]?)?\$([^\d\W]\w\|\{,?[^\d\W]\w*\,?})'`
dont clobber gen.py error, fix for old versions of python 2017-07-13 16:19:27 +00:00
			`# older versions of Python have a bug where \w* does not work,`
			`# so we need to replace with the non-shortened version [a-zA-Z0-9_]*`
			`# https://bugs.python.org/issue18647`

Squash some 'invalid escape sequence' warnings. (#10310) Summary: Signed-off-by: Edward Z. Yang <ezyang@fb.com> Pull Request resolved: https://github.com/pytorch/pytorch/pull/10310 Differential Revision: D9196254 Pulled By: ezyang fbshipit-source-id: 63bb8e52ac6970fe8e11a2d3c491ab58250dc467 2018-08-07 19:14:51 +00:00			`substitution_str = substitution_str.replace(r'\w', r'[a-zA-Z0-9_]')`
dont clobber gen.py error, fix for old versions of python 2017-07-13 16:19:27 +00:00
2020-08-06 nightly release (644d787cd8d9512b92d18e9a94d6f3a401b5ca69) 2020-08-06 07:03:49 +00:00			`substitution = re.compile(substitution_str, re.MULTILINE)`

			`pattern: str`
			`filename: str`
autopep8 2017-06-09 20:53:27 +00:00
addressing comments from pull request: processors codemodded to backend and other minor changes 2017-06-10 23:56:24 +00:00			`@staticmethod`
2020-08-06 nightly release (644d787cd8d9512b92d18e9a94d6f3a401b5ca69) 2020-08-06 07:03:49 +00:00			`def from_file(filename: str) -> 'CodeTemplate':`
autopep8 2017-06-09 20:53:27 +00:00			`with open(filename, 'r') as f:`
Fix the false generated_comment (#17563) Summary: The generated_comments are wrong to below generated files: ```bash ./torch/csrc/autograd/generated/VariableType_0.cpp:3:// generated from tools/autograd/templates/VariableType_0.cpp ./torch/csrc/autograd/generated/VariableType_1.cpp:3:// generated from tools/autograd/templates/VariableType_1.cpp ./torch/csrc/autograd/generated/VariableType_2.cpp:3:// generated from tools/autograd/templates/VariableType_2.cpp ./torch/csrc/autograd/generated/VariableType_3.cpp:3:// generated from tools/autograd/templates/VariableType_3.cpp ./torch/csrc/autograd/generated/VariableType_4.cpp:3:// generated from tools/autograd/templates/VariableType_4.cpp ./torch/csrc/autograd/generated/VariableTypeEverything.cpp:3:// generated from tools/autograd/templates/VariableTypeEverything.cpp ./torch/csrc/jit/generated/register_aten_ops_0.cpp:23:// generated from tools/autograd/templates/register_aten_ops_0.cpp ./torch/csrc/jit/generated/register_aten_ops_1.cpp:23:// generated from tools/autograd/templates/register_aten_ops_1.cpp ./torch/csrc/jit/generated/register_aten_ops_2.cpp:23:// generated from tools/autograd/templates/register_aten_ops_2.cpp ``` These generated files were split to speed the compile, however, the template files are not. After this fix, the comments will look like below: ```bash ./torch/csrc/autograd/generated/VariableType_0.cpp:3:// generated from tools/autograd/templates/VariableType.cpp ./torch/csrc/autograd/generated/VariableType_1.cpp:3:// generated from tools/autograd/templates/VariableType.cpp ...... ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/17563 Differential Revision: D14260992 Pulled By: soumith fbshipit-source-id: 038181367fa43bee87837e4170704ddff7f4d6f2 2019-02-28 17:37:48 +00:00			`return CodeTemplate(f.read(), filename)`
autopep8 2017-06-09 20:53:27 +00:00
2020-08-06 nightly release (644d787cd8d9512b92d18e9a94d6f3a401b5ca69) 2020-08-06 07:03:49 +00:00			`def __init__(self, pattern: str, filename: str = "") -> None:`
Initial commit of framework for TensorLib 2017-06-02 19:02:02 +00:00			`self.pattern = pattern`
Fix the false generated_comment (#17563) Summary: The generated_comments are wrong to below generated files: ```bash ./torch/csrc/autograd/generated/VariableType_0.cpp:3:// generated from tools/autograd/templates/VariableType_0.cpp ./torch/csrc/autograd/generated/VariableType_1.cpp:3:// generated from tools/autograd/templates/VariableType_1.cpp ./torch/csrc/autograd/generated/VariableType_2.cpp:3:// generated from tools/autograd/templates/VariableType_2.cpp ./torch/csrc/autograd/generated/VariableType_3.cpp:3:// generated from tools/autograd/templates/VariableType_3.cpp ./torch/csrc/autograd/generated/VariableType_4.cpp:3:// generated from tools/autograd/templates/VariableType_4.cpp ./torch/csrc/autograd/generated/VariableTypeEverything.cpp:3:// generated from tools/autograd/templates/VariableTypeEverything.cpp ./torch/csrc/jit/generated/register_aten_ops_0.cpp:23:// generated from tools/autograd/templates/register_aten_ops_0.cpp ./torch/csrc/jit/generated/register_aten_ops_1.cpp:23:// generated from tools/autograd/templates/register_aten_ops_1.cpp ./torch/csrc/jit/generated/register_aten_ops_2.cpp:23:// generated from tools/autograd/templates/register_aten_ops_2.cpp ``` These generated files were split to speed the compile, however, the template files are not. After this fix, the comments will look like below: ```bash ./torch/csrc/autograd/generated/VariableType_0.cpp:3:// generated from tools/autograd/templates/VariableType.cpp ./torch/csrc/autograd/generated/VariableType_1.cpp:3:// generated from tools/autograd/templates/VariableType.cpp ...... ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/17563 Differential Revision: D14260992 Pulled By: soumith fbshipit-source-id: 038181367fa43bee87837e4170704ddff7f4d6f2 2019-02-28 17:37:48 +00:00			`self.filename = filename`
autopep8 2017-06-09 20:53:27 +00:00
2020-08-06 nightly release (644d787cd8d9512b92d18e9a94d6f3a401b5ca69) 2020-08-06 07:03:49 +00:00			`def substitute(self, env: Optional[Mapping[str, object]] = None, **kwargs: object) -> str:`
Fix B006 lint errors: using mutable structure in default argument. (#18178) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/18178 ghimport-source-id: 667ee76b418f505fa64b863e52a603c508dcd1bf Stack from [ghstack](https://github.com/ezyang/ghstack): * #18184 Fix B903 lint: save memory for data classes with slots/namedtuple * #18181 Fix B902 lint error: invalid first argument. * #18178 Fix B006 lint errors: using mutable structure in default argument. * #18177 Fix lstrip bug revealed by B005 lint Signed-off-by: Edward Z. Yang <ezyang@fb.com> Differential Revision: D14530874 fbshipit-source-id: 38f4456a085bfe55f2a96fff53028ebd0d621604 2019-03-21 16:06:30 +00:00			`if env is None:`
			`env = {}`

2020-08-06 nightly release (644d787cd8d9512b92d18e9a94d6f3a401b5ca69) 2020-08-06 07:03:49 +00:00			`def lookup(v: str) -> object:`
			`assert env is not None`
autopep8 2017-06-09 20:53:27 +00:00			`return kwargs[v] if v in kwargs else env[v]`

2020-08-06 nightly release (644d787cd8d9512b92d18e9a94d6f3a401b5ca69) 2020-08-06 07:03:49 +00:00			`def indent_lines(indent: str, v: Sequence[object]) -> str:`
autopep8 2017-06-09 20:53:27 +00:00			`return "".join([indent + l + "\n" for e in v for l in str(e).splitlines()]).rstrip()`

2020-08-06 nightly release (644d787cd8d9512b92d18e9a94d6f3a401b5ca69) 2020-08-06 07:03:49 +00:00			`def replace(match: Match[str]) -> str:`
Initial commit of framework for TensorLib 2017-06-02 19:02:02 +00:00			`indent = match.group(1)`
			`key = match.group(2)`
autopep8 2017-06-09 20:53:27 +00:00			`comma_before = ''`
			`comma_after = ''`
Initial commit of framework for TensorLib 2017-06-02 19:02:02 +00:00			`if key[0] == "{":`
			`key = key[1:-1]`
add storage to generator 2017-06-02 23:39:23 +00:00			`if key[0] == ",":`
			`comma_before = ', '`
			`key = key[1:]`
			`if key[-1] == ',':`
			`comma_after = ', '`
			`key = key[:-1]`
Initial commit of framework for TensorLib 2017-06-02 19:02:02 +00:00			`v = lookup(key)`
Switch interpreter to use IValue's primitive int/floats (#9718) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/9718 This patch switches the interpreter to use IValue's primitive numbers rather than tensors for computing on integers and floats. In addition to preparing the interpreter for first-class support of other types, this cleans up the handling of primitive numbers, making it possible to just use the normal operator overloading dispatch to find the right implementation for numbers. As a result of this change, a lot of other functionality needed to be updated since it was the first time we use non-tensors in a lot of places in the code base. Notes: * Fixes code_template.py so that multi-line strings are indented correctly when used on a standalone line * Cast operators (`int(x)`) now are functional. Some tests have addition conversions to integers because we no longer allow implicit tensor -> integer conversions following the same convention as in python * prim::ListConstruct/createList has been added to the interpreter for creating lists and this has replaced aten::stack for integers lists * gen_jit_dispatch.py has been refactored so that non-tensor types use operators on IValues to extract the primitives * IValue gains a .to<T> method that is the equivalent of tensor_as but for IValue instead of at::Tensor * `constant_as<T>` is switched over to using IValues's `.to<T>` method, to make conversion from constant->IValue->C++ type more consistent. This functionality combined with `toIValue(Value)` replaces the `tensor_as` and `as_tensor` family of functions. conditional expressions (if, loop) and operators related to them are now computed on integers rather than tensors * IValue gains constructors for constructing from at::Scalar and converting to it. However, IValue itself will always store the scalars as a double or int64. * To align with python 3 syntax, TK_INT, TK_FLOAT, and TK_BOOL have been removed from the parser, and int/float/bool are just treated as special identifiers in the compiler, along with print. These are represented as special sugared values with a `call` method implemented. For int/float/bool this implements casting behavior. * Dropped shared_from_this from Type/Module. They were not needed and they making debugging harder because they internally throw/catch exceptions. * Shape propagation has been updated to support running nodes that include floating point primitive types, this required some refactoring of internal functions. * TensorToNum and NumToTensor have actual implementations as operators now * regster_prim_ops now contains implementations of math operators for float/int primitive types, and for mixed (prim <+> tensor) versions. This removes the need for special handling in compiler.cpp * Primitive math is now entirely handled by letting the compiler choose the right overloads. This removes tons of special casing in the compiler. * incorporates eellison's change to allow casting from return values. Due to the addition of primitive support, the code need slight modifications, so I just pre-merged it here. * stack.h gains generic vararg versions of push/pop that know how to convert to/from C++ types: ``` at::Tensor a; at::Scalar b; pop(stack, a, b); at::Tensor c = a + b; push(stack, c); ``` apaszke Pull Request resolved: https://github.com/pytorch/pytorch/pull/9584 Reviewed By: apaszke Differential Revision: D8910546 Pulled By: zdevito fbshipit-source-id: 0f3e60d4d22217f196a8f606549430e43b7e7e30 2018-07-23 20:58:32 +00:00			`if indent is not None:`
			`if not isinstance(v, list):`
			`v = [v]`
autopep8 2017-06-09 20:53:27 +00:00			`return indent_lines(indent, v)`
			`elif isinstance(v, list):`
add storage to generator 2017-06-02 23:39:23 +00:00			`middle = ', '.join([str(x) for x in v])`
			`if len(v) == 0:`
			`return middle`
autopep8 2017-06-09 20:53:27 +00:00			`return comma_before + middle + comma_after`
Initial commit of framework for TensorLib 2017-06-02 19:02:02 +00:00			`else:`
Switch interpreter to use IValue's primitive int/floats (#9718) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/9718 This patch switches the interpreter to use IValue's primitive numbers rather than tensors for computing on integers and floats. In addition to preparing the interpreter for first-class support of other types, this cleans up the handling of primitive numbers, making it possible to just use the normal operator overloading dispatch to find the right implementation for numbers. As a result of this change, a lot of other functionality needed to be updated since it was the first time we use non-tensors in a lot of places in the code base. Notes: * Fixes code_template.py so that multi-line strings are indented correctly when used on a standalone line * Cast operators (`int(x)`) now are functional. Some tests have addition conversions to integers because we no longer allow implicit tensor -> integer conversions following the same convention as in python * prim::ListConstruct/createList has been added to the interpreter for creating lists and this has replaced aten::stack for integers lists * gen_jit_dispatch.py has been refactored so that non-tensor types use operators on IValues to extract the primitives * IValue gains a .to<T> method that is the equivalent of tensor_as but for IValue instead of at::Tensor * `constant_as<T>` is switched over to using IValues's `.to<T>` method, to make conversion from constant->IValue->C++ type more consistent. This functionality combined with `toIValue(Value)` replaces the `tensor_as` and `as_tensor` family of functions. conditional expressions (if, loop) and operators related to them are now computed on integers rather than tensors * IValue gains constructors for constructing from at::Scalar and converting to it. However, IValue itself will always store the scalars as a double or int64. * To align with python 3 syntax, TK_INT, TK_FLOAT, and TK_BOOL have been removed from the parser, and int/float/bool are just treated as special identifiers in the compiler, along with print. These are represented as special sugared values with a `call` method implemented. For int/float/bool this implements casting behavior. * Dropped shared_from_this from Type/Module. They were not needed and they making debugging harder because they internally throw/catch exceptions. * Shape propagation has been updated to support running nodes that include floating point primitive types, this required some refactoring of internal functions. * TensorToNum and NumToTensor have actual implementations as operators now * regster_prim_ops now contains implementations of math operators for float/int primitive types, and for mixed (prim <+> tensor) versions. This removes the need for special handling in compiler.cpp * Primitive math is now entirely handled by letting the compiler choose the right overloads. This removes tons of special casing in the compiler. * incorporates eellison's change to allow casting from return values. Due to the addition of primitive support, the code need slight modifications, so I just pre-merged it here. * stack.h gains generic vararg versions of push/pop that know how to convert to/from C++ types: ``` at::Tensor a; at::Scalar b; pop(stack, a, b); at::Tensor c = a + b; push(stack, c); ``` apaszke Pull Request resolved: https://github.com/pytorch/pytorch/pull/9584 Reviewed By: apaszke Differential Revision: D8910546 Pulled By: zdevito fbshipit-source-id: 0f3e60d4d22217f196a8f606549430e43b7e7e30 2018-07-23 20:58:32 +00:00			`return str(v)`
2020-08-06 nightly release (644d787cd8d9512b92d18e9a94d6f3a401b5ca69) 2020-08-06 07:03:49 +00:00			`return self.substitution.sub(replace, self.pattern)`
autopep8 2017-06-09 20:53:27 +00:00
Initial commit of framework for TensorLib 2017-06-02 19:02:02 +00:00
			`if __name__ == "__main__":`
			`c = CodeTemplate("""\`
			`int foo($args) {`

			`$bar`
			`$bar`
			`$a+$b`
			`}`
add storage to generator 2017-06-02 23:39:23 +00:00			`int commatest(int a${,stuff})`
			`int notest(int a${,empty,})`
Initial commit of framework for TensorLib 2017-06-02 19:02:02 +00:00			`""")`
autopep8 2017-06-09 20:53:27 +00:00			`print(c.substitute(args=["hi", 8], bar=["what", 7],`
			`a=3, b=4, stuff=["things...", "others"], empty=[]))`