Canonicalize all includes in PyTorch. (#14849)
Summary:
Anywhere we used #include "foo.h", we now say #include <foo.h>
Paths are adjusted to be rooted out of aten/src, torch/lib, or
the root level directory.
I modified CMakeLists.txt by hand to remove TH and THC from
the include paths.
I used the following script to do the canonicalization:
```
import subprocess
import re
import os.path
files = subprocess.check_output(['git', 'ls-files']).decode('utf-8').rstrip().split('\n')
for fn in files:
if not any(fn.endswith(suff) for suff in ['.cu', '.cpp', '.in', '.h', '.hpp', '.cu', '.cuh', '.cc']):
continue
if not any(fn.startswith(pref) for pref in ["aten/", "torch/"]):
continue
with open(fn, 'r') as f:
c = f.read()
def fmt(p):
return "#include <{}>".format(p)
def repl(m):
p = m.group(1)
if p in ["dlfcn.h", "unistd.h", "nvrtc.h", "cuda.h", "cuda_runtime.h", "cstdint", "cudnn.h", "Python.h", "cusparse.h", "cuda_runtime_api.h", "cuda_fp16.h", "cublas_v2.h", "stdint.h", "curand_kernel.h"]:
return fmt(p)
if any(p.startswith(pref) for pref in ["torch/csrc", "c10/", "ATen/", "caffe2/", "TH/", "THC/", "Eigen/", "gtest/", "zdl/", "gloo/", "onnx/", "miopen/"]):
return fmt(p)
for root in ["aten/src", "torch/lib", ""]:
for bad_root in [os.path.dirname(fn), "aten/src/TH", "aten/src/THC", "torch/csrc"]:
new_p = os.path.relpath(os.path.join(bad_root, p), root)
if not new_p.startswith("../") and (os.path.exists(os.path.join(root, new_p)) or os.path.exists(os.path.join(root, new_p + ".in"))):
return fmt(new_p)
print("ERROR: ", fn, p)
return m.group(0)
new_c = re.sub(r'#include "([^"]+)"', repl, c)
if new_c != c:
print(fn)
with open(fn, 'w') as f:
f.write(new_c)
```
Signed-off-by: Edward Z. Yang <ezyang@fb.com>
Pull Request resolved: https://github.com/pytorch/pytorch/pull/14849
Reviewed By: dzhulgakov
Differential Revision: D13363445
Pulled By: ezyang
fbshipit-source-id: 52361f878a672785f9306c9e9ab2513128092b68
2018-12-09 03:32:01 +00:00
|
|
|
#include <torch/csrc/jit/passes/utils/subgraph_utils.h>
|
2021-01-09 22:34:42 +00:00
|
|
|
|
2020-09-24 22:22:16 +00:00
|
|
|
#include <torch/csrc/jit/passes/canonicalize.h>
|
2018-11-15 01:20:36 +00:00
|
|
|
|
|
|
|
|
namespace torch {
|
|
|
|
|
namespace jit {
|
|
|
|
|
namespace SubgraphUtils {
|
|
|
|
|
namespace {
|
|
|
|
|
|
2019-01-10 20:25:22 +00:00
|
|
|
bool hasSubgraph(Node* n) {
|
|
|
|
|
return n->hasAttribute(attr::Subgraph);
|
2018-11-15 01:20:36 +00:00
|
|
|
}
|
|
|
|
|
|
2020-09-24 22:22:16 +00:00
|
|
|
std::vector<c10::optional<const Use>> gatherLastUses(
|
|
|
|
|
at::ArrayRef<Value*> values) {
|
|
|
|
|
return fmap(values, [&](Value* v) -> c10::optional<const Use> {
|
|
|
|
|
return firstOrLastUse(v, /*find_first*/ false);
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// When merging a node into a subgraph, we wish to preserve all of the
|
|
|
|
|
// aliasing properties of the node's outputs. It is difficult to track
|
|
|
|
|
// the node or its contained nodes through all of the ir manipulation
|
|
|
|
|
// involved in merging; it is pretty easy to uniquely identify the value
|
|
|
|
|
// based on its uses. We can identify the value by its last use in the graph.
|
|
|
|
|
// Values which do not have uses or which do not have a last use
|
|
|
|
|
// outside of the subgraph to be merged into we do not need to track.
|
|
|
|
|
struct ValueMapper {
|
|
|
|
|
ValueMapper(Node* to_merge, AliasDb& db, size_t subgraph_num_outputs) {
|
|
|
|
|
last_uses_ = gatherLastUses(to_merge->outputs());
|
|
|
|
|
subgraph_num_outputs_ = subgraph_num_outputs;
|
|
|
|
|
WithInsertPoint guard(to_merge);
|
|
|
|
|
auto g = to_merge->owningGraph();
|
|
|
|
|
// temporary node to put the aliasing properties of the node before its
|
|
|
|
|
// merged and destroyed
|
|
|
|
|
placeholder_node_ = g->insertNode(g->create(prim::Uninitialized, 0));
|
|
|
|
|
for (size_t i = 0; i < to_merge->outputs().size(); ++i) {
|
|
|
|
|
Value* existing = to_merge->outputs().at(i);
|
|
|
|
|
Value* new_value = placeholder_node_->insertOutput(i)->copyMetadata(
|
|
|
|
|
to_merge->outputs().at(i));
|
|
|
|
|
db.replaceWithNewValue(existing, new_value);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool usesEqual(const Use& a, const Use& b) {
|
|
|
|
|
return a.user == b.user && a.offset == b.offset;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void copyAliasing(Node* merged_node, AliasDb& db) {
|
|
|
|
|
auto num_outputs = merged_node->outputs().size();
|
|
|
|
|
auto new_outputs = merged_node->outputs().slice(
|
|
|
|
|
subgraph_num_outputs_, num_outputs - subgraph_num_outputs_);
|
|
|
|
|
for (Value* v : new_outputs) {
|
|
|
|
|
auto maybe_last_use = firstOrLastUse(v, /*find_first*/ false);
|
2021-03-02 05:14:16 +00:00
|
|
|
if (!maybe_last_use) {
|
|
|
|
|
if (AliasDb::isMutableType(v->type())) {
|
|
|
|
|
db.createValue(v);
|
|
|
|
|
}
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2020-09-24 22:22:16 +00:00
|
|
|
// if it doesnt have a use it shouldnt have been added as output
|
|
|
|
|
TORCH_INTERNAL_ASSERT(maybe_last_use);
|
|
|
|
|
const Use last_use = *maybe_last_use;
|
|
|
|
|
size_t i = 0;
|
|
|
|
|
while (i < last_uses_.size() && last_uses_.at(i).has_value() &&
|
|
|
|
|
!usesEqual(*last_uses_.at(i), last_use)) {
|
|
|
|
|
++i;
|
|
|
|
|
}
|
|
|
|
|
TORCH_INTERNAL_ASSERT(i != last_uses_.size());
|
|
|
|
|
db.replaceWithNewValue(placeholder_node_->outputs().at(i), v);
|
|
|
|
|
}
|
|
|
|
|
placeholder_node_->destroy();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::vector<c10::optional<const Use>> last_uses_;
|
|
|
|
|
size_t subgraph_num_outputs_;
|
|
|
|
|
Node* placeholder_node_;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
Node* executeSubgraphMergeAndUpdateAliasing(
|
|
|
|
|
Node* to_merge,
|
|
|
|
|
c10::optional<Node*> existing,
|
|
|
|
|
AliasDb& db,
|
|
|
|
|
const std::function<Node*(void)>& merge_fn) {
|
|
|
|
|
// When we merge a node into a subgraph, the new subgraph outputs
|
|
|
|
|
// have the same aliasing properties as the original node's outputs.
|
|
|
|
|
// Here we create a placeholder node, transfer the aliasing properties
|
|
|
|
|
// to the placeholder, execute the merge, and transfer the aliasing
|
|
|
|
|
// properties to the appropriate fusion group outputs
|
|
|
|
|
ValueMapper vm(to_merge, db, existing ? (*existing)->outputs().size() : 0);
|
|
|
|
|
Node* fusion_group = merge_fn();
|
|
|
|
|
vm.copyAliasing(fusion_group, db);
|
|
|
|
|
return fusion_group;
|
|
|
|
|
}
|
|
|
|
|
|
2018-11-15 01:20:36 +00:00
|
|
|
// Combine the nodes in two subgraph together. The nodes will end up in
|
|
|
|
|
// `mergeTo`, and `mergeFrom` is destroyed.
|
2021-03-02 05:14:16 +00:00
|
|
|
void mergeSubgraph(Node* mergeTo, Node* mergeFrom) {
|
2019-01-10 20:25:22 +00:00
|
|
|
Node* nodeBeforeMergeFrom = mergeFrom->prev();
|
|
|
|
|
Node* nodeAfterMergeFrom = mergeFrom->next();
|
2020-08-26 01:09:55 +00:00
|
|
|
|
2021-03-02 05:14:16 +00:00
|
|
|
unmergeSubgraph(mergeFrom);
|
2020-08-26 01:09:55 +00:00
|
|
|
|
2019-01-10 20:25:22 +00:00
|
|
|
std::vector<Node*> nodes;
|
|
|
|
|
const auto end_it = nodeBeforeMergeFrom->reverseIterator();
|
|
|
|
|
auto it = nodeAfterMergeFrom->reverseIterator();
|
|
|
|
|
++it;
|
2020-08-26 01:09:55 +00:00
|
|
|
|
2021-03-02 05:14:16 +00:00
|
|
|
// defer destroying nodes until after all nodes have been merged,
|
|
|
|
|
// to make iterators easier to reason about
|
[JIT] Regularize tensorexpr fuser strategy with other fusers (#44972)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/44972
Previously, our fusion strategy would be:
- start at the end of the block, find a fusable node
- iteratively try to merge inputs into the fusion group, sorted topologically
This strategy works pretty well, but has the possibility of missing fusion groups. See my attached test case for an example where we wouldn't find all possible fusion groups. bertmaher found an example of a missed fusion groups in one of our rnn examples (jit_premul) that caused a regression from the legacy fuser.
Here, I'm updating our fusion strategy to be the same as our other fusion passes - create_autodiff_subgraphs, and graph_fuser.cpp.
The basic strategy is:
- iterate until you find a fusible node
- try to merge the nodes inputs, whenever a succesful merge occurs restart at the beginning of the nodes inputs
- after you've exhausted a node, continue searching the block for fusion opportunities from the node
- continue doing this on the block until we go through an iteration without an succesful merges
Since we create the fusion groups once, and only re-specialize within the fusion groups, we should be running this very infrequently (only re-triggers when we fail undefinedness specializations). Also bc it's the same algorithm as the existing fuser it is unlikely to cause a regression.
Test Plan: Imported from OSS
Reviewed By: Krovatkin, robieta
Differential Revision: D23821581
Pulled By: eellison
fbshipit-source-id: e513d1ef719120dadb0bfafc7a14f4254cd806ee
2020-09-24 22:22:16 +00:00
|
|
|
std::vector<Node*> merged_nodes;
|
2019-01-10 20:25:22 +00:00
|
|
|
while (it != end_it) {
|
|
|
|
|
Node* node = *it;
|
|
|
|
|
++it;
|
[JIT] Regularize tensorexpr fuser strategy with other fusers (#44972)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/44972
Previously, our fusion strategy would be:
- start at the end of the block, find a fusable node
- iteratively try to merge inputs into the fusion group, sorted topologically
This strategy works pretty well, but has the possibility of missing fusion groups. See my attached test case for an example where we wouldn't find all possible fusion groups. bertmaher found an example of a missed fusion groups in one of our rnn examples (jit_premul) that caused a regression from the legacy fuser.
Here, I'm updating our fusion strategy to be the same as our other fusion passes - create_autodiff_subgraphs, and graph_fuser.cpp.
The basic strategy is:
- iterate until you find a fusible node
- try to merge the nodes inputs, whenever a succesful merge occurs restart at the beginning of the nodes inputs
- after you've exhausted a node, continue searching the block for fusion opportunities from the node
- continue doing this on the block until we go through an iteration without an succesful merges
Since we create the fusion groups once, and only re-specialize within the fusion groups, we should be running this very infrequently (only re-triggers when we fail undefinedness specializations). Also bc it's the same algorithm as the existing fuser it is unlikely to cause a regression.
Test Plan: Imported from OSS
Reviewed By: Krovatkin, robieta
Differential Revision: D23821581
Pulled By: eellison
fbshipit-source-id: e513d1ef719120dadb0bfafc7a14f4254cd806ee
2020-09-24 22:22:16 +00:00
|
|
|
merged_nodes.push_back(node);
|
2021-03-02 05:14:16 +00:00
|
|
|
mergeNodeIntoSubgraph(node, mergeTo, /*destroyNode*/ false);
|
[JIT] Regularize tensorexpr fuser strategy with other fusers (#44972)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/44972
Previously, our fusion strategy would be:
- start at the end of the block, find a fusable node
- iteratively try to merge inputs into the fusion group, sorted topologically
This strategy works pretty well, but has the possibility of missing fusion groups. See my attached test case for an example where we wouldn't find all possible fusion groups. bertmaher found an example of a missed fusion groups in one of our rnn examples (jit_premul) that caused a regression from the legacy fuser.
Here, I'm updating our fusion strategy to be the same as our other fusion passes - create_autodiff_subgraphs, and graph_fuser.cpp.
The basic strategy is:
- iterate until you find a fusible node
- try to merge the nodes inputs, whenever a succesful merge occurs restart at the beginning of the nodes inputs
- after you've exhausted a node, continue searching the block for fusion opportunities from the node
- continue doing this on the block until we go through an iteration without an succesful merges
Since we create the fusion groups once, and only re-specialize within the fusion groups, we should be running this very infrequently (only re-triggers when we fail undefinedness specializations). Also bc it's the same algorithm as the existing fuser it is unlikely to cause a regression.
Test Plan: Imported from OSS
Reviewed By: Krovatkin, robieta
Differential Revision: D23821581
Pulled By: eellison
fbshipit-source-id: e513d1ef719120dadb0bfafc7a14f4254cd806ee
2020-09-24 22:22:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (Node* n : merged_nodes) {
|
|
|
|
|
n->destroy();
|
2020-08-26 01:09:55 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-11-15 01:20:36 +00:00
|
|
|
} // namespace
|
|
|
|
|
|
|
|
|
|
std::shared_ptr<Graph> getSubgraph(Node* n) {
|
|
|
|
|
return n->g(attr::Subgraph);
|
|
|
|
|
}
|
|
|
|
|
|
2021-03-02 05:14:16 +00:00
|
|
|
void unmergeSubgraph(Node* subgraphNode) {
|
2019-01-10 20:25:22 +00:00
|
|
|
// Inline the graph, replace uses of node outputs and destroy the node
|
2019-04-25 22:43:54 +00:00
|
|
|
auto outerGraph = subgraphNode->owningGraph();
|
|
|
|
|
WithInsertPoint guard(subgraphNode);
|
2019-07-30 18:18:50 +00:00
|
|
|
const auto subgraphOutputs = insertGraph(
|
2021-03-02 05:14:16 +00:00
|
|
|
*outerGraph, *getSubgraph(subgraphNode), subgraphNode->inputs());
|
2019-01-24 19:05:07 +00:00
|
|
|
AT_ASSERT(subgraphOutputs.size() >= subgraphNode->outputs().size());
|
2018-12-03 21:27:59 +00:00
|
|
|
for (size_t i = 0; i < subgraphNode->outputs().size(); ++i) {
|
2019-01-10 20:25:22 +00:00
|
|
|
subgraphNode->outputs()[i]->replaceAllUsesWith(subgraphOutputs[i]);
|
2018-11-15 01:20:36 +00:00
|
|
|
}
|
|
|
|
|
subgraphNode->destroy();
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-25 06:39:49 +00:00
|
|
|
void collectNestedUses(
|
|
|
|
|
std::unordered_set<Value*>& closed_over_values,
|
|
|
|
|
std::unordered_set<Value*>& new_values,
|
|
|
|
|
std::unordered_map<Value*, Value*>& inputsMap,
|
|
|
|
|
Node* input_node) {
|
|
|
|
|
for (auto input : input_node->inputs()) {
|
|
|
|
|
if (inputsMap.count(input) == 0 && new_values.count(input) == 0) {
|
|
|
|
|
closed_over_values.insert(input);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (input_node->kind() == prim::If) {
|
|
|
|
|
for (Block* block : input_node->blocks()) {
|
|
|
|
|
for (Node* node : block->nodes()) {
|
|
|
|
|
collectNestedUses(closed_over_values, new_values, inputsMap, node);
|
|
|
|
|
}
|
|
|
|
|
for (Value* v : block->outputs()) {
|
|
|
|
|
if (inputsMap.count(v) == 0 && new_values.count(v) == 0) {
|
|
|
|
|
closed_over_values.insert(v);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} else if (input_node->kind() == prim::Loop) {
|
|
|
|
|
for (Value* v : input_node->inputs()) {
|
|
|
|
|
if (inputsMap.count(v) == 0 && new_values.count(v) == 0) {
|
|
|
|
|
closed_over_values.insert(v);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
Block* block = input_node->blocks().at(0);
|
|
|
|
|
for (Value* v : block->inputs()) {
|
|
|
|
|
new_values.insert(v);
|
|
|
|
|
}
|
|
|
|
|
for (Node* node : block->nodes()) {
|
|
|
|
|
collectNestedUses(closed_over_values, new_values, inputsMap, node);
|
|
|
|
|
}
|
|
|
|
|
} else if (input_node->blocks().size() != 0) {
|
|
|
|
|
TORCH_INTERNAL_ASSERT(false, input_node, " kind not handled yet");
|
|
|
|
|
}
|
|
|
|
|
for (Value* output : input_node->outputs()) {
|
|
|
|
|
new_values.insert(output);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::unordered_set<Value*> closedOverValues(
|
|
|
|
|
Node* toMerge,
|
|
|
|
|
std::unordered_map<Value*, Value*>& inputsMap) {
|
|
|
|
|
std::unordered_set<Value*> closed_over_values;
|
|
|
|
|
std::unordered_set<Value*> new_values;
|
|
|
|
|
collectNestedUses(closed_over_values, new_values, inputsMap, toMerge);
|
|
|
|
|
return closed_over_values;
|
|
|
|
|
}
|
|
|
|
|
|
2020-08-26 01:09:55 +00:00
|
|
|
void mergeNodeIntoSubgraph(
|
|
|
|
|
Node* toMerge,
|
|
|
|
|
Node* subgraphNode,
|
[JIT] Regularize tensorexpr fuser strategy with other fusers (#44972)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/44972
Previously, our fusion strategy would be:
- start at the end of the block, find a fusable node
- iteratively try to merge inputs into the fusion group, sorted topologically
This strategy works pretty well, but has the possibility of missing fusion groups. See my attached test case for an example where we wouldn't find all possible fusion groups. bertmaher found an example of a missed fusion groups in one of our rnn examples (jit_premul) that caused a regression from the legacy fuser.
Here, I'm updating our fusion strategy to be the same as our other fusion passes - create_autodiff_subgraphs, and graph_fuser.cpp.
The basic strategy is:
- iterate until you find a fusible node
- try to merge the nodes inputs, whenever a succesful merge occurs restart at the beginning of the nodes inputs
- after you've exhausted a node, continue searching the block for fusion opportunities from the node
- continue doing this on the block until we go through an iteration without an succesful merges
Since we create the fusion groups once, and only re-specialize within the fusion groups, we should be running this very infrequently (only re-triggers when we fail undefinedness specializations). Also bc it's the same algorithm as the existing fuser it is unlikely to cause a regression.
Test Plan: Imported from OSS
Reviewed By: Krovatkin, robieta
Differential Revision: D23821581
Pulled By: eellison
fbshipit-source-id: e513d1ef719120dadb0bfafc7a14f4254cd806ee
2020-09-24 22:22:16 +00:00
|
|
|
bool destroyNode) {
|
2020-03-25 06:39:49 +00:00
|
|
|
AT_ASSERT(hasSubgraph(subgraphNode) && toMerge != subgraphNode);
|
2019-01-10 20:25:22 +00:00
|
|
|
if (hasSubgraph(toMerge)) {
|
2021-03-02 05:14:16 +00:00
|
|
|
return mergeSubgraph(subgraphNode, toMerge);
|
2018-11-15 01:20:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
auto subgraph = getSubgraph(subgraphNode);
|
|
|
|
|
|
|
|
|
|
// Map from values in the surrounding graph to inputs in the subgraph
|
|
|
|
|
std::unordered_map<Value*, Value*> inputsMap;
|
|
|
|
|
|
2019-01-24 19:05:07 +00:00
|
|
|
AT_ASSERT(subgraphNode->inputs().size() == subgraph->inputs().size());
|
2018-11-15 01:20:36 +00:00
|
|
|
size_t idx = 0;
|
|
|
|
|
for (auto input : subgraphNode->inputs()) {
|
|
|
|
|
inputsMap[input] = subgraph->inputs()[idx];
|
|
|
|
|
idx++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Add n's inputs to the group's input list if we don't already have them
|
|
|
|
|
WithInsertPoint guard(*subgraph->nodes().begin());
|
2020-03-25 06:39:49 +00:00
|
|
|
std::unordered_set<Value*> closedValues =
|
|
|
|
|
closedOverValues(toMerge, inputsMap);
|
2020-03-27 05:42:25 +00:00
|
|
|
|
|
|
|
|
// There are currently downstream usage that relies on a fixed ordering
|
|
|
|
|
// of graph inputs. TODO: remove
|
|
|
|
|
std::vector<Value*> orderedClosedValues;
|
|
|
|
|
std::unordered_set<Value*> orderedSeenValues;
|
|
|
|
|
for (Value* input : toMerge->inputs()) {
|
|
|
|
|
orderedClosedValues.push_back(input);
|
|
|
|
|
orderedSeenValues.insert(input);
|
|
|
|
|
}
|
|
|
|
|
for (Value* closedValue : closedValues) {
|
|
|
|
|
if (!orderedSeenValues.count(closedValue)) {
|
|
|
|
|
orderedClosedValues.push_back(closedValue);
|
|
|
|
|
orderedSeenValues.insert(closedValue);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (auto input : orderedClosedValues) {
|
2018-11-15 01:20:36 +00:00
|
|
|
if (inputsMap.count(input) == 0) {
|
|
|
|
|
// Clone constants inside the subgraph instead of referencing them, to
|
|
|
|
|
// enable more optimizations
|
|
|
|
|
if (auto value = toIValue(input)) {
|
|
|
|
|
auto nv = subgraph->insertConstant(*value);
|
2021-03-02 05:14:16 +00:00
|
|
|
nv->copyMetadata(input);
|
2018-11-15 01:20:36 +00:00
|
|
|
inputsMap[input] = nv;
|
|
|
|
|
} else {
|
|
|
|
|
// The common case: this is a regular input, so just register it with
|
|
|
|
|
// the group node and inner subgraph
|
|
|
|
|
subgraphNode->addInput(input);
|
|
|
|
|
auto inputToGraph = subgraph->addInput();
|
2021-03-02 05:14:16 +00:00
|
|
|
inputToGraph->copyMetadata(input);
|
2018-11-15 01:20:36 +00:00
|
|
|
inputsMap[input] = inputToGraph;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Merge the node into the graph
|
|
|
|
|
auto mergedNode = subgraph->insertNode(
|
|
|
|
|
subgraph->createClone(toMerge, [&](Value* v) { return inputsMap[v]; }));
|
|
|
|
|
|
|
|
|
|
// If n's outputs were inputs to `group`, remove them since we just merged
|
|
|
|
|
// n in.
|
|
|
|
|
//
|
|
|
|
|
// i.e.,
|
|
|
|
|
// x = f(w); group(x, y, z) becomes group(w, y, z).
|
|
|
|
|
// x, y, z = f(w); group(x, y, z) becomes group(w).
|
|
|
|
|
auto inputs = subgraphNode->inputs();
|
|
|
|
|
for (size_t i = 0; i < toMerge->outputs().size(); ++i) {
|
|
|
|
|
auto it = std::find(inputs.begin(), inputs.end(), toMerge->outputs()[i]);
|
|
|
|
|
if (it != inputs.end()) {
|
|
|
|
|
size_t p = it - inputs.begin();
|
|
|
|
|
subgraphNode->removeInput(p);
|
|
|
|
|
subgraph->inputs()[p]->replaceAllUsesWith(mergedNode->outputs()[i]);
|
|
|
|
|
subgraph->eraseInput(p);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Add n's outputs to the group node and inner subgraph outputs.
|
|
|
|
|
for (size_t i = 0; i < toMerge->outputs().size(); i++) {
|
|
|
|
|
auto oldOutput = toMerge->outputs()[i];
|
|
|
|
|
|
|
|
|
|
// Only register the output in the group node if it's actually used
|
|
|
|
|
// outside the subgraph.
|
|
|
|
|
const auto hasUsesOutsideSubgraph = std::any_of(
|
|
|
|
|
oldOutput->uses().cbegin(),
|
|
|
|
|
oldOutput->uses().cend(),
|
|
|
|
|
[&](const Use& use) { return use.user->isAfter(subgraphNode); });
|
|
|
|
|
|
|
|
|
|
if (hasUsesOutsideSubgraph) {
|
|
|
|
|
auto newOutput = mergedNode->outputs()[i];
|
|
|
|
|
subgraph->registerOutput(newOutput);
|
|
|
|
|
auto groupOutput = subgraphNode->addOutput();
|
|
|
|
|
groupOutput->copyMetadata(oldOutput);
|
|
|
|
|
oldOutput->replaceAllUsesWith(groupOutput);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
// Remove the original node now that the merge is complete
|
[JIT] Regularize tensorexpr fuser strategy with other fusers (#44972)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/44972
Previously, our fusion strategy would be:
- start at the end of the block, find a fusable node
- iteratively try to merge inputs into the fusion group, sorted topologically
This strategy works pretty well, but has the possibility of missing fusion groups. See my attached test case for an example where we wouldn't find all possible fusion groups. bertmaher found an example of a missed fusion groups in one of our rnn examples (jit_premul) that caused a regression from the legacy fuser.
Here, I'm updating our fusion strategy to be the same as our other fusion passes - create_autodiff_subgraphs, and graph_fuser.cpp.
The basic strategy is:
- iterate until you find a fusible node
- try to merge the nodes inputs, whenever a succesful merge occurs restart at the beginning of the nodes inputs
- after you've exhausted a node, continue searching the block for fusion opportunities from the node
- continue doing this on the block until we go through an iteration without an succesful merges
Since we create the fusion groups once, and only re-specialize within the fusion groups, we should be running this very infrequently (only re-triggers when we fail undefinedness specializations). Also bc it's the same algorithm as the existing fuser it is unlikely to cause a regression.
Test Plan: Imported from OSS
Reviewed By: Krovatkin, robieta
Differential Revision: D23821581
Pulled By: eellison
fbshipit-source-id: e513d1ef719120dadb0bfafc7a14f4254cd806ee
2020-09-24 22:22:16 +00:00
|
|
|
if (destroyNode) {
|
|
|
|
|
toMerge->destroy();
|
|
|
|
|
}
|
2018-11-15 01:20:36 +00:00
|
|
|
}
|
[JIT] Regularize tensorexpr fuser strategy with other fusers (#44972)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/44972
Previously, our fusion strategy would be:
- start at the end of the block, find a fusable node
- iteratively try to merge inputs into the fusion group, sorted topologically
This strategy works pretty well, but has the possibility of missing fusion groups. See my attached test case for an example where we wouldn't find all possible fusion groups. bertmaher found an example of a missed fusion groups in one of our rnn examples (jit_premul) that caused a regression from the legacy fuser.
Here, I'm updating our fusion strategy to be the same as our other fusion passes - create_autodiff_subgraphs, and graph_fuser.cpp.
The basic strategy is:
- iterate until you find a fusible node
- try to merge the nodes inputs, whenever a succesful merge occurs restart at the beginning of the nodes inputs
- after you've exhausted a node, continue searching the block for fusion opportunities from the node
- continue doing this on the block until we go through an iteration without an succesful merges
Since we create the fusion groups once, and only re-specialize within the fusion groups, we should be running this very infrequently (only re-triggers when we fail undefinedness specializations). Also bc it's the same algorithm as the existing fuser it is unlikely to cause a regression.
Test Plan: Imported from OSS
Reviewed By: Krovatkin, robieta
Differential Revision: D23821581
Pulled By: eellison
fbshipit-source-id: e513d1ef719120dadb0bfafc7a14f4254cd806ee
2020-09-24 22:22:16 +00:00
|
|
|
|
2021-03-02 05:14:16 +00:00
|
|
|
Node* createSingletonSubgraph(Node* n, Symbol subgraphKind) {
|
2018-11-15 01:20:36 +00:00
|
|
|
auto graph = n->owningGraph();
|
|
|
|
|
auto subgraph = graph->create(subgraphKind, 0);
|
|
|
|
|
subgraph->g_(attr::Subgraph, std::make_shared<Graph>(graph->current_scope()));
|
|
|
|
|
subgraph->insertBefore(n);
|
2021-03-02 05:14:16 +00:00
|
|
|
mergeNodeIntoSubgraph(n, subgraph);
|
2018-11-15 01:20:36 +00:00
|
|
|
return subgraph;
|
|
|
|
|
}
|
2019-01-10 20:25:22 +00:00
|
|
|
|
2020-09-24 22:22:16 +00:00
|
|
|
void mergeNodeIntoSubgraphAndUpdateAliasing(
|
|
|
|
|
Node* to_merge,
|
|
|
|
|
Node* subgraphNode,
|
|
|
|
|
AliasDb& db) {
|
|
|
|
|
executeSubgraphMergeAndUpdateAliasing(to_merge, subgraphNode, db, [&]() {
|
|
|
|
|
mergeNodeIntoSubgraph(to_merge, subgraphNode);
|
|
|
|
|
return subgraphNode;
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Node* createSingletonSubgraphAndUpdateAliasing(
|
|
|
|
|
Node* to_merge,
|
|
|
|
|
Symbol subgraphKind,
|
|
|
|
|
AliasDb& db) {
|
|
|
|
|
return executeSubgraphMergeAndUpdateAliasing(
|
|
|
|
|
to_merge, c10::nullopt, db, [&]() {
|
|
|
|
|
return createSingletonSubgraph(to_merge, subgraphKind);
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
2020-11-04 00:34:31 +00:00
|
|
|
std::string truncateStrWithHash(const std::string& s, size_t maxlen) {
|
|
|
|
|
if (s.size() <= maxlen) {
|
|
|
|
|
return s;
|
|
|
|
|
}
|
|
|
|
|
std::string hash_str = c10::to_string(c10::hash<std::string>{}(s));
|
|
|
|
|
// If hash-string plus '_' can fit into maxlen, then truncate the original
|
|
|
|
|
// string correspondingly so that the final string with the hash included fits
|
|
|
|
|
// into maxlen. If that's not possible, at least truncate the original string
|
|
|
|
|
// to maxlen (and appen the hash to it).
|
|
|
|
|
size_t trunc_len =
|
|
|
|
|
(maxlen > hash_str.size() + 1) ? (maxlen - hash_str.size() - 1) : maxlen;
|
|
|
|
|
std::stringstream truncated;
|
|
|
|
|
truncated << s.substr(0, trunc_len);
|
|
|
|
|
truncated << "_" << hash_str;
|
|
|
|
|
return truncated.str();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::string generateNameForGraph(
|
|
|
|
|
const std::shared_ptr<Graph>& graph,
|
|
|
|
|
size_t maxlen,
|
|
|
|
|
const std::string& prefix) {
|
|
|
|
|
std::stringstream graph_name;
|
|
|
|
|
graph_name << prefix;
|
|
|
|
|
for (Node* node : graph->nodes()) {
|
|
|
|
|
if (!node->kind().is_aten()) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
graph_name << "_" << node->kind().toUnqualString();
|
|
|
|
|
}
|
|
|
|
|
return truncateStrWithHash(graph_name.str(), maxlen);
|
|
|
|
|
}
|
|
|
|
|
|
2018-11-15 01:20:36 +00:00
|
|
|
} // namespace SubgraphUtils
|
|
|
|
|
} // namespace jit
|
|
|
|
|
} // namespace torch
|