mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-21 02:18:09 +00:00
Switch GSL to MS GSL 4.0.0 (#13416)
This commit is contained in:
parent
7fbfbf789f
commit
2ecd1d6622
292 changed files with 1128 additions and 4486 deletions
|
|
@ -28,8 +28,8 @@
|
|||
"component": {
|
||||
"type": "git",
|
||||
"git": {
|
||||
"commitHash": "58123b93bd7f12d17ac0c46379a0f2c0255d9213",
|
||||
"repositoryUrl": "https://github.com/martinmoene/gsl-lite.git"
|
||||
"commitHash": "a3534567187d2edc428efd3f13466ff75fe5805c",
|
||||
"repositoryUrl": "https://github.com/microsoft/gsl.git"
|
||||
}
|
||||
}
|
||||
},
|
||||
|
|
|
|||
|
|
@ -193,7 +193,7 @@
|
|||
"component": {
|
||||
"type": "git",
|
||||
"git": {
|
||||
"commitHash": "53495a2a7d6ba7e0691a7f3602e9a5324bba6e45",
|
||||
"commitHash": "58d77fa8070e8cec2dc1ed015d66b454c8d78850",
|
||||
"repositoryUrl": "https://github.com/google/googletest.git"
|
||||
},
|
||||
"comments": "git submodule at cmake/external/googletest"
|
||||
|
|
|
|||
|
|
@ -1016,27 +1016,14 @@ if (CPUINFO_SUPPORTED)
|
|||
endif()
|
||||
endif()
|
||||
|
||||
# bounds checking behavior.
|
||||
# throw instead of calling terminate if there's a bounds checking violation.
|
||||
# we make it through via a handler so CUDA does not complain
|
||||
# The following -DGSL macros are recognized by gsl-lite along with -Dgsl macros
|
||||
# no bounds checking in release build so no perf cost
|
||||
# if we enable onnxruntime_DISABLE_EXCEPTIONS, gsl will terminate
|
||||
if (onnxruntime_DISABLE_EXCEPTIONS)
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DGSL_TERMINATE_ON_CONTRACT_VIOLATION")
|
||||
else()
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DGSL_THROW_ON_CONTRACT_VIOLATION")
|
||||
endif()
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DGSL_UNENFORCED_ON_CONTRACT_VIOLATION")
|
||||
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -DGSL_UNENFORCED_ON_CONTRACT_VIOLATION")
|
||||
set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS_MINSIZEREL} -DGSL_UNENFORCED_ON_CONTRACT_VIOLATION")
|
||||
include(gsl)
|
||||
|
||||
include(eigen)
|
||||
|
||||
#onnxruntime_EXTERNAL_LIBRARIES could contain onnx, onnx_proto,libprotobuf, cuda/cudnn,
|
||||
# dnnl/mklml, onnxruntime_codegen_tvm, tvm and pthread
|
||||
# pthread is always at the last
|
||||
set(onnxruntime_EXTERNAL_LIBRARIES onnx onnx_proto ${PROTOBUF_LIB} re2::re2)
|
||||
set(onnxruntime_EXTERNAL_LIBRARIES onnx onnx_proto ${PROTOBUF_LIB} re2::re2 ${GSL_TARGET})
|
||||
|
||||
if(NOT onnxruntime_DISABLE_ABSEIL)
|
||||
set(ABSEIL_LIBS absl::inlined_vector absl::flat_hash_set
|
||||
|
|
|
|||
2
cmake/external/googletest
vendored
2
cmake/external/googletest
vendored
|
|
@ -1 +1 @@
|
|||
Subproject commit 53495a2a7d6ba7e0691a7f3602e9a5324bba6e45
|
||||
Subproject commit 58d77fa8070e8cec2dc1ed015d66b454c8d78850
|
||||
16
cmake/external/gsl.cmake
vendored
Normal file
16
cmake/external/gsl.cmake
vendored
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
include(FetchContent)
|
||||
|
||||
FetchContent_Declare(
|
||||
GSL
|
||||
GIT_REPOSITORY https://github.com/microsoft/gsl
|
||||
GIT_TAG a3534567187d2edc428efd3f13466ff75fe5805c # v4.0.0
|
||||
GIT_SHALLOW ON
|
||||
)
|
||||
|
||||
FetchContent_MakeAvailable(GSL)
|
||||
|
||||
set(GSL_TARGET "Microsoft.GSL::GSL")
|
||||
set(GSL_INCLUDE_DIR "$<TARGET_PROPERTY:${GSL_TARGET},INTERFACE_INCLUDE_DIRECTORIES>")
|
||||
14
cmake/external/gsl.natvis
vendored
14
cmake/external/gsl.natvis
vendored
|
|
@ -1,14 +0,0 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
|
||||
<Type Name="gsl::span<*>">
|
||||
<Intrinsic Name="_size" Expression="(last_ - first_)"/>
|
||||
<DisplayString>{{ size={ _size() }}}</DisplayString>
|
||||
<Expand>
|
||||
<Item Name="[size]" ExcludeView="simple">_size()</Item>
|
||||
<IndexListItems Condition="_size() > 0">
|
||||
<Size>_size()</Size>
|
||||
<ValueNode>first_[$i]</ValueNode>
|
||||
</IndexListItems>
|
||||
</Expand>
|
||||
</Type>
|
||||
</AutoVisualizer>
|
||||
|
|
@ -78,7 +78,7 @@ file(GLOB onnxruntime_common_src CONFIGURE_DEPENDS
|
|||
# Remove new/delete intercept. To deal with memory leaks
|
||||
# Use either non-mimalloc build OR use mimalloc built-in features.
|
||||
if(WIN32 AND onnxruntime_USE_MIMALLOC)
|
||||
list(REMOVE_ITEM onnxruntime_common_src
|
||||
list(REMOVE_ITEM onnxruntime_common_src
|
||||
"${ONNXRUNTIME_ROOT}/core/platform/windows/debug_alloc.cc"
|
||||
"${ONNXRUNTIME_ROOT}/core/platform/windows/debug_alloc.h")
|
||||
endif()
|
||||
|
|
@ -116,11 +116,6 @@ if(NOT onnxruntime_DISABLE_ABSEIL)
|
|||
target_sources(
|
||||
onnxruntime_common
|
||||
INTERFACE $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/external/${ABSEIL_NATVIS_FILE}>)
|
||||
set(GSL_NATVIS_FILE "gsl.natvis")
|
||||
target_sources(
|
||||
onnxruntime_common
|
||||
INTERFACE $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/external/${GSL_NATVIS_FILE}>
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
|
@ -131,7 +126,7 @@ target_include_directories(onnxruntime_common
|
|||
PUBLIC
|
||||
${OPTIONAL_LITE_INCLUDE_DIR})
|
||||
|
||||
target_link_libraries(onnxruntime_common safeint_interface Boost::mp11)
|
||||
target_link_libraries(onnxruntime_common safeint_interface Boost::mp11 ${GSL_TARGET})
|
||||
|
||||
if(NOT WIN32)
|
||||
target_include_directories(onnxruntime_common PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/external/nsync/public")
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ file(GLOB onnxruntime_flatbuffers_srcs CONFIGURE_DEPENDS
|
|||
source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_flatbuffers_srcs})
|
||||
|
||||
onnxruntime_add_static_library(onnxruntime_flatbuffers ${onnxruntime_flatbuffers_srcs})
|
||||
onnxruntime_add_include_to_target(onnxruntime_flatbuffers onnx flatbuffers)
|
||||
onnxruntime_add_include_to_target(onnxruntime_flatbuffers onnx flatbuffers ${GSL_TARGET})
|
||||
if(onnxruntime_ENABLE_INSTRUMENT)
|
||||
target_compile_definitions(onnxruntime_flatbuffers PUBLIC ONNXRUNTIME_ENABLE_INSTRUMENT)
|
||||
endif()
|
||||
|
|
@ -41,4 +41,4 @@ namespace std { using ::getenv; }
|
|||
]])
|
||||
target_compile_options(flatbuffers PRIVATE /FI${CMAKE_BINARY_DIR}/gdk_cstdlib_wrapper.h)
|
||||
target_compile_options(flatc PRIVATE /FI${CMAKE_BINARY_DIR}/gdk_cstdlib_wrapper.h)
|
||||
endif()
|
||||
endif()
|
||||
|
|
|
|||
|
|
@ -502,6 +502,7 @@ endif()
|
|||
|
||||
foreach(mlas_target ${ONNXRUNTIME_MLAS_LIBS})
|
||||
target_include_directories(${mlas_target} PRIVATE ${ONNXRUNTIME_ROOT}/core/mlas/inc ${MLAS_SRC_DIR})
|
||||
onnxruntime_add_include_to_target(${mlas_target} ${GSL_TARGET})
|
||||
endforeach()
|
||||
set_target_properties(onnxruntime_mlas PROPERTIES FOLDER "ONNXRuntime")
|
||||
if (WIN32)
|
||||
|
|
|
|||
|
|
@ -550,7 +550,7 @@ if (onnxruntime_USE_DNNL)
|
|||
add_dependencies(onnxruntime_providers_dnnl onnxruntime_providers_shared project_dnnl ${onnxruntime_EXTERNAL_DEPENDENCIES})
|
||||
target_include_directories(onnxruntime_providers_dnnl PRIVATE ${ONNXRUNTIME_ROOT} ${eigen_INCLUDE_DIRS} ${DNNL_INCLUDE_DIR} ${DNNL_OCL_INCLUDE_DIR})
|
||||
# ${CMAKE_CURRENT_BINARY_DIR} is so that #include "onnxruntime_config.h" inside tensor_shape.h is found
|
||||
target_link_libraries(onnxruntime_providers_dnnl PRIVATE dnnl ${ONNXRUNTIME_PROVIDERS_SHARED} Boost::mp11 ${ABSEIL_LIBS})
|
||||
target_link_libraries(onnxruntime_providers_dnnl PRIVATE dnnl ${ONNXRUNTIME_PROVIDERS_SHARED} Boost::mp11 ${ABSEIL_LIBS} ${GSL_TARGET})
|
||||
install(DIRECTORY ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/dnnl DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core/providers)
|
||||
set_target_properties(onnxruntime_providers_dnnl PROPERTIES FOLDER "ONNXRuntime")
|
||||
set_target_properties(onnxruntime_providers_dnnl PROPERTIES LINKER_LANGUAGE CXX)
|
||||
|
|
|
|||
|
|
@ -1097,6 +1097,7 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
|
|||
if (onnxruntime_BUILD_SHARED_LIB)
|
||||
onnxruntime_add_static_library(onnxruntime_mocked_allocator ${TEST_SRC_DIR}/util/test_allocator.cc)
|
||||
target_include_directories(onnxruntime_mocked_allocator PUBLIC ${TEST_SRC_DIR}/util/include)
|
||||
target_link_libraries(onnxruntime_mocked_allocator PRIVATE ${GSL_TARGET})
|
||||
set_target_properties(onnxruntime_mocked_allocator PROPERTIES FOLDER "ONNXRuntimeTest")
|
||||
|
||||
#################################################################
|
||||
|
|
@ -1253,6 +1254,7 @@ else()
|
|||
onnxruntime_add_shared_library_module(custom_op_library ${TEST_SRC_DIR}/testdata/custom_op_library/custom_op_library.cc)
|
||||
endif()
|
||||
target_include_directories(custom_op_library PRIVATE ${REPO_ROOT}/include)
|
||||
target_link_libraries(custom_op_library PRIVATE ${GSL_TARGET})
|
||||
if(UNIX)
|
||||
if (APPLE)
|
||||
set(ONNXRUNTIME_CUSTOM_OP_LIB_LINK_FLAG "-Xlinker -dead_strip")
|
||||
|
|
|
|||
|
|
@ -187,6 +187,7 @@ target_include_directories(winml_lib_telemetry PRIVATE ${winml_lib_telemetry_dir
|
|||
target_include_directories(winml_lib_telemetry PRIVATE ${winml_lib_common_dir}/inc)
|
||||
target_include_directories(winml_lib_telemetry PRIVATE ${ONNXRUNTIME_INCLUDE_DIR}/core/platform/windows)
|
||||
target_include_directories(winml_lib_telemetry PRIVATE ${REPO_ROOT}/winml)
|
||||
target_include_directories(winml_lib_telemetry PRIVATE ${GSL_INCLUDE_DIR})
|
||||
|
||||
# Properties
|
||||
set_target_properties(winml_lib_telemetry
|
||||
|
|
@ -264,6 +265,7 @@ target_include_directories(winml_lib_ort PRIVATE ${winml_lib_api_ort_dir})
|
|||
target_include_directories(winml_lib_ort PRIVATE ${winml_lib_common_dir}/inc)
|
||||
target_include_directories(winml_lib_ort PRIVATE ${ONNXRUNTIME_INCLUDE_DIR})
|
||||
target_include_directories(winml_lib_ort PRIVATE ${ONNXRUNTIME_ROOT})
|
||||
target_include_directories(winml_lib_ort PRIVATE ${GSL_INCLUDE_DIR})
|
||||
|
||||
set_target_properties(winml_lib_ort
|
||||
PROPERTIES
|
||||
|
|
@ -403,13 +405,13 @@ target_include_directories(winml_lib_image PRIVATE ${winml_lib_api_image_dir})
|
|||
target_include_directories(winml_lib_image PRIVATE ${winml_lib_common_dir}/inc)
|
||||
target_include_directories(winml_lib_image PRIVATE ${ONNXRUNTIME_ROOT})
|
||||
target_include_directories(winml_lib_image PRIVATE ${ONNXRUNTIME_INCLUDE_DIR}) # for status.h
|
||||
target_include_directories(winml_lib_image PRIVATE ${REPO_ROOT}/cmake/external/gsl/include)
|
||||
target_include_directories(winml_lib_image PRIVATE ${REPO_ROOT}/cmake/external/onnx)
|
||||
target_include_directories(winml_lib_image PRIVATE ${REPO_ROOT}/cmake/external/protobuf/src)
|
||||
target_include_directories(winml_lib_image PRIVATE ${ONNXRUNTIME_INCLUDE_DIR}/core/platform/windows)
|
||||
target_include_directories(winml_lib_image PRIVATE ${REPO_ROOT}/cmake/external/flatbuffers/include)
|
||||
target_include_directories(winml_lib_image PRIVATE ${REPO_ROOT}/cmake/external/mp11/include)
|
||||
target_include_directories(winml_lib_image PRIVATE ${REPO_ROOT}/winml)
|
||||
target_include_directories(winml_lib_image PRIVATE ${GSL_INCLUDE_DIR})
|
||||
|
||||
# Properties
|
||||
set_target_properties(winml_lib_image
|
||||
|
|
@ -511,7 +513,6 @@ target_include_directories(winml_lib_api PRIVATE ${winml_lib_common_dir}/inc)
|
|||
|
||||
target_include_directories(winml_lib_api PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
|
||||
target_include_directories(winml_lib_api PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/external/date/include)
|
||||
target_include_directories(winml_lib_api PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/external/gsl/include)
|
||||
target_include_directories(winml_lib_api PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/external/onnx)
|
||||
|
||||
target_include_directories(winml_lib_api PRIVATE ${ONNXRUNTIME_INCLUDE_DIR})
|
||||
|
|
@ -521,11 +522,11 @@ target_include_directories(winml_lib_api PRIVATE ${ONNXRUNTIME_ROOT}/core/graph)
|
|||
target_include_directories(winml_lib_api PRIVATE ${REPO_ROOT}/cmake/external/eigen)
|
||||
target_include_directories(winml_lib_api PRIVATE ${REPO_ROOT}/cmake/external/onnx)
|
||||
target_include_directories(winml_lib_api PRIVATE ${REPO_ROOT}/cmake/external/protobuf/src)
|
||||
target_include_directories(winml_lib_api PRIVATE ${REPO_ROOT}/cmake/external/gsl/include)
|
||||
target_include_directories(winml_lib_api PRIVATE ${REPO_ROOT}/cmake/external/SafeInt)
|
||||
target_include_directories(winml_lib_api PRIVATE ${REPO_ROOT}/cmake/external/flatbuffers/include)
|
||||
target_include_directories(winml_lib_api PRIVATE ${REPO_ROOT}/cmake/external/mp11/include)
|
||||
target_include_directories(winml_lib_api PRIVATE ${REPO_ROOT}/winml)
|
||||
target_include_directories(winml_lib_api PRIVATE ${GSL_INCLUDE_DIR})
|
||||
|
||||
# Properties
|
||||
set_target_properties(winml_lib_api
|
||||
|
|
@ -606,7 +607,6 @@ target_include_directories(winml_lib_api_experimental PRIVATE ${winml_lib_common
|
|||
|
||||
target_include_directories(winml_lib_api_experimental PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
|
||||
target_include_directories(winml_lib_api_experimental PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/external/date/include)
|
||||
target_include_directories(winml_lib_api_experimental PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/external/gsl/include)
|
||||
target_include_directories(winml_lib_api_experimental PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/external/onnx)
|
||||
|
||||
target_include_directories(winml_lib_api_experimental PRIVATE ${ONNXRUNTIME_INCLUDE_DIR})
|
||||
|
|
@ -616,11 +616,11 @@ target_include_directories(winml_lib_api_experimental PRIVATE ${ONNXRUNTIME_ROOT
|
|||
target_include_directories(winml_lib_api_experimental PRIVATE ${REPO_ROOT}/cmake/external/eigen)
|
||||
target_include_directories(winml_lib_api_experimental PRIVATE ${REPO_ROOT}/cmake/external/onnx)
|
||||
target_include_directories(winml_lib_api_experimental PRIVATE ${REPO_ROOT}/cmake/external/protobuf/src)
|
||||
target_include_directories(winml_lib_api_experimental PRIVATE ${REPO_ROOT}/cmake/external/gsl/include)
|
||||
target_include_directories(winml_lib_api_experimental PRIVATE ${REPO_ROOT}/cmake/external/SafeInt)
|
||||
target_include_directories(winml_lib_api_experimental PRIVATE ${REPO_ROOT}/cmake/external/flatbuffers/include)
|
||||
target_include_directories(winml_lib_api_experimental PRIVATE ${REPO_ROOT}/cmake/external/mp11/include)
|
||||
target_include_directories(winml_lib_api_experimental PRIVATE ${REPO_ROOT}/winml)
|
||||
target_include_directories(winml_lib_api_experimental PRIVATE ${GSL_INCLUDE_DIR})
|
||||
|
||||
# Properties
|
||||
set_target_properties(winml_lib_api_experimental
|
||||
|
|
@ -692,8 +692,15 @@ target_include_directories(winml_lib_common PRIVATE ${winml_lib_api_dir})
|
|||
target_include_directories(winml_lib_common PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
|
||||
target_include_directories(winml_lib_common PRIVATE ${winml_lib_common_dir}/inc)
|
||||
target_include_directories(winml_lib_common PRIVATE ${REPO_ROOT}/winml)
|
||||
target_include_directories(winml_lib_common PRIVATE ${GSL_INCLUDE_DIR})
|
||||
target_precompiled_header(winml_lib_common lib/Common/inc/pch.h)
|
||||
|
||||
# Properties
|
||||
set_target_properties(winml_lib_common
|
||||
PROPERTIES
|
||||
FOLDER
|
||||
${target_folder})
|
||||
|
||||
if (onnxruntime_USE_DML)
|
||||
target_add_dml(winml_lib_common)
|
||||
endif()
|
||||
|
|
@ -762,7 +769,6 @@ target_include_directories(winml_dll PRIVATE ${winml_lib_common_dir}/inc)
|
|||
|
||||
target_include_directories(winml_dll PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
|
||||
target_include_directories(winml_dll PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/external/date/include)
|
||||
target_include_directories(winml_dll PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/external/gsl/include)
|
||||
target_include_directories(winml_dll PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/external/onnx)
|
||||
|
||||
target_include_directories(winml_dll PRIVATE ${ONNXRUNTIME_INCLUDE_DIR})
|
||||
|
|
@ -771,12 +777,12 @@ target_include_directories(winml_dll PRIVATE ${ONNXRUNTIME_ROOT})
|
|||
target_include_directories(winml_dll PRIVATE ${ONNXRUNTIME_ROOT}/core/graph)
|
||||
target_include_directories(winml_dll PRIVATE ${REPO_ROOT}/cmake/external/onnx)
|
||||
target_include_directories(winml_dll PRIVATE ${REPO_ROOT}/cmake/external/protobuf/src)
|
||||
target_include_directories(winml_dll PRIVATE ${REPO_ROOT}/cmake/external/gsl/include)
|
||||
target_include_directories(winml_dll PRIVATE ${REPO_ROOT}/cmake/external/eigen)
|
||||
target_include_directories(winml_dll PRIVATE ${REPO_ROOT}/cmake/external/SafeInt)
|
||||
target_include_directories(winml_dll PRIVATE ${REPO_ROOT}/cmake/external/flatbuffers/include)
|
||||
target_include_directories(winml_dll PRIVATE ${REPO_ROOT}/cmake/external/mp11/include)
|
||||
target_include_directories(winml_dll PRIVATE ${REPO_ROOT}/winml)
|
||||
target_include_directories(winml_dll PRIVATE ${GSL_INCLUDE_DIR})
|
||||
|
||||
# Properties
|
||||
set_target_properties(winml_dll
|
||||
|
|
|
|||
|
|
@ -183,7 +183,7 @@ add_dependencies(winml_test_common
|
|||
winml_api
|
||||
winml_dll
|
||||
)
|
||||
onnxruntime_add_include_to_target(winml_test_common onnx_proto)
|
||||
onnxruntime_add_include_to_target(winml_test_common onnx_proto ${GSL_TARGET})
|
||||
onnxruntime_add_static_library(winml_google_test_lib ${WINML_TEST_SRC_DIR}/common/googletest/main.cpp)
|
||||
set_winml_target_properties(winml_google_test_lib)
|
||||
|
||||
|
|
|
|||
|
|
@ -36,7 +36,6 @@
|
|||
#include "core/common/exceptions.h"
|
||||
#include "core/common/make_string.h"
|
||||
#include "core/common/status.h"
|
||||
#include "core/common/gsl_suppress.h"
|
||||
|
||||
|
||||
namespace onnxruntime {
|
||||
|
|
|
|||
6
include/onnxruntime/core/common/gsl.h
Normal file
6
include/onnxruntime/core/common/gsl.h
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "gsl/gsl"
|
||||
|
|
@ -1,15 +0,0 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
#pragma once
|
||||
|
||||
#ifndef GSL_SUPPRESS
|
||||
#if defined(__clang__) && !defined(__NVCC__)
|
||||
#define GSL_SUPPRESS(x) [[gsl::suppress("x")]]
|
||||
#else
|
||||
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) && !defined(__NVCC__)
|
||||
#define GSL_SUPPRESS(x) [[gsl::suppress(x)]]
|
||||
#else
|
||||
#define GSL_SUPPRESS(x)
|
||||
#endif // _MSC_VER
|
||||
#endif // __clang__
|
||||
#endif
|
||||
|
|
@ -4,7 +4,7 @@
|
|||
#pragma once
|
||||
|
||||
#include <cstdarg>
|
||||
#include <gsl/gsl>
|
||||
#include "core/common/gsl.h"
|
||||
#include "core/common/common.h"
|
||||
#include "core/common/code_location.h"
|
||||
#include "core/common/logging/severity.h"
|
||||
|
|
|
|||
77
include/onnxruntime/core/common/narrow.h
Normal file
77
include/onnxruntime/core/common/narrow.h
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#pragma once
|
||||
|
||||
// onnxruntime::narrow() is like gsl::narrow() but it is also available when exceptions are disabled.
|
||||
|
||||
#if !defined(ORT_NO_EXCEPTIONS)
|
||||
|
||||
#include "gsl/narrow"
|
||||
|
||||
namespace onnxruntime {
|
||||
using gsl::narrow;
|
||||
} // namespace onnxruntime
|
||||
|
||||
#else // ^^ !defined(ORT_NO_EXCEPTIONS) ^^ / vv defined(ORT_NO_EXCEPTIONS) vv
|
||||
|
||||
#include <cstdio> // std::fprintf
|
||||
#include <exception> // std::terminate
|
||||
#include <type_traits>
|
||||
|
||||
#include "gsl/util" // gsl::narrow_cast
|
||||
|
||||
namespace onnxruntime {
|
||||
|
||||
namespace detail {
|
||||
[[noreturn]] inline void OnNarrowingError() noexcept {
|
||||
std::fprintf(stderr, "%s", "narrowing error\n");
|
||||
std::terminate();
|
||||
}
|
||||
} // namespace detail
|
||||
|
||||
// This implementation of onnxruntime::narrow was copied and adapted from:
|
||||
// https://github.com/microsoft/GSL/blob/a3534567187d2edc428efd3f13466ff75fe5805c/include/gsl/narrow
|
||||
|
||||
// narrow() : a checked version of narrow_cast() that terminates if the cast changed the value
|
||||
template <class T, class U, typename std::enable_if<std::is_arithmetic<T>::value>::type* = nullptr>
|
||||
// clang-format off
|
||||
GSL_SUPPRESS(type.1) // NO-FORMAT: attribute
|
||||
// clang-format on
|
||||
constexpr T narrow(U u) noexcept {
|
||||
constexpr const bool is_different_signedness =
|
||||
(std::is_signed<T>::value != std::is_signed<U>::value);
|
||||
|
||||
// clang-format off
|
||||
GSL_SUPPRESS(es.103) // NO-FORMAT: attribute // don't overflow
|
||||
GSL_SUPPRESS(es.104) // NO-FORMAT: attribute // don't underflow
|
||||
GSL_SUPPRESS(p.2) // NO-FORMAT: attribute // don't rely on undefined behavior
|
||||
// clang-format on
|
||||
const T t = gsl::narrow_cast<T>(u); // While this is technically undefined behavior in some cases (i.e., if the source value is of floating-point type
|
||||
// and cannot fit into the destination integral type), the resultant behavior is benign on the platforms
|
||||
// that we target (i.e., no hardware trap representations are hit).
|
||||
|
||||
if (static_cast<U>(t) != u || (is_different_signedness && ((t < T{}) != (u < U{})))) {
|
||||
detail::OnNarrowingError();
|
||||
}
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
template <class T, class U, typename std::enable_if<!std::is_arithmetic<T>::value>::type* = nullptr>
|
||||
// clang-format off
|
||||
GSL_SUPPRESS(type.1) // NO-FORMAT: attribute
|
||||
// clang-format on
|
||||
constexpr T narrow(U u) noexcept {
|
||||
const T t = gsl::narrow_cast<T>(u);
|
||||
|
||||
if (static_cast<U>(t) != u) {
|
||||
detail::OnNarrowingError();
|
||||
}
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
} // namespace onnxruntime
|
||||
|
||||
#endif // defined(ORT_NO_EXCEPTIONS)
|
||||
|
|
@ -3,23 +3,26 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <gsl/gsl>
|
||||
#include <algorithm>
|
||||
|
||||
#include "core/common/gsl.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
// Inspired by Fekir's Blog https://fekir.info/post/span-the-missing-constructor/
|
||||
|
||||
// AsSpan inspired by Fekir's Blog https://fekir.info/post/span-the-missing-constructor/
|
||||
// Used under MIT license
|
||||
|
||||
// Use AsSpan for less typing on any container including initializer list to create a span
|
||||
// (unnamed, untyped initializer list does not automatically convert to gsl::span).
|
||||
// {1, 2, 3} as such does not have a type
|
||||
// {1, 2, 3} as such does not have a type
|
||||
// (see https://scottmeyers.blogspot.com/2014/03/if-braced-initializers-have-no-type-why.html)
|
||||
//
|
||||
//
|
||||
// Example: AsSpan({1, 2, 3}) results in gsl::span<const int>
|
||||
//
|
||||
//
|
||||
// The above would deduce to std::initializer_list<int> and the result is gsl::span<const int>
|
||||
//
|
||||
// AsSpan<int64_t>({1, 2, 3}) produces gsl::span<const int64_t>
|
||||
//
|
||||
//
|
||||
// We can also do std::array<int64_t, 3>{1, 2, 3} that can be automatically converted to span
|
||||
// without memory allocation.
|
||||
//
|
||||
|
|
@ -38,7 +41,7 @@ template <class C>
|
|||
constexpr auto AsSpan(C& c) {
|
||||
return details::AsSpanImpl(c.data(), c.size());
|
||||
}
|
||||
|
||||
|
||||
template <class C>
|
||||
constexpr auto AsSpan(const C& c) {
|
||||
return details::AsSpanImpl(c.data(), c.size());
|
||||
|
|
@ -64,7 +67,22 @@ constexpr auto AsSpan(const T (&arr)[N]) {
|
|||
return details::AsSpanImpl(arr, N);
|
||||
}
|
||||
|
||||
template<class T>
|
||||
template <class T>
|
||||
inline gsl::span<const T> EmptySpan() { return gsl::span<const T>(); }
|
||||
|
||||
}
|
||||
template <class U, class T>
|
||||
[[nodiscard]] inline gsl::span<U> ReinterpretAsSpan(gsl::span<T> src) {
|
||||
// adapted from gsl-lite span::as_span():
|
||||
// https://github.com/gsl-lite/gsl-lite/blob/4720a2980a30da085b4ddb4a0ea2a71af7351a48/include/gsl/gsl-lite.hpp#L4102-L4108
|
||||
Expects(src.size_bytes() % sizeof(U) == 0);
|
||||
return gsl::span<U>(reinterpret_cast<U*>(src.data()), src.size_bytes() / sizeof(U));
|
||||
}
|
||||
|
||||
template <class T1, size_t Extent1, class T2, size_t Extent2>
|
||||
[[nodiscard]] inline bool SpanEq(gsl::span<T1, Extent1> a, gsl::span<T2, Extent2> b) {
|
||||
static_assert(std::is_same_v<std::remove_const_t<T1>, std::remove_const_t<T2>>,
|
||||
"T1 and T2 should be the same type except for const qualification");
|
||||
return std::equal(a.begin(), a.end(), b.begin(), b.end());
|
||||
}
|
||||
|
||||
} // namespace onnxruntime
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ limitations under the License.
|
|||
#ifdef _WIN32
|
||||
#include <winerror.h>
|
||||
#endif
|
||||
#include "core/common/gsl_suppress.h"
|
||||
#include "core/common/gsl.h"
|
||||
namespace onnxruntime {
|
||||
namespace common {
|
||||
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@
|
|||
#include <type_traits>
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
#include "core/common/gsl_suppress.h"
|
||||
#include "core/common/gsl.h"
|
||||
#include "core/common/common.h"
|
||||
#include "core/common/exceptions.h"
|
||||
#include "core/framework/endian.h"
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@
|
|||
#endif
|
||||
|
||||
#if !defined(__CUDACC__) && !defined(__HIPCC__)
|
||||
#include <gsl/gsl>
|
||||
#include "core/common/narrow.h"
|
||||
#endif
|
||||
|
||||
#include "core/common/common.h"
|
||||
|
|
@ -123,7 +123,7 @@ inline ORT_HOST_DEVICE bool operator<(const BFloat16& left, const BFloat16& righ
|
|||
// E.g 10_f16 or 10_b16
|
||||
#if !defined(__CUDACC__) && !defined(__HIPCC__)
|
||||
inline MLFloat16 operator"" _f16(unsigned long long int v) {
|
||||
return MLFloat16(gsl::narrow<uint16_t>(v));
|
||||
return MLFloat16(narrow<uint16_t>(v));
|
||||
}
|
||||
|
||||
inline MLFloat16 operator"" _fp16(long double v) {
|
||||
|
|
@ -131,7 +131,7 @@ inline MLFloat16 operator"" _fp16(long double v) {
|
|||
}
|
||||
|
||||
inline BFloat16 operator"" _b16(unsigned long long int v) {
|
||||
return BFloat16(gsl::narrow<uint16_t>(v), BFloat16::FromBits());
|
||||
return BFloat16(narrow<uint16_t>(v), BFloat16::FromBits());
|
||||
}
|
||||
|
||||
inline BFloat16 operator"" _bfp16(long double v) {
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@
|
|||
#endif
|
||||
#include "onnx/onnx_pb.h"
|
||||
#include "onnx/onnx-operators_pb.h"
|
||||
#include "gsl/gsl"
|
||||
#include "core/common/gsl.h"
|
||||
namespace onnxruntime {
|
||||
class OpKernelContext;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@
|
|||
#include "core/framework/ort_value.h"
|
||||
#include "core/framework/op_node_proto_helper.h"
|
||||
#include "core/graph/graph_viewer.h"
|
||||
#include "gsl/gsl"
|
||||
#include "core/common/gsl.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@
|
|||
#include "core/common/status.h"
|
||||
#include "core/framework/tensor_shape.h"
|
||||
#include "core/graph/graph_viewer.h"
|
||||
#include "gsl/gsl"
|
||||
#include "core/common/gsl.h"
|
||||
#endif
|
||||
|
||||
#ifdef __has_attribute
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@
|
|||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "gsl/gsl"
|
||||
#include "core/common/gsl.h"
|
||||
#include "core/common/common.h"
|
||||
#include "core/framework/allocator.h"
|
||||
#include "core/framework/tensor_shape.h"
|
||||
|
|
@ -189,7 +189,7 @@ class Tensor final {
|
|||
ORT_ENFORCE(utils::IsPrimitiveDataType<T>(dtype_), "Tensor type mismatch. ",
|
||||
"T ", "!=", dtype_);
|
||||
const T* data = reinterpret_cast<const T*>(static_cast<char*>(p_data_) + byte_offset_);
|
||||
return gsl::make_span(data, static_cast<typename gsl::span<T>::index_type>(shape_.Size()));
|
||||
return gsl::make_span(data, static_cast<typename gsl::span<T>::size_type>(shape_.Size()));
|
||||
}
|
||||
|
||||
void* MutableDataRaw(MLDataType type) {
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@
|
|||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <cstring>
|
||||
#include <gsl/gsl>
|
||||
#include "core/common/gsl.h"
|
||||
#include "onnxruntime_config.h"
|
||||
|
||||
#ifndef DISABLE_ABSEIL
|
||||
|
|
@ -29,6 +29,8 @@
|
|||
#endif
|
||||
#endif // DISABLE_ABSEIL
|
||||
|
||||
#include "core/common/span_utils.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
#ifdef __GNUC__
|
||||
#pragma GCC diagnostic push
|
||||
|
|
@ -96,7 +98,7 @@ class TensorShape {
|
|||
int64_t operator[](size_t idx) const { return values_[idx]; }
|
||||
int64_t& operator[](size_t idx) { return values_[idx]; }
|
||||
|
||||
bool operator==(const TensorShape& other) const noexcept { return GetDims() == other.GetDims(); }
|
||||
bool operator==(const TensorShape& other) const noexcept { return SpanEq(GetDims(), other.GetDims()); }
|
||||
bool operator!=(const TensorShape& other) const noexcept { return !(*this == other); }
|
||||
|
||||
size_t NumDimensions() const noexcept {
|
||||
|
|
|
|||
|
|
@ -29,12 +29,13 @@
|
|||
#pragma warning(pop)
|
||||
#endif
|
||||
|
||||
#include "gsl/gsl"
|
||||
#include "core/common/gsl.h"
|
||||
|
||||
#include "core/common/common.h"
|
||||
#include "core/common/const_pointer_container.h"
|
||||
#include "core/common/inlined_containers_fwd.h"
|
||||
#include "core/common/path.h"
|
||||
#include "core/common/span_utils.h"
|
||||
#include "core/common/status.h"
|
||||
#include "core/common/logging/logging.h"
|
||||
#include "core/graph/basic_types.h"
|
||||
|
|
@ -935,8 +936,8 @@ class Graph {
|
|||
const NodeAttributes* attributes = nullptr,
|
||||
const std::string& domain = kOnnxDomain) {
|
||||
return AddNode(name, op_type, description,
|
||||
gsl::make_span(input_args.begin(), input_args.end()),
|
||||
gsl::make_span(output_args.begin(), output_args.end()),
|
||||
AsSpan(input_args),
|
||||
AsSpan(output_args),
|
||||
attributes, domain);
|
||||
}
|
||||
|
||||
|
|
@ -949,7 +950,7 @@ class Graph {
|
|||
const std::string& domain = kOnnxDomain) {
|
||||
return AddNode(name, op_type, description,
|
||||
input_args,
|
||||
gsl::make_span(output_args.begin(), output_args.end()),
|
||||
AsSpan(output_args),
|
||||
attributes, domain);
|
||||
}
|
||||
|
||||
|
|
@ -961,7 +962,7 @@ class Graph {
|
|||
const NodeAttributes* attributes = nullptr,
|
||||
const std::string& domain = kOnnxDomain) {
|
||||
return AddNode(name, op_type, description,
|
||||
gsl::make_span(input_args.begin(), input_args.end()),
|
||||
AsSpan(input_args),
|
||||
output_args,
|
||||
attributes, domain);
|
||||
}
|
||||
|
|
@ -1153,7 +1154,7 @@ class Graph {
|
|||
void SetInputs(gsl::span<const NodeArg* const> inputs);
|
||||
|
||||
void SetInputs(std::initializer_list<const NodeArg*> inputs) {
|
||||
SetInputs(gsl::make_span(inputs));
|
||||
SetInputs(AsSpan(inputs));
|
||||
}
|
||||
|
||||
const Model& GetModel() const {
|
||||
|
|
@ -1171,7 +1172,7 @@ class Graph {
|
|||
void SetOutputs(gsl::span<const NodeArg* const> outputs);
|
||||
|
||||
void SetOutputs(std::initializer_list<const NodeArg*> outputs) {
|
||||
SetOutputs(gsl::make_span(outputs.begin(), outputs.end()));
|
||||
SetOutputs(AsSpan(outputs));
|
||||
}
|
||||
|
||||
#endif // !defined(ORT_MINIMAL_BUILD)
|
||||
|
|
@ -1232,7 +1233,7 @@ class Graph {
|
|||
}
|
||||
|
||||
void UpdateConsumerNodes(const std::string& node_arg_name, std::initializer_list<Node*> nodes) {
|
||||
UpdateConsumerNodes(node_arg_name, gsl::make_span(nodes));
|
||||
UpdateConsumerNodes(node_arg_name, AsSpan(nodes));
|
||||
}
|
||||
|
||||
/** During constant folding it may become possible to infer the shape for a node.
|
||||
|
|
|
|||
|
|
@ -129,7 +129,6 @@ extern "C" {
|
|||
|
||||
// Used in *.cc files. Almost as same as ORT_API_STATUS, except without ORT_MUST_USE_RESULT and ORT_EXPORT
|
||||
#define ORT_API_STATUS_IMPL(NAME, ...) \
|
||||
GSL_SUPPRESS(r .11) \
|
||||
_Success_(return == 0) _Check_return_ _Ret_maybenull_ OrtStatusPtr ORT_API_CALL NAME(__VA_ARGS__) NO_EXCEPTION
|
||||
|
||||
#define ORT_CLASS_RELEASE(X) void(ORT_API_CALL * Release##X)(_Frees_ptr_opt_ Ort##X * input)
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <gsl/gsl>
|
||||
#include "core/common/gsl.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace contrib {
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ void AttentionWrapper<T>::ProcessOutput(const gsl::span<const T>& rnn_cell_outpu
|
|||
// Get the context which is calculated within attention mechanism.
|
||||
attention_mechanism_.Compute(rnn_cell_output, prev_alignments_, attn_context_, alignments_);
|
||||
if (attention_mechanism_.NeedPrevAlignment()) {
|
||||
std::copy(alignments_.cbegin(), alignments_.cend(), prev_alignments_.begin());
|
||||
std::copy(alignments_.begin(), alignments_.end(), prev_alignments_.begin());
|
||||
}
|
||||
|
||||
if (has_attn_layer_) {
|
||||
|
|
|
|||
|
|
@ -63,11 +63,11 @@ template <typename T>
|
|||
void BahdanauAttention<T>::PrepareMemory(
|
||||
const gsl::span<const T>& memory,
|
||||
const gsl::span<const int>& memory_sequence_lengths) {
|
||||
std::copy(memory.cbegin(), memory.cend(), values_.begin());
|
||||
std::copy(memory.begin(), memory.end(), values_.begin());
|
||||
if (memory_sequence_lengths.empty()) {
|
||||
std::fill(mem_seq_lengths_.begin(), mem_seq_lengths_.end(), max_memory_steps_);
|
||||
} else {
|
||||
std::copy(memory_sequence_lengths.cbegin(), memory_sequence_lengths.cend(), mem_seq_lengths_.begin());
|
||||
std::copy(memory_sequence_lengths.begin(), memory_sequence_lengths.end(), mem_seq_lengths_.begin());
|
||||
}
|
||||
|
||||
for (int b = 0; b < batch_size_; b++) {
|
||||
|
|
@ -145,7 +145,7 @@ void BahdanauAttention<T>::Compute(
|
|||
}
|
||||
}
|
||||
|
||||
SoftmaxInplace(gsl::span<T>{alignments, gsl::narrow_cast<gsl::index>(mem_steps)});
|
||||
SoftmaxInplace(gsl::span<T>{alignments, gsl::narrow_cast<size_t>(mem_steps)});
|
||||
|
||||
// Calculate the context
|
||||
auto outspan = output.subspan(b * memory_depth_);
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@
|
|||
|
||||
#include "core/common/common.h"
|
||||
#include "core/common/logging/logging.h"
|
||||
#include "core/common/narrow.h"
|
||||
#include "core/platform/threadpool.h"
|
||||
#include "core/framework/allocator.h"
|
||||
//TODO: fix the warnings
|
||||
|
|
@ -95,9 +96,9 @@ Status DeepCpuAttnLstmOp::ComputeImpl(OpKernelContext& context) const {
|
|||
|
||||
auto& X_shape = X.Shape();
|
||||
|
||||
int seq_length = gsl::narrow<int>(X_shape[0]);
|
||||
int batch_size = gsl::narrow<int>(X_shape[1]);
|
||||
int input_size = gsl::narrow<int>(X_shape[2]);
|
||||
int seq_length = narrow<int>(X_shape[0]);
|
||||
int batch_size = narrow<int>(X_shape[1]);
|
||||
int input_size = narrow<int>(X_shape[2]);
|
||||
|
||||
// Processing attention wrapper
|
||||
constexpr int first_attn_input = 8;
|
||||
|
|
@ -113,12 +114,12 @@ Status DeepCpuAttnLstmOp::ComputeImpl(OpKernelContext& context) const {
|
|||
am_query_layer_weights, am_memory_layer_weights, am_v_weights, attn_memory, attn_memory_seq_lens, attn_layer_weights);
|
||||
ORT_RETURN_IF_ERROR(status);
|
||||
|
||||
const int max_memory_step = gsl::narrow<int>(attn_memory.Shape()[1]);
|
||||
const int memory_depth = gsl::narrow<int>(am_memory_layer_weights.Shape()[1]);
|
||||
const int am_attn_size = gsl::narrow<int>(am_memory_layer_weights.Shape()[2]);
|
||||
const int query_depth = gsl::narrow<int>(am_query_layer_weights.Shape()[1]); // it is equal to hidden_size
|
||||
const int max_memory_step = narrow<int>(attn_memory.Shape()[1]);
|
||||
const int memory_depth = narrow<int>(am_memory_layer_weights.Shape()[1]);
|
||||
const int am_attn_size = narrow<int>(am_memory_layer_weights.Shape()[2]);
|
||||
const int query_depth = narrow<int>(am_query_layer_weights.Shape()[1]); // it is equal to hidden_size
|
||||
const bool has_attention_layer = attn_layer_weights != nullptr;
|
||||
const int attn_layer_depth = has_attention_layer ? gsl::narrow<int>(attn_layer_weights->Shape()[2]) : 0;
|
||||
const int attn_layer_depth = has_attention_layer ? narrow<int>(attn_layer_weights->Shape()[2]) : 0;
|
||||
const int attention_size = has_attention_layer ? attn_layer_depth : memory_depth;
|
||||
|
||||
const gsl::span<const T> attn_layer_weights_span = (has_attention_layer) ? attn_layer_weights->DataAsSpan<T>() : gsl::span<const T>();
|
||||
|
|
@ -202,7 +203,7 @@ Status DeepCpuAttnLstmOp::ComputeImpl(OpKernelContext& context) const {
|
|||
|
||||
if (!output.empty() && !sequence_lens_span.empty()) {
|
||||
// clear tailing outputs
|
||||
int32_t max_seq_this_batch = *std::max_element(sequence_lens_span.cbegin(), sequence_lens_span.cend());
|
||||
int32_t max_seq_this_batch = *std::max_element(sequence_lens_span.begin(), sequence_lens_span.end());
|
||||
if (max_seq_this_batch >= 0 && max_seq_this_batch < seq_length) {
|
||||
auto start = max_seq_this_batch * hidden_output_size_per_direction * num_directions_;
|
||||
std::fill(output.begin() + start, output.end(), T{});
|
||||
|
|
@ -424,8 +425,8 @@ static Status ValidateRnnInputsWithExtraInputFromState(
|
|||
}
|
||||
|
||||
auto sequence_len_entries = sequence_lens->DataAsSpan<int>();
|
||||
if (std::any_of(sequence_len_entries.cbegin(),
|
||||
sequence_len_entries.cend(),
|
||||
if (std::any_of(sequence_len_entries.begin(),
|
||||
sequence_len_entries.end(),
|
||||
[seq_length](int len) { return len <= 0 || len > seq_length; })) {
|
||||
return ORT_MAKE_STATUS(
|
||||
ONNXRUNTIME, INVALID_ARGUMENT,
|
||||
|
|
@ -461,8 +462,8 @@ Status DeepCpuAttnLstmOp::ValidateInputs(
|
|||
"Attention mechanism memory shape error! Expected: {", batch_size,
|
||||
"}, actural: ", memory_shape);
|
||||
}
|
||||
const int max_memory_step = gsl::narrow<int>(memory_shape[1]);
|
||||
const int memory_depth = gsl::narrow<int>(memory_shape[2]);
|
||||
const int max_memory_step = narrow<int>(memory_shape[1]);
|
||||
const int memory_depth = narrow<int>(memory_shape[2]);
|
||||
if (attn_memory_seq_lens != nullptr) {
|
||||
auto memory_seq_lens_shape = attn_memory_seq_lens->Shape();
|
||||
if (memory_seq_lens_shape.NumDimensions() != 1 || memory_seq_lens_shape[0] != batch_size) {
|
||||
|
|
@ -472,9 +473,9 @@ Status DeepCpuAttnLstmOp::ValidateInputs(
|
|||
}
|
||||
const gsl::span<const int> mem_seq_lens_span = attn_memory_seq_lens->DataAsSpan<int>();
|
||||
auto item_not_in_range = std::find_if(
|
||||
mem_seq_lens_span.cbegin(), mem_seq_lens_span.cend(),
|
||||
mem_seq_lens_span.begin(), mem_seq_lens_span.end(),
|
||||
[max_memory_step](int len) { return len <= 0 || len > max_memory_step; });
|
||||
if (item_not_in_range != mem_seq_lens_span.cend()) {
|
||||
if (item_not_in_range != mem_seq_lens_span.end()) {
|
||||
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
|
||||
"Attention mechanism memory sequence lengths value must in (0, ",
|
||||
max_memory_step, "], while ", *item_not_in_range, " found!");
|
||||
|
|
@ -490,7 +491,7 @@ Status DeepCpuAttnLstmOp::ValidateInputs(
|
|||
"Attention memory layer weight shape error! Expected:{",
|
||||
num_directions_, ",", memory_depth, ", am_attn_size}, Got:", memory_layer_shape);
|
||||
}
|
||||
const int am_attn_size = gsl::narrow<int>(memory_layer_shape[2]);
|
||||
const int am_attn_size = narrow<int>(memory_layer_shape[2]);
|
||||
|
||||
// check query layer weights of [num_directions, query_depth(hidden_size of lstm), am_attn_size]
|
||||
auto query_layer_shape = am_query_layer_weights.Shape();
|
||||
|
|
@ -525,7 +526,7 @@ Status DeepCpuAttnLstmOp::ValidateInputs(
|
|||
"Attention layer weight shape error! Expected: {", num_directions_, ", ",
|
||||
memory_depth + hidden_size_, ", aw_attn_size}. Got:", attn_layer_shape);
|
||||
}
|
||||
aw_attn_size = gsl::narrow<int>(attn_layer_shape[2]);
|
||||
aw_attn_size = narrow<int>(attn_layer_shape[2]);
|
||||
}
|
||||
|
||||
auto status = ValidateRnnInputsWithExtraInputFromState(
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@
|
|||
|
||||
#include "attention_wrapper.h"
|
||||
|
||||
#include "core/common/narrow.h"
|
||||
#include "core/framework/op_kernel.h"
|
||||
#include "core/providers/cpu/rnn/rnn_helpers.h"
|
||||
|
||||
|
|
@ -30,7 +31,7 @@ class DeepCpuAttnLstmOp final : public OpKernel {
|
|||
|
||||
int64_t int64_value;
|
||||
ORT_ENFORCE(info.GetAttr("hidden_size", &int64_value).IsOK() && int64_value > 0);
|
||||
hidden_size_ = gsl::narrow<int>(int64_value);
|
||||
hidden_size_ = narrow<int>(int64_value);
|
||||
|
||||
// optional attributes
|
||||
std::vector<std::string> activation_func_names = info.GetAttrsOrDefault<std::string>("activations");
|
||||
|
|
|
|||
|
|
@ -162,7 +162,7 @@ void UniDirectionalAttnLstm<T>::LoadPeepholeWeights(const gsl::span<const T>& pe
|
|||
DumpMatrix("P[f]", peephole_weights.data() + (i++ * hidden_size_), 1, hidden_size_);
|
||||
|
||||
auto copy_weight = [this, &peephole_weights](int offset, gsl::span<T>& out) {
|
||||
typename gsl::span<const T>::const_iterator in_iter = peephole_weights.cbegin() + offset;
|
||||
typename gsl::span<const T>::iterator in_iter = peephole_weights.begin() + offset;
|
||||
std::copy(in_iter, in_iter + hidden_size_, out.begin());
|
||||
};
|
||||
|
||||
|
|
@ -245,9 +245,9 @@ void UniDirectionalAttnLstm<T>::Compute(const gsl::span<const T>& inputs_arg,
|
|||
}
|
||||
|
||||
// Calculate the max and min length
|
||||
int32_t max_sequence_length = *std::max_element(sequence_lengths.cbegin(), sequence_lengths.cend());
|
||||
int32_t min_sequence_length = std::min(seq_length_, *std::min_element(sequence_lengths.cbegin(),
|
||||
sequence_lengths.cend()));
|
||||
int32_t max_sequence_length = *std::max_element(sequence_lengths.begin(), sequence_lengths.end());
|
||||
int32_t min_sequence_length = std::min(seq_length_, *std::min_element(sequence_lengths.begin(),
|
||||
sequence_lengths.end()));
|
||||
|
||||
///**************************LSTM Calculations****************************/
|
||||
const int hidden_size_x4 = 4 * hidden_size_;
|
||||
|
|
@ -255,9 +255,9 @@ void UniDirectionalAttnLstm<T>::Compute(const gsl::span<const T>& inputs_arg,
|
|||
|
||||
// apply the weights to all the inputs and save to output_IOFC
|
||||
ComputeGemm(total_rows, hidden_size_x4, input_size_, T{1.0},
|
||||
inputs.cbegin(), inputs.cend(),
|
||||
inputs.begin(), inputs.end(),
|
||||
input_size_,
|
||||
input_weights.cbegin(), input_weights.cend(), // W[iofc]^T
|
||||
input_weights.begin(), input_weights.end(), // W[iofc]^T
|
||||
input_size_ + attention_size_, T{0.0},
|
||||
output_iofc_.begin(), output_iofc_.end(),
|
||||
hidden_size_x4, ttp_);
|
||||
|
|
@ -278,7 +278,7 @@ void UniDirectionalAttnLstm<T>::Compute(const gsl::span<const T>& inputs_arg,
|
|||
|
||||
// hidden state can be provided as input for first step, so need to special case that.
|
||||
// after the first step this will switch to the output from the previous step
|
||||
span_T_const_iter previous_state = batched_hidden_state_one_step.cbegin();
|
||||
span_T_const_iter previous_state = batched_hidden_state_one_step.begin();
|
||||
|
||||
//run through steps sequentially
|
||||
for (int step = 0; step < max_sequence_length; step++) {
|
||||
|
|
@ -293,9 +293,9 @@ void UniDirectionalAttnLstm<T>::Compute(const gsl::span<const T>& inputs_arg,
|
|||
|
||||
// Xt*(W[iofc]^T) = INPUTt * W[iofc]^T + At-1 * WA[iofc]
|
||||
ComputeGemm(batch_size_, hidden_size_x4, attention_size_, T{1.0},
|
||||
attention.cbegin(), attention.cend(), // At-1
|
||||
attention.begin(), attention.end(), // At-1
|
||||
attention_size_,
|
||||
input_weights.cbegin() + input_size_, input_weights.cend(), // WA[iofc]
|
||||
input_weights.begin() + input_size_, input_weights.end(), // WA[iofc]
|
||||
input_size_ + attention_size_, T{1.0},
|
||||
step_out_IOFC, output_iofc_.end(), // input contains Xt*(W[iofc]^T)
|
||||
hidden_size_x4, ttp_);
|
||||
|
|
@ -304,7 +304,7 @@ void UniDirectionalAttnLstm<T>::Compute(const gsl::span<const T>& inputs_arg,
|
|||
ComputeGemm(batch_size_, hidden_size_x4, hidden_size_, T{1.0},
|
||||
previous_state, previous_state_end, // Ht-1
|
||||
hidden_size_,
|
||||
recurrent_weights.cbegin(), recurrent_weights.cend(), // R[iofc]
|
||||
recurrent_weights.begin(), recurrent_weights.end(), // R[iofc]
|
||||
hidden_size_, T{1.0},
|
||||
step_out_IOFC, output_iofc_.end(), // input contains Xt*(W[iofc]^T)
|
||||
hidden_size_x4, ttp_);
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@
|
|||
#include "core/common/logging/logging.h"
|
||||
#include "core/framework/allocator.h"
|
||||
|
||||
#include <gsl/gsl>
|
||||
#include "core/common/gsl.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace contrib {
|
||||
|
|
@ -69,7 +69,7 @@ class UniDirectionalAttnLstm {
|
|||
}
|
||||
|
||||
private:
|
||||
using span_T_const_iter = typename gsl::span<T>::const_iterator;
|
||||
using span_T_const_iter = typename gsl::span<const T>::iterator;
|
||||
using span_T_iter = typename gsl::span<T>::iterator;
|
||||
|
||||
void SetNumThreads();
|
||||
|
|
|
|||
|
|
@ -101,7 +101,7 @@ void PrepareMask(const int32_t* mask_index,
|
|||
bool is_raw_attention_mask = (nullptr != mask_index && mask_index_dims.size() == 2);
|
||||
bool has_mask_start_position = (nullptr != mask_index &&
|
||||
mask_index_dims.size() == 1 &&
|
||||
static_cast<int>(mask_index_dims.at(0)) == 2 * batch_size);
|
||||
static_cast<int>(mask_index_dims[0]) == 2 * batch_size);
|
||||
|
||||
for (int b_i = 0; b_i < batch_size; b_i++) {
|
||||
// TODO: mask_index can be used in softmax to save some calculation.
|
||||
|
|
|
|||
|
|
@ -27,8 +27,8 @@ class BifurcationDetector : public OpKernel {
|
|||
const Tensor* pred_tokens = context->Input<Tensor>(3);
|
||||
const auto* src_tokens_data = static_cast<const int64_t*>(src_tokens->DataRaw());
|
||||
const auto* cur_tokens_data = static_cast<const int64_t*>(cur_tokens->DataRaw());
|
||||
int64_t src_tokens_len = src_tokens->Shape().GetDims().at(0);
|
||||
int64_t cur_tokens_len = cur_tokens->Shape().GetDims().at(0);
|
||||
int64_t src_tokens_len = src_tokens->Shape().GetDims()[0];
|
||||
int64_t cur_tokens_len = cur_tokens->Shape().GetDims()[0];
|
||||
|
||||
Tensor* out_tokens = nullptr;
|
||||
|
||||
|
|
@ -45,7 +45,7 @@ class BifurcationDetector : public OpKernel {
|
|||
} else {
|
||||
const auto* pred_tokens_data = static_cast<const int64_t*>(pred_tokens->DataRaw());
|
||||
const int64_t prev_suffix_match_idx_data = static_cast<const int64_t*>(prev_suffix_match_idx->DataRaw())[0];
|
||||
int64_t pred_tokens_len = pred_tokens->Shape().GetDims().at(0);
|
||||
int64_t pred_tokens_len = pred_tokens->Shape().GetDims()[0];
|
||||
// Find bifurcation index between prediction tokens, and source tokens
|
||||
// starting from previous suffix match index.
|
||||
ORT_ENFORCE(src_tokens_len >= prev_suffix_match_idx_data);
|
||||
|
|
@ -70,7 +70,7 @@ class BifurcationDetector : public OpKernel {
|
|||
// Return the index of the start of the n-gram in source tokens.
|
||||
// No matching if found if src tokens contain multiple or zero matching n-grams.
|
||||
// Return -1.
|
||||
int64_t tokens_len = out_tokens->Shape().GetDims().at(0);
|
||||
int64_t tokens_len = out_tokens->Shape().GetDims()[0];
|
||||
int64_t min_gram = min_ngram_size_;
|
||||
int64_t max_gram = max_ngram_size_;
|
||||
int64_t suffix_idx = -1;
|
||||
|
|
|
|||
|
|
@ -3,8 +3,9 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <core/common/safeint.h>
|
||||
#include "core/common/common.h"
|
||||
#include "core/common/narrow.h"
|
||||
#include "core/common/safeint.h"
|
||||
#include "core/framework/op_kernel.h"
|
||||
#include "core/platform/threadpool.h"
|
||||
|
||||
|
|
@ -36,7 +37,7 @@ class NGramRepeatBlock : public OpKernel {
|
|||
int64_t cur_len = input_ids_dims[1];
|
||||
ORT_ENFORCE(scores_dims[0] == batch_size);
|
||||
int64_t vocab_size = scores_dims[1];
|
||||
|
||||
|
||||
if (cur_len + 1 < ngram_size_) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
@ -69,7 +70,7 @@ class NGramRepeatBlock : public OpKernel {
|
|||
|
||||
concurrency::ThreadPool* tp = context->GetOperatorThreadPool();
|
||||
concurrency::ThreadPool::TryParallelFor(
|
||||
tp, gsl::narrow<std::ptrdiff_t>(batch_size) , static_cast<double>(cur_len * ngram_size_),
|
||||
tp, narrow<std::ptrdiff_t>(batch_size), static_cast<double>(cur_len * ngram_size_),
|
||||
[&lambda](ptrdiff_t first, ptrdiff_t last) {
|
||||
for (auto b = static_cast<int64_t>(first), end = static_cast<int64_t>(last); b < end; ++b) {
|
||||
lambda(b);
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
#include "core/common/common.h"
|
||||
#include "core/framework/op_kernel.h"
|
||||
|
||||
#include "gsl/gsl"
|
||||
#include "core/common/gsl.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace contrib {
|
||||
|
|
|
|||
|
|
@ -3,8 +3,9 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <core/common/safeint.h>
|
||||
#include "core/common/common.h"
|
||||
#include "core/common/narrow.h"
|
||||
#include "core/common/safeint.h"
|
||||
#include "core/framework/op_kernel.h"
|
||||
#include "core/util/math_cpuonly.h"
|
||||
|
||||
|
|
@ -44,7 +45,7 @@ class ImageScaler final : public OpKernel {
|
|||
EigenArrayMap<T> Y_arr(Y->MutableData<T>(), SafeInt<size_t>(H) * W, SafeInt<size_t>(N) * C);
|
||||
|
||||
for (int64_t nc = 0; nc < N * C; ++nc) {
|
||||
Y_arr.col(gsl::narrow<size_t>(nc)) = scale_ * X_arr.col(gsl::narrow<size_t>(nc)) + bias_[gsl::narrow<size_t>(nc % C)];
|
||||
Y_arr.col(narrow<size_t>(nc)) = scale_ * X_arr.col(narrow<size_t>(nc)) + bias_[narrow<size_t>(nc % C)];
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
// Licensed under the MIT License.
|
||||
|
||||
#include "core/common/common.h"
|
||||
#include "core/common/narrow.h"
|
||||
#include "core/framework/op_kernel.h"
|
||||
#include "core/platform/threadpool.h"
|
||||
#include "core/util/math_cpuonly.h"
|
||||
|
|
@ -41,8 +42,8 @@ struct Inverse::ComputeImpl {
|
|||
const auto* input_data = input->Data<T>() + batch_offset;
|
||||
auto* output_data = output->MutableData<T>() + batch_offset;
|
||||
|
||||
Eigen::Map<const MatrixT<T>> input_matrix(input_data, gsl::narrow<size_t>(rows), gsl::narrow<size_t>(cols));
|
||||
Eigen::Map<MatrixT<T>> output_matrix(output_data, gsl::narrow<size_t>(rows), gsl::narrow<size_t>(cols));
|
||||
Eigen::Map<const MatrixT<T>> input_matrix(input_data, narrow<size_t>(rows), narrow<size_t>(cols));
|
||||
Eigen::Map<MatrixT<T>> output_matrix(output_data, narrow<size_t>(rows), narrow<size_t>(cols));
|
||||
output_matrix = input_matrix.inverse();
|
||||
}
|
||||
};
|
||||
|
|
@ -56,8 +57,8 @@ struct Inverse::ComputeImpl<MLFloat16> {
|
|||
const auto* input_data = reinterpret_cast<const Eigen::half*>(input->Data<MLFloat16>() + batch_offset);
|
||||
auto* output_data = reinterpret_cast<Eigen::half*>(output->MutableData<MLFloat16>() + batch_offset);
|
||||
|
||||
Eigen::Map<const MatrixT<Eigen::half>> input_matrix(input_data, gsl::narrow<size_t>(rows), gsl::narrow<size_t>(cols));
|
||||
Eigen::Map<MatrixT<Eigen::half>> output_matrix(output_data, gsl::narrow<size_t>(rows), gsl::narrow<size_t>(cols));
|
||||
Eigen::Map<const MatrixT<Eigen::half>> input_matrix(input_data, narrow<size_t>(rows), narrow<size_t>(cols));
|
||||
Eigen::Map<MatrixT<Eigen::half>> output_matrix(output_data, narrow<size_t>(rows), narrow<size_t>(cols));
|
||||
output_matrix = input_matrix.inverse();
|
||||
}
|
||||
};
|
||||
|
|
@ -81,7 +82,7 @@ Status Inverse::Compute(OpKernelContext* ctx) const {
|
|||
t_disp.Invoke<ComputeImpl>(input, output, batch_num, rows, cols);
|
||||
};
|
||||
|
||||
concurrency::ThreadPool::TryBatchParallelFor(ctx->GetOperatorThreadPool(), gsl::narrow<size_t>(num_batches), std::move(fn), 0);
|
||||
concurrency::ThreadPool::TryBatchParallelFor(ctx->GetOperatorThreadPool(), narrow<size_t>(num_batches), std::move(fn), 0);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
#if !defined(DISABLE_SPARSE_TENSORS)
|
||||
|
||||
#include "core/framework/sparse_tensor.h"
|
||||
#include "core/common/narrow.h"
|
||||
#include "core/providers/cpu/math/gemm_matmul_common.h"
|
||||
#include "core/providers/cpu/math/matmul_helper.h"
|
||||
#include "core/util/math.h"
|
||||
|
|
@ -120,9 +121,9 @@ struct SparseToDenseCoo {
|
|||
auto coo_view = A.AsCoo();
|
||||
const auto& ind_dims = coo_view.Indices().Shape().GetDims();
|
||||
ORT_RETURN_IF_NOT(ind_dims.size() == 2, "COO indices must be 2-D, got: ", ind_dims.size());
|
||||
ConstEigenMatrixMapRowMajor<int64_t> a_indicies_map(coo_view.Indices().Data<int64_t>(), gsl::narrow<size_t>(ind_dims[0]), gsl::narrow<size_t>(ind_dims[1]));
|
||||
ConstEigenMatrixMapRowMajor<T> map_b(B.Data<T>(), gsl::narrow<size_t>(b_dims[0]), gsl::narrow<size_t>(b_dims[1]));
|
||||
EigenMatrixMapRowMajor<T> output_map(output.MutableData<T>(), gsl::narrow<size_t>(out_dims[0]), gsl::narrow<size_t>(out_dims[1]));
|
||||
ConstEigenMatrixMapRowMajor<int64_t> a_indicies_map(coo_view.Indices().Data<int64_t>(), narrow<size_t>(ind_dims[0]), narrow<size_t>(ind_dims[1]));
|
||||
ConstEigenMatrixMapRowMajor<T> map_b(B.Data<T>(), narrow<size_t>(b_dims[0]), narrow<size_t>(b_dims[1]));
|
||||
EigenMatrixMapRowMajor<T> output_map(output.MutableData<T>(), narrow<size_t>(out_dims[0]), narrow<size_t>(out_dims[1]));
|
||||
output_map.setZero();
|
||||
|
||||
const auto rhs_right = (ctx.trans_B) ? b_dims[0] : b_dims[1];
|
||||
|
|
@ -139,8 +140,8 @@ struct SparseToDenseCoo {
|
|||
ORT_RETURN_IF_NOT(m < out_left, "COO m index: ", m, " is out of bounds of out_left: ", out_left);
|
||||
const T a_value = a_values[i];
|
||||
for (int64_t n = 0; n < rhs_right; ++n) {
|
||||
const T b_value = (ctx.trans_B) ? map_b(gsl::narrow<size_t>(n), gsl::narrow<size_t>(k)) : map_b(gsl::narrow<size_t>(k), gsl::narrow<size_t>(n));
|
||||
output_map(gsl::narrow<size_t>(m), gsl::narrow<size_t>(n)) += Mul(a_value, ctx.alpha, b_value);
|
||||
const T b_value = (ctx.trans_B) ? map_b(narrow<size_t>(n), narrow<size_t>(k)) : map_b(narrow<size_t>(k), narrow<size_t>(n));
|
||||
output_map(narrow<size_t>(m), narrow<size_t>(n)) += Mul(a_value, ctx.alpha, b_value);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@
|
|||
|
||||
#pragma once
|
||||
#include "core/common/common.h"
|
||||
#include "core/common/narrow.h"
|
||||
#include "core/framework/op_kernel.h"
|
||||
#include "core/framework/tensor.h"
|
||||
#include "core/providers/cpu/nn/pool_base.h"
|
||||
|
|
@ -229,7 +230,7 @@ class MaxpoolWithMask : public OpKernel, public PoolBase {
|
|||
int64_t y_step = pooled_height;
|
||||
const int64_t total_channels = x_shape[0] * channels;
|
||||
const int64_t total_mask_channels = m_shape[0] * m_shape[1];
|
||||
RunMaxpoolLoop<MaxpoolWithMask1DTask<float>>(tp, gsl::narrow<size_t>(total_channels),
|
||||
RunMaxpoolLoop<MaxpoolWithMask1DTask<float>>(tp, narrow<size_t>(total_channels),
|
||||
{X_data, M_data, Y_data, x_step, y_step, pooled_height, stride_h(),
|
||||
height, total_mask_channels, kernel_shape, pads});
|
||||
break;
|
||||
|
|
@ -241,7 +242,7 @@ class MaxpoolWithMask : public OpKernel, public PoolBase {
|
|||
const int64_t total_channels = x_shape[0] * channels;
|
||||
const int64_t total_mask_channels = m_shape[0] * m_shape[1];
|
||||
RunMaxpoolLoop<MaxpoolWithMask2DTask<float>>(
|
||||
tp, gsl::narrow<size_t>(total_channels),
|
||||
tp, narrow<size_t>(total_channels),
|
||||
{X_data, M_data, Y_data, x_step, y_step, pooled_height, pooled_width, stride_h(), stride_w(), height, width,
|
||||
total_mask_channels, kernel_shape, pads});
|
||||
break;
|
||||
|
|
@ -252,7 +253,7 @@ class MaxpoolWithMask : public OpKernel, public PoolBase {
|
|||
const int64_t total_channels = x_shape[0] * channels;
|
||||
const int64_t total_mask_channels = m_shape[0] * m_shape[1];
|
||||
RunMaxpoolLoop<MaxpoolWithMask3DTask<float>>(
|
||||
tp, gsl::narrow<size_t>(total_channels),
|
||||
tp, narrow<size_t>(total_channels),
|
||||
{X_data, M_data, Y_data, x_step, y_step, pooled_height, pooled_width, pooled_depth, stride_h(), stride_w(),
|
||||
stride_d(), height, width, depth, total_mask_channels, kernel_shape, pads});
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -1,8 +1,9 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include <core/common/safeint.h>
|
||||
#include "nchwc_ops.h"
|
||||
#include "core/common/narrow.h"
|
||||
#include "core/common/safeint.h"
|
||||
#include "core/mlas/inc/mlas.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
|
|
@ -54,7 +55,7 @@ Status ReorderInput::Compute(OpKernelContext* context) const {
|
|||
// elements, so that operations involving a smaller number of channels will
|
||||
// process more rows per worker.
|
||||
constexpr ptrdiff_t worker_goal = 48 * 1024;
|
||||
ptrdiff_t work_per_worker = std::max<ptrdiff_t>(worker_goal / gsl::narrow<ptrdiff_t>(nchwc_channels), 1);
|
||||
ptrdiff_t work_per_worker = std::max<ptrdiff_t>(worker_goal / narrow<ptrdiff_t>(nchwc_channels), 1);
|
||||
worker_count = std::max<ptrdiff_t>(total_work / work_per_worker, 1);
|
||||
} else {
|
||||
// Each iteration produces one spatial_size chunk of NCHWc blocks.
|
||||
|
|
@ -258,27 +259,27 @@ std::vector<float> NchwcUpsample::ComputeInterpolation(int64_t input_length,
|
|||
int64_t output_length,
|
||||
int64_t scale) const {
|
||||
std::vector<float> interpolation;
|
||||
interpolation.resize(gsl::narrow<size_t>(output_length));
|
||||
interpolation.resize(narrow<size_t>(output_length));
|
||||
|
||||
if (scale == 1) {
|
||||
// Identity map for unscaled.
|
||||
for (int64_t o = 0; o < output_length; o++) {
|
||||
interpolation[gsl::narrow<size_t>(o)] = static_cast<float>(o);
|
||||
interpolation[narrow<size_t>(o)] = static_cast<float>(o);
|
||||
}
|
||||
} else if (transformation_mode_ == TransformationMode::ALIGN_CORNERS) {
|
||||
for (int64_t o = 0; o < output_length; o++) {
|
||||
interpolation[gsl::narrow<size_t>(o)] =
|
||||
interpolation[narrow<size_t>(o)] =
|
||||
static_cast<float>(o) * static_cast<float>(input_length - 1) / static_cast<float>(output_length - 1);
|
||||
}
|
||||
} else if (transformation_mode_ == TransformationMode::HALF_PIXEL) {
|
||||
for (int64_t o = 0; o < output_length; o++) {
|
||||
interpolation[gsl::narrow<size_t>(o)] =
|
||||
interpolation[narrow<size_t>(o)] =
|
||||
std::max(0.0f, (static_cast<float>(o) + 0.5f) / static_cast<float>(scale) - 0.5f);
|
||||
}
|
||||
} else {
|
||||
// Default to TransformationMode::ASYMMETRIC.
|
||||
for (int64_t o = 0; o < output_length; o++) {
|
||||
interpolation[gsl::narrow<size_t>(o)] = static_cast<float>(o) / static_cast<float>(scale);
|
||||
interpolation[narrow<size_t>(o)] = static_cast<float>(o) / static_cast<float>(scale);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -353,7 +354,7 @@ Status NchwcUpsample::Compute(OpKernelContext* context) const {
|
|||
static_cast<size_t>(input_h),
|
||||
static_cast<size_t>(input_w),
|
||||
static_cast<size_t>(output_w),
|
||||
interpolation_h[gsl::narrow<size_t>(row_index)],
|
||||
interpolation_h[narrow<size_t>(row_index)],
|
||||
interpolation_w.data(),
|
||||
x_channel_base,
|
||||
y_row);
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
#include "core/common/narrow.h"
|
||||
#include "core/providers/cpu/rnn/lstm_base.h"
|
||||
#include "core/providers/cpu/rnn/rnn_helpers.h"
|
||||
#include "core/providers/cpu/rnn/uni_directional_lstm.h"
|
||||
|
|
@ -188,8 +189,8 @@ Status DynamicQuantizeLSTM::Compute(OpKernelContext* context) const {
|
|||
ZeroPointCheck(w_zp, W_zp_shape, is_W_signed, Input);
|
||||
ZeroPointCheck(r_zp, R_zp_shape, is_R_signed, Recurrent);
|
||||
|
||||
size_t W_scale_size = W_scale_shape.NumDimensions() == 2 ? gsl::narrow<size_t>(W_scale_shape[1]) : 1;
|
||||
size_t R_scale_size = R_scale_shape.NumDimensions() == 2 ? gsl::narrow<size_t>(R_scale_shape[1]) : 1;
|
||||
size_t W_scale_size = W_scale_shape.NumDimensions() == 2 ? narrow<size_t>(W_scale_shape[1]) : 1;
|
||||
size_t R_scale_size = R_scale_shape.NumDimensions() == 2 ? narrow<size_t>(R_scale_shape[1]) : 1;
|
||||
|
||||
QuantizationParameter quant_para_W_1(w_scale->Data<float>(),
|
||||
static_cast<const uint8_t*>(w_zp->DataRaw()),
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include "core/common/narrow.h"
|
||||
#include "core/common/safeint.h"
|
||||
#include "core/mlas/inc/mlas.h"
|
||||
#include "core/providers/cpu/math/element_wise_ops.h"
|
||||
|
|
@ -102,7 +103,7 @@ Status MatMulIntegerToFloatBase::ComputeCommon(OpKernelContext* ctx,
|
|||
const float* b_scale_tensor_data = b_scale_tensor->Data<float>();
|
||||
|
||||
if (is_b_scale_per_column) {
|
||||
multipliers_per_column.reserve(gsl::narrow<size_t>(b_scale_tensor->Shape().Size()));
|
||||
multipliers_per_column.reserve(narrow<size_t>(b_scale_tensor->Shape().Size()));
|
||||
std::transform(b_scale_tensor_data,
|
||||
b_scale_tensor_data + b_scale_tensor->Shape().Size(),
|
||||
std::back_inserter(multipliers_per_column),
|
||||
|
|
@ -217,7 +218,7 @@ Status DynamicQuantizeMatMul::Compute(OpKernelContext* ctx) const {
|
|||
uint8_t* a_data_quant = static_cast<uint8_t*>(allocator->Alloc(SafeInt<size_t>(num_of_elements) * sizeof(uint8_t)));
|
||||
BufferUniquePtr a_buffer_quant_holder(a_data_quant, BufferDeleter(std::move(allocator)));
|
||||
|
||||
ParQuantizeLinear(a_data, a_data_quant, gsl::narrow<size_t>(num_of_elements), a_scale, a_zero_point, ctx->GetOperatorThreadPool());
|
||||
ParQuantizeLinear(a_data, a_data_quant, narrow<size_t>(num_of_elements), a_scale, a_zero_point, ctx->GetOperatorThreadPool());
|
||||
|
||||
bool is_b_scale_supported = IsBQuantParamSupported(b_scale_tensor->Shape(), b ? b->Shape() : b_shape_);
|
||||
ORT_RETURN_IF_ERROR(ComputeCommon(
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
#include "qlinear_activations.h"
|
||||
#include "qlinear_lookup_table.h"
|
||||
|
||||
#include "core/common/narrow.h"
|
||||
#include "core/mlas/inc/mlas.h"
|
||||
#include "core/platform/threadpool.h"
|
||||
|
||||
|
|
@ -53,7 +54,7 @@ Status QLinearLookupBase<T>::ComputeBase(OpKernelContext* context, Transformer f
|
|||
const uint8_t* x_data = reinterpret_cast<const uint8_t*>(X.Data<T>());
|
||||
uint8_t* y_data = reinterpret_cast<uint8_t*>(Y.MutableData<T>());
|
||||
ThreadPool::TryParallelFor(
|
||||
tp, gsl::narrow<std::ptrdiff_t>(N), TensorOpCost{1.0, 1.0, 1.0},
|
||||
tp, narrow<std::ptrdiff_t>(N), TensorOpCost{1.0, 1.0, 1.0},
|
||||
[this, x_data, y_data, &table](std::ptrdiff_t first, std::ptrdiff_t last) {
|
||||
QLinearLookupTableTransform(
|
||||
x_data + first,
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
#include "qlinear_concat.h"
|
||||
#include "qlinear_lookup_table.h"
|
||||
|
||||
#include "core/common/narrow.h"
|
||||
#include "core/providers/common.h"
|
||||
#include "core/mlas/inc/mlas.h"
|
||||
#include "core/platform/threadpool.h"
|
||||
|
|
@ -158,9 +159,9 @@ Status QLinearConcat::Compute(OpKernelContext* ctx) const {
|
|||
uint8_t* output = static_cast<uint8_t*>(p.output_tensor->MutableDataRaw()) + initial_output_offset;
|
||||
for (int64_t cur_in_offset = 0; cur_in_offset < prep.num_elements; cur_in_offset += input_axis_pitch) {
|
||||
if (is_copy) {
|
||||
memcpy(output, input + cur_in_offset, gsl::narrow<size_t>(input_axis_pitch));
|
||||
memcpy(output, input + cur_in_offset, narrow<size_t>(input_axis_pitch));
|
||||
} else {
|
||||
QLinearLookupTableTransform(input + cur_in_offset, table, output, gsl::narrow<size_t>(input_axis_pitch));
|
||||
QLinearLookupTableTransform(input + cur_in_offset, table, output, narrow<size_t>(input_axis_pitch));
|
||||
}
|
||||
output += p.output_axis_pitch;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
// Licensed under the MIT License.
|
||||
|
||||
#include "qlinear_global_average_pool.h"
|
||||
#include "core/common/narrow.h"
|
||||
#include "core/util/math_cpuonly.h"
|
||||
#include "core/providers/common.h"
|
||||
#include "core/platform/threadpool.h"
|
||||
|
|
@ -32,7 +33,7 @@ Status ComputeQLinearGlobalAvgPool(
|
|||
const T8Bits* input = (const T8Bits*)(x + (first * image_size));
|
||||
T8Bits* output = (T8Bits*)(y + first);
|
||||
std::vector<int32_t> acc_buffer(MlasQLinearSafePaddingElementCount(sizeof(int32_t), last - first));
|
||||
MlasQLinearGlobalAveragePoolNchw(input, x_scale, x_zero_point, output, y_scale, y_zero_point, last - first, gsl::narrow<size_t>(image_size), acc_buffer.data());
|
||||
MlasQLinearGlobalAveragePoolNchw(input, x_scale, x_zero_point, output, y_scale, y_zero_point, last - first, narrow<size_t>(image_size), acc_buffer.data());
|
||||
};
|
||||
concurrency::ThreadPool::TryParallelFor(
|
||||
tp, static_cast<std::ptrdiff_t>(N * C), {1.0 * image_size, 1.0, 8.0 * image_size}, worker);
|
||||
|
|
@ -40,11 +41,11 @@ Status ComputeQLinearGlobalAvgPool(
|
|||
auto worker = [=](std::ptrdiff_t first, std::ptrdiff_t last) {
|
||||
const T8Bits* input = x + first * C * image_size;
|
||||
T8Bits* output = y + first * C;
|
||||
std::vector<int32_t> acc_buffer(MlasQLinearSafePaddingElementCount(sizeof(int32_t), gsl::narrow<size_t>(C)));
|
||||
std::vector<T8Bits> zero_buffer(MlasQLinearSafePaddingElementCount(sizeof(T8Bits), gsl::narrow<size_t>(C)), 0);
|
||||
std::vector<int32_t> acc_buffer(MlasQLinearSafePaddingElementCount(sizeof(int32_t), narrow<size_t>(C)));
|
||||
std::vector<T8Bits> zero_buffer(MlasQLinearSafePaddingElementCount(sizeof(T8Bits), narrow<size_t>(C)), 0);
|
||||
MlasQLinearGlobalAveragePoolNhwc(
|
||||
input, x_scale, x_zero_point, output, y_scale, y_zero_point,
|
||||
last - first, gsl::narrow<size_t>(image_size), gsl::narrow<size_t>(C), gsl::narrow<size_t>(C), acc_buffer.data(), zero_buffer.data());
|
||||
last - first, narrow<size_t>(image_size), narrow<size_t>(C), narrow<size_t>(C), acc_buffer.data(), zero_buffer.data());
|
||||
};
|
||||
concurrency::ThreadPool::TryParallelFor(
|
||||
tp, static_cast<std::ptrdiff_t>(N),
|
||||
|
|
@ -79,11 +80,11 @@ Status QLinearGlobalAveragePool::Compute(OpKernelContext* context) const {
|
|||
|
||||
int64_t N = x_shape[0];
|
||||
int64_t C = (channels_last_ ? x_shape.back() : x_shape[1]);
|
||||
int64_t image_size = std::accumulate(x_shape.cbegin() + spatial_dim_start, x_shape.cbegin() + spatial_dim_end,
|
||||
int64_t image_size = std::accumulate(x_shape.begin() + spatial_dim_start, x_shape.begin() + spatial_dim_end,
|
||||
1LL, std::multiplies<int64_t>());
|
||||
|
||||
std::vector<int64_t> output_dims(x_shape.begin(), x_shape.end());
|
||||
std::transform(x_shape.cbegin() + spatial_dim_start, x_shape.cbegin() + spatial_dim_end,
|
||||
std::transform(x_shape.begin() + spatial_dim_start, x_shape.begin() + spatial_dim_end,
|
||||
output_dims.begin() + spatial_dim_start, [](const int64_t&) { return int64_t{1}; });
|
||||
Tensor& Y = *context->Output(0, output_dims);
|
||||
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@
|
|||
|
||||
#include "core/mlas/inc/mlas.h"
|
||||
#include "core/platform/threadpool.h"
|
||||
#include "gsl/gsl-lite.hpp"
|
||||
#include "core/common/gsl.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace contrib {
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
// Licensed under the MIT License.
|
||||
|
||||
#include "core/common/common.h"
|
||||
#include "core/common/narrow.h"
|
||||
#include "core/common/utf8_util.h"
|
||||
#include "core/framework/tensor.h"
|
||||
#include "core/framework/op_kernel.h"
|
||||
|
|
@ -473,10 +474,10 @@ Status Tokenizer::Compute(OpKernelContext* ctx) const {
|
|||
size_t C = 0;
|
||||
if (input_dims.size() == 1) {
|
||||
N = 1;
|
||||
C = gsl::narrow<size_t>(input_dims[0]);
|
||||
C = narrow<size_t>(input_dims[0]);
|
||||
} else if (input_dims.size() == 2) {
|
||||
N = gsl::narrow<size_t>(input_dims[0]);
|
||||
C = gsl::narrow<size_t>(input_dims[1]);
|
||||
N = narrow<size_t>(input_dims[0]);
|
||||
C = narrow<size_t>(input_dims[1]);
|
||||
} else {
|
||||
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT,
|
||||
"Input dimensions are either [C] or [N][C] allowed");
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@
|
|||
#include "core/framework/TensorSeq.h"
|
||||
#include "core/framework/allocator.h"
|
||||
#include "core/framework/ort_value.h"
|
||||
#include "gsl/gsl"
|
||||
#include "core/common/gsl.h"
|
||||
#include "contrib_ops/cpu/transformers/beam_search.h"
|
||||
#include "contrib_ops/cpu/transformers/logits_processor.h"
|
||||
#include "contrib_ops/cpu/transformers/sequences.h"
|
||||
|
|
|
|||
|
|
@ -5,6 +5,8 @@
|
|||
|
||||
#include "contrib_ops/cpu/transformers/beam_search_impl_base.h"
|
||||
|
||||
#include "core/common/span_utils.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace contrib {
|
||||
|
||||
|
|
@ -255,8 +257,8 @@ Status BeamSearchGpt<T>::Execute(const FeedsFetchesManager& feeds_fetches_manage
|
|||
bool increase_position = (iteration_counter > 1);
|
||||
ORT_RETURN_IF_ERROR(UpdateFeeds(fetches, feeds, current_length,
|
||||
position_ids, increase_position,
|
||||
beam_next_tokens.as_span<const int32_t>(),
|
||||
beam_indices.as_span<const int32_t>()));
|
||||
ReinterpretAsSpan<const int32_t>(beam_next_tokens),
|
||||
ReinterpretAsSpan<const int32_t>(beam_indices)));
|
||||
}
|
||||
fetches.clear();
|
||||
}
|
||||
|
|
@ -280,7 +282,7 @@ Status BeamSearchGpt<T>::Execute(const FeedsFetchesManager& feeds_fetches_manage
|
|||
if (output_scores != nullptr) {
|
||||
gsl::span<float> target = output_scores->MutableDataAsSpan<float>();
|
||||
gsl::span<const float> source = gsl::span<const float>(beam_state.scores.data(), beam_state.scores.size());
|
||||
assert(target.length() == source.length());
|
||||
assert(target.size() == source.size());
|
||||
ORT_RETURN_IF_ERROR(this->device_copy_func_(target, source, nullptr, DeviceCopyDirection::deviceToDevice));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "core/common/span_utils.h"
|
||||
#include "contrib_ops/cpu/transformers/generation_shared.h" // for DEBUG_GENERATION
|
||||
#include "contrib_ops/cpu/transformers/beam_search_impl_base.h"
|
||||
#include "contrib_ops/cpu/transformers/subgraph_t5_encoder.h"
|
||||
|
|
@ -214,7 +215,7 @@ Status BeamSearchT5<T>::Execute(const FeedsFetchesManager& encoder_feeds_fetches
|
|||
cpu_state,
|
||||
iteration_counter));
|
||||
++current_length; // Increase sequence length after a new token is generated.
|
||||
ORT_RETURN_IF_ERROR(decoder_subgraph_.CreateInitialFeeds(beam_next_tokens.as_span<const int32_t>(),
|
||||
ORT_RETURN_IF_ERROR(decoder_subgraph_.CreateInitialFeeds(ReinterpretAsSpan<const int32_t>(beam_next_tokens),
|
||||
this->implicit_inputs_,
|
||||
encoder_feeds,
|
||||
encoder_fetches,
|
||||
|
|
@ -284,8 +285,8 @@ Status BeamSearchT5<T>::Execute(const FeedsFetchesManager& encoder_feeds_fetches
|
|||
decoder_fetches,
|
||||
decoder_feeds,
|
||||
num_present_outputs,
|
||||
beam_next_tokens.as_span<const int32_t>(),
|
||||
beam_indices.as_span<const int32_t>(),
|
||||
ReinterpretAsSpan<const int32_t>(beam_next_tokens),
|
||||
ReinterpretAsSpan<const int32_t>(beam_indices),
|
||||
parameters->num_beams,
|
||||
decoder_subgraph_.GetFirstPastInputIndex(),
|
||||
decoder_subgraph_.GetFirstPresentOutputIndex(),
|
||||
|
|
@ -316,7 +317,7 @@ Status BeamSearchT5<T>::Execute(const FeedsFetchesManager& encoder_feeds_fetches
|
|||
if (output_scores != nullptr) {
|
||||
gsl::span<float> target = output_scores->MutableDataAsSpan<float>();
|
||||
gsl::span<const float> source = gsl::span<const float>(beam_state.scores.data(), beam_state.scores.size());
|
||||
assert(target.length() == source.length());
|
||||
assert(target.size() == source.size());
|
||||
ORT_RETURN_IF_ERROR(this->device_copy_func_(target, source, nullptr, DeviceCopyDirection::deviceToDevice));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@
|
|||
#include <math.h>
|
||||
#include "core/common/common.h"
|
||||
#include "core/common/safeint.h"
|
||||
#include "core/common/span_utils.h"
|
||||
#include "core/framework/allocator.h"
|
||||
#include "core/framework/tensorprotoutils.h"
|
||||
#include "core/framework/utils.h"
|
||||
|
|
@ -188,7 +189,7 @@ void BeamSearchScorer::Process(ISequences* sequences,
|
|||
auto clone = hypothesis_buffer_.subspan(hypothesis_buffer_offset_, sequence_length);
|
||||
gsl::copy(src, clone);
|
||||
hypothesis_buffer_offset_ += static_cast<size_t>(sequence_length);
|
||||
auto sequence = clone.template as_span<const int32_t>();
|
||||
auto sequence = ReinterpretAsSpan<const int32_t>(clone);
|
||||
beam_hyp.Add(sequence, next_score);
|
||||
} else {
|
||||
// Add next predicted token since it is not eos_token.
|
||||
|
|
@ -209,7 +210,7 @@ void BeamSearchScorer::Process(ISequences* sequences,
|
|||
// Check if we are done so that we can save a pad step if all(done)
|
||||
if (!done_[batch]) {
|
||||
gsl::span<const float> topk_scores = next_scores.subspan(batch * num_beams_, top_k);
|
||||
const float* best_sum_logprobs = std::max_element(topk_scores.begin(), topk_scores.end());
|
||||
const auto best_sum_logprobs = std::max_element(topk_scores.begin(), topk_scores.end());
|
||||
if (beam_hyp.IsDone(*best_sum_logprobs, sequence_length)) {
|
||||
done_[batch] = true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@
|
|||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "core/common/span_utils.h"
|
||||
#include "contrib_ops/cpu/transformers/generation_shared.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
|
|
@ -142,7 +143,7 @@ class GenerateBase {
|
|||
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
|
||||
"Input 'attention_mask' is expected to have 2 dimensions, got ", dims_attn.size());
|
||||
}
|
||||
if (dims_attn != dims) {
|
||||
if (!SpanEq(dims_attn, dims)) {
|
||||
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
|
||||
"Input 'attention_mask' is expected to have same shape as input_ids");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@
|
|||
#include "core/providers/cpu/math/top_k.h"
|
||||
#include "core/providers/cpu/math/softmax_shared.h"
|
||||
#include "core/common/safeint.h"
|
||||
#include "gsl/gsl"
|
||||
#include "core/common/gsl.h"
|
||||
#include "contrib_ops/cpu/transformers/sequences.h"
|
||||
#include "contrib_ops/cpu/transformers/beam_search_scorer.h"
|
||||
#include "contrib_ops/cpu/transformers/generation_device_helper.h"
|
||||
|
|
@ -526,7 +526,7 @@ void PickGptPastState(const std::vector<OrtValue>& last_outputs,
|
|||
|
||||
gsl::span<T> past_span = gsl::make_span<T>(past.GetMutable<Tensor>()->MutableData<T>(), past_shape.Size());
|
||||
gsl::span<const T> present_span = gsl::make_span<const T>(present.Get<Tensor>().Data<T>(), past_shape.Size());
|
||||
for (gsl::index j = 0; j < beam_indices.length(); j++) {
|
||||
for (size_t j = 0; j < beam_indices.size(); j++) {
|
||||
int32_t beam_index = beam_indices[j];
|
||||
gsl::span<const T> present_key = present_span.subspan(beam_index * block_size_per_beam, block_size_per_beam);
|
||||
gsl::span<const T> present_value = present_span.subspan(past_key_size + beam_index * block_size_per_beam,
|
||||
|
|
@ -563,7 +563,7 @@ Status UpdateGptFeeds(
|
|||
// The following updates inputs for subgraph
|
||||
|
||||
// Update input_ids with next tokens.
|
||||
int batch_beam_size = static_cast<int>(beam_next_tokens.length());
|
||||
int batch_beam_size = static_cast<int>(beam_next_tokens.size());
|
||||
int64_t dims[] = {batch_beam_size, 1};
|
||||
TensorShape input_ids_shape(&dims[0], 2);
|
||||
auto int32_type = DataTypeImpl::GetType<int32_t>();
|
||||
|
|
@ -712,7 +712,7 @@ void PickT5PastState(const std::vector<OrtValue>& last_outputs,
|
|||
|
||||
gsl::span<T> past_span = gsl::make_span<T>(past.GetMutable<Tensor>()->MutableData<T>(), past_shape.Size());
|
||||
gsl::span<const T> present_span = gsl::make_span<const T>(present.Get<Tensor>().Data<T>(), past_shape.Size());
|
||||
for (gsl::index j = 0; j < beam_indices.length(); j++) {
|
||||
for (size_t j = 0; j < beam_indices.size(); j++) {
|
||||
int32_t beam_index = beam_indices[j];
|
||||
gsl::span<const T> present_beam = present_span.subspan(beam_index * block_size_per_beam, block_size_per_beam);
|
||||
gsl::span<T> past_beam = past_span.subspan(j * block_size_per_beam, block_size_per_beam);
|
||||
|
|
@ -750,7 +750,7 @@ Status UpdateDecoderFeeds(
|
|||
// Only need copy beam next tokens to input_ids, and copy present_*_self_* to past_*_self_*,
|
||||
|
||||
// Update input_ids with next tokens.
|
||||
int batch_beam_size = static_cast<int>(beam_next_tokens.length());
|
||||
int batch_beam_size = static_cast<int>(beam_next_tokens.size());
|
||||
|
||||
// TODO(tianleiwu): Reuse buffer for input_ids to reduce memory allocation.
|
||||
OrtValue input_ids;
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@
|
|||
#endif
|
||||
|
||||
#include <vector>
|
||||
#include "gsl/gsl"
|
||||
#include "core/common/gsl.h"
|
||||
#include "contrib_ops/cpu/transformers/logits_processor.h"
|
||||
#include "contrib_ops/cpu/transformers/generation_shared.h"
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
#pragma once
|
||||
|
||||
#include <utility>
|
||||
#include "gsl/gsl"
|
||||
#include "core/common/gsl.h"
|
||||
#include "core/framework/allocator.h"
|
||||
#include "core/framework/ort_value.h"
|
||||
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@
|
|||
#include "core/framework/session_options.h"
|
||||
#include "core/framework/TensorSeq.h"
|
||||
#include "core/framework/ort_value.h"
|
||||
#include "gsl/gsl"
|
||||
#include "core/common/gsl.h"
|
||||
#include "contrib_ops/cpu/transformers/greedy_search.h"
|
||||
#include "contrib_ops/cpu/transformers/logits_processor.h"
|
||||
#include "contrib_ops/cpu/transformers/sequences.h"
|
||||
|
|
|
|||
|
|
@ -4,6 +4,8 @@
|
|||
#pragma once
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
|
||||
#include "core/common/span_utils.h"
|
||||
#include "contrib_ops/cpu/transformers/greedy_search_impl_base.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
|
|
@ -219,7 +221,7 @@ Status GreedySearchGpt<T>::Execute(const FeedsFetchesManager& feeds_fetches_mana
|
|||
bool increase_position = (iteration_counter > 1);
|
||||
ORT_RETURN_IF_ERROR(UpdateFeeds(fetches, feeds, current_length,
|
||||
position_ids, increase_position,
|
||||
next_tokens.as_span<const int32_t>()));
|
||||
ReinterpretAsSpan<const int32_t>(next_tokens)));
|
||||
}
|
||||
fetches.clear();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,7 +3,9 @@
|
|||
|
||||
#include <memory>
|
||||
#include <assert.h>
|
||||
#include "core/common/narrow.h"
|
||||
#include "core/common/safeint.h"
|
||||
#include "core/common/span_utils.h"
|
||||
#include "contrib_ops/cpu/transformers/logits_processor.h"
|
||||
#include "contrib_ops/cpu/transformers/dump_tensor.h"
|
||||
|
||||
|
|
@ -100,15 +102,15 @@ void NoRepeatNGramLogitsProcessor<T>::Process(const ISequences* sequences,
|
|||
gsl::span<T> beam_token_scores = next_token_scores.GetScores(i);
|
||||
gsl::span<const int32_t> sequence = sequences->GetSequence(i);
|
||||
|
||||
gsl::span<const int32_t> prefix = sequence.subspan(sequence.length() - prefix_length);
|
||||
ORT_ENFORCE(prefix.length() == prefix_length);
|
||||
gsl::span<const int32_t> prefix = sequence.subspan(sequence.size() - prefix_length);
|
||||
ORT_ENFORCE(prefix.size() == narrow<size_t>(prefix_length));
|
||||
|
||||
std::unordered_set<int32_t> blocked_word_ids;
|
||||
for (int j = 0; j <= static_cast<int>(sequence.length()) - ngram_size_; j++) {
|
||||
for (int j = 0; j <= static_cast<int>(sequence.size()) - ngram_size_; j++) {
|
||||
// Here we use naive algorithm for matching. The complexity is O(batch_beam_size * ngram_size * sequence_length)
|
||||
// TODO(tianleiwu): build N-Gram index (hash table with prefix of length NGram - 1 as key,
|
||||
// and list of last word of NGram as value) for fast matching.
|
||||
if (ngram_size_ == 1 || prefix == sequence.subspan(j, prefix_length)) {
|
||||
if (ngram_size_ == 1 || SpanEq(prefix, sequence.subspan(j, prefix_length))) {
|
||||
blocked_word_ids.insert(sequence[static_cast<gsl::index>(j) + prefix_length]);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ namespace transformers {
|
|||
|
||||
void Sequences::Init(gsl::span<int32_t> buffer, int batch_beam_size, int sequence_length, int max_length) {
|
||||
size_t sequences_size = SafeInt<size_t>(batch_beam_size) * max_length;
|
||||
assert(buffer.length() == sequences_size + sequences_size);
|
||||
assert(buffer.size() == sequences_size + sequences_size);
|
||||
|
||||
sequences[0] = buffer.subspan(0, sequences_size);
|
||||
sequences[1] = buffer.subspan(sequences_size);
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "gsl/gsl"
|
||||
#include "core/common/gsl.h"
|
||||
#include "contrib_ops/cpu/transformers/generation_shared.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@
|
|||
#include "core/framework/tensorprotoutils.h"
|
||||
#include "core/framework/utils.h"
|
||||
#include "core/providers/cpu/tensor/utils.h"
|
||||
#include "gsl/gsl"
|
||||
#include "core/common/gsl.h"
|
||||
#include "contrib_ops/cpu/transformers/subgraph_base.h"
|
||||
#include "contrib_ops/cpu/transformers/dump_tensor.h"
|
||||
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "gsl/gsl"
|
||||
#include "core/common/gsl.h"
|
||||
#include "core/framework/allocator.h"
|
||||
#include "core/framework/feeds_fetches_manager.h"
|
||||
#include "contrib_ops/cpu/transformers/generation_device_helper.h"
|
||||
|
|
@ -48,7 +48,7 @@ class Subgraph {
|
|||
Status Setup(const SessionState& session_state,
|
||||
const SessionState& subgraph_session_state);
|
||||
|
||||
FeedsFetchesManager* GetFeedsFetchesManager() {
|
||||
FeedsFetchesManager* GetFeedsFetchesManager() {
|
||||
return (feeds_fetches_manager_.has_value()) ? &*feeds_fetches_manager_ : nullptr;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
#include "core/framework/tensorprotoutils.h"
|
||||
#include "core/framework/utils.h"
|
||||
#include "core/providers/cpu/tensor/utils.h"
|
||||
#include "gsl/gsl"
|
||||
#include "core/common/gsl.h"
|
||||
#include "contrib_ops/cpu/transformers/subgraph_gpt.h"
|
||||
#include "contrib_ops/cpu/transformers/dump_tensor.h"
|
||||
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
#include "core/framework/tensorprotoutils.h"
|
||||
#include "core/framework/utils.h"
|
||||
#include "core/providers/cpu/tensor/utils.h"
|
||||
#include "gsl/gsl"
|
||||
#include "core/common/gsl.h"
|
||||
#include "contrib_ops/cpu/transformers/subgraph_t5_decoder.h"
|
||||
#include "contrib_ops/cpu/transformers/dump_tensor.h"
|
||||
#include "contrib_ops/cpu/transformers/generation_device_helper.h"
|
||||
|
|
@ -139,7 +139,7 @@ Status T5DecoderSubgraph::CreateInitialFeeds(
|
|||
AllocatorPtr allocator = session_state_->GetAllocator(encoder_feeds[0].Get<Tensor>().Location());
|
||||
|
||||
// Copy beam next tokens in CPU to input_ids in provider device (CPU for CPU EP, or GPU for CUDA EP).
|
||||
int batch_beam_size = static_cast<int>(beam_next_tokens.length());
|
||||
int batch_beam_size = static_cast<int>(beam_next_tokens.size());
|
||||
int64_t dims[] = {batch_beam_size, 1};
|
||||
TensorShape input_ids_shape(&dims[0], 2);
|
||||
OrtValue input_ids;
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
#include "core/framework/tensorprotoutils.h"
|
||||
#include "core/framework/utils.h"
|
||||
#include "core/providers/cpu/tensor/utils.h"
|
||||
#include "gsl/gsl"
|
||||
#include "core/common/gsl.h"
|
||||
#include "contrib_ops/cpu/transformers/subgraph_t5_encoder.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
|
|
|
|||
|
|
@ -254,7 +254,7 @@ Status QkvToContext(
|
|||
} else if (nullptr != mask_index) { // 1d mask index
|
||||
ORT_ENFORCE(mask_index_dims.size() == 1);
|
||||
// mask_index has 1D shape: either (batch_size) or (2*batch_size). Only the later one has start postions.
|
||||
const int* mask_start = (mask_index_dims.at(0) > batch_size) ? mask_index + batch_size : nullptr;
|
||||
const int* mask_start = (mask_index_dims[0] > batch_size) ? mask_index + batch_size : nullptr;
|
||||
ORT_RETURN_IF_ERROR(ComputeSoftmaxWithMask1D<T>(
|
||||
stream, total_sequence_length, sequence_length, batch_size, num_heads,
|
||||
mask_index, mask_start, data.extra_add_qk, scratch1, scratch2, parameters.is_unidirectional));
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "gsl/gsl"
|
||||
#include "core/providers/cuda/cuda_kernel.h"
|
||||
#include "core/providers/cuda/cuda_common.h"
|
||||
#include "core/framework/random_generator.h"
|
||||
|
|
|
|||
|
|
@ -394,7 +394,7 @@ Status ProcessLogits(const OrtValue& logits, //
|
|||
|
||||
gsl::span<const float> next_scores = gsl::make_span(
|
||||
cpu_state->topk_scores.data(),
|
||||
static_cast<typename gsl::span<float>::index_type>(topk_scores->Shape().Size()));
|
||||
static_cast<typename gsl::span<float>::size_type>(topk_scores->Shape().Size()));
|
||||
gsl::span<const int32_t> next_tokens(cpu_state->topk_tokens.data(), beam_state->next_tokens.size());
|
||||
gsl::span<const int32_t> next_indices(cpu_state->topk_indices.data(), beam_state->next_indices.size());
|
||||
|
||||
|
|
@ -579,7 +579,7 @@ Status PickGptPastState(const std::vector<OrtValue>& last_outputs,
|
|||
|
||||
gsl::span<T> past_span = gsl::make_span<T>(past.GetMutable<Tensor>()->MutableData<T>(), past_shape.Size());
|
||||
gsl::span<const T> present_span = gsl::make_span<const T>(present.Get<Tensor>().Data<T>(), past_shape.Size());
|
||||
for (gsl::index j = 0; j < beam_indices.length(); j++) {
|
||||
for (size_t j = 0; j < beam_indices.size(); j++) {
|
||||
int32_t beam_index = beam_indices[j];
|
||||
gsl::span<const T> present_key = present_span.subspan(beam_index * block_size_per_beam, block_size_per_beam);
|
||||
gsl::span<const T> present_value = present_span.subspan(past_key_size + beam_index * block_size_per_beam,
|
||||
|
|
@ -623,7 +623,7 @@ Status PickT5PastState(const std::vector<OrtValue>& last_outputs,
|
|||
|
||||
gsl::span<T> past_span = gsl::make_span<T>(past.GetMutable<Tensor>()->MutableData<T>(), past_shape.Size());
|
||||
gsl::span<const T> present_span = gsl::make_span<const T>(present.Get<Tensor>().Data<T>(), past_shape.Size());
|
||||
for (gsl::index j = 0; j < beam_indices.length(); j++) {
|
||||
for (size_t j = 0; j < beam_indices.size(); j++) {
|
||||
int32_t beam_index = beam_indices[j];
|
||||
gsl::span<const T> present_beam = present_span.subspan(beam_index * block_size_per_beam, block_size_per_beam);
|
||||
gsl::span<T> past_beam = past_span.subspan(j * block_size_per_beam, block_size_per_beam);
|
||||
|
|
@ -652,7 +652,7 @@ Status UpdateGptFeeds(
|
|||
int gpt_subgraph_first_past_input_idx,
|
||||
int gpt_subgraph_first_present_output_idx) {
|
||||
// Update input_ids with next tokens.
|
||||
int batch_beam_size = static_cast<int>(beam_next_tokens.length());
|
||||
int batch_beam_size = static_cast<int>(beam_next_tokens.size());
|
||||
int64_t dims[] = {batch_beam_size, 1};
|
||||
TensorShape input_ids_shape(&dims[0], 2);
|
||||
auto element_type = DataTypeImpl::GetType<int32_t>();
|
||||
|
|
@ -732,7 +732,7 @@ Status UpdateDecoderFeeds(
|
|||
ORT_UNUSED_PARAMETER(current_length);
|
||||
|
||||
// Update input_ids with next tokens.
|
||||
int batch_beam_size = static_cast<int>(beam_next_tokens.length());
|
||||
int batch_beam_size = static_cast<int>(beam_next_tokens.size());
|
||||
int64_t dims[] = {batch_beam_size, 1};
|
||||
TensorShape input_ids_shape(&dims[0], 2);
|
||||
auto element_type = DataTypeImpl::GetType<int32_t>();
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@
|
|||
#include "core/providers/cpu/tensor/utils.h"
|
||||
#include "core/providers/cuda/cuda_common.h"
|
||||
|
||||
#include "gsl/gsl"
|
||||
#include "core/common/gsl.h"
|
||||
#include "contrib_ops/cpu/transformers/generation_shared.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
|
|
|
|||
|
|
@ -154,7 +154,7 @@ Status QkvToContext(
|
|||
// apply softmax and store result P to scratch2: BxNxSxS*
|
||||
if (use_raw_attention_mask) { // 2d, 3d or 4d attention mask
|
||||
const int mask_dimension = static_cast<int>(mask_index_dims.size());
|
||||
const int max_sequence_length = mask_dimension == 4 ? static_cast<int>(mask_index_dims.at(3)) : 0;
|
||||
const int max_sequence_length = mask_dimension == 4 ? static_cast<int>(mask_index_dims[3]) : 0;
|
||||
|
||||
T* persistent_softmax_workspace = scratch1; // replace Q*K' in place if persistent softmax is selected.
|
||||
ORT_RETURN_IF_ERROR(
|
||||
|
|
@ -165,7 +165,7 @@ Status QkvToContext(
|
|||
} else if (nullptr != mask_index) { // 1d mask index
|
||||
ORT_ENFORCE(mask_index_dims.size() == 1);
|
||||
// mask_index has 1D shape: either (batch_size) or (2*batch_size). Only the later one has start postions.
|
||||
const int* mask_start = (mask_index_dims.at(0) > batch_size) ? mask_index + batch_size : nullptr;
|
||||
const int* mask_start = (mask_index_dims[0] > batch_size) ? mask_index + batch_size : nullptr;
|
||||
ORT_RETURN_IF_ERROR(ComputeSoftmaxWithMask1D<T>(stream, all_sequence_length, sequence_length, batch_size, num_heads,
|
||||
mask_index, mask_start, extra_add_qk, scratch1, scratch2, is_unidirectional));
|
||||
} else { // no mask
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <gsl/gsl>
|
||||
#include "core/common/gsl.h"
|
||||
#include <tvm/tvm.h>
|
||||
#include "core/codegen/mti/common.h"
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
#include "core/codegen/mti/tensor/concat_ops.h"
|
||||
|
||||
#include "core/codegen/mti/mti_tvm_utils.h"
|
||||
#include "gsl/gsl"
|
||||
#include "core/common/gsl.h"
|
||||
#include <topi/transform.h>
|
||||
|
||||
namespace onnxruntime {
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
#include "core/codegen/mti/tensor/gather.h"
|
||||
|
||||
#include "core/codegen/mti/mti_tvm_utils.h"
|
||||
#include "gsl/gsl"
|
||||
#include "core/common/gsl.h"
|
||||
#include <topi/transform.h>
|
||||
|
||||
namespace onnxruntime {
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
|
||||
#include "core/codegen/mti/mti_tvm_utils.h"
|
||||
#include <climits>
|
||||
#include <gsl/gsl>
|
||||
#include "core/common/gsl.h"
|
||||
#include <topi/transform.h>
|
||||
#include <tvm/ir_pass.h>
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
#include "core/codegen/mti/tensor/split.h"
|
||||
|
||||
#include "core/codegen/mti/mti_tvm_utils.h"
|
||||
#include "gsl/gsl"
|
||||
#include "core/common/gsl.h"
|
||||
#include <topi/transform.h>
|
||||
|
||||
namespace onnxruntime {
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
#include "core/codegen/mti/tensor/tile.h"
|
||||
#include "core/codegen/mti/mti_tvm_utils.h"
|
||||
#include "gsl/gsl"
|
||||
#include "core/common/gsl.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace tvm_codegen {
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@
|
|||
#include "core/codegen/passes/utils/codegen_context.h"
|
||||
#include "core/framework/tensorprotoutils.h"
|
||||
#include "core/providers/common.h"
|
||||
#include "gsl/gsl"
|
||||
#include "core/common/gsl.h"
|
||||
|
||||
#include <topi/detail/extern.h>
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
#include "core/common/logging/capture.h"
|
||||
#include "core/common/logging/logging.h"
|
||||
#include "gsl/gsl"
|
||||
#include "core/common/gsl.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace logging {
|
||||
|
|
@ -43,7 +43,7 @@ void Capture::ProcessPrintf(msvc_printf_check const char* format, va_list args)
|
|||
const int nbrcharacters = vsnprintf(message.data(), message.size(), format, args);
|
||||
#endif
|
||||
error = nbrcharacters < 0;
|
||||
truncated = (nbrcharacters >= 0 && static_cast<gsl::index>(nbrcharacters) > message.size());
|
||||
truncated = (nbrcharacters >= 0 && static_cast<size_t>(nbrcharacters) > message.size());
|
||||
#endif
|
||||
|
||||
if (error) {
|
||||
|
|
|
|||
|
|
@ -3,9 +3,8 @@
|
|||
|
||||
#include "core/flatbuffers/flatbuffers_utils.h"
|
||||
|
||||
#include "gsl/gsl"
|
||||
|
||||
#include "core/common/common.h"
|
||||
#include "core/common/gsl.h"
|
||||
#include "core/flatbuffers/schema/ort.fbs.h"
|
||||
#include "core/graph/constants.h"
|
||||
#include "core/graph/onnx_protobuf.h"
|
||||
|
|
|
|||
|
|
@ -605,10 +605,10 @@ class PlannerImpl {
|
|||
UseCount(name)++;
|
||||
|
||||
bool is_graph_input = (graph_inputs.find(name) != graph_inputs.cend());
|
||||
bool is_outer_scope_arg = std::find_if(outer_scope_node_args_.cbegin(), outer_scope_node_args_.cend(),
|
||||
bool is_outer_scope_arg = std::find_if(outer_scope_node_args_.begin(), outer_scope_node_args_.end(),
|
||||
[&name](const NodeArg* value) {
|
||||
return value && value->Name() == name;
|
||||
}) != outer_scope_node_args_.cend();
|
||||
}) != outer_scope_node_args_.end();
|
||||
bool is_subgraph = (parent_node_ != nullptr);
|
||||
|
||||
// If it's a graph input or outer scope node arg, set its plan.
|
||||
|
|
|
|||
|
|
@ -2,12 +2,15 @@
|
|||
// Licensed under the MIT License.
|
||||
|
||||
#include "core/framework/allocatormgr.h"
|
||||
#include "core/framework/bfc_arena.h"
|
||||
#include "core/common/logging/logging.h"
|
||||
|
||||
#include <limits>
|
||||
#include <mutex>
|
||||
#include <sstream>
|
||||
#include <unordered_map>
|
||||
#include <limits>
|
||||
|
||||
#include "core/common/logging/logging.h"
|
||||
#include "core/common/narrow.h"
|
||||
#include "core/framework/bfc_arena.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
using namespace common;
|
||||
|
|
@ -15,9 +18,9 @@ using namespace common;
|
|||
namespace {
|
||||
int32_t MakeKey(OrtMemType mem_type, OrtDevice device) {
|
||||
// shorten device id so we can fit everything
|
||||
uint8_t short_device = gsl::narrow<uint8_t>(device.Id());
|
||||
uint8_t short_device = narrow<uint8_t>(device.Id());
|
||||
// and convert mem_type. OrtMemType weirdly uses -2 as the first value so we offset by that before narrowing
|
||||
uint8_t ort_mem_type = gsl::narrow<uint8_t>(mem_type + 2);
|
||||
uint8_t ort_mem_type = narrow<uint8_t>(mem_type + 2);
|
||||
|
||||
// NOTE: OrtMemType is the type of memory for a kernel's input/output
|
||||
// OrtDevice.MemType is the device memory type.
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
|
||||
#include <type_traits>
|
||||
|
||||
#include "gsl/gsl"
|
||||
#include "core/common/gsl.h"
|
||||
|
||||
#include "core/common/common.h"
|
||||
#include "core/framework/tensor.h"
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
|
||||
#include <type_traits>
|
||||
|
||||
#include "gsl/gsl"
|
||||
#include "core/common/gsl.h"
|
||||
|
||||
#include "core/common/status.h"
|
||||
#include "core/common/common.h"
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include "core/common/gsl_suppress.h"
|
||||
#include "core/session/onnxruntime_c_api.h"
|
||||
#include "core/session/ort_apis.h"
|
||||
#include "core/common/status.h"
|
||||
|
|
|
|||
|
|
@ -3,8 +3,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <gsl/gsl>
|
||||
|
||||
#include "core/common/gsl.h"
|
||||
#include "core/common/inlined_containers_fwd.h"
|
||||
#include "core/framework/execution_provider.h" // for IExecutionProvider::IKernelLookup
|
||||
#include "core/graph/graph_viewer.h"
|
||||
|
|
|
|||
|
|
@ -7,8 +7,6 @@
|
|||
#include <unordered_set>
|
||||
#include <string>
|
||||
|
||||
#include "gsl/gsl"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace {
|
||||
|
||||
|
|
|
|||
|
|
@ -3,9 +3,8 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "gsl/gsl"
|
||||
|
||||
#include "core/common/common.h"
|
||||
#include "core/common/gsl.h"
|
||||
#include "core/framework/execution_provider.h" // for IExecutionProvider::IKernelLookup
|
||||
#include "core/framework/kernel_registry.h"
|
||||
#include "core/framework/kernel_type_str_resolver.h"
|
||||
|
|
|
|||
|
|
@ -7,8 +7,7 @@
|
|||
#include <variant>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "gsl/gsl"
|
||||
|
||||
#include "core/common/gsl.h"
|
||||
#include "core/common/inlined_containers.h"
|
||||
#include "core/common/status.h"
|
||||
#include "core/framework/kernel_type_str_resolver.h"
|
||||
|
|
|
|||
|
|
@ -7,12 +7,11 @@
|
|||
#include <string_view>
|
||||
#include <utility>
|
||||
|
||||
#include "gsl/gsl"
|
||||
|
||||
#if !defined(ORT_MINIMAL_BUILD)
|
||||
#include "onnx/defs/schema.h"
|
||||
#endif // !defined(ORT_MINIMAL_BUILD)
|
||||
|
||||
#include "core/common/gsl.h"
|
||||
#include "core/common/inlined_containers.h"
|
||||
#include "core/common/status.h"
|
||||
#include "core/graph/op_identifier.h"
|
||||
|
|
|
|||
|
|
@ -5,8 +5,7 @@
|
|||
|
||||
#if !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)
|
||||
|
||||
#include "gsl/gsl"
|
||||
|
||||
#include "core/common/gsl.h"
|
||||
#include "core/common/status.h"
|
||||
#include "core/framework/kernel_type_str_resolver.h"
|
||||
#include "core/graph/op_identifier.h"
|
||||
|
|
|
|||
|
|
@ -3,8 +3,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <gsl/gsl>
|
||||
|
||||
#include "core/common/narrow.h"
|
||||
#include "core/framework/tensor.h"
|
||||
#include "core/util/math_cpuonly.h"
|
||||
|
||||
|
|
@ -12,12 +11,12 @@ namespace onnxruntime {
|
|||
|
||||
template <typename T>
|
||||
auto EigenMap(Tensor& t) -> EigenVectorMap<T> {
|
||||
return EigenVectorMap<T>(t.MutableData<T>(), gsl::narrow<ptrdiff_t>(t.Shape().Size()));
|
||||
return EigenVectorMap<T>(t.MutableData<T>(), narrow<ptrdiff_t>(t.Shape().Size()));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
auto EigenMap(const Tensor& t) -> ConstEigenVectorMap<T> {
|
||||
return ConstEigenVectorMap<T>(t.Data<T>(), gsl::narrow<ptrdiff_t>(t.Shape().Size()));
|
||||
return ConstEigenVectorMap<T>(t.Data<T>(), narrow<ptrdiff_t>(t.Shape().Size()));
|
||||
}
|
||||
|
||||
} // namespace onnxruntime
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@
|
|||
#pragma once
|
||||
#include <atomic>
|
||||
#include <string>
|
||||
#include "core/common/gsl_suppress.h"
|
||||
#include "core/session/onnxruntime_c_api.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
#include "core/framework/tensorprotoutils.h"
|
||||
#include "core/graph/onnx_protobuf.h"
|
||||
#include "core/graph/op.h"
|
||||
#include "gsl/gsl"
|
||||
#include "core/common/gsl.h"
|
||||
|
||||
using namespace ONNX_NAMESPACE;
|
||||
using namespace ::onnxruntime::common;
|
||||
|
|
|
|||
|
|
@ -3,14 +3,13 @@
|
|||
|
||||
#include "random_seed.h"
|
||||
#include "random_generator.h"
|
||||
#include "core/common/gsl_suppress.h"
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace utils {
|
||||
|
||||
// "Global initializer calls a non-constexpr function."
|
||||
// "Global initializer calls a non-constexpr function."
|
||||
//TODO: Fix the warning. The variable should be put in the environment class.
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
#pragma warning(push)
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "core/common/gsl_suppress.h"
|
||||
#include "core/common/gsl.h"
|
||||
#include "core/common/inlined_containers.h"
|
||||
#include "core/session/onnxruntime_c_api.h"
|
||||
#include "core/optimizer/graph_transformer_level.h"
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue