Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 5 additions & 85 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -200,10 +200,8 @@ option(onnxruntime_FUZZ_TEST "Enable Fuzz testing" OFF)
# training options
option(onnxruntime_ENABLE_NVTX_PROFILE "Enable NVTX profile." OFF)
option(onnxruntime_ENABLE_MEMORY_PROFILE "Enable memory profile." OFF)
option(onnxruntime_ENABLE_TRAINING "Enable full training functionality. Includes ORTModule and ORT Training APIs" OFF)
option(onnxruntime_ENABLE_TRAINING_APIS "Enable ort training apis." OFF)
option(onnxruntime_ENABLE_TRAINING_OPS "Include training operators but no training session support." OFF)
option(onnxruntime_ENABLE_TRAINING_E2E_TESTS "Enable training end-to-end tests." OFF)
option(onnxruntime_ENABLE_CPU_FP16_OPS "Build with advanced instruction sets" ON)
option(onnxruntime_USE_NCCL "Build with NCCL support" OFF)

Expand All @@ -223,9 +221,6 @@ option(onnxruntime_ENABLE_WEBASSEMBLY_RELAXED_SIMD "Enable WebAssembly Relaxed S
# Enable bitcode for iOS
option(onnxruntime_ENABLE_BITCODE "Enable bitcode for iOS only" OFF)

# build Pytorch's LazyTensor support
cmake_dependent_option(onnxruntime_ENABLE_LAZY_TENSOR "Enable ORT as a LazyTensor backend in Pytorch." ON "onnxruntime_ENABLE_TRAINING" OFF)

# build separate library of schemas of (custom) ops used by ORT (for ONNX to MLIR translation)
option(onnxruntime_BUILD_OPSCHEMA_LIB "Build op schema library" ON)

Expand Down Expand Up @@ -254,7 +249,7 @@ option(onnxruntime_ENABLE_CPUINFO "Enable cpuinfo" ON)
option(onnxruntime_ENABLE_ATEN "Enable ATen fallback" OFF)

# dlpack support
cmake_dependent_option(onnxruntime_ENABLE_DLPACK "Enable dlpack" ON "onnxruntime_ENABLE_TRAINING OR onnxruntime_ENABLE_ATEN OR onnxruntime_ENABLE_PYTHON" OFF)
cmake_dependent_option(onnxruntime_ENABLE_DLPACK "Enable dlpack" ON "onnxruntime_ENABLE_TRAINING_APIS OR onnxruntime_ENABLE_ATEN OR onnxruntime_ENABLE_PYTHON" OFF)
option(onnxruntime_USE_AZURE "Build with azure inferencing support" OFF)
option(onnxruntime_USE_LOCK_FREE_QUEUE "Build with lock-free task queue for threadpool." OFF)
option(onnxruntime_FORCE_GENERIC_ALGORITHMS "Disable optimized arch-specific algorithms. Use only for testing and debugging generic algorithms." OFF)
Expand All @@ -281,34 +276,8 @@ endif()

include(detect_onnxruntime_target_platform.cmake)

# ENABLE_TRAINING includes all training functionality
# The following 2 entry points
# 1. ORTModule
# 2. ORT Training APIs
# It includes all the feature additions as well like
# 1. Python OP
# 2. Aten Fallback
# 3. Strided Tensors
# 4. All training ops including communication and collectives ops
# 5. ONNXBlock (Front end for training preparation when using training apis)
# Some features are only enabled when onnxruntime_ENABLE_PYTHON is ON as they are only relevant
# when using python env
if (onnxruntime_ENABLE_TRAINING)
set(onnxruntime_ENABLE_TRAINING_APIS ON)
set(onnxruntime_ENABLE_TRAINING_OPS ON)
set(onnxruntime_ENABLE_ATEN ON)
set(onnxruntime_ENABLE_TRITON ON)
if (NOT APPLE)
set(onnxruntime_ENABLE_TRAINING_TORCH_INTEROP ON)
endif()
endif()

if (onnxruntime_ENABLE_TRAINING_APIS)
set(onnxruntime_ENABLE_TRAINING_OPS ON)
if (onnxruntime_ENABLE_PYTHON AND NOT onnxruntime_ENABLE_TRAINING)
message(FATAL_ERROR "Standalone On-Device Training build is not supported with Python bindings! "
"Please use the --enable_training flag instead of the --enable_training_apis flag.")
endif()
endif()


Expand Down Expand Up @@ -378,29 +347,14 @@ endif()

set(ONNX_ML 1)

if (NOT (UNIX AND onnxruntime_ENABLE_PYTHON AND onnxruntime_ENABLE_TRAINING AND (NOT onnxruntime_BUILD_SHARED_LIB)))
if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
message(WARNING "onnxruntime_ENABLE_TRAINING_TORCH_INTEROP is turned OFF due to incompatible build combinations.")
endif()
set(onnxruntime_ENABLE_TRAINING_TORCH_INTEROP OFF)
endif()

if (NOT (UNIX AND onnxruntime_USE_CUDA AND onnxruntime_ENABLE_PYTHON AND onnxruntime_ENABLE_TRAINING AND (NOT onnxruntime_BUILD_SHARED_LIB)))
if (NOT (UNIX AND onnxruntime_USE_CUDA AND onnxruntime_ENABLE_PYTHON AND onnxruntime_ENABLE_TRAINING_APIS AND (NOT onnxruntime_BUILD_SHARED_LIB)))
if (onnxruntime_ENABLE_TRITON)
message(WARNING "onnxruntime_ENABLE_TRITON is turned OFF because it's designed to support CUDA training on Linux only currently.")
endif()
set(onnxruntime_ENABLE_TRITON OFF)
endif()

set(onnxruntime_REQUIRE_PYTHON_EMBED_LIB OFF)
if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
add_compile_definitions(ENABLE_TRAINING_TORCH_INTEROP)

# Python::Python is required for building unit test executables.
if (onnxruntime_BUILD_UNIT_TESTS)
set(onnxruntime_REQUIRE_PYTHON_EMBED_LIB ON)
endif()
endif()

if (onnxruntime_ENABLE_TRITON)
# Need SetOutputMLValue.
Expand Down Expand Up @@ -1033,25 +987,9 @@ if (onnxruntime_FORCE_GENERIC_ALGORITHMS)
add_compile_definitions(FORCE_GENERIC_ALGORITHMS)
endif()

if (onnxruntime_ENABLE_LAZY_TENSOR)
# To support LazyTensor, ORT needs to call Python function from C/C++.
# so onnxruntime_ENABLE_PYTHON is required.
if (NOT onnxruntime_ENABLE_TRAINING OR NOT onnxruntime_ENABLE_PYTHON)
message(
FATAL_ERROR
"Option onnxruntime_ENABLE_LAZY_TENSOR can only be set when onnxruntime_ENABLE_TRAINING and onnxruntime_ENABLE_PYTHON are enabled")
endif()
# TODO: In the future, we can compile LazyTensor into a standalone
# library target, onnxruntime_lazy_tensor, to make the buid
# cleaner.
endif()

function(onnxruntime_set_compile_flags target_name)
if (CPUINFO_SUPPORTED)
onnxruntime_add_include_to_target(${target_name} cpuinfo::cpuinfo)
endif()
if(onnxruntime_ENABLE_LAZY_TENSOR)
target_compile_definitions(${target_name} PRIVATE ENABLE_LAZY_TENSOR)
endif()
# Enable stream for all the non-minimal build
if (NOT onnxruntime_MINIMAL_BUILD)
Expand Down Expand Up @@ -1536,15 +1474,6 @@ if (onnxruntime_ENABLE_CUDA_PROFILING)
add_compile_definitions(ENABLE_CUDA_PROFILING)
endif()

if (onnxruntime_ENABLE_TRAINING)
add_compile_definitions(ENABLE_TRAINING_CORE)
add_compile_definitions(ENABLE_STRIDED_TENSORS)
add_compile_definitions(ENABLE_TRAINING)

add_subdirectory(tensorboard EXCLUDE_FROM_ALL)
list(APPEND onnxruntime_EXTERNAL_LIBRARIES tensorboard)
endif()

if (onnxruntime_ENABLE_DLPACK)
add_compile_definitions(ENABLE_DLPACK)
endif()
Expand Down Expand Up @@ -1739,15 +1668,6 @@ if (onnxruntime_BUILD_WINML_TESTS)
list(APPEND ONNXRUNTIME_CMAKE_FILES winml_unittests)
endif()

# onnxruntime_training depends on onnxruntime_unittests since onnxruntime_training.cmake uses a variable `TEST_SRC_DIR`
# that is defined in onnxruntime_unittests.cmake
if (onnxruntime_ENABLE_TRAINING)
list(APPEND ONNXRUNTIME_CMAKE_FILES onnxruntime_training)
if (onnxruntime_ENABLE_TRAINING_E2E_TESTS)
list(APPEND ONNXRUNTIME_CMAKE_FILES onnxruntime_training_e2e_tests)
endif()
endif()

if (onnxruntime_BUILD_CSHARP)
message(STATUS "CSharp Build is enabled")
# set_property(GLOBAL PROPERTY VS_DOTNET_TARGET_FRAMEWORK_VERSION "netstandard2.0")
Expand Down Expand Up @@ -1793,7 +1713,7 @@ if (BUILD_PKGCONFIG_FILES)
${CMAKE_INSTALL_LIBDIR}/pkgconfig )
endif()

if (onnxruntime_BUILD_OPSCHEMA_LIB AND onnxruntime_ENABLE_TRAINING)
if (onnxruntime_BUILD_OPSCHEMA_LIB AND onnxruntime_ENABLE_TRAINING_OPS)
# opschema library requires training ops as well
include(onnxruntime_opschema_lib.cmake)
endif()
Expand All @@ -1815,8 +1735,8 @@ if (onnxruntime_ENABLE_EXTERNAL_CUSTOM_OP_SCHEMAS)
message(FATAL_ERROR "External custom operator schemas is not supported with the user specified protoc executable")
endif()

if (NOT onnxruntime_ENABLE_TRAINING)
message(FATAL_ERROR "External custom operator schemas is supported only with --enable-training option")
if (NOT onnxruntime_ENABLE_TRAINING_OPS)
message(FATAL_ERROR "External custom operator schemas is supported only with --enable_training_ops or --enable_training_apis option")
endif()

add_custom_target(install_protobuf ALL DEPENDS ${PROTOBUF_LIB} protobuf::protoc)
Expand Down
4 changes: 1 addition & 3 deletions cmake/external/onnxruntime_external_deps.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -607,9 +607,7 @@ if(onnxruntime_ENABLE_DLPACK)
onnxruntime_fetchcontent_makeavailable(dlpack)
endif()

if(onnxruntime_ENABLE_TRAINING OR (onnxruntime_ENABLE_TRAINING_APIS AND onnxruntime_BUILD_UNIT_TESTS))
# Once code under orttraining/orttraining/models dir is removed "onnxruntime_ENABLE_TRAINING" should be removed from
# this conditional
if(onnxruntime_ENABLE_TRAINING_APIS AND onnxruntime_BUILD_UNIT_TESTS)
onnxruntime_fetchcontent_declare(
cxxopts
URL ${DEP_URL_cxxopts}
Expand Down
20 changes: 2 additions & 18 deletions cmake/onnxruntime_framework.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,7 @@ file(GLOB_RECURSE onnxruntime_framework_srcs CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/core/framework/*.cc"
)

if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
file(GLOB_RECURSE onnxruntime_training_framework_torch_srcs CONFIGURE_DEPENDS
"${ORTTRAINING_SOURCE_DIR}/core/framework/torch/*.h"
"${ORTTRAINING_SOURCE_DIR}/core/framework/torch/*.cc"
)
list(APPEND onnxruntime_framework_srcs ${onnxruntime_training_framework_torch_srcs})
if (onnxruntime_ENABLE_TRITON)
file(GLOB_RECURSE onnxruntime_training_framework_triton_srcs CONFIGURE_DEPENDS
"${ORTTRAINING_SOURCE_DIR}/core/framework/triton/*.h"
"${ORTTRAINING_SOURCE_DIR}/core/framework/triton/*.cc"
)
list(APPEND onnxruntime_framework_srcs ${onnxruntime_training_framework_triton_srcs})
endif()
elseif(onnxruntime_ENABLE_TRITON)
# Triton executor shares some code from torch_interop, such as python and dlpack related code files.
# When torch_interop is enabled, all these dependencies are already included.
# But if not, we need to include them explicitly.
if(onnxruntime_ENABLE_TRITON)
file(GLOB_RECURSE onnxruntime_training_framework_triton_srcs CONFIGURE_DEPENDS
"${ORTTRAINING_SOURCE_DIR}/core/framework/torch/dlpack_python.h"
"${ORTTRAINING_SOURCE_DIR}/core/framework/torch/dlpack_python.cc"
Expand Down Expand Up @@ -73,7 +57,7 @@ endif()
# Needed for the provider interface, as it includes training headers when training is enabled
if (onnxruntime_ENABLE_TRAINING_OPS)
target_include_directories(onnxruntime_framework PRIVATE ${ORTTRAINING_ROOT})
if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP OR onnxruntime_ENABLE_TRITON)
if (onnxruntime_ENABLE_TRITON)
onnxruntime_add_include_to_target(onnxruntime_framework Python::Module dlpack::dlpack)
endif()
endif()
Expand Down
15 changes: 1 addition & 14 deletions cmake/onnxruntime_graph.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,13 @@ file(GLOB_RECURSE onnxruntime_graph_src CONFIGURE_DEPENDS
# start with empty training srcs list
set(orttraining_graph_src)

if (onnxruntime_ENABLE_TRAINING_OPS AND NOT onnxruntime_ENABLE_TRAINING)
if (onnxruntime_ENABLE_TRAINING_OPS)
set(orttraining_graph_src
"${ORTTRAINING_SOURCE_DIR}/core/graph/training_op_defs.cc"
"${ORTTRAINING_SOURCE_DIR}/core/graph/training_op_defs.h"
)
endif()

if (onnxruntime_ENABLE_TRAINING)
file(GLOB_RECURSE orttraining_graph_src CONFIGURE_DEPENDS
"${ORTTRAINING_SOURCE_DIR}/core/graph/*.h"
"${ORTTRAINING_SOURCE_DIR}/core/graph/*.cc"
)
endif()

# create empty lists for any excludes
set(onnxruntime_graph_src_exclude_patterns)
set(orttraining_graph_src_exclude_patterns)
Expand Down Expand Up @@ -109,12 +102,6 @@ endif()
if(NOT MSVC)
target_compile_options(onnxruntime_graph PRIVATE "-Wno-parentheses" "-Wno-deprecated-declarations")
endif()
if (onnxruntime_ENABLE_TRAINING)
#TODO: the graph library should focus on ONNX IR, it shouldn't depend on math libraries like MKLML/OpenBlas
target_include_directories(onnxruntime_graph PRIVATE ${MKLML_INCLUDE_DIR})
target_link_libraries(onnxruntime_graph PRIVATE nlohmann_json::nlohmann_json)
endif()

target_include_directories(onnxruntime_graph PRIVATE ${ONNXRUNTIME_ROOT})

if (onnxruntime_ENABLE_TRAINING_OPS)
Expand Down
18 changes: 0 additions & 18 deletions cmake/onnxruntime_optimizer.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -85,17 +85,6 @@ else()
)
endif()

if (onnxruntime_ENABLE_TRAINING)
list(APPEND onnxruntime_optimizer_src_patterns
"${ORTTRAINING_SOURCE_DIR}/core/optimizer/*.h"
"${ORTTRAINING_SOURCE_DIR}/core/optimizer/*.cc"
"${ORTTRAINING_SOURCE_DIR}/core/optimizer/compute_optimizer/*.h"
"${ORTTRAINING_SOURCE_DIR}/core/optimizer/compute_optimizer/*.cc"
"${ORTTRAINING_SOURCE_DIR}/core/optimizer/memory_optimizer/*.h"
"${ORTTRAINING_SOURCE_DIR}/core/optimizer/memory_optimizer/*.cc"
)
endif()

file(GLOB onnxruntime_optimizer_srcs CONFIGURE_DEPENDS ${onnxruntime_optimizer_src_patterns})

source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_optimizer_srcs})
Expand All @@ -117,13 +106,6 @@ onnxruntime_add_static_library(onnxruntime_optimizer ${onnxruntime_optimizer_src

onnxruntime_add_include_to_target(onnxruntime_optimizer onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11 safeint_interface)
target_include_directories(onnxruntime_optimizer PRIVATE ${ONNXRUNTIME_ROOT})
if (onnxruntime_ENABLE_TRAINING)
target_include_directories(onnxruntime_optimizer PRIVATE ${ORTTRAINING_ROOT})
onnxruntime_add_include_to_target(onnxruntime_optimizer nlohmann_json::nlohmann_json)
if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
onnxruntime_add_include_to_target(onnxruntime_optimizer Python::Module)
endif()
endif()
if (onnxruntime_ENABLE_TRITON)
target_link_libraries(onnxruntime_optimizer PRIVATE nlohmann_json::nlohmann_json)
onnxruntime_add_include_to_target(onnxruntime_optimizer Python::Module)
Expand Down
40 changes: 4 additions & 36 deletions cmake/onnxruntime_providers_cpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ if(NOT onnxruntime_DISABLE_CONTRIB_OPS)
list(APPEND onnxruntime_providers_src ${onnxruntime_cpu_contrib_ops_srcs})
endif()

if (onnxruntime_ENABLE_TRAINING_OPS AND NOT onnxruntime_ENABLE_TRAINING)
if (onnxruntime_ENABLE_TRAINING_OPS)
file(GLOB_RECURSE onnxruntime_cpu_training_ops_srcs CONFIGURE_DEPENDS
"${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/*.h"
"${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/*.cc"
Expand Down Expand Up @@ -112,30 +112,6 @@ if (onnxruntime_ENABLE_DLPACK)
list(APPEND onnxruntime_providers_src ${onnxruntime_providers_dlpack_srcs})
endif()

if (onnxruntime_ENABLE_TRAINING)
file(GLOB_RECURSE onnxruntime_cpu_training_ops_srcs CONFIGURE_DEPENDS
"${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/*.h"
"${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/*.cc"
"${ORTTRAINING_SOURCE_DIR}/core/framework/*.h"
"${ORTTRAINING_SOURCE_DIR}/core/framework/*.cc"
"${ORTTRAINING_SOURCE_DIR}/core/framework/adasum/*"
"${ORTTRAINING_SOURCE_DIR}/core/framework/communication/*"
)

# This is already built in framework.cmake
file(GLOB_RECURSE onnxruntime_training_framework_excude_srcs CONFIGURE_DEPENDS
"${ORTTRAINING_SOURCE_DIR}/core/framework/torch/*.h"
"${ORTTRAINING_SOURCE_DIR}/core/framework/torch/*.cc"
"${ORTTRAINING_SOURCE_DIR}/core/framework/triton/*.h"
"${ORTTRAINING_SOURCE_DIR}/core/framework/triton/*.cc"
)

list(REMOVE_ITEM onnxruntime_cpu_training_ops_srcs ${onnxruntime_training_framework_excude_srcs})

source_group(TREE ${ORTTRAINING_ROOT}/ FILES ${onnxruntime_cpu_training_ops_srcs})
list(APPEND onnxruntime_providers_src ${onnxruntime_cpu_training_ops_srcs})
endif()

if (onnxruntime_REDUCED_OPS_BUILD)
substitute_op_reduction_srcs(onnxruntime_providers_src)
endif()
Expand Down Expand Up @@ -198,19 +174,11 @@ if (onnxruntime_ENABLE_DLPACK)
onnxruntime_add_include_to_target(onnxruntime_providers dlpack::dlpack)
endif()

if (onnxruntime_ENABLE_TRAINING)
add_dependencies(onnxruntime_providers tensorboard)
onnxruntime_add_include_to_target(onnxruntime_providers tensorboard)
if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP OR onnxruntime_ENABLE_TRITON)
onnxruntime_add_include_to_target(onnxruntime_providers Python::Module)
endif()

if (onnxruntime_USE_NCCL)
target_include_directories(onnxruntime_providers PUBLIC ${MPI_CXX_INCLUDE_DIRS})
endif()
if (onnxruntime_ENABLE_TRITON)
onnxruntime_add_include_to_target(onnxruntime_providers Python::Module)
endif()

install(FILES ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/cpu/cpu_provider_factory.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/)
install(FILES${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/cpu/cpu_provider_factory.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/)
install(FILES ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/resource.h ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/custom_op_context.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core/providers)
set_target_properties(onnxruntime_providers PROPERTIES LINKER_LANGUAGE CXX)
set_target_properties(onnxruntime_providers PROPERTIES FOLDER "ONNXRuntime")
Expand Down
Loading
Loading