microsoft · MaanavD · Apr 13, 2026 · Apr 14, 2026 · Apr 14, 2026
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
@@ -200,10 +200,8 @@ option(onnxruntime_FUZZ_TEST "Enable Fuzz testing" OFF)
 # training options
 option(onnxruntime_ENABLE_NVTX_PROFILE "Enable NVTX profile." OFF)
 option(onnxruntime_ENABLE_MEMORY_PROFILE "Enable memory profile." OFF)
-option(onnxruntime_ENABLE_TRAINING "Enable full training functionality. Includes ORTModule and ORT Training APIs" OFF)
 option(onnxruntime_ENABLE_TRAINING_APIS "Enable ort training apis." OFF)
 option(onnxruntime_ENABLE_TRAINING_OPS "Include training operators but no training session support." OFF)
-option(onnxruntime_ENABLE_TRAINING_E2E_TESTS "Enable training end-to-end tests." OFF)
 option(onnxruntime_ENABLE_CPU_FP16_OPS "Build with advanced instruction sets" ON)
 option(onnxruntime_USE_NCCL "Build with NCCL support" OFF)
 
@@ -223,9 +221,6 @@ option(onnxruntime_ENABLE_WEBASSEMBLY_RELAXED_SIMD "Enable WebAssembly Relaxed S
 # Enable bitcode for iOS
 option(onnxruntime_ENABLE_BITCODE "Enable bitcode for iOS only" OFF)
 
-# build Pytorch's LazyTensor support
-cmake_dependent_option(onnxruntime_ENABLE_LAZY_TENSOR "Enable ORT as a LazyTensor backend in Pytorch." ON "onnxruntime_ENABLE_TRAINING" OFF)
-
 # build separate library of schemas of (custom) ops used by ORT (for ONNX to MLIR translation)
 option(onnxruntime_BUILD_OPSCHEMA_LIB "Build op schema library" ON)
 
@@ -254,7 +249,7 @@ option(onnxruntime_ENABLE_CPUINFO "Enable cpuinfo" ON)
 option(onnxruntime_ENABLE_ATEN "Enable ATen fallback" OFF)
 
 # dlpack support
-cmake_dependent_option(onnxruntime_ENABLE_DLPACK "Enable dlpack" ON "onnxruntime_ENABLE_TRAINING OR onnxruntime_ENABLE_ATEN OR onnxruntime_ENABLE_PYTHON" OFF)
+cmake_dependent_option(onnxruntime_ENABLE_DLPACK "Enable dlpack" ON "onnxruntime_ENABLE_TRAINING_APIS OR onnxruntime_ENABLE_ATEN OR onnxruntime_ENABLE_PYTHON" OFF)
 option(onnxruntime_USE_AZURE "Build with azure inferencing support" OFF)
 option(onnxruntime_USE_LOCK_FREE_QUEUE "Build with lock-free task queue for threadpool." OFF)
 option(onnxruntime_FORCE_GENERIC_ALGORITHMS "Disable optimized arch-specific algorithms. Use only for testing and debugging generic algorithms." OFF)
@@ -281,34 +276,8 @@ endif()
 
 include(detect_onnxruntime_target_platform.cmake)
 
-# ENABLE_TRAINING includes all training functionality
-# The following 2 entry points
-# 1. ORTModule
-# 2. ORT Training APIs
-# It includes all the feature additions as well like
-# 1. Python OP
-# 2. Aten Fallback
-# 3. Strided Tensors
-# 4. All training ops including communication and  collectives ops
-# 5. ONNXBlock (Front end for training preparation when using training apis)
-# Some features are only enabled when onnxruntime_ENABLE_PYTHON is ON as they are only relevant
-# when using python env
-if (onnxruntime_ENABLE_TRAINING)
-  set(onnxruntime_ENABLE_TRAINING_APIS ON)
-  set(onnxruntime_ENABLE_TRAINING_OPS ON)
-  set(onnxruntime_ENABLE_ATEN ON)
-  set(onnxruntime_ENABLE_TRITON ON)
-  if (NOT APPLE)
-    set(onnxruntime_ENABLE_TRAINING_TORCH_INTEROP ON)
-  endif()
-endif()
-
 if (onnxruntime_ENABLE_TRAINING_APIS)
   set(onnxruntime_ENABLE_TRAINING_OPS ON)
-  if (onnxruntime_ENABLE_PYTHON AND NOT onnxruntime_ENABLE_TRAINING)
-    message(FATAL_ERROR "Standalone On-Device Training build is not supported with Python bindings! "
-    "Please use the --enable_training flag instead of the --enable_training_apis flag.")
-  endif()
 endif()
 
 
@@ -378,29 +347,14 @@ endif()
 
 set(ONNX_ML 1)
 
-if (NOT (UNIX AND onnxruntime_ENABLE_PYTHON AND onnxruntime_ENABLE_TRAINING AND (NOT onnxruntime_BUILD_SHARED_LIB)))
-  if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
-    message(WARNING "onnxruntime_ENABLE_TRAINING_TORCH_INTEROP is turned OFF due to incompatible build combinations.")
-  endif()
-  set(onnxruntime_ENABLE_TRAINING_TORCH_INTEROP OFF)
-endif()
-
-if (NOT (UNIX AND onnxruntime_USE_CUDA AND onnxruntime_ENABLE_PYTHON AND onnxruntime_ENABLE_TRAINING AND (NOT onnxruntime_BUILD_SHARED_LIB)))
+if (NOT (UNIX AND onnxruntime_USE_CUDA AND onnxruntime_ENABLE_PYTHON AND onnxruntime_ENABLE_TRAINING_APIS AND (NOT onnxruntime_BUILD_SHARED_LIB)))
   if (onnxruntime_ENABLE_TRITON)
     message(WARNING "onnxruntime_ENABLE_TRITON is turned OFF because it's designed to support CUDA training on Linux only currently.")
   endif()
   set(onnxruntime_ENABLE_TRITON OFF)
 endif()
 
 set(onnxruntime_REQUIRE_PYTHON_EMBED_LIB OFF)
-if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
-  add_compile_definitions(ENABLE_TRAINING_TORCH_INTEROP)
-
-  # Python::Python is required for building unit test executables.
-  if (onnxruntime_BUILD_UNIT_TESTS)
-    set(onnxruntime_REQUIRE_PYTHON_EMBED_LIB ON)
-  endif()
-endif()
 
 if (onnxruntime_ENABLE_TRITON)
   # Need SetOutputMLValue.
@@ -1033,25 +987,9 @@ if (onnxruntime_FORCE_GENERIC_ALGORITHMS)
     add_compile_definitions(FORCE_GENERIC_ALGORITHMS)
 endif()
 
-if (onnxruntime_ENABLE_LAZY_TENSOR)
-  # To support LazyTensor, ORT needs to call Python function from C/C++.
-  # so onnxruntime_ENABLE_PYTHON is required.
-  if (NOT onnxruntime_ENABLE_TRAINING OR NOT onnxruntime_ENABLE_PYTHON)
-    message(
-        FATAL_ERROR
-        "Option onnxruntime_ENABLE_LAZY_TENSOR can only be set when onnxruntime_ENABLE_TRAINING and onnxruntime_ENABLE_PYTHON are enabled")
-  endif()
-  # TODO: In the future, we can compile LazyTensor into a standalone
-  # library target, onnxruntime_lazy_tensor, to make the buid
-  # cleaner.
-endif()
-
 function(onnxruntime_set_compile_flags target_name)
     if (CPUINFO_SUPPORTED)
       onnxruntime_add_include_to_target(${target_name} cpuinfo::cpuinfo)
-    endif()
-    if(onnxruntime_ENABLE_LAZY_TENSOR)
-      target_compile_definitions(${target_name} PRIVATE ENABLE_LAZY_TENSOR)
     endif()
 	# Enable stream for all the non-minimal build
     if (NOT onnxruntime_MINIMAL_BUILD)
@@ -1536,15 +1474,6 @@ if (onnxruntime_ENABLE_CUDA_PROFILING)
   add_compile_definitions(ENABLE_CUDA_PROFILING)
 endif()
 
-if (onnxruntime_ENABLE_TRAINING)
-  add_compile_definitions(ENABLE_TRAINING_CORE)
-  add_compile_definitions(ENABLE_STRIDED_TENSORS)
-  add_compile_definitions(ENABLE_TRAINING)
-
-  add_subdirectory(tensorboard EXCLUDE_FROM_ALL)
-  list(APPEND onnxruntime_EXTERNAL_LIBRARIES tensorboard)
-endif()
-
 if (onnxruntime_ENABLE_DLPACK)
   add_compile_definitions(ENABLE_DLPACK)
 endif()
@@ -1739,15 +1668,6 @@ if (onnxruntime_BUILD_WINML_TESTS)
   list(APPEND ONNXRUNTIME_CMAKE_FILES winml_unittests)
 endif()
 
-# onnxruntime_training depends on onnxruntime_unittests since onnxruntime_training.cmake uses a variable `TEST_SRC_DIR`
-# that is defined in onnxruntime_unittests.cmake
-if (onnxruntime_ENABLE_TRAINING)
-  list(APPEND ONNXRUNTIME_CMAKE_FILES onnxruntime_training)
-  if (onnxruntime_ENABLE_TRAINING_E2E_TESTS)
-    list(APPEND ONNXRUNTIME_CMAKE_FILES onnxruntime_training_e2e_tests)
-  endif()
-endif()
-
 if (onnxruntime_BUILD_CSHARP)
   message(STATUS "CSharp Build is enabled")
 #  set_property(GLOBAL PROPERTY VS_DOTNET_TARGET_FRAMEWORK_VERSION "netstandard2.0")
@@ -1793,7 +1713,7 @@ if (BUILD_PKGCONFIG_FILES)
     ${CMAKE_INSTALL_LIBDIR}/pkgconfig )
 endif()
 
-if (onnxruntime_BUILD_OPSCHEMA_LIB AND onnxruntime_ENABLE_TRAINING)
+if (onnxruntime_BUILD_OPSCHEMA_LIB AND onnxruntime_ENABLE_TRAINING_OPS)
   # opschema library requires training ops as well
   include(onnxruntime_opschema_lib.cmake)
 endif()
@@ -1815,8 +1735,8 @@ if (onnxruntime_ENABLE_EXTERNAL_CUSTOM_OP_SCHEMAS)
     message(FATAL_ERROR "External custom operator schemas is not supported with the user specified protoc executable")
   endif()
 
-  if (NOT onnxruntime_ENABLE_TRAINING)
-    message(FATAL_ERROR "External custom operator schemas is supported only with --enable-training option")
+  if (NOT onnxruntime_ENABLE_TRAINING_OPS)
+    message(FATAL_ERROR "External custom operator schemas is supported only with --enable_training_ops or --enable_training_apis option")
   endif()
 
   add_custom_target(install_protobuf ALL DEPENDS ${PROTOBUF_LIB} protobuf::protoc)

diff --git a/cmake/external/onnxruntime_external_deps.cmake b/cmake/external/onnxruntime_external_deps.cmake
@@ -607,9 +607,7 @@ if(onnxruntime_ENABLE_DLPACK)
   onnxruntime_fetchcontent_makeavailable(dlpack)
 endif()
 
-if(onnxruntime_ENABLE_TRAINING OR (onnxruntime_ENABLE_TRAINING_APIS AND onnxruntime_BUILD_UNIT_TESTS))
-  # Once code under orttraining/orttraining/models dir is removed "onnxruntime_ENABLE_TRAINING" should be removed from
-  # this conditional
+if(onnxruntime_ENABLE_TRAINING_APIS AND onnxruntime_BUILD_UNIT_TESTS)
   onnxruntime_fetchcontent_declare(
     cxxopts
     URL ${DEP_URL_cxxopts}

diff --git a/cmake/onnxruntime_framework.cmake b/cmake/onnxruntime_framework.cmake
@@ -7,23 +7,7 @@ file(GLOB_RECURSE onnxruntime_framework_srcs CONFIGURE_DEPENDS
     "${ONNXRUNTIME_ROOT}/core/framework/*.cc"
 )
 
-if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
-  file(GLOB_RECURSE onnxruntime_training_framework_torch_srcs CONFIGURE_DEPENDS
-      "${ORTTRAINING_SOURCE_DIR}/core/framework/torch/*.h"
-      "${ORTTRAINING_SOURCE_DIR}/core/framework/torch/*.cc"
-  )
-  list(APPEND onnxruntime_framework_srcs ${onnxruntime_training_framework_torch_srcs})
-  if (onnxruntime_ENABLE_TRITON)
-    file(GLOB_RECURSE onnxruntime_training_framework_triton_srcs CONFIGURE_DEPENDS
-      "${ORTTRAINING_SOURCE_DIR}/core/framework/triton/*.h"
-      "${ORTTRAINING_SOURCE_DIR}/core/framework/triton/*.cc"
-    )
-    list(APPEND onnxruntime_framework_srcs ${onnxruntime_training_framework_triton_srcs})
-  endif()
-elseif(onnxruntime_ENABLE_TRITON)
-  # Triton executor shares some code from torch_interop, such as python and dlpack related code files.
-  # When torch_interop is enabled, all these dependencies are already included.
-  # But if not, we need to include them explicitly.
+if(onnxruntime_ENABLE_TRITON)
   file(GLOB_RECURSE onnxruntime_training_framework_triton_srcs CONFIGURE_DEPENDS
     "${ORTTRAINING_SOURCE_DIR}/core/framework/torch/dlpack_python.h"
     "${ORTTRAINING_SOURCE_DIR}/core/framework/torch/dlpack_python.cc"
@@ -73,7 +57,7 @@ endif()
 # Needed for the provider interface, as it includes training headers when training is enabled
 if (onnxruntime_ENABLE_TRAINING_OPS)
   target_include_directories(onnxruntime_framework PRIVATE ${ORTTRAINING_ROOT})
-  if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP OR onnxruntime_ENABLE_TRITON)
+  if (onnxruntime_ENABLE_TRITON)
     onnxruntime_add_include_to_target(onnxruntime_framework Python::Module dlpack::dlpack)
   endif()
 endif()

diff --git a/cmake/onnxruntime_graph.cmake b/cmake/onnxruntime_graph.cmake
@@ -10,20 +10,13 @@ file(GLOB_RECURSE onnxruntime_graph_src CONFIGURE_DEPENDS
 # start with empty training srcs list
 set(orttraining_graph_src)
 
-if (onnxruntime_ENABLE_TRAINING_OPS AND NOT onnxruntime_ENABLE_TRAINING)
+if (onnxruntime_ENABLE_TRAINING_OPS)
   set(orttraining_graph_src
       "${ORTTRAINING_SOURCE_DIR}/core/graph/training_op_defs.cc"
       "${ORTTRAINING_SOURCE_DIR}/core/graph/training_op_defs.h"
       )
 endif()
 
-if (onnxruntime_ENABLE_TRAINING)
-  file(GLOB_RECURSE orttraining_graph_src CONFIGURE_DEPENDS
-      "${ORTTRAINING_SOURCE_DIR}/core/graph/*.h"
-      "${ORTTRAINING_SOURCE_DIR}/core/graph/*.cc"
-      )
-endif()
-
 # create empty lists for any excludes
 set(onnxruntime_graph_src_exclude_patterns)
 set(orttraining_graph_src_exclude_patterns)
@@ -109,12 +102,6 @@ endif()
 if(NOT MSVC)
   target_compile_options(onnxruntime_graph PRIVATE "-Wno-parentheses" "-Wno-deprecated-declarations")
 endif()
-if (onnxruntime_ENABLE_TRAINING)
-  #TODO: the graph library should focus on ONNX IR, it shouldn't depend on math libraries like MKLML/OpenBlas
-  target_include_directories(onnxruntime_graph PRIVATE ${MKLML_INCLUDE_DIR})
-  target_link_libraries(onnxruntime_graph PRIVATE nlohmann_json::nlohmann_json)
-endif()
-
 target_include_directories(onnxruntime_graph PRIVATE ${ONNXRUNTIME_ROOT})
 
 if (onnxruntime_ENABLE_TRAINING_OPS)

diff --git a/cmake/onnxruntime_optimizer.cmake b/cmake/onnxruntime_optimizer.cmake
@@ -85,17 +85,6 @@ else()
   )
 endif()
 
-if (onnxruntime_ENABLE_TRAINING)
-  list(APPEND onnxruntime_optimizer_src_patterns
-    "${ORTTRAINING_SOURCE_DIR}/core/optimizer/*.h"
-    "${ORTTRAINING_SOURCE_DIR}/core/optimizer/*.cc"
-    "${ORTTRAINING_SOURCE_DIR}/core/optimizer/compute_optimizer/*.h"
-    "${ORTTRAINING_SOURCE_DIR}/core/optimizer/compute_optimizer/*.cc"
-    "${ORTTRAINING_SOURCE_DIR}/core/optimizer/memory_optimizer/*.h"
-    "${ORTTRAINING_SOURCE_DIR}/core/optimizer/memory_optimizer/*.cc"
-  )
-endif()
-
 file(GLOB onnxruntime_optimizer_srcs CONFIGURE_DEPENDS ${onnxruntime_optimizer_src_patterns})
 
 source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_optimizer_srcs})
@@ -117,13 +106,6 @@ onnxruntime_add_static_library(onnxruntime_optimizer ${onnxruntime_optimizer_src
 
 onnxruntime_add_include_to_target(onnxruntime_optimizer onnxruntime_common onnxruntime_framework onnx onnx_proto ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11 safeint_interface)
 target_include_directories(onnxruntime_optimizer PRIVATE ${ONNXRUNTIME_ROOT})
-if (onnxruntime_ENABLE_TRAINING)
-  target_include_directories(onnxruntime_optimizer PRIVATE ${ORTTRAINING_ROOT})
-  onnxruntime_add_include_to_target(onnxruntime_optimizer nlohmann_json::nlohmann_json)
-  if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
-    onnxruntime_add_include_to_target(onnxruntime_optimizer Python::Module)
-  endif()
-endif()
 if (onnxruntime_ENABLE_TRITON)
   target_link_libraries(onnxruntime_optimizer PRIVATE nlohmann_json::nlohmann_json)
   onnxruntime_add_include_to_target(onnxruntime_optimizer Python::Module)

diff --git a/cmake/onnxruntime_providers_cpu.cmake b/cmake/onnxruntime_providers_cpu.cmake
@@ -69,7 +69,7 @@ if(NOT onnxruntime_DISABLE_CONTRIB_OPS)
   list(APPEND onnxruntime_providers_src ${onnxruntime_cpu_contrib_ops_srcs})
 endif()
 
-if (onnxruntime_ENABLE_TRAINING_OPS AND NOT onnxruntime_ENABLE_TRAINING)
+if (onnxruntime_ENABLE_TRAINING_OPS)
   file(GLOB_RECURSE onnxruntime_cpu_training_ops_srcs CONFIGURE_DEPENDS
     "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/*.h"
     "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/*.cc"
@@ -112,30 +112,6 @@ if (onnxruntime_ENABLE_DLPACK)
   list(APPEND onnxruntime_providers_src ${onnxruntime_providers_dlpack_srcs})
 endif()
 
-if (onnxruntime_ENABLE_TRAINING)
-  file(GLOB_RECURSE onnxruntime_cpu_training_ops_srcs CONFIGURE_DEPENDS
-    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/*.h"
-    "${ORTTRAINING_SOURCE_DIR}/training_ops/cpu/*.cc"
-    "${ORTTRAINING_SOURCE_DIR}/core/framework/*.h"
-    "${ORTTRAINING_SOURCE_DIR}/core/framework/*.cc"
-    "${ORTTRAINING_SOURCE_DIR}/core/framework/adasum/*"
-    "${ORTTRAINING_SOURCE_DIR}/core/framework/communication/*"
-  )
-
-  # This is already built in framework.cmake
-  file(GLOB_RECURSE onnxruntime_training_framework_excude_srcs CONFIGURE_DEPENDS
-      "${ORTTRAINING_SOURCE_DIR}/core/framework/torch/*.h"
-      "${ORTTRAINING_SOURCE_DIR}/core/framework/torch/*.cc"
-      "${ORTTRAINING_SOURCE_DIR}/core/framework/triton/*.h"
-      "${ORTTRAINING_SOURCE_DIR}/core/framework/triton/*.cc"
-  )
-
-  list(REMOVE_ITEM onnxruntime_cpu_training_ops_srcs ${onnxruntime_training_framework_excude_srcs})
-
-  source_group(TREE ${ORTTRAINING_ROOT}/ FILES ${onnxruntime_cpu_training_ops_srcs})
-  list(APPEND onnxruntime_providers_src ${onnxruntime_cpu_training_ops_srcs})
-endif()
-
 if (onnxruntime_REDUCED_OPS_BUILD)
   substitute_op_reduction_srcs(onnxruntime_providers_src)
 endif()
@@ -198,19 +174,11 @@ if (onnxruntime_ENABLE_DLPACK)
   onnxruntime_add_include_to_target(onnxruntime_providers dlpack::dlpack)
 endif()
 
-if (onnxruntime_ENABLE_TRAINING)
-  add_dependencies(onnxruntime_providers tensorboard)
-  onnxruntime_add_include_to_target(onnxruntime_providers tensorboard)
-  if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP OR onnxruntime_ENABLE_TRITON)
-    onnxruntime_add_include_to_target(onnxruntime_providers Python::Module)
-  endif()
-
-  if (onnxruntime_USE_NCCL)
-    target_include_directories(onnxruntime_providers PUBLIC ${MPI_CXX_INCLUDE_DIRS})
-  endif()
+if (onnxruntime_ENABLE_TRITON)
+  onnxruntime_add_include_to_target(onnxruntime_providers Python::Module)
 endif()
 
-install(FILES ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/cpu/cpu_provider_factory.h  DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/)
+install(FILES${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/cpu/cpu_provider_factory.h  DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/)
 install(FILES ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/resource.h ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/custom_op_context.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core/providers)
 set_target_properties(onnxruntime_providers PROPERTIES LINKER_LANGUAGE CXX)
 set_target_properties(onnxruntime_providers PROPERTIES FOLDER "ONNXRuntime")