From a26806c73a251b8a63645d133cf225061d3ae43a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 5 Feb 2026 20:12:49 +0000 Subject: [PATCH 1/4] Initial plan From c5ae05a4c747dded9fddddbdc808fa48ed31e2f8 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 5 Feb 2026 20:18:17 +0000 Subject: [PATCH 2/4] Add Windows platform support for build system and C++ code - Add Windows platform guards in Threading.cpp (SetThreadAffinityMask) - Make setup.py cross-platform (where/which, cmake --build, MSVC flags) - Make CMakeLists.txt use generator expressions for library naming - Make FindCPUkernels.cmake support MSVC compiler flags - Make ZenDnnlFwkIntegrate.cmake support Windows library names - Add Windows build instructions to README.md Co-authored-by: Looong01 <69568351+Looong01@users.noreply.github.com> --- CMakeLists.txt | 5 +- README.md | 53 ++++++++++++++++++++ cmake/modules/FindCPUkernels.cmake | 20 ++++++-- cmake/modules/ZenDnnlFwkIntegrate.cmake | 65 +++++++++++++++++++------ setup.py | 56 ++++++++++++++++----- src/cpu/cpp/Threading.cpp | 15 +++++- 6 files changed, 178 insertions(+), 36 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7a470ec..ab9b0d4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -47,7 +47,8 @@ add_dependencies(zentorch CPUkernels zendnnl::zendnnl_archive) target_compile_features(zentorch PUBLIC cxx_std_17) set_target_properties(zentorch PROPERTIES - LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/lib/) + LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/lib/ + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/lib/) target_include_directories(zentorch PUBLIC ${ZENTORCH_INCLUDE_DIR} @@ -66,7 +67,7 @@ message(STATUS "zentorch PROJECT_NAME, ${PROJECT_NAME}") add_custom_command( TARGET zentorch POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy - ${CMAKE_CURRENT_BINARY_DIR}/lib/libzentorch.so + $ ${CMAKE_SOURCE_DIR}/${INSTALL_LIB_DIR}/${PROJECT_NAME}/) # Set default build type diff --git a/README.md b/README.md index 646d753..0fd2037 100644 --- a/README.md +++ b/README.md @@ -199,6 +199,59 @@ zentorch-5.2.0-cp310-cp310-linux_x86_64.whl ```bash python setup.py clean --all ``` + +### 2.2.3. Windows build (Experimental) + +>**Note:** Windows support is experimental. The build system has been made cross-platform, +>but full functionality depends on ZenDNN and its dependencies being available for Windows. +>An AMD EPYC™ CPU with AVX-512 support is still required. + +#### 2.2.3.1. Prerequisites + +* [Visual Studio 2019 or later](https://visualstudio.microsoft.com/) with the + "Desktop development with C++" workload (provides MSVC and CMake). +* [Git for Windows](https://gitforwindows.org/) +* Python 3.10+ (via conda or the official installer) + +#### 2.2.3.2. Create conda environment for the build + +```cmd +conda create -n pt-zentorch python=3.10 -y +conda activate pt-zentorch +``` + +#### 2.2.3.3. Install PyTorch v2.10.0 + +```cmd +pip install torch==2.10.0 --index-url https://download.pytorch.org/whl/cpu +``` + +#### 2.2.3.4. Install Dependencies + +```cmd +pip install -r requirements.txt +``` + +#### 2.2.3.5. Build from a Visual Studio Developer Command Prompt + +Open a **"x64 Native Tools Command Prompt for VS"** (or run `vcvarsall.bat x64`), then: + +```cmd +python setup.py bdist_wheel +``` + +#### 2.2.3.6. Install the wheel file + +```cmd +cd dist +pip install zentorch-5.2.0-cp310-cp310-win_amd64.whl +``` + +#### 2.2.3.7. Build Cleanup + +```cmd +python setup.py clean --all +``` # 3. Unit Tests ## 3.1 Install Unit tests Dependencies diff --git a/cmake/modules/FindCPUkernels.cmake b/cmake/modules/FindCPUkernels.cmake index 4340089..3ea2a06 100644 --- a/cmake/modules/FindCPUkernels.cmake +++ b/cmake/modules/FindCPUkernels.cmake @@ -11,10 +11,15 @@ find_package(Torch REQUIRED) file(GLOB cpu_kernels "${CMAKE_CURRENT_SOURCE_DIR}/src/cpu/cpp/kernels/*.cpp") # setting necessary flags for .cpp files -set(FLAGS "-Wall -Werror -Wno-unknown-pragmas -Wno-error=uninitialized \ - -Wno-error=maybe-uninitialized -fPIC -fopenmp -fno-math-errno \ - -fno-trapping-math -O2 -std=c++17 -mavx512f -mavx512bf16 \ - -mavx512vl -mavx512dq -DCPU_CAPABILITY_AVX512") +if(MSVC) + set(FLAGS "/W3 /WX /openmp /O2 /std:c++17 /arch:AVX512 \ + /DCPU_CAPABILITY_AVX512") +else() + set(FLAGS "-Wall -Werror -Wno-unknown-pragmas -Wno-error=uninitialized \ + -Wno-error=maybe-uninitialized -fPIC -fopenmp -fno-math-errno \ + -fno-trapping-math -O2 -std=c++17 -mavx512f -mavx512bf16 \ + -mavx512vl -mavx512dq -DCPU_CAPABILITY_AVX512") +endif() set_source_files_properties(${cpu_kernels} PROPERTIES COMPILE_FLAGS "${FLAGS}") @@ -30,9 +35,14 @@ target_include_directories(CPUkernels PUBLIC target_link_libraries(CPUkernels PUBLIC zendnnl::zendnnl_archive) -LIST(APPEND MHA_LIBRARIES ${CMAKE_CURRENT_BINARY_DIR}/lib/libCPUkernels.a) set(MHA_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src/cpu/cpp/kernels/") +if(MSVC) + LIST(APPEND MHA_LIBRARIES ${CMAKE_CURRENT_BINARY_DIR}/lib/CPUkernels.lib) +else() + LIST(APPEND MHA_LIBRARIES ${CMAKE_CURRENT_BINARY_DIR}/lib/libCPUkernels.a) +endif() + SET(MHA_FOUND ON) ENDIF (NOT MHA_FOUND) diff --git a/cmake/modules/ZenDnnlFwkIntegrate.cmake b/cmake/modules/ZenDnnlFwkIntegrate.cmake index bc2f88b..efc56d9 100644 --- a/cmake/modules/ZenDnnlFwkIntegrate.cmake +++ b/cmake/modules/ZenDnnlFwkIntegrate.cmake @@ -210,13 +210,23 @@ else() add_library(zendnnl_library STATIC IMPORTED GLOBAL) add_dependencies(zendnnl_library fwk_zendnnl) - set_target_properties(zendnnl_library - PROPERTIES - IMPORTED_LOCATION "${ZENDNNL_LIBRARY_LIB_DIR}/libzendnnl_archive.a" - INCLUDE_DIRECTORIES "${ZENDNNL_LIBRARY_INC_DIR}" - INTERFACE_INCLUDE_DIRECTORIES "${ZENDNNL_LIBRARY_INC_DIR}") + if(MSVC) + set_target_properties(zendnnl_library + PROPERTIES + IMPORTED_LOCATION "${ZENDNNL_LIBRARY_LIB_DIR}/zendnnl_archive.lib" + INCLUDE_DIRECTORIES "${ZENDNNL_LIBRARY_INC_DIR}" + INTERFACE_INCLUDE_DIRECTORIES "${ZENDNNL_LIBRARY_INC_DIR}") + else() + set_target_properties(zendnnl_library + PROPERTIES + IMPORTED_LOCATION "${ZENDNNL_LIBRARY_LIB_DIR}/libzendnnl_archive.a" + INCLUDE_DIRECTORIES "${ZENDNNL_LIBRARY_INC_DIR}" + INTERFACE_INCLUDE_DIRECTORIES "${ZENDNNL_LIBRARY_INC_DIR}") + endif() - target_link_options(zendnnl_library INTERFACE "-fopenmp") + if(NOT MSVC) + target_link_options(zendnnl_library INTERFACE "-fopenmp") + endif() target_link_libraries(zendnnl_library INTERFACE OpenMP::OpenMP_CXX INTERFACE ${CMAKE_DL_LIBS}) @@ -232,22 +242,37 @@ else() if (ZENDNNL_DEPENDS_LIBXSMM) # libxsmm dependency + if(MSVC) + set(_LIBXSMM_ARCHIVE "libxsmm.lib") + else() + set(_LIBXSMM_ARCHIVE "libxsmm.a") + endif() zendnnl_add_dependency(NAME libxsmm PATH "${ZENDNNL_INSTALL_PREFIX}/deps/libxsmm" - ARCHIVE_FILE "libxsmm.a" + ARCHIVE_FILE "${_LIBXSMM_ARCHIVE}" ALIAS "libxsmm::libxsmm" DEPENDS fwk_zendnnl) target_link_libraries(zendnnl_library INTERFACE libxsmm::libxsmm) endif() + if(MSVC) + set(_AOCLUTILS_ARCHIVE "aoclutils.lib") + set(_AUCPUID_ARCHIVE "au_cpuid.lib") + set(_DNNL_ARCHIVE "dnnl.lib") + else() + set(_AOCLUTILS_ARCHIVE "libaoclutils.a") + set(_AUCPUID_ARCHIVE "libau_cpuid.a") + set(_DNNL_ARCHIVE "libdnnl.a") + endif() + # aoclutils dependency if (DEFINED ENV{ZENDNNL_MANYLINUX_BUILD}) zendnnl_add_dependency(NAME aoclutils PATH "${ZENDNNL_INSTALL_PREFIX}/deps/aoclutils" LIB_SUFFIX lib64 - ARCHIVE_FILE "libaoclutils.a" + ARCHIVE_FILE "${_AOCLUTILS_ARCHIVE}" ALIAS "au::aoclutils" DEPENDS fwk_zendnnl) target_link_libraries(zendnnl_library INTERFACE au::aoclutils) @@ -255,7 +280,7 @@ else() zendnnl_add_dependency(NAME aucpuid PATH "${ZENDNNL_INSTALL_PREFIX}/deps/aoclutils" LIB_SUFFIX lib64 - ARCHIVE_FILE "libau_cpuid.a" + ARCHIVE_FILE "${_AUCPUID_ARCHIVE}" ALIAS "au::au_cpuid" DEPENDS fwk_zendnnl) @@ -264,7 +289,7 @@ else() zendnnl_add_dependency(NAME onednn PATH "${ZENDNNL_INSTALL_PREFIX}/deps/onednn" LIB_SUFFIX lib64 - ARCHIVE_FILE "libdnnl.a" + ARCHIVE_FILE "${_DNNL_ARCHIVE}" ALIAS "DNNL::dnnl" DEPENDS fwk_zendnnl) target_link_libraries(zendnnl_library INTERFACE DNNL::dnnl) @@ -272,7 +297,7 @@ else() else() zendnnl_add_dependency(NAME aoclutils PATH "${ZENDNNL_INSTALL_PREFIX}/deps/aoclutils" - ARCHIVE_FILE "libaoclutils.a" + ARCHIVE_FILE "${_AOCLUTILS_ARCHIVE}" ALIAS "au::aoclutils" DEPENDS fwk_zendnnl) @@ -280,7 +305,7 @@ else() zendnnl_add_dependency(NAME aucpuid PATH "${ZENDNNL_INSTALL_PREFIX}/deps/aoclutils" - ARCHIVE_FILE "libau_cpuid.a" + ARCHIVE_FILE "${_AUCPUID_ARCHIVE}" ALIAS "au::au_cpuid" DEPENDS fwk_zendnnl) @@ -288,7 +313,7 @@ else() zendnnl_add_dependency(NAME onednn PATH "${ZENDNNL_INSTALL_PREFIX}/deps/onednn" - ARCHIVE_FILE "libdnnl.a" + ARCHIVE_FILE "${_DNNL_ARCHIVE}" ALIAS "DNNL::dnnl" DEPENDS fwk_zendnnl) @@ -298,9 +323,14 @@ else() # amdblis dependency if (ZENDNNL_DEPENDS_AMDBLIS) + if(MSVC) + set(_BLIS_ARCHIVE "blis-mt.lib") + else() + set(_BLIS_ARCHIVE "libblis-mt.a") + endif() zendnnl_add_dependency(NAME amdblis PATH "${ZENDNNL_INSTALL_PREFIX}/deps/amdblis" - ARCHIVE_FILE "libblis-mt.a" + ARCHIVE_FILE "${_BLIS_ARCHIVE}" ALIAS "amdblis::amdblis_archive" DEPENDS fwk_zendnnl) @@ -308,9 +338,14 @@ else() endif() if (ZENDNNL_DEPENDS_AOCLDLP) + if(MSVC) + set(_AOCLDLP_ARCHIVE "aocl-dlp.lib") + else() + set(_AOCLDLP_ARCHIVE "libaocl-dlp.a") + endif() zendnnl_add_dependency(NAME aocldlp PATH "${ZENDNNL_INSTALL_PREFIX}/deps/aocldlp" - ARCHIVE_FILE "libaocl-dlp.a" + ARCHIVE_FILE "${_AOCLDLP_ARCHIVE}" ALIAS "aocldlp::aocldlp" DEPENDS fwk_zendnnl) diff --git a/setup.py b/setup.py index 6bca04a..59f4e56 100644 --- a/setup.py +++ b/setup.py @@ -10,9 +10,12 @@ from os.path import join as Path import os import subprocess +import sys import torch import warnings +IS_WINDOWS = sys.platform == "win32" + if parse(__version__) < parse("2.9.1"): raise ImportError( "zentorch Plugin requires torch version \ @@ -41,7 +44,9 @@ def run(self) -> None: os.makedirs(os.path.join(self.build_temp, "lib"), exist_ok=True) os.makedirs(os.path.join(self.build_lib), exist_ok=True) - rc, out, err = subproc_communicate("which python") + rc, out, err = subproc_communicate( + "where python" if IS_WINDOWS else "which python" + ) if rc == 0: out = out.split("\n")[0] os.environ["PYTHON_PATH"] = out.strip() @@ -74,7 +79,16 @@ def run(self) -> None: self.spawn(cmake_cmd) - self.spawn(["make", "-j", str(os.cpu_count()), "-C", self.build_temp]) + build_cmd = [ + "cmake", + "--build", + self.build_temp, + "--config", + build_type, + "--parallel", + str(os.cpu_count()), + ] + self.spawn(build_cmd) super().run() @@ -86,8 +100,13 @@ def build_extensions(self) -> None: extension = self.extensions[0] + if IS_WINDOWS: + zentorch_lib = "zentorch.dll" + else: + zentorch_lib = "libzentorch.so" + extra_objects = [ - Path(project_root_dir, self.build_lib, PACKAGE_NAME, "libzentorch.so"), + Path(project_root_dir, self.build_lib, PACKAGE_NAME, zentorch_lib), ] extension.extra_objects.extend(extra_objects) @@ -146,25 +165,36 @@ def get_commit_hash(base_dir): # -Wno-unknown-pragma is for [unroll pragma], to be removed # -fopenmp is needed for omp related pragmas (simd etc.) zentorch_compile_args = [ - "-Wall", - "-Werror", - "-fopenmp", - "-Wno-unknown-pragmas", "-DZENTORCH_VERSION_HASH=" + git_sha, "-DZENTORCH_VERSION=" + PACKAGE_VERSION, "-DPT_VERSION=" + PT_VERSION, ] +if IS_WINDOWS: + zentorch_compile_args += [ + "/W3", + "/WX", + "/openmp", + ] +else: + zentorch_compile_args += [ + "-Wall", + "-Werror", + "-fopenmp", + "-Wno-unknown-pragmas", + ] + # Enable C++11 ABI compilation for zentorch # if PyTorch was built with ABI support. -zentorch_compile_args += [ - f"-D_GLIBCXX_USE_CXX11_ABI={int(torch._C._GLIBCXX_USE_CXX11_ABI)}" -] +if not IS_WINDOWS: + zentorch_compile_args += [ + f"-D_GLIBCXX_USE_CXX11_ABI={int(torch._C._GLIBCXX_USE_CXX11_ABI)}" + ] -# add the "-O2" optimization only when we are doing release build +# add the optimization flag only when we are doing release build # check for release build if not os.getenv("DEBUG", 0): - zentorch_compile_args += ["-O2"] + zentorch_compile_args += ["/O2"] if IS_WINDOWS else ["-O2"] long_description = "" @@ -214,7 +244,7 @@ def main(): sources=sources, include_dirs=include_dirs, extra_compile_args=zentorch_compile_args, - extra_link_args=['-Wl,-rpath,$ORIGIN'], + extra_link_args=[] if IS_WINDOWS else ['-Wl,-rpath,$ORIGIN'], ) ], cmdclass={ diff --git a/src/cpu/cpp/Threading.cpp b/src/cpu/cpp/Threading.cpp index 21ad1b0..edb9525 100644 --- a/src/cpu/cpp/Threading.cpp +++ b/src/cpu/cpp/Threading.cpp @@ -4,10 +4,15 @@ ******************************************************************************/ #include -#include #include #include + +#ifdef _WIN32 +#include +#else +#include #include +#endif #include "Threading.hpp" @@ -19,6 +24,13 @@ void thread_bind(const std::vector &cpu_core_list) { #pragma omp parallel num_threads(cpu_core_list.size()) { int thread_index = omp_get_thread_num(); +#ifdef _WIN32 + DWORD_PTR mask = static_cast(1) + << cpu_core_list[thread_index]; + if (SetThreadAffinityMask(GetCurrentThread(), mask) == 0) { + throw std::runtime_error("Fail to bind cores."); + } +#else cpu_set_t cpuset; CPU_ZERO(&cpuset); CPU_SET(cpu_core_list[thread_index], &cpuset); @@ -26,6 +38,7 @@ void thread_bind(const std::vector &cpu_core_list) { 0) { throw std::runtime_error("Fail to bind cores."); } +#endif } } From 05ba4501afe189c41c67dccd2ffe7800e365c258 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 5 Feb 2026 20:19:50 +0000 Subject: [PATCH 3/4] Add Windows platform support for build system and C++ code Co-authored-by: Looong01 <69568351+Looong01@users.noreply.github.com> --- _codeql_detected_source_root | 1 + 1 file changed, 1 insertion(+) create mode 120000 _codeql_detected_source_root diff --git a/_codeql_detected_source_root b/_codeql_detected_source_root new file mode 120000 index 0000000..945c9b4 --- /dev/null +++ b/_codeql_detected_source_root @@ -0,0 +1 @@ +. \ No newline at end of file From 3350a7a67272b4b66d65bc843f6575002fadc13f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 5 Feb 2026 20:20:13 +0000 Subject: [PATCH 4/4] Remove codeql artifact and add to .gitignore Co-authored-by: Looong01 <69568351+Looong01@users.noreply.github.com> --- .gitignore | 1 + _codeql_detected_source_root | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) delete mode 120000 _codeql_detected_source_root diff --git a/.gitignore b/.gitignore index 4666ceb..a85b8ee 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ __pycache__ benchmark/recsys/dlrmv2-mlperf/mlperf.conf benchmark/recsys/dlrmv2-mlperf/output .hypothesis +_codeql_detected_source_root diff --git a/_codeql_detected_source_root b/_codeql_detected_source_root deleted file mode 120000 index 945c9b4..0000000 --- a/_codeql_detected_source_root +++ /dev/null @@ -1 +0,0 @@ -. \ No newline at end of file