diff options
Diffstat (limited to 'gnu')
| -rw-r--r-- | gnu/local.mk | 4 | ||||
| -rw-r--r-- | gnu/packages/machine-learning.scm | 25 | ||||
| -rw-r--r-- | gnu/packages/patches/python-pytorch-fix-codegen-2.7.0.patch | 178 | ||||
| -rw-r--r-- | gnu/packages/patches/python-pytorch-fix-codegen.patch | 21 | ||||
| -rw-r--r-- | gnu/packages/patches/python-pytorch-runpath-2.7.0.patch | 30 | ||||
| -rw-r--r-- | gnu/packages/patches/python-pytorch-runpath.patch | 4 | ||||
| -rw-r--r-- | gnu/packages/patches/python-pytorch-system-libraries-2.7.0.patch | 442 | ||||
| -rw-r--r-- | gnu/packages/patches/python-pytorch-system-libraries.patch | 216 | ||||
| -rw-r--r-- | gnu/packages/patches/python-pytorch-without-kineto-2.7.0.patch | 64 |
9 files changed, 160 insertions, 824 deletions
diff --git a/gnu/local.mk b/gnu/local.mk index 3a1ff0ea956..e2419ea847f 100644 --- a/gnu/local.mk +++ b/gnu/local.mk @@ -2159,15 +2159,11 @@ dist_patch_DATA = \ %D%/packages/patches/python-pyan3-fix-absolute-path-bug.patch \ %D%/packages/patches/python-pyan3-fix-positional-arguments.patch \ %D%/packages/patches/python-pytorch-fix-codegen.patch \ - %D%/packages/patches/python-pytorch-fix-codegen-2.7.0.patch \ %D%/packages/patches/python-pytorch-for-r-torch-fix-codegen.patch \ %D%/packages/patches/python-pytorch-for-r-torch-system-libraries.patch \ %D%/packages/patches/python-pytorch-runpath.patch \ - %D%/packages/patches/python-pytorch-runpath-2.7.0.patch \ %D%/packages/patches/python-pytorch-system-libraries.patch \ - %D%/packages/patches/python-pytorch-system-libraries-2.7.0.patch \ %D%/packages/patches/python-pytorch-without-kineto.patch \ - %D%/packages/patches/python-pytorch-without-kineto-2.7.0.patch \ %D%/packages/patches/python-robotframework-sshlibrary-rf5-compat.patch \ %D%/packages/patches/python-unittest2-python3-compat.patch \ %D%/packages/patches/python-unittest2-remove-argparse.patch \ diff --git a/gnu/packages/machine-learning.scm b/gnu/packages/machine-learning.scm index c1621d1e0b9..3be1026bf7d 100644 --- a/gnu/packages/machine-learning.scm +++ b/gnu/packages/machine-learning.scm @@ -4968,13 +4968,13 @@ PyTorch.") (sha256 (base32 "19prdpzx34n8y2q6wx9dn9vyms6zidjvfgh58d28rfcf5z7z5ra5")) - (patches (search-patches "python-pytorch-system-libraries-2.7.0.patch" - "python-pytorch-runpath-2.7.0.patch" - "python-pytorch-without-kineto-2.7.0.patch" + (patches (search-patches "python-pytorch-system-libraries.patch" + "python-pytorch-runpath.patch" + "python-pytorch-without-kineto.patch" ;; Some autogeneration scripts depend on the ;; compile PyTorch library. Therefore, we create ;; dummy versions which are regenerated later. - "python-pytorch-fix-codegen-2.7.0.patch")) + "python-pytorch-fix-codegen.patch")) (modules '((guix build utils))) (snippet '(begin @@ -5123,11 +5123,18 @@ PyTorch.") (("entry_points\\[\"console_scripts\"\\]\\.append\\(") "(")))) (add-before 'build 'use-system-libraries (lambda _ - (substitute* '("caffe2/serialize/crc.cc" - "caffe2/serialize/inline_container.cc" - "torch/csrc/inductor/aoti_package/model_package_loader.cpp") - (("\"miniz\\.h\"") "<miniz/miniz.h>") - (("<miniz\\.h>") "<miniz/miniz.h>")) + (for-each + (lambda (file) + ;; Check whether the files exist for the + ;; python-pytorch-for-r-torch package + (when (file-exists? file) + (substitute* file + (("\"miniz\\.h\"") "<miniz/miniz.h>") + (("<miniz\\.h>") "<miniz/miniz.h>")))) + '("caffe2/serialize/crc.cc" + "caffe2/serialize/inline_container.cc" + "torch/csrc/inductor/aoti_package/model_package_loader.cpp")) + (substitute* "aten/src/ATen/native/vulkan/api/Allocator.h" (("<include/vk_mem_alloc.h>") "<vk_mem_alloc.h>")) diff --git a/gnu/packages/patches/python-pytorch-fix-codegen-2.7.0.patch b/gnu/packages/patches/python-pytorch-fix-codegen-2.7.0.patch deleted file mode 100644 index 3862339b141..00000000000 --- a/gnu/packages/patches/python-pytorch-fix-codegen-2.7.0.patch +++ /dev/null @@ -1,178 +0,0 @@ -This patch fixes some scripts for generating source files. For -gen_jit_decompositions.py, gen_mobile_upgraders.py and -gen_jit_shape_functions.py, which depend on the compiled PyTorch library, the -option to generate "dummy" source files is added for the initial build, which -is later corrected. codegen_external.py is patched to avoid duplicate -functions and add the static keyword as in the existing generated file. - -diff --git a/tools/gen_flatbuffers.sh b/tools/gen_flatbuffers.sh -index cc0263dbb..ac34e84b8 100644 ---- a/tools/gen_flatbuffers.sh -+++ b/tools/gen_flatbuffers.sh -@@ -1,13 +1,13 @@ - #!/bin/bash - ROOT=$(pwd) --FF_LOCATION="$ROOT/third_party/flatbuffers" --cd "$FF_LOCATION" || exit --mkdir build --cd build || exit --cmake .. --cmake --build . --target flatc --mkdir -p "$ROOT/build/torch/csrc/jit/serialization" --./flatc --cpp --gen-mutable --scoped-enums \ -+#FF_LOCATION="$ROOT/third_party/flatbuffers" -+#cd "$FF_LOCATION" || exit -+#mkdir build -+#cd build || exit -+#cmake .. -+#cmake --build . --target flatc -+#mkdir -p "$ROOT/build/torch/csrc/jit/serialization" -+flatc --cpp --gen-mutable --scoped-enums \ - -o "$ROOT/torch/csrc/jit/serialization" \ - -c "$ROOT/torch/csrc/jit/serialization/mobile_bytecode.fbs" - echo '// @generated' >> "$ROOT/torch/csrc/jit/serialization/mobile_bytecode_generated.h" -diff --git a/torch/csrc/jit/tensorexpr/codegen_external.py b/torch/csrc/jit/tensorexpr/codegen_external.py -index 5dcf1b284..0e20b0c10 100644 ---- a/torch/csrc/jit/tensorexpr/codegen_external.py -+++ b/torch/csrc/jit/tensorexpr/codegen_external.py -@@ -21,9 +21,14 @@ def gen_external(native_functions_path, tags_path, external_path): - native_functions = parse_native_yaml(native_functions_path, tags_path) - func_decls = [] - func_registrations = [] -- for func in native_functions: -+ done_names = set() -+ for func in native_functions[0]: - schema = func.func - name = schema.name.name.base -+ if name in done_names: -+ continue -+ else: -+ done_names.add(name) - args = schema.arguments - # Only supports extern calls for functions with out variants - if not schema.is_out_fn(): -@@ -63,7 +68,7 @@ def gen_external(native_functions_path, tags_path, external_path): - - # print(tensor_decls, name, arg_names) - func_decl = f"""\ --void nnc_aten_{name}( -+static void nnc_aten_{name}( - int64_t bufs_num, - void** buf_data, - int64_t* buf_ranks, -diff --git a/torchgen/decompositions/gen_jit_decompositions.py b/torchgen/decompositions/gen_jit_decompositions.py -index b42948045..e1cfc73a5 100644 ---- a/torchgen/decompositions/gen_jit_decompositions.py -+++ b/torchgen/decompositions/gen_jit_decompositions.py -@@ -1,8 +1,12 @@ - #!/usr/bin/env python3 - import os - from pathlib import Path -+import sys - --from torch.jit._decompositions import decomposition_table -+if len(sys.argv) < 2 or sys.argv[1] != "dummy": -+ from torch.jit._decompositions import decomposition_table -+else: -+ decomposition_table = {} - - - # from torchgen.code_template import CodeTemplate -@@ -86,7 +90,7 @@ def write_decomposition_util_file(path: str) -> None: - - - def main() -> None: -- pytorch_dir = Path(__file__).resolve().parents[3] -+ pytorch_dir = Path(__file__).resolve().parents[2] - upgrader_path = pytorch_dir / "torch" / "csrc" / "jit" / "runtime" - write_decomposition_util_file(str(upgrader_path)) - -diff --git a/torchgen/operator_versions/gen_mobile_upgraders.py b/torchgen/operator_versions/gen_mobile_upgraders.py -index 845034cb7..a1c5767c2 100644 ---- a/torchgen/operator_versions/gen_mobile_upgraders.py -+++ b/torchgen/operator_versions/gen_mobile_upgraders.py -@@ -6,10 +6,13 @@ import os - from enum import Enum - from operator import itemgetter - from pathlib import Path -+import sys - from typing import Any - --import torch --from torch.jit.generate_bytecode import generate_upgraders_bytecode -+if len(sys.argv) < 2 or sys.argv[1] != "dummy": -+ import torch -+ from torch.jit.generate_bytecode import generate_upgraders_bytecode -+ - from torchgen.code_template import CodeTemplate - from torchgen.operator_versions.gen_mobile_upgraders_constant import ( - MOBILE_UPGRADERS_HEADER_DESCRIPTION, -@@ -263,7 +266,10 @@ def construct_register_size(register_size_from_yaml: int) -> str: - def construct_version_maps( - upgrader_bytecode_function_to_index_map: dict[str, Any], - ) -> str: -- version_map = torch._C._get_operator_version_map() -+ if len(sys.argv) < 2 or sys.argv[1] != "dummy": -+ version_map = torch._C._get_operator_version_map() -+ else: -+ version_map = {} - sorted_version_map_ = sorted(version_map.items(), key=itemgetter(0)) # type: ignore[no-any-return] - sorted_version_map = dict(sorted_version_map_) - -@@ -375,7 +381,10 @@ def sort_upgrader(upgrader_list: list[dict[str, Any]]) -> list[dict[str, Any]]: - - - def main() -> None: -- upgrader_list = generate_upgraders_bytecode() -+ if len(sys.argv) < 2 or sys.argv[1] != "dummy": -+ upgrader_list = generate_upgraders_bytecode() -+ else: -+ upgrader_list = [] - sorted_upgrader_list = sort_upgrader(upgrader_list) - for up in sorted_upgrader_list: - print("after sort upgrader : ", next(iter(up))) -diff --git a/torchgen/shape_functions/gen_jit_shape_functions.py b/torchgen/shape_functions/gen_jit_shape_functions.py -index 56a3d8bf0..ffd0785fd 100644 ---- a/torchgen/shape_functions/gen_jit_shape_functions.py -+++ b/torchgen/shape_functions/gen_jit_shape_functions.py -@@ -1,6 +1,7 @@ - #!/usr/bin/env python3 - import os - import sys -+import importlib - from importlib.util import module_from_spec, spec_from_file_location - from itertools import chain - from pathlib import Path -@@ -18,17 +19,21 @@ you are in the root directory of the Pytorch git repo""" - if not file_path.exists(): - raise Exception(err_msg) # noqa: TRY002 - --spec = spec_from_file_location(module_name, file_path) --assert spec is not None --module = module_from_spec(spec) --sys.modules[module_name] = module --assert spec.loader is not None --assert module is not None --spec.loader.exec_module(module) -- --bounded_compute_graph_mapping = module.bounded_compute_graph_mapping --shape_compute_graph_mapping = module.shape_compute_graph_mapping -- -+if len(sys.argv) < 2 or sys.argv[1] != "dummy": -+ spec = importlib.util.spec_from_file_location(module_name, file_path) -+ assert spec is not None -+ module = importlib.util.module_from_spec(spec) -+ sys.modules[module_name] = module -+ assert spec.loader is not None -+ assert module is not None -+ spec.loader.exec_module(module) -+ -+ bounded_compute_graph_mapping = module.bounded_compute_graph_mapping -+ shape_compute_graph_mapping = module.shape_compute_graph_mapping -+ -+else: -+ bounded_compute_graph_mapping = {} -+ shape_compute_graph_mapping = {} - - SHAPE_HEADER = r""" - /** diff --git a/gnu/packages/patches/python-pytorch-fix-codegen.patch b/gnu/packages/patches/python-pytorch-fix-codegen.patch index 106ea7db663..3862339b141 100644 --- a/gnu/packages/patches/python-pytorch-fix-codegen.patch +++ b/gnu/packages/patches/python-pytorch-fix-codegen.patch @@ -6,7 +6,7 @@ is later corrected. codegen_external.py is patched to avoid duplicate functions and add the static keyword as in the existing generated file. diff --git a/tools/gen_flatbuffers.sh b/tools/gen_flatbuffers.sh -index cc0263dbbf..ac34e84b82 100644 +index cc0263dbb..ac34e84b8 100644 --- a/tools/gen_flatbuffers.sh +++ b/tools/gen_flatbuffers.sh @@ -1,13 +1,13 @@ @@ -32,7 +32,7 @@ index cc0263dbbf..ac34e84b82 100644 -c "$ROOT/torch/csrc/jit/serialization/mobile_bytecode.fbs" echo '// @generated' >> "$ROOT/torch/csrc/jit/serialization/mobile_bytecode_generated.h" diff --git a/torch/csrc/jit/tensorexpr/codegen_external.py b/torch/csrc/jit/tensorexpr/codegen_external.py -index 5dcf1b2840..0e20b0c102 100644 +index 5dcf1b284..0e20b0c10 100644 --- a/torch/csrc/jit/tensorexpr/codegen_external.py +++ b/torch/csrc/jit/tensorexpr/codegen_external.py @@ -21,9 +21,14 @@ def gen_external(native_functions_path, tags_path, external_path): @@ -61,7 +61,7 @@ index 5dcf1b2840..0e20b0c102 100644 void** buf_data, int64_t* buf_ranks, diff --git a/torchgen/decompositions/gen_jit_decompositions.py b/torchgen/decompositions/gen_jit_decompositions.py -index b42948045c..e1cfc73a5e 100644 +index b42948045..e1cfc73a5 100644 --- a/torchgen/decompositions/gen_jit_decompositions.py +++ b/torchgen/decompositions/gen_jit_decompositions.py @@ -1,8 +1,12 @@ @@ -88,7 +88,7 @@ index b42948045c..e1cfc73a5e 100644 write_decomposition_util_file(str(upgrader_path)) diff --git a/torchgen/operator_versions/gen_mobile_upgraders.py b/torchgen/operator_versions/gen_mobile_upgraders.py -index 362ce427d5..245056f815 100644 +index 845034cb7..a1c5767c2 100644 --- a/torchgen/operator_versions/gen_mobile_upgraders.py +++ b/torchgen/operator_versions/gen_mobile_upgraders.py @@ -6,10 +6,13 @@ import os @@ -107,9 +107,9 @@ index 362ce427d5..245056f815 100644 from torchgen.code_template import CodeTemplate from torchgen.operator_versions.gen_mobile_upgraders_constant import ( MOBILE_UPGRADERS_HEADER_DESCRIPTION, -@@ -265,7 +268,10 @@ def construct_register_size(register_size_from_yaml: int) -> str: +@@ -263,7 +266,10 @@ def construct_register_size(register_size_from_yaml: int) -> str: def construct_version_maps( - upgrader_bytecode_function_to_index_map: dict[str, Any] + upgrader_bytecode_function_to_index_map: dict[str, Any], ) -> str: - version_map = torch._C._get_operator_version_map() + if len(sys.argv) < 2 or sys.argv[1] != "dummy": @@ -119,7 +119,7 @@ index 362ce427d5..245056f815 100644 sorted_version_map_ = sorted(version_map.items(), key=itemgetter(0)) # type: ignore[no-any-return] sorted_version_map = dict(sorted_version_map_) -@@ -381,7 +387,10 @@ def sort_upgrader(upgrader_list: list[dict[str, Any]]) -> list[dict[str, Any]]: +@@ -375,7 +381,10 @@ def sort_upgrader(upgrader_list: list[dict[str, Any]]) -> list[dict[str, Any]]: def main() -> None: @@ -132,7 +132,7 @@ index 362ce427d5..245056f815 100644 for up in sorted_upgrader_list: print("after sort upgrader : ", next(iter(up))) diff --git a/torchgen/shape_functions/gen_jit_shape_functions.py b/torchgen/shape_functions/gen_jit_shape_functions.py -index 56a3d8bf0d..490a3ea2e7 100644 +index 56a3d8bf0..ffd0785fd 100644 --- a/torchgen/shape_functions/gen_jit_shape_functions.py +++ b/torchgen/shape_functions/gen_jit_shape_functions.py @@ -1,6 +1,7 @@ @@ -143,7 +143,7 @@ index 56a3d8bf0d..490a3ea2e7 100644 from importlib.util import module_from_spec, spec_from_file_location from itertools import chain from pathlib import Path -@@ -18,16 +19,21 @@ you are in the root directory of the Pytorch git repo""" +@@ -18,17 +19,21 @@ you are in the root directory of the Pytorch git repo""" if not file_path.exists(): raise Exception(err_msg) # noqa: TRY002 @@ -157,6 +157,7 @@ index 56a3d8bf0d..490a3ea2e7 100644 - -bounded_compute_graph_mapping = module.bounded_compute_graph_mapping -shape_compute_graph_mapping = module.shape_compute_graph_mapping +- +if len(sys.argv) < 2 or sys.argv[1] != "dummy": + spec = importlib.util.spec_from_file_location(module_name, file_path) + assert spec is not None @@ -173,5 +174,5 @@ index 56a3d8bf0d..490a3ea2e7 100644 + bounded_compute_graph_mapping = {} + shape_compute_graph_mapping = {} - SHAPE_HEADER = r""" + /** diff --git a/gnu/packages/patches/python-pytorch-runpath-2.7.0.patch b/gnu/packages/patches/python-pytorch-runpath-2.7.0.patch deleted file mode 100644 index cd8bea370b8..00000000000 --- a/gnu/packages/patches/python-pytorch-runpath-2.7.0.patch +++ /dev/null @@ -1,30 +0,0 @@ -Libraries (such as 'libtorch_cpu.so') and executables (such as 'torch_shm_manager') -get installed, quite surprisingly, to 'lib/python3.8/site-packages/{bin,lib}'. -Make sure RUNPATH matches that. - -diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake -index be45936a8..7b19e5359 100644 ---- a/cmake/Dependencies.cmake -+++ b/cmake/Dependencies.cmake -@@ -4,7 +4,7 @@ if(APPLE) - set(CMAKE_MACOSX_RPATH ON) - set(_rpath_portable_origin "@loader_path") - else() -- set(_rpath_portable_origin $ORIGIN) -+ set(_rpath_portable_origin $ORIGIN/../lib) - endif(APPLE) - # Use separate rpaths during build and install phases - set(CMAKE_SKIP_BUILD_RPATH FALSE) -diff --git a/functorch/CMakeLists.txt b/functorch/CMakeLists.txt -index bdfa4bfe4..2a75e3825 100644 ---- a/functorch/CMakeLists.txt -+++ b/functorch/CMakeLists.txt -@@ -26,7 +26,7 @@ target_link_libraries(${PROJECT_NAME} PRIVATE pybind::pybind11) - - set_target_properties(${PROJECT_NAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY - ${CMAKE_BINARY_DIR}/functorch) --set_target_properties(${PROJECT_NAME} PROPERTIES INSTALL_RPATH "${_rpath_portable_origin}/../torch/lib") -+set_target_properties(${PROJECT_NAME} PROPERTIES INSTALL_RPATH "$ORIGIN/../torch/lib") - - # Copy-pasted prefix/suffix logic for Python extensions from - # https://github.com/pytorch/pytorch/blob/33bb8ae350611760139457b85842b1d7edf9aa11/caffe2/CMakeLists.txt#L1975 diff --git a/gnu/packages/patches/python-pytorch-runpath.patch b/gnu/packages/patches/python-pytorch-runpath.patch index 811de9e2888..cd8bea370b8 100644 --- a/gnu/packages/patches/python-pytorch-runpath.patch +++ b/gnu/packages/patches/python-pytorch-runpath.patch @@ -3,7 +3,7 @@ get installed, quite surprisingly, to 'lib/python3.8/site-packages/{bin,lib}'. Make sure RUNPATH matches that. diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake -index f1f2eb7..cb5caea 100644 +index be45936a8..7b19e5359 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -4,7 +4,7 @@ if(APPLE) @@ -16,7 +16,7 @@ index f1f2eb7..cb5caea 100644 # Use separate rpaths during build and install phases set(CMAKE_SKIP_BUILD_RPATH FALSE) diff --git a/functorch/CMakeLists.txt b/functorch/CMakeLists.txt -index bdfa4bf..2a75e38 100644 +index bdfa4bfe4..2a75e3825 100644 --- a/functorch/CMakeLists.txt +++ b/functorch/CMakeLists.txt @@ -26,7 +26,7 @@ target_link_libraries(${PROJECT_NAME} PRIVATE pybind::pybind11) diff --git a/gnu/packages/patches/python-pytorch-system-libraries-2.7.0.patch b/gnu/packages/patches/python-pytorch-system-libraries-2.7.0.patch deleted file mode 100644 index 6d8da60e413..00000000000 --- a/gnu/packages/patches/python-pytorch-system-libraries-2.7.0.patch +++ /dev/null @@ -1,442 +0,0 @@ -Patch build files to also system libraries instead of bundled ones for the -libraries not supported or working only by specifying USE_SYSTEM_LIBS. This -includes using the clog, cpuinfo, fbgemm, foxi, fp16, fxdiv, googletest, -httlib, ideep, miniz, nnpack, oneapi-dnnl, pocketfft, pthreadpool, -qnnpack-pytorch, tensorpipe, valgrind and xnnpack packages. - -diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt -index 085af373e..3287429b4 100644 ---- a/aten/src/ATen/CMakeLists.txt -+++ b/aten/src/ATen/CMakeLists.txt -@@ -468,9 +468,9 @@ if(AT_NNPACK_ENABLED) - list(APPEND ATen_CPU_DEPENDENCY_LIBS nnpack) # cpuinfo is added below - endif() - --if(MKLDNN_FOUND) -- list(APPEND ATen_CPU_DEPENDENCY_LIBS ${MKLDNN_LIBRARIES}) --endif(MKLDNN_FOUND) -+if(USE_MKLDNN) -+ list(APPEND ATen_CPU_DEPENDENCY_LIBS DNNL::dnnl) -+endif(USE_MKLDNN) - - if(USE_MKLDNN_ACL) - list(APPEND ATen_CPU_INCLUDE ${ACL_INCLUDE_DIRS}) -diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt -index d2d23b7ab..1a7e5a042 100644 ---- a/caffe2/CMakeLists.txt -+++ b/caffe2/CMakeLists.txt -@@ -91,9 +91,6 @@ if(NOT MSVC AND USE_XNNPACK) - if(NOT TARGET fxdiv) - set(FXDIV_BUILD_TESTS OFF CACHE BOOL "") - set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "") -- add_subdirectory( -- "${FXDIV_SOURCE_DIR}" -- "${CMAKE_BINARY_DIR}/FXdiv") - endif() - endif() - -@@ -1135,7 +1132,6 @@ if(USE_XPU) - endif() - - if(NOT MSVC AND USE_XNNPACK) -- TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv) - endif() - - # ========================================================== -@@ -1254,8 +1250,8 @@ endif() - target_include_directories(torch_cpu PRIVATE - ${TORCH_ROOT}/third_party/cpp-httplib) - --target_include_directories(torch_cpu PRIVATE -- ${TORCH_ROOT}/third_party/nlohmann/include) -+find_package(httplib REQUIRED) -+target_link_libraries(torch_cpu PUBLIC httplib::httplib) - - install(DIRECTORY "${TORCH_SRC_DIR}/csrc" - DESTINATION ${TORCH_INSTALL_INCLUDE_DIR}/torch -@@ -1494,6 +1490,7 @@ target_link_libraries(torch_cpu PUBLIC c10) - target_link_libraries(torch_cpu PUBLIC ${Caffe2_PUBLIC_DEPENDENCY_LIBS}) - target_link_libraries(torch_cpu PRIVATE ${Caffe2_DEPENDENCY_LIBS}) - target_link_libraries(torch_cpu PRIVATE ${Caffe2_DEPENDENCY_WHOLE_LINK_LIBS}) -+target_link_libraries(torch_cpu PRIVATE miniz clog) - if(USE_MPI) - target_link_libraries(torch_cpu PRIVATE MPI::MPI_CXX) - endif() -@@ -1728,7 +1725,7 @@ if(BUILD_STATIC_RUNTIME_BENCHMARK) - add_executable(static_runtime_bench "${STATIC_RUNTIME_BENCHMARK_SRCS}") - add_executable(static_runtime_test "${STATIC_RUNTIME_TEST_SRCS}") - target_link_libraries(static_runtime_bench torch_library benchmark) -- target_link_libraries(static_runtime_test torch_library gtest_main) -+ target_link_libraries(static_runtime_test torch_library gtest_main gtest) - endif() - - if(BUILD_MOBILE_BENCHMARK) -@@ -1747,7 +1744,7 @@ if(BUILD_MOBILE_TEST) - foreach(test_src ${ATen_MOBILE_TEST_SRCS}) - get_filename_component(test_name ${test_src} NAME_WE) - add_executable(${test_name} "${test_src}") -- target_link_libraries(${test_name} torch_library gtest_main) -+ target_link_libraries(${test_name} torch_library gtest_main gtest) - target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>) - target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>) - target_include_directories(${test_name} PRIVATE ${ATen_CPU_INCLUDE}) -@@ -1768,7 +1765,7 @@ if(BUILD_TEST) - if(NOT MSVC) - add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}" ../aten/src/ATen/native/quantized/AffineQuantizerBase.cpp) - # TODO: Get rid of c10 dependency (which is only needed for the implementation of AT_ERROR) -- target_link_libraries(${test_name}_${CPU_CAPABILITY} c10 sleef gtest_main nlohmann) -+ target_link_libraries(${test_name}_${CPU_CAPABILITY} c10 sleef gtest_main gtest nlohmann) - if(USE_FBGEMM) - target_link_libraries(${test_name}_${CPU_CAPABILITY} fbgemm) - endif() -@@ -1782,7 +1779,7 @@ if(BUILD_TEST) - endif() - else() - add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}") -- target_link_libraries(${test_name}_${CPU_CAPABILITY} torch_library sleef gtest_main) -+ target_link_libraries(${test_name}_${CPU_CAPABILITY} torch_library sleef gtest_main gtest) - endif() - target_include_directories(${test_name}_${CPU_CAPABILITY} PRIVATE $<INSTALL_INTERFACE:include>) - target_include_directories(${test_name}_${CPU_CAPABILITY} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>) -@@ -1799,7 +1796,7 @@ if(BUILD_TEST) - foreach(test_src ${Caffe2_CPU_TEST_SRCS}) - get_filename_component(test_name ${test_src} NAME_WE) - add_executable(${test_name} "${test_src}") -- target_link_libraries(${test_name} torch_library gtest_main) -+ target_link_libraries(${test_name} torch_library gtest_main gtest) - if(NOT MSVC) - target_link_libraries(${test_name} stdc++) - endif() -@@ -1823,7 +1820,7 @@ if(BUILD_TEST) - add_executable(${test_name} "${test_src}") - find_library(metal NAMES Metal) - find_library(foundation NAMES Foundation) -- target_link_libraries(${test_name} torch_library gtest_main ${metal} ${foundation}) -+ target_link_libraries(${test_name} torch_library gtest_main gtest ${metal} ${foundation}) - target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>) - target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>) - target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE}) -@@ -1843,7 +1840,7 @@ if(BUILD_TEST) - foreach(test_src ${Caffe2_GPU_TEST_SRCS}) - get_filename_component(test_name ${test_src} NAME_WE) - add_executable(${test_name} "${test_src}") -- target_link_libraries(${test_name} torch_library gtest_main) -+ target_link_libraries(${test_name} torch_library gtest_main gtest) - if(USE_CUDNN AND ${test_name} MATCHES "cudnn") - target_link_libraries(${test_name} torch::cudnn) - endif() -@@ -1865,7 +1862,7 @@ if(BUILD_TEST) - foreach(test_src ${Caffe2_XPU_TEST_SRCS}) - get_filename_component(test_name ${test_src} NAME_WE) - add_executable(${test_name} "${test_src}") -- target_link_libraries(${test_name} torch_library gtest_main) -+ target_link_libraries(${test_name} torch_library gtest_main gtest) - target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>) - target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE}) - add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>) -@@ -1880,7 +1877,7 @@ if(BUILD_TEST) - foreach(test_src ${Caffe2_VULKAN_TEST_SRCS}) - get_filename_component(test_name ${test_src} NAME_WE) - add_executable(${test_name} "${test_src}") -- target_link_libraries(${test_name} torch_library gtest_main) -+ target_link_libraries(${test_name} torch_library gtest_main gtest) - target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>) - target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE}) - add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>) -@@ -1899,7 +1896,7 @@ if(BUILD_TEST) - foreach(test_src ${Caffe2_HIP_TEST_SRCS}) - get_filename_component(test_name ${test_src} NAME_WE) - add_executable(${test_name} "${test_src}") -- target_link_libraries(${test_name} torch_library gtest_main) -+ target_link_libraries(${test_name} torch_library gtest_main gtest) - target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>) - target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE} ${Caffe2_HIP_INCLUDE}) - target_compile_options(${test_name} PRIVATE ${HIP_CXX_FLAGS}) -diff --git a/caffe2/serialize/CMakeLists.txt b/caffe2/serialize/CMakeLists.txt -index ebbff0f29..dcded2590 100644 ---- a/caffe2/serialize/CMakeLists.txt -+++ b/caffe2/serialize/CMakeLists.txt -@@ -2,7 +2,6 @@ file(GLOB tmp *_test.cc) - - set(Caffe2_CPU_TEST_SRCS ${Caffe2_CPU_TEST_SRCS} ${tmp}) - list(APPEND Caffe2_CPU_SRCS -- ${PROJECT_SOURCE_DIR}/third_party/miniz-3.0.2/miniz.c - ${CMAKE_CURRENT_SOURCE_DIR}/inline_container.cc - ${CMAKE_CURRENT_SOURCE_DIR}/istream_adapter.cc - ${CMAKE_CURRENT_SOURCE_DIR}/file_adapter.cc -diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake -index be45936a8..bb1aa1cc1 100644 ---- a/cmake/Dependencies.cmake -+++ b/cmake/Dependencies.cmake -@@ -276,7 +276,7 @@ endif() - # --- [ PocketFFT - set(AT_POCKETFFT_ENABLED 0) - if(NOT AT_MKL_ENABLED) -- set(POCKETFFT_INCLUDE_DIR "${Torch_SOURCE_DIR}/third_party/pocketfft/") -+ set(POCKETFFT_INCLUDE_DIR "#POCKETFFT_INCLUDE_DIR") - if(NOT EXISTS "${POCKETFFT_INCLUDE_DIR}") - message(FATAL_ERROR "pocketfft directory not found, expected ${POCKETFFT_INCLUDE_DIR}") - elif(NOT EXISTS "${POCKETFFT_INCLUDE_DIR}/pocketfft_hdronly.h") -@@ -460,15 +460,6 @@ if(USE_PYTORCH_QNNPACK) - set(PYTORCH_QNNPACK_BUILD_TESTS OFF CACHE BOOL "") - set(PYTORCH_QNNPACK_BUILD_BENCHMARKS OFF CACHE BOOL "") - set(PYTORCH_QNNPACK_LIBRARY_TYPE "static" CACHE STRING "") -- add_subdirectory( -- "${PYTORCH_QNNPACK_SOURCE_DIR}" -- "${CONFU_DEPENDENCIES_BINARY_DIR}/pytorch_qnnpack") -- # We build static versions of QNNPACK and pthreadpool but link -- # them into a shared library for Caffe2, so they need PIC. -- set_property(TARGET pytorch_qnnpack PROPERTY POSITION_INDEPENDENT_CODE ON) -- set_property(TARGET cpuinfo PROPERTY POSITION_INDEPENDENT_CODE ON) -- # QNNPACK depends on gemmlowp headers -- target_include_directories(pytorch_qnnpack PRIVATE "${CAFFE2_THIRD_PARTY_ROOT}/gemmlowp") - endif() - - list(APPEND Caffe2_DEPENDENCY_LIBS pytorch_qnnpack) -@@ -558,16 +549,15 @@ if(USE_XNNPACK AND NOT USE_SYSTEM_XNNPACK) - list(APPEND Caffe2_DEPENDENCY_LIBS XNNPACK microkernels-prod) - elseif(NOT TARGET XNNPACK AND USE_SYSTEM_XNNPACK) - add_library(XNNPACK SHARED IMPORTED) -- add_library(microkernels-prod SHARED IMPORTED) -+ add_library(microkernels-prod INTERFACE IMPORTED) - find_library(XNNPACK_LIBRARY XNNPACK) -- find_library(microkernels-prod_LIBRARY microkernels-prod) - set_property(TARGET XNNPACK PROPERTY IMPORTED_LOCATION "${XNNPACK_LIBRARY}") -- set_property(TARGET microkernels-prod PROPERTY IMPORTED_LOCATION "${microkernels-prod_LIBRARY}") -- if(NOT XNNPACK_LIBRARY or NOT microkernels-prod_LIBRARY) -+ set_property(TARGET microkernels-prod PROPERTY INTERFACE_LINK_LIBRARIES XNNPACK) -+ if(NOT XNNPACK_LIBRARY) - message(FATAL_ERROR "Cannot find XNNPACK") - endif() - message("-- Found XNNPACK: ${XNNPACK_LIBRARY}") -- list(APPEND Caffe2_DEPENDENCY_LIBS XNNPACK microkernels-prod) -+ list(APPEND Caffe2_DEPENDENCY_LIBS XNNPACK) - endif() - - # ---[ Vulkan deps -@@ -650,11 +640,6 @@ if(BUILD_TEST OR BUILD_MOBILE_BENCHMARK OR BUILD_MOBILE_TEST) - # this shouldn't be necessary anymore. - get_property(INC_DIR_temp DIRECTORY PROPERTY INCLUDE_DIRECTORIES) - set_property(DIRECTORY PROPERTY INCLUDE_DIRECTORIES "") -- add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/googletest) -- set_property(DIRECTORY PROPERTY INCLUDE_DIRECTORIES ${INC_DIR_temp}) -- -- include_directories(BEFORE SYSTEM ${CMAKE_CURRENT_LIST_DIR}/../third_party/googletest/googletest/include) -- include_directories(BEFORE SYSTEM ${CMAKE_CURRENT_LIST_DIR}/../third_party/googletest/googlemock/include) - - # We will not need to test benchmark lib itself. - set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Disable benchmark testing as we don't need it.") -@@ -732,16 +717,6 @@ if(USE_FBGEMM) - if(USE_ASAN) - set(USE_SANITIZER "address,undefined" CACHE STRING "-fsanitize options for FBGEMM") - endif() -- add_subdirectory("${FBGEMM_SOURCE_DIR}") -- set_property(TARGET fbgemm_generic PROPERTY POSITION_INDEPENDENT_CODE ON) -- set_property(TARGET fbgemm_avx2 PROPERTY POSITION_INDEPENDENT_CODE ON) -- set_property(TARGET fbgemm_avx512 PROPERTY POSITION_INDEPENDENT_CODE ON) -- set_property(TARGET fbgemm PROPERTY POSITION_INDEPENDENT_CODE ON) -- if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 13.0.0) -- # See https://github.com/pytorch/pytorch/issues/74352 -- target_compile_options_if_supported(asmjit -Wno-deprecated-copy) -- target_compile_options_if_supported(asmjit -Wno-unused-but-set-variable) -- endif() - if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") - target_compile_options_if_supported(asmjit -Wno-extra-semi) - target_compile_options_if_supported(fbgemm -Wno-extra-semi) -@@ -829,7 +804,7 @@ if(NOT TARGET fp16 AND NOT USE_SYSTEM_FP16) - "${CONFU_DEPENDENCIES_BINARY_DIR}/FP16") - endif() - elseif(NOT TARGET fp16 AND USE_SYSTEM_FP16) -- add_library(fp16 STATIC "/usr/include/fp16.h") -+ add_library(fp16 STATIC "#FP16_INCLUDE_DIR") - set_target_properties(fp16 PROPERTIES LINKER_LANGUAGE C) - endif() - list(APPEND Caffe2_DEPENDENCY_LIBS fp16) -@@ -1170,7 +1145,6 @@ if(USE_DISTRIBUTED AND USE_TENSORPIPE) - message(WARNING "Archived TensorPipe forces CMake compatibility mode") - set(CMAKE_POLICY_VERSION_MINIMUM 3.5) - endif() -- add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/tensorpipe) - if(CMAKE_VERSION VERSION_GREATER_EQUAL "4.0.0") - unset(CMAKE_POLICY_VERSION_MINIMUM) - endif() -@@ -1340,7 +1314,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX) - endif() - set_property(TARGET onnx_proto PROPERTY IMPORTED_LOCATION ${ONNX_PROTO_LIBRARY}) - message("-- Found onnx: ${ONNX_LIBRARY} ${ONNX_PROTO_LIBRARY}") -- list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx) -+ list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx onnx_optimizer) - endif() - # Recover the build shared libs option. - set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS}) -@@ -1500,9 +1474,8 @@ if(NOT INTERN_BUILD_MOBILE) - endif() - if(USE_MKLDNN) - include(${CMAKE_CURRENT_LIST_DIR}/public/mkldnn.cmake) -- if(MKLDNN_FOUND) -+ if(DNNL_FOUND) - set(AT_MKLDNN_ENABLED 1) -- include_directories(AFTER SYSTEM ${MKLDNN_INCLUDE_DIR}) - else() - message(WARNING "MKLDNN could not be found.") - caffe2_update_option(USE_MKLDNN OFF) -@@ -1583,7 +1556,7 @@ endif() - # - set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) - set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE) --add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt) -+find_package(fmt) - - # Disable compiler feature checks for `fmt`. - # -@@ -1592,7 +1565,6 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt) - # CMAKE_CXX_FLAGS in ways that break feature checks. Since we already know - # `fmt` is compatible with a superset of the compilers that PyTorch is, it - # shouldn't be too bad to just disable the checks. --set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "") - - list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only) - set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS} CACHE BOOL "Build shared libs" FORCE) -diff --git a/cmake/External/nnpack.cmake b/cmake/External/nnpack.cmake -index 8a4a310d6..f413d2e61 100644 ---- a/cmake/External/nnpack.cmake -+++ b/cmake/External/nnpack.cmake -@@ -40,7 +40,7 @@ endif() - # (3) Android, iOS, Linux, macOS - supported - ############################################################################## - --if(ANDROID OR IOS OR ${CMAKE_SYSTEM_NAME} STREQUAL "Linux" OR ${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") -+if(FALSE) - message(STATUS "Brace yourself, we are building NNPACK") - set(CAFFE2_THIRD_PARTY_ROOT ${PROJECT_SOURCE_DIR}/third_party) - -@@ -94,6 +94,5 @@ endif() - # (4) Catch-all: not supported. - ############################################################################## - --message(WARNING "Unknown platform - I don't know how to build NNPACK. " -- "See cmake/External/nnpack.cmake for details.") --set(USE_NNPACK OFF) -+set(NNPACK_FOUND TRUE) -+set(USE_NNPACK ON) -diff --git a/cmake/public/mkldnn.cmake b/cmake/public/mkldnn.cmake -index 87935625f..9f8fa3df8 100644 ---- a/cmake/public/mkldnn.cmake -+++ b/cmake/public/mkldnn.cmake -@@ -4,7 +4,7 @@ if(CPU_AARCH64) - include(${CMAKE_CURRENT_LIST_DIR}/ComputeLibrary.cmake) - endif() - --find_package(MKLDNN QUIET) -+find_package(DNNL REQUIRED) - - if(NOT TARGET caffe2::mkldnn) - add_library(caffe2::mkldnn INTERFACE IMPORTED) -@@ -15,4 +15,4 @@ set_property( - ${MKLDNN_INCLUDE_DIR}) - set_property( - TARGET caffe2::mkldnn PROPERTY INTERFACE_LINK_LIBRARIES -- ${MKLDNN_LIBRARIES}) -+ DNNL::dnnl) -diff --git a/setup.py b/setup.py -index 61ee9363f..3691cc35c 100644 ---- a/setup.py -+++ b/setup.py -@@ -508,13 +508,9 @@ def build_deps(): - # Windows has very poor support for them. - sym_files = [ - "tools/shared/_utils_internal.py", -- "torch/utils/benchmark/utils/valgrind_wrapper/callgrind.h", -- "torch/utils/benchmark/utils/valgrind_wrapper/valgrind.h", - ] - orig_files = [ - "torch/_utils_internal.py", -- "third_party/valgrind-headers/callgrind.h", -- "third_party/valgrind-headers/valgrind.h", - ] - for sym_file, orig_file in zip(sym_files, orig_files): - same = False -diff --git a/test/cpp/c10d/CMakeLists.txt b/test/cpp/c10d/CMakeLists.txt -index 5b423241d..e069accd6 100644 ---- a/test/cpp/c10d/CMakeLists.txt -+++ b/test/cpp/c10d/CMakeLists.txt -@@ -26,17 +26,17 @@ function(c10d_add_test test_src) - endif() - endfunction() - --c10d_add_test(BackoffTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST OFF) --c10d_add_test(FileStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST ${INSTALL_TEST}) --c10d_add_test(TCPStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST ${INSTALL_TEST}) -+c10d_add_test(BackoffTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST OFF) -+c10d_add_test(FileStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST ${INSTALL_TEST}) -+c10d_add_test(TCPStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST ${INSTALL_TEST}) - if(NOT WIN32) -- c10d_add_test(HashStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST ${INSTALL_TEST}) -+ c10d_add_test(HashStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST ${INSTALL_TEST}) - endif() - - if(USE_CUDA) - if(USE_GLOO AND USE_C10D_GLOO) -- c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main INSTALL_TEST ${INSTALL_TEST}) -- c10d_add_test(ProcessGroupGlooAsyncTest.cpp LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main INSTALL_TEST ${INSTALL_TEST}) -+ c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest INSTALL_TEST ${INSTALL_TEST}) -+ c10d_add_test(ProcessGroupGlooAsyncTest.cpp LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest INSTALL_TEST ${INSTALL_TEST}) - endif() - if(USE_NCCL AND USE_C10D_NCCL) - # NCCL is a private dependency of libtorch, but the tests include some -@@ -45,10 +45,10 @@ if(USE_CUDA) - # a private dependency of the tests as well. - c10d_add_test( - ProcessGroupNCCLTest.cpp -- LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main __caffe2_nccl INSTALL_TEST ${INSTALL_TEST}) -+ LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest __caffe2_nccl INSTALL_TEST ${INSTALL_TEST}) - c10d_add_test( - ProcessGroupNCCLErrorsTest.cpp -- LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main __caffe2_nccl INSTALL_TEST ${INSTALL_TEST}) -+ LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest __caffe2_nccl INSTALL_TEST ${INSTALL_TEST}) - if(INSTALL_TEST) - install(TARGETS c10d_cuda_test DESTINATION lib) - endif() -@@ -60,14 +60,14 @@ if(USE_CUDA) - # a private dependency of the tests as well. - c10d_add_test( - ProcessGroupUCCTest.cpp -- LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main __caffe2_ucc INSTALL_TEST ${INSTALL_TEST}) -+ LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest __caffe2_ucc INSTALL_TEST ${INSTALL_TEST}) - if(INSTALL_TEST) - install(TARGETS c10d_cuda_test DESTINATION lib) - endif() - endif() - else() - if(USE_GLOO AND USE_C10D_GLOO) -- c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST OFF) -+ c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST OFF) - endif() - endif() - -diff --git a/test/cpp/tensorexpr/CMakeLists.txt b/test/cpp/tensorexpr/CMakeLists.txt -index 9c409e078..6cddd8de4 100644 ---- a/test/cpp/tensorexpr/CMakeLists.txt -+++ b/test/cpp/tensorexpr/CMakeLists.txt -@@ -51,7 +51,7 @@ target_include_directories(tutorial_tensorexpr PRIVATE ${ATen_CPU_INCLUDE}) - # pthreadpool header. For some build environment we need add the dependency - # explicitly. - if(USE_PTHREADPOOL) -- target_link_libraries(test_tensorexpr PRIVATE pthreadpool_interface) -+ target_link_libraries(test_tensorexpr PRIVATE pthreadpool) - endif() - if(USE_CUDA) - target_compile_definitions(test_tensorexpr PRIVATE USE_CUDA) -diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt -index 8b8ebdc6e..034b5e56c 100644 ---- a/torch/CMakeLists.txt -+++ b/torch/CMakeLists.txt -@@ -82,8 +82,6 @@ set(TORCH_PYTHON_LINK_LIBRARIES - Python::Module - pybind::pybind11 - opentelemetry::api -- httplib -- nlohmann - shm - fmt::fmt-header-only - ATEN_CPU_FILES_GEN_LIB) diff --git a/gnu/packages/patches/python-pytorch-system-libraries.patch b/gnu/packages/patches/python-pytorch-system-libraries.patch index 6b3f36f3f0c..6d8da60e413 100644 --- a/gnu/packages/patches/python-pytorch-system-libraries.patch +++ b/gnu/packages/patches/python-pytorch-system-libraries.patch @@ -5,10 +5,10 @@ httlib, ideep, miniz, nnpack, oneapi-dnnl, pocketfft, pthreadpool, qnnpack-pytorch, tensorpipe, valgrind and xnnpack packages. diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt -index 6d9152a4d0..97cb945722 100644 +index 085af373e..3287429b4 100644 --- a/aten/src/ATen/CMakeLists.txt +++ b/aten/src/ATen/CMakeLists.txt -@@ -414,9 +414,9 @@ if(AT_NNPACK_ENABLED) +@@ -468,9 +468,9 @@ if(AT_NNPACK_ENABLED) list(APPEND ATen_CPU_DEPENDENCY_LIBS nnpack) # cpuinfo is added below endif() @@ -19,10 +19,10 @@ index 6d9152a4d0..97cb945722 100644 + list(APPEND ATen_CPU_DEPENDENCY_LIBS DNNL::dnnl) +endif(USE_MKLDNN) - if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "^(s390x|ppc64le)$") - list(APPEND ATen_CPU_DEPENDENCY_LIBS cpuinfo) + if(USE_MKLDNN_ACL) + list(APPEND ATen_CPU_INCLUDE ${ACL_INCLUDE_DIRS}) diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt -index 9be7f3732f..111215dacc 100644 +index d2d23b7ab..1a7e5a042 100644 --- a/caffe2/CMakeLists.txt +++ b/caffe2/CMakeLists.txt @@ -91,9 +91,6 @@ if(NOT MSVC AND USE_XNNPACK) @@ -35,7 +35,7 @@ index 9be7f3732f..111215dacc 100644 endif() endif() -@@ -1081,7 +1078,6 @@ if(USE_XPU) +@@ -1135,7 +1132,6 @@ if(USE_XPU) endif() if(NOT MSVC AND USE_XNNPACK) @@ -43,7 +43,7 @@ index 9be7f3732f..111215dacc 100644 endif() # ========================================================== -@@ -1195,8 +1191,8 @@ endif() +@@ -1254,8 +1250,8 @@ endif() target_include_directories(torch_cpu PRIVATE ${TORCH_ROOT}/third_party/cpp-httplib) @@ -54,7 +54,7 @@ index 9be7f3732f..111215dacc 100644 install(DIRECTORY "${TORCH_SRC_DIR}/csrc" DESTINATION ${TORCH_INSTALL_INCLUDE_DIR}/torch -@@ -1425,6 +1421,7 @@ target_link_libraries(torch_cpu PUBLIC c10) +@@ -1494,6 +1490,7 @@ target_link_libraries(torch_cpu PUBLIC c10) target_link_libraries(torch_cpu PUBLIC ${Caffe2_PUBLIC_DEPENDENCY_LIBS}) target_link_libraries(torch_cpu PRIVATE ${Caffe2_DEPENDENCY_LIBS}) target_link_libraries(torch_cpu PRIVATE ${Caffe2_DEPENDENCY_WHOLE_LINK_LIBS}) @@ -62,7 +62,7 @@ index 9be7f3732f..111215dacc 100644 if(USE_MPI) target_link_libraries(torch_cpu PRIVATE MPI::MPI_CXX) endif() -@@ -1659,7 +1656,7 @@ if(BUILD_STATIC_RUNTIME_BENCHMARK) +@@ -1728,7 +1725,7 @@ if(BUILD_STATIC_RUNTIME_BENCHMARK) add_executable(static_runtime_bench "${STATIC_RUNTIME_BENCHMARK_SRCS}") add_executable(static_runtime_test "${STATIC_RUNTIME_TEST_SRCS}") target_link_libraries(static_runtime_bench torch_library benchmark) @@ -71,7 +71,7 @@ index 9be7f3732f..111215dacc 100644 endif() if(BUILD_MOBILE_BENCHMARK) -@@ -1678,7 +1675,7 @@ if(BUILD_MOBILE_TEST) +@@ -1747,7 +1744,7 @@ if(BUILD_MOBILE_TEST) foreach(test_src ${ATen_MOBILE_TEST_SRCS}) get_filename_component(test_name ${test_src} NAME_WE) add_executable(${test_name} "${test_src}") @@ -80,7 +80,7 @@ index 9be7f3732f..111215dacc 100644 target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>) target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>) target_include_directories(${test_name} PRIVATE ${ATen_CPU_INCLUDE}) -@@ -1699,7 +1696,7 @@ if(BUILD_TEST) +@@ -1768,7 +1765,7 @@ if(BUILD_TEST) if(NOT MSVC) add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}" ../aten/src/ATen/native/quantized/AffineQuantizerBase.cpp) # TODO: Get rid of c10 dependency (which is only needed for the implementation of AT_ERROR) @@ -89,7 +89,7 @@ index 9be7f3732f..111215dacc 100644 if(USE_FBGEMM) target_link_libraries(${test_name}_${CPU_CAPABILITY} fbgemm) endif() -@@ -1713,7 +1710,7 @@ if(BUILD_TEST) +@@ -1782,7 +1779,7 @@ if(BUILD_TEST) endif() else() add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}") @@ -98,7 +98,7 @@ index 9be7f3732f..111215dacc 100644 endif() target_include_directories(${test_name}_${CPU_CAPABILITY} PRIVATE $<INSTALL_INTERFACE:include>) target_include_directories(${test_name}_${CPU_CAPABILITY} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>) -@@ -1730,7 +1727,7 @@ if(BUILD_TEST) +@@ -1799,7 +1796,7 @@ if(BUILD_TEST) foreach(test_src ${Caffe2_CPU_TEST_SRCS}) get_filename_component(test_name ${test_src} NAME_WE) add_executable(${test_name} "${test_src}") @@ -107,7 +107,34 @@ index 9be7f3732f..111215dacc 100644 if(NOT MSVC) target_link_libraries(${test_name} stdc++) endif() -@@ -1810,7 +1807,7 @@ if(BUILD_TEST) +@@ -1823,7 +1820,7 @@ if(BUILD_TEST) + add_executable(${test_name} "${test_src}") + find_library(metal NAMES Metal) + find_library(foundation NAMES Foundation) +- target_link_libraries(${test_name} torch_library gtest_main ${metal} ${foundation}) ++ target_link_libraries(${test_name} torch_library gtest_main gtest ${metal} ${foundation}) + target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>) + target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>) + target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE}) +@@ -1843,7 +1840,7 @@ if(BUILD_TEST) + foreach(test_src ${Caffe2_GPU_TEST_SRCS}) + get_filename_component(test_name ${test_src} NAME_WE) + add_executable(${test_name} "${test_src}") +- target_link_libraries(${test_name} torch_library gtest_main) ++ target_link_libraries(${test_name} torch_library gtest_main gtest) + if(USE_CUDNN AND ${test_name} MATCHES "cudnn") + target_link_libraries(${test_name} torch::cudnn) + endif() +@@ -1865,7 +1862,7 @@ if(BUILD_TEST) + foreach(test_src ${Caffe2_XPU_TEST_SRCS}) + get_filename_component(test_name ${test_src} NAME_WE) + add_executable(${test_name} "${test_src}") +- target_link_libraries(${test_name} torch_library gtest_main) ++ target_link_libraries(${test_name} torch_library gtest_main gtest) + target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>) + target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE}) + add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>) +@@ -1880,7 +1877,7 @@ if(BUILD_TEST) foreach(test_src ${Caffe2_VULKAN_TEST_SRCS}) get_filename_component(test_name ${test_src} NAME_WE) add_executable(${test_name} "${test_src}") @@ -116,23 +143,32 @@ index 9be7f3732f..111215dacc 100644 target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>) target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE}) add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>) +@@ -1899,7 +1896,7 @@ if(BUILD_TEST) + foreach(test_src ${Caffe2_HIP_TEST_SRCS}) + get_filename_component(test_name ${test_src} NAME_WE) + add_executable(${test_name} "${test_src}") +- target_link_libraries(${test_name} torch_library gtest_main) ++ target_link_libraries(${test_name} torch_library gtest_main gtest) + target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>) + target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE} ${Caffe2_HIP_INCLUDE}) + target_compile_options(${test_name} PRIVATE ${HIP_CXX_FLAGS}) diff --git a/caffe2/serialize/CMakeLists.txt b/caffe2/serialize/CMakeLists.txt -index 1552b59d0d..67e1a9a1a3 100644 +index ebbff0f29..dcded2590 100644 --- a/caffe2/serialize/CMakeLists.txt +++ b/caffe2/serialize/CMakeLists.txt @@ -2,7 +2,6 @@ file(GLOB tmp *_test.cc) set(Caffe2_CPU_TEST_SRCS ${Caffe2_CPU_TEST_SRCS} ${tmp}) list(APPEND Caffe2_CPU_SRCS -- ${PROJECT_SOURCE_DIR}/third_party/miniz-2.1.0/miniz.c +- ${PROJECT_SOURCE_DIR}/third_party/miniz-3.0.2/miniz.c ${CMAKE_CURRENT_SOURCE_DIR}/inline_container.cc ${CMAKE_CURRENT_SOURCE_DIR}/istream_adapter.cc ${CMAKE_CURRENT_SOURCE_DIR}/file_adapter.cc diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake -index e78305e0a8..5b3c3d7bf2 100644 +index be45936a8..bb1aa1cc1 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake -@@ -270,7 +270,7 @@ endif() +@@ -276,7 +276,7 @@ endif() # --- [ PocketFFT set(AT_POCKETFFT_ENABLED 0) if(NOT AT_MKL_ENABLED) @@ -141,7 +177,7 @@ index e78305e0a8..5b3c3d7bf2 100644 if(NOT EXISTS "${POCKETFFT_INCLUDE_DIR}") message(FATAL_ERROR "pocketfft directory not found, expected ${POCKETFFT_INCLUDE_DIR}") elif(NOT EXISTS "${POCKETFFT_INCLUDE_DIR}/pocketfft_hdronly.h") -@@ -465,15 +465,6 @@ if(USE_PYTORCH_QNNPACK) +@@ -460,15 +460,6 @@ if(USE_PYTORCH_QNNPACK) set(PYTORCH_QNNPACK_BUILD_TESTS OFF CACHE BOOL "") set(PYTORCH_QNNPACK_BUILD_BENCHMARKS OFF CACHE BOOL "") set(PYTORCH_QNNPACK_LIBRARY_TYPE "static" CACHE STRING "") @@ -154,10 +190,31 @@ index e78305e0a8..5b3c3d7bf2 100644 - set_property(TARGET cpuinfo PROPERTY POSITION_INDEPENDENT_CODE ON) - # QNNPACK depends on gemmlowp headers - target_include_directories(pytorch_qnnpack PRIVATE "${CAFFE2_THIRD_PARTY_ROOT}/gemmlowp") + endif() - if(PYTORCH_QNNPACK_CUSTOM_THREADPOOL) - target_compile_definitions( -@@ -660,11 +651,6 @@ if(BUILD_TEST OR BUILD_MOBILE_BENCHMARK OR BUILD_MOBILE_TEST) + list(APPEND Caffe2_DEPENDENCY_LIBS pytorch_qnnpack) +@@ -558,16 +549,15 @@ if(USE_XNNPACK AND NOT USE_SYSTEM_XNNPACK) + list(APPEND Caffe2_DEPENDENCY_LIBS XNNPACK microkernels-prod) + elseif(NOT TARGET XNNPACK AND USE_SYSTEM_XNNPACK) + add_library(XNNPACK SHARED IMPORTED) +- add_library(microkernels-prod SHARED IMPORTED) ++ add_library(microkernels-prod INTERFACE IMPORTED) + find_library(XNNPACK_LIBRARY XNNPACK) +- find_library(microkernels-prod_LIBRARY microkernels-prod) + set_property(TARGET XNNPACK PROPERTY IMPORTED_LOCATION "${XNNPACK_LIBRARY}") +- set_property(TARGET microkernels-prod PROPERTY IMPORTED_LOCATION "${microkernels-prod_LIBRARY}") +- if(NOT XNNPACK_LIBRARY or NOT microkernels-prod_LIBRARY) ++ set_property(TARGET microkernels-prod PROPERTY INTERFACE_LINK_LIBRARIES XNNPACK) ++ if(NOT XNNPACK_LIBRARY) + message(FATAL_ERROR "Cannot find XNNPACK") + endif() + message("-- Found XNNPACK: ${XNNPACK_LIBRARY}") +- list(APPEND Caffe2_DEPENDENCY_LIBS XNNPACK microkernels-prod) ++ list(APPEND Caffe2_DEPENDENCY_LIBS XNNPACK) + endif() + + # ---[ Vulkan deps +@@ -650,11 +640,6 @@ if(BUILD_TEST OR BUILD_MOBILE_BENCHMARK OR BUILD_MOBILE_TEST) # this shouldn't be necessary anymore. get_property(INC_DIR_temp DIRECTORY PROPERTY INCLUDE_DIRECTORIES) set_property(DIRECTORY PROPERTY INCLUDE_DIRECTORIES "") @@ -169,7 +226,7 @@ index e78305e0a8..5b3c3d7bf2 100644 # We will not need to test benchmark lib itself. set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Disable benchmark testing as we don't need it.") -@@ -742,16 +728,6 @@ if(USE_FBGEMM) +@@ -732,16 +717,6 @@ if(USE_FBGEMM) if(USE_ASAN) set(USE_SANITIZER "address,undefined" CACHE STRING "-fsanitize options for FBGEMM") endif() @@ -183,27 +240,27 @@ index e78305e0a8..5b3c3d7bf2 100644 - target_compile_options_if_supported(asmjit -Wno-deprecated-copy) - target_compile_options_if_supported(asmjit -Wno-unused-but-set-variable) - endif() + if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") + target_compile_options_if_supported(asmjit -Wno-extra-semi) + target_compile_options_if_supported(fbgemm -Wno-extra-semi) +@@ -829,7 +804,7 @@ if(NOT TARGET fp16 AND NOT USE_SYSTEM_FP16) + "${CONFU_DEPENDENCIES_BINARY_DIR}/FP16") endif() - - if(USE_FBGEMM) -@@ -819,7 +795,7 @@ if(NOT TARGET fp16 AND NOT USE_SYSTEM_FP16) - "${FP16_SOURCE_DIR}" - "${CONFU_DEPENDENCIES_BINARY_DIR}/FP16") elseif(NOT TARGET fp16 AND USE_SYSTEM_FP16) - add_library(fp16 STATIC "/usr/include/fp16.h") + add_library(fp16 STATIC "#FP16_INCLUDE_DIR") set_target_properties(fp16 PROPERTIES LINKER_LANGUAGE C) endif() list(APPEND Caffe2_DEPENDENCY_LIBS fp16) -@@ -1161,7 +1137,6 @@ if(USE_DISTRIBUTED AND USE_TENSORPIPE) - - # Tensorpipe uses cuda_add_library - torch_update_find_cuda_flags() +@@ -1170,7 +1145,6 @@ if(USE_DISTRIBUTED AND USE_TENSORPIPE) + message(WARNING "Archived TensorPipe forces CMake compatibility mode") + set(CMAKE_POLICY_VERSION_MINIMUM 3.5) + endif() - add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/tensorpipe) - - list(APPEND Caffe2_DEPENDENCY_LIBS tensorpipe) - list(APPEND Caffe2_DEPENDENCY_LIBS nlohmann) -@@ -1317,7 +1292,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX) + if(CMAKE_VERSION VERSION_GREATER_EQUAL "4.0.0") + unset(CMAKE_POLICY_VERSION_MINIMUM) + endif() +@@ -1340,7 +1314,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX) endif() set_property(TARGET onnx_proto PROPERTY IMPORTED_LOCATION ${ONNX_PROTO_LIBRARY}) message("-- Found onnx: ${ONNX_LIBRARY} ${ONNX_PROTO_LIBRARY}") @@ -212,7 +269,7 @@ index e78305e0a8..5b3c3d7bf2 100644 endif() # Recover the build shared libs option. set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS}) -@@ -1465,9 +1440,8 @@ if(NOT INTERN_BUILD_MOBILE) +@@ -1500,9 +1474,8 @@ if(NOT INTERN_BUILD_MOBILE) endif() if(USE_MKLDNN) include(${CMAKE_CURRENT_LIST_DIR}/public/mkldnn.cmake) @@ -223,7 +280,7 @@ index e78305e0a8..5b3c3d7bf2 100644 else() message(WARNING "MKLDNN could not be found.") caffe2_update_option(USE_MKLDNN OFF) -@@ -1519,7 +1493,7 @@ endif() +@@ -1583,7 +1556,7 @@ endif() # set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE) @@ -232,7 +289,7 @@ index e78305e0a8..5b3c3d7bf2 100644 # Disable compiler feature checks for `fmt`. # -@@ -1528,7 +1502,6 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt) +@@ -1592,7 +1565,6 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt) # CMAKE_CXX_FLAGS in ways that break feature checks. Since we already know # `fmt` is compatible with a superset of the compilers that PyTorch is, it # shouldn't be too bad to just disable the checks. @@ -241,7 +298,7 @@ index e78305e0a8..5b3c3d7bf2 100644 list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only) set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS} CACHE BOOL "Build shared libs" FORCE) diff --git a/cmake/External/nnpack.cmake b/cmake/External/nnpack.cmake -index 9d5f0643ec..c3624e582a 100644 +index 8a4a310d6..f413d2e61 100644 --- a/cmake/External/nnpack.cmake +++ b/cmake/External/nnpack.cmake @@ -40,7 +40,7 @@ endif() @@ -253,7 +310,7 @@ index 9d5f0643ec..c3624e582a 100644 message(STATUS "Brace yourself, we are building NNPACK") set(CAFFE2_THIRD_PARTY_ROOT ${PROJECT_SOURCE_DIR}/third_party) -@@ -112,6 +112,5 @@ endif() +@@ -94,6 +94,5 @@ endif() # (4) Catch-all: not supported. ############################################################################## @@ -263,7 +320,7 @@ index 9d5f0643ec..c3624e582a 100644 +set(NNPACK_FOUND TRUE) +set(USE_NNPACK ON) diff --git a/cmake/public/mkldnn.cmake b/cmake/public/mkldnn.cmake -index 87935625f9..9f8fa3df81 100644 +index 87935625f..9f8fa3df8 100644 --- a/cmake/public/mkldnn.cmake +++ b/cmake/public/mkldnn.cmake @@ -4,7 +4,7 @@ if(CPU_AARCH64) @@ -282,10 +339,10 @@ index 87935625f9..9f8fa3df81 100644 - ${MKLDNN_LIBRARIES}) + DNNL::dnnl) diff --git a/setup.py b/setup.py -index 2b0cfa99d7..2d76b0d035 100644 +index 61ee9363f..3691cc35c 100644 --- a/setup.py +++ b/setup.py -@@ -491,13 +491,9 @@ def build_deps(): +@@ -508,13 +508,9 @@ def build_deps(): # Windows has very poor support for them. sym_files = [ "tools/shared/_utils_internal.py", @@ -300,40 +357,30 @@ index 2b0cfa99d7..2d76b0d035 100644 for sym_file, orig_file in zip(sym_files, orig_files): same = False diff --git a/test/cpp/c10d/CMakeLists.txt b/test/cpp/c10d/CMakeLists.txt -index 0874852517..58ad3e9d66 100644 +index 5b423241d..e069accd6 100644 --- a/test/cpp/c10d/CMakeLists.txt +++ b/test/cpp/c10d/CMakeLists.txt -@@ -16,15 +16,15 @@ function(c10d_add_test test_src) - add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>) +@@ -26,17 +26,17 @@ function(c10d_add_test test_src) + endif() endfunction() --c10d_add_test(BackoffTest.cpp torch_cpu gtest_main) --c10d_add_test(FileStoreTest.cpp torch_cpu gtest_main) --c10d_add_test(TCPStoreTest.cpp torch_cpu gtest_main) -+c10d_add_test(BackoffTest.cpp torch_cpu gtest_main gtest) -+c10d_add_test(FileStoreTest.cpp torch_cpu gtest_main gtest) -+c10d_add_test(TCPStoreTest.cpp torch_cpu gtest_main gtest) - if(INSTALL_TEST) - install(TARGETS FileStoreTest DESTINATION bin) - install(TARGETS TCPStoreTest DESTINATION bin) - endif() +-c10d_add_test(BackoffTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST OFF) +-c10d_add_test(FileStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST ${INSTALL_TEST}) +-c10d_add_test(TCPStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST ${INSTALL_TEST}) ++c10d_add_test(BackoffTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST OFF) ++c10d_add_test(FileStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST ${INSTALL_TEST}) ++c10d_add_test(TCPStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST ${INSTALL_TEST}) if(NOT WIN32) -- c10d_add_test(HashStoreTest.cpp torch_cpu gtest_main) -+ c10d_add_test(HashStoreTest.cpp torch_cpu gtest_main gtest) - if(INSTALL_TEST) - install(TARGETS HashStoreTest DESTINATION bin) - endif() -@@ -32,11 +32,11 @@ endif() +- c10d_add_test(HashStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST ${INSTALL_TEST}) ++ c10d_add_test(HashStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST ${INSTALL_TEST}) + endif() if(USE_CUDA) if(USE_GLOO AND USE_C10D_GLOO) -- c10d_add_test(ProcessGroupGlooTest.cpp torch_cpu c10d_cuda_test gtest_main) -+ c10d_add_test(ProcessGroupGlooTest.cpp torch_cpu c10d_cuda_test gtest_main gtest) - if(INSTALL_TEST) - install(TARGETS ProcessGroupGlooTest DESTINATION bin) - endif() -- c10d_add_test(ProcessGroupGlooAsyncTest.cpp torch_cpu c10d_cuda_test gtest_main) -+ c10d_add_test(ProcessGroupGlooAsyncTest.cpp torch_cpu c10d_cuda_test gtest_main gtest) +- c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main INSTALL_TEST ${INSTALL_TEST}) +- c10d_add_test(ProcessGroupGlooAsyncTest.cpp LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main INSTALL_TEST ${INSTALL_TEST}) ++ c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest INSTALL_TEST ${INSTALL_TEST}) ++ c10d_add_test(ProcessGroupGlooAsyncTest.cpp LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest INSTALL_TEST ${INSTALL_TEST}) endif() if(USE_NCCL AND USE_C10D_NCCL) # NCCL is a private dependency of libtorch, but the tests include some @@ -341,35 +388,34 @@ index 0874852517..58ad3e9d66 100644 # a private dependency of the tests as well. c10d_add_test( ProcessGroupNCCLTest.cpp -- torch_cpu c10d_cuda_test gtest_main __caffe2_nccl) -+ torch_cpu c10d_cuda_test gtest_main gtest __caffe2_nccl) +- LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main __caffe2_nccl INSTALL_TEST ${INSTALL_TEST}) ++ LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest __caffe2_nccl INSTALL_TEST ${INSTALL_TEST}) c10d_add_test( ProcessGroupNCCLErrorsTest.cpp -- torch_cpu c10d_cuda_test gtest_main __caffe2_nccl) -+ torch_cpu c10d_cuda_test gtest_main gtest __caffe2_nccl) +- LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main __caffe2_nccl INSTALL_TEST ${INSTALL_TEST}) ++ LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest __caffe2_nccl INSTALL_TEST ${INSTALL_TEST}) if(INSTALL_TEST) - install(TARGETS ProcessGroupNCCLTest DESTINATION bin) - install(TARGETS ProcessGroupNCCLErrorsTest DESTINATION bin) -@@ -62,7 +62,7 @@ if(USE_CUDA) + install(TARGETS c10d_cuda_test DESTINATION lib) + endif() +@@ -60,14 +60,14 @@ if(USE_CUDA) # a private dependency of the tests as well. c10d_add_test( ProcessGroupUCCTest.cpp -- torch_cpu c10d_cuda_test gtest_main __caffe2_ucc) -+ torch_cpu c10d_cuda_test gtest_main gtest __caffe2_ucc) +- LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main __caffe2_ucc INSTALL_TEST ${INSTALL_TEST}) ++ LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest __caffe2_ucc INSTALL_TEST ${INSTALL_TEST}) if(INSTALL_TEST) - install(TARGETS ProcessGroupUCCTest DESTINATION bin) install(TARGETS c10d_cuda_test DESTINATION lib) -@@ -70,7 +70,7 @@ if(USE_CUDA) + endif() endif() else() if(USE_GLOO AND USE_C10D_GLOO) -- c10d_add_test(ProcessGroupGlooTest.cpp torch_cpu gtest_main) -+ c10d_add_test(ProcessGroupGlooTest.cpp torch_cpu gtest_main gtest) +- c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST OFF) ++ c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST OFF) endif() endif() diff --git a/test/cpp/tensorexpr/CMakeLists.txt b/test/cpp/tensorexpr/CMakeLists.txt -index 179270c4a4..72f5582e81 100644 +index 9c409e078..6cddd8de4 100644 --- a/test/cpp/tensorexpr/CMakeLists.txt +++ b/test/cpp/tensorexpr/CMakeLists.txt @@ -51,7 +51,7 @@ target_include_directories(tutorial_tensorexpr PRIVATE ${ATen_CPU_INCLUDE}) @@ -382,7 +428,7 @@ index 179270c4a4..72f5582e81 100644 if(USE_CUDA) target_compile_definitions(test_tensorexpr PRIVATE USE_CUDA) diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt -index c74b45431c..5b5d0919d0 100644 +index 8b8ebdc6e..034b5e56c 100644 --- a/torch/CMakeLists.txt +++ b/torch/CMakeLists.txt @@ -82,8 +82,6 @@ set(TORCH_PYTHON_LINK_LIBRARIES diff --git a/gnu/packages/patches/python-pytorch-without-kineto-2.7.0.patch b/gnu/packages/patches/python-pytorch-without-kineto-2.7.0.patch deleted file mode 100644 index 1b10f18d5a6..00000000000 --- a/gnu/packages/patches/python-pytorch-without-kineto-2.7.0.patch +++ /dev/null @@ -1,64 +0,0 @@ -Even when building without Kineto, the <ActivityType.h> header is still -imported and the ActivityType type is used. This patch was copied from -https://github.com/pytorch/pytorch/pull/111048 and adapted. - -diff --git a/torch/csrc/profiler/kineto_shim.h b/torch/csrc/profiler/kineto_shim.h -index c4efd7785..2caef1f1e 100644 ---- a/torch/csrc/profiler/kineto_shim.h -+++ b/torch/csrc/profiler/kineto_shim.h -@@ -12,7 +12,55 @@ - #undef USE_KINETO - #endif - -+#ifdef USE_KINETO - #include <ActivityType.h> -+#else -+namespace libkineto { -+// copied from header -+/* -+ * Copyright (c) Meta Platforms, Inc. and affiliates. -+ * All rights reserved. -+ * -+ * This source code is licensed under the BSD-style license found in the -+ * LICENSE file in the root directory of this source tree. -+ */ -+ -+// Note : All activity types are not enabled by default. Please add them -+// at correct position in the enum -+enum class ActivityType { -+ // Activity types enabled by default -+ CPU_OP = 0, // cpu side ops -+ USER_ANNOTATION, -+ GPU_USER_ANNOTATION, -+ GPU_MEMCPY, -+ GPU_MEMSET, -+ CONCURRENT_KERNEL, // on-device kernels -+ EXTERNAL_CORRELATION, -+ CUDA_RUNTIME, // host side cuda runtime events -+ CUDA_DRIVER, // host side cuda driver events -+ CPU_INSTANT_EVENT, // host side point-like events -+ PYTHON_FUNCTION, -+ OVERHEAD, // CUPTI induced overhead events sampled from its overhead API. -+ -+ // Optional Activity types -+ CUDA_SYNC, // synchronization events between runtime and kernels -+ GLOW_RUNTIME, // host side glow runtime events -+ MTIA_RUNTIME, // host side MTIA runtime events -+ CUDA_PROFILER_RANGE, // CUPTI Profiler range for performance metrics -+ MTIA_CCP_EVENTS, // MTIA ondevice CCP events -+ HPU_OP, // HPU host side runtime event -+ XPU_RUNTIME, // host side xpu runtime events -+ MTIA_WORKLOADD, -+ -+ PRIVATEUSE1_RUNTIME, -+ PRIVATEUSE1_DRIVER, -+ -+ ENUM_COUNT, // This is to add buffer and not used for any profiling logic. Add your new type before it. -+ OPTIONAL_ACTIVITY_TYPE_START = CUDA_SYNC, -+}; -+} -+ -+#endif - - #include <torch/csrc/Export.h> - #include <torch/csrc/profiler/api.h> |
