9 files changed, 160 insertions, 824 deletions
diff --git a/gnu/local.mk b/gnu/local.mk
index 3a1ff0ea956..e2419ea847f 100644
--- a/gnu/local.mk
+++ b/gnu/local.mk
@@ -2159,15 +2159,11 @@ dist_patch_DATA =						\
   %D%/packages/patches/python-pyan3-fix-absolute-path-bug.patch \
   %D%/packages/patches/python-pyan3-fix-positional-arguments.patch \
   %D%/packages/patches/python-pytorch-fix-codegen.patch		\
-  %D%/packages/patches/python-pytorch-fix-codegen-2.7.0.patch		\
   %D%/packages/patches/python-pytorch-for-r-torch-fix-codegen.patch \
   %D%/packages/patches/python-pytorch-for-r-torch-system-libraries.patch \
   %D%/packages/patches/python-pytorch-runpath.patch		\
-  %D%/packages/patches/python-pytorch-runpath-2.7.0.patch		\
   %D%/packages/patches/python-pytorch-system-libraries.patch	\
-  %D%/packages/patches/python-pytorch-system-libraries-2.7.0.patch	\
   %D%/packages/patches/python-pytorch-without-kineto.patch	\
-  %D%/packages/patches/python-pytorch-without-kineto-2.7.0.patch	\
   %D%/packages/patches/python-robotframework-sshlibrary-rf5-compat.patch \
   %D%/packages/patches/python-unittest2-python3-compat.patch	\
   %D%/packages/patches/python-unittest2-remove-argparse.patch	\
diff --git a/gnu/packages/machine-learning.scm b/gnu/packages/machine-learning.scm
index c1621d1e0b9..3be1026bf7d 100644
--- a/gnu/packages/machine-learning.scm
+++ b/gnu/packages/machine-learning.scm
@@ -4968,13 +4968,13 @@ PyTorch.")
     (sha256
      (base32
       "19prdpzx34n8y2q6wx9dn9vyms6zidjvfgh58d28rfcf5z7z5ra5"))
-    (patches (search-patches "python-pytorch-system-libraries-2.7.0.patch"
-                             "python-pytorch-runpath-2.7.0.patch"
-                             "python-pytorch-without-kineto-2.7.0.patch"
+    (patches (search-patches "python-pytorch-system-libraries.patch"
+                             "python-pytorch-runpath.patch"
+                             "python-pytorch-without-kineto.patch"
                              ;; Some autogeneration scripts depend on the
                              ;; compile PyTorch library. Therefore, we create
                              ;; dummy versions which are regenerated later.
-                             "python-pytorch-fix-codegen-2.7.0.patch"))
+                             "python-pytorch-fix-codegen.patch"))
     (modules '((guix build utils)))
     (snippet
      '(begin
@@ -5123,11 +5123,18 @@ PyTorch.")
                 (("entry_points\\[\"console_scripts\"\\]\\.append\\(") "("))))
           (add-before 'build 'use-system-libraries
             (lambda _
-              (substitute* '("caffe2/serialize/crc.cc"
-                             "caffe2/serialize/inline_container.cc"
-                             "torch/csrc/inductor/aoti_package/model_package_loader.cpp")
-                (("\"miniz\\.h\"") "<miniz/miniz.h>")
-                (("<miniz\\.h>") "<miniz/miniz.h>"))
+              (for-each
+               (lambda (file)
+                 ;; Check whether the files exist for the
+                 ;; python-pytorch-for-r-torch package
+                 (when (file-exists? file)
+                   (substitute* file
+                     (("\"miniz\\.h\"") "<miniz/miniz.h>")
+                     (("<miniz\\.h>") "<miniz/miniz.h>"))))
+               '("caffe2/serialize/crc.cc"
+                 "caffe2/serialize/inline_container.cc"
+                 "torch/csrc/inductor/aoti_package/model_package_loader.cpp"))
+
               (substitute* "aten/src/ATen/native/vulkan/api/Allocator.h"
                 (("<include/vk_mem_alloc.h>")
                  "<vk_mem_alloc.h>"))
diff --git a/gnu/packages/patches/python-pytorch-fix-codegen-2.7.0.patch b/gnu/packages/patches/python-pytorch-fix-codegen-2.7.0.patch
deleted file mode 100644
index 3862339b141..00000000000
--- a/gnu/packages/patches/python-pytorch-fix-codegen-2.7.0.patch
+++ /dev/null
@@ -1,178 +0,0 @@
-This patch fixes some scripts for generating source files.  For
-gen_jit_decompositions.py, gen_mobile_upgraders.py and
-gen_jit_shape_functions.py, which depend on the compiled PyTorch library, the
-option to generate "dummy" source files is added for the initial build, which
-is later corrected.  codegen_external.py is patched to avoid duplicate
-functions and add the static keyword as in the existing generated file.
-
-diff --git a/tools/gen_flatbuffers.sh b/tools/gen_flatbuffers.sh
-index cc0263dbb..ac34e84b8 100644
---- a/tools/gen_flatbuffers.sh
-+++ b/tools/gen_flatbuffers.sh
-@@ -1,13 +1,13 @@
- #!/bin/bash
- ROOT=$(pwd)
--FF_LOCATION="$ROOT/third_party/flatbuffers"
--cd "$FF_LOCATION" || exit
--mkdir build
--cd build || exit
--cmake ..
--cmake --build . --target flatc
--mkdir -p "$ROOT/build/torch/csrc/jit/serialization"
--./flatc --cpp --gen-mutable --scoped-enums \
-+#FF_LOCATION="$ROOT/third_party/flatbuffers"
-+#cd "$FF_LOCATION" || exit
-+#mkdir build
-+#cd build || exit
-+#cmake ..
-+#cmake --build . --target flatc
-+#mkdir -p "$ROOT/build/torch/csrc/jit/serialization"
-+flatc --cpp --gen-mutable --scoped-enums \
-      -o "$ROOT/torch/csrc/jit/serialization" \
-      -c "$ROOT/torch/csrc/jit/serialization/mobile_bytecode.fbs"
- echo '// @generated' >> "$ROOT/torch/csrc/jit/serialization/mobile_bytecode_generated.h"
-diff --git a/torch/csrc/jit/tensorexpr/codegen_external.py b/torch/csrc/jit/tensorexpr/codegen_external.py
-index 5dcf1b284..0e20b0c10 100644
---- a/torch/csrc/jit/tensorexpr/codegen_external.py
-+++ b/torch/csrc/jit/tensorexpr/codegen_external.py
-@@ -21,9 +21,14 @@ def gen_external(native_functions_path, tags_path, external_path):
-     native_functions = parse_native_yaml(native_functions_path, tags_path)
-     func_decls = []
-     func_registrations = []
--    for func in native_functions:
-+    done_names = set()
-+    for func in native_functions[0]:
-         schema = func.func
-         name = schema.name.name.base
-+        if name in done_names:
-+            continue
-+        else:
-+            done_names.add(name)
-         args = schema.arguments
-         # Only supports extern calls for functions with out variants
-         if not schema.is_out_fn():
-@@ -63,7 +68,7 @@ def gen_external(native_functions_path, tags_path, external_path):
- 
-         # print(tensor_decls, name, arg_names)
-         func_decl = f"""\
--void nnc_aten_{name}(
-+static void nnc_aten_{name}(
-     int64_t bufs_num,
-     void** buf_data,
-     int64_t* buf_ranks,
-diff --git a/torchgen/decompositions/gen_jit_decompositions.py b/torchgen/decompositions/gen_jit_decompositions.py
-index b42948045..e1cfc73a5 100644
---- a/torchgen/decompositions/gen_jit_decompositions.py
-+++ b/torchgen/decompositions/gen_jit_decompositions.py
-@@ -1,8 +1,12 @@
- #!/usr/bin/env python3
- import os
- from pathlib import Path
-+import sys
- 
--from torch.jit._decompositions import decomposition_table
-+if len(sys.argv) < 2 or sys.argv[1] != "dummy":
-+    from torch.jit._decompositions import decomposition_table
-+else:
-+    decomposition_table = {}
- 
- 
- # from torchgen.code_template import CodeTemplate
-@@ -86,7 +90,7 @@ def write_decomposition_util_file(path: str) -> None:
- 
- 
- def main() -> None:
--    pytorch_dir = Path(__file__).resolve().parents[3]
-+    pytorch_dir = Path(__file__).resolve().parents[2]
-     upgrader_path = pytorch_dir / "torch" / "csrc" / "jit" / "runtime"
-     write_decomposition_util_file(str(upgrader_path))
- 
-diff --git a/torchgen/operator_versions/gen_mobile_upgraders.py b/torchgen/operator_versions/gen_mobile_upgraders.py
-index 845034cb7..a1c5767c2 100644
---- a/torchgen/operator_versions/gen_mobile_upgraders.py
-+++ b/torchgen/operator_versions/gen_mobile_upgraders.py
-@@ -6,10 +6,13 @@ import os
- from enum import Enum
- from operator import itemgetter
- from pathlib import Path
-+import sys
- from typing import Any
- 
--import torch
--from torch.jit.generate_bytecode import generate_upgraders_bytecode
-+if len(sys.argv) < 2 or sys.argv[1] != "dummy":
-+    import torch
-+    from torch.jit.generate_bytecode import generate_upgraders_bytecode
-+
- from torchgen.code_template import CodeTemplate
- from torchgen.operator_versions.gen_mobile_upgraders_constant import (
-     MOBILE_UPGRADERS_HEADER_DESCRIPTION,
-@@ -263,7 +266,10 @@ def construct_register_size(register_size_from_yaml: int) -> str:
- def construct_version_maps(
-     upgrader_bytecode_function_to_index_map: dict[str, Any],
- ) -> str:
--    version_map = torch._C._get_operator_version_map()
-+    if len(sys.argv) < 2 or sys.argv[1] != "dummy":
-+        version_map = torch._C._get_operator_version_map()
-+    else:
-+        version_map = {}
-     sorted_version_map_ = sorted(version_map.items(), key=itemgetter(0))  # type: ignore[no-any-return]
-     sorted_version_map = dict(sorted_version_map_)
- 
-@@ -375,7 +381,10 @@ def sort_upgrader(upgrader_list: list[dict[str, Any]]) -> list[dict[str, Any]]:
- 
- 
- def main() -> None:
--    upgrader_list = generate_upgraders_bytecode()
-+    if len(sys.argv) < 2 or sys.argv[1] != "dummy":
-+        upgrader_list = generate_upgraders_bytecode()
-+    else:
-+        upgrader_list = []
-     sorted_upgrader_list = sort_upgrader(upgrader_list)
-     for up in sorted_upgrader_list:
-         print("after sort upgrader : ", next(iter(up)))
-diff --git a/torchgen/shape_functions/gen_jit_shape_functions.py b/torchgen/shape_functions/gen_jit_shape_functions.py
-index 56a3d8bf0..ffd0785fd 100644
---- a/torchgen/shape_functions/gen_jit_shape_functions.py
-+++ b/torchgen/shape_functions/gen_jit_shape_functions.py
-@@ -1,6 +1,7 @@
- #!/usr/bin/env python3
- import os
- import sys
-+import importlib
- from importlib.util import module_from_spec, spec_from_file_location
- from itertools import chain
- from pathlib import Path
-@@ -18,17 +19,21 @@ you are in the root directory of the Pytorch git repo"""
- if not file_path.exists():
-     raise Exception(err_msg)  # noqa: TRY002
- 
--spec = spec_from_file_location(module_name, file_path)
--assert spec is not None
--module = module_from_spec(spec)
--sys.modules[module_name] = module
--assert spec.loader is not None
--assert module is not None
--spec.loader.exec_module(module)
--
--bounded_compute_graph_mapping = module.bounded_compute_graph_mapping
--shape_compute_graph_mapping = module.shape_compute_graph_mapping
--
-+if len(sys.argv) < 2 or sys.argv[1] != "dummy":
-+    spec = importlib.util.spec_from_file_location(module_name, file_path)
-+    assert spec is not None
-+    module = importlib.util.module_from_spec(spec)
-+    sys.modules[module_name] = module
-+    assert spec.loader is not None
-+    assert module is not None
-+    spec.loader.exec_module(module)
-+
-+    bounded_compute_graph_mapping = module.bounded_compute_graph_mapping
-+    shape_compute_graph_mapping = module.shape_compute_graph_mapping
-+
-+else:
-+    bounded_compute_graph_mapping = {}
-+    shape_compute_graph_mapping = {}
- 
- SHAPE_HEADER = r"""
- /**
diff --git a/gnu/packages/patches/python-pytorch-fix-codegen.patch b/gnu/packages/patches/python-pytorch-fix-codegen.patch
index 106ea7db663..3862339b141 100644
--- a/gnu/packages/patches/python-pytorch-fix-codegen.patch
+++ b/gnu/packages/patches/python-pytorch-fix-codegen.patch
@@ -6,7 +6,7 @@ is later corrected.  codegen_external.py is patched to avoid duplicate
 functions and add the static keyword as in the existing generated file.
 
 diff --git a/tools/gen_flatbuffers.sh b/tools/gen_flatbuffers.sh
-index cc0263dbbf..ac34e84b82 100644
+index cc0263dbb..ac34e84b8 100644
 --- a/tools/gen_flatbuffers.sh
 +++ b/tools/gen_flatbuffers.sh
 @@ -1,13 +1,13 @@
@@ -32,7 +32,7 @@ index cc0263dbbf..ac34e84b82 100644
       -c "$ROOT/torch/csrc/jit/serialization/mobile_bytecode.fbs"
  echo '// @generated' >> "$ROOT/torch/csrc/jit/serialization/mobile_bytecode_generated.h"
 diff --git a/torch/csrc/jit/tensorexpr/codegen_external.py b/torch/csrc/jit/tensorexpr/codegen_external.py
-index 5dcf1b2840..0e20b0c102 100644
+index 5dcf1b284..0e20b0c10 100644
 --- a/torch/csrc/jit/tensorexpr/codegen_external.py
 +++ b/torch/csrc/jit/tensorexpr/codegen_external.py
 @@ -21,9 +21,14 @@ def gen_external(native_functions_path, tags_path, external_path):
@@ -61,7 +61,7 @@ index 5dcf1b2840..0e20b0c102 100644
      void** buf_data,
      int64_t* buf_ranks,
 diff --git a/torchgen/decompositions/gen_jit_decompositions.py b/torchgen/decompositions/gen_jit_decompositions.py
-index b42948045c..e1cfc73a5e 100644
+index b42948045..e1cfc73a5 100644
 --- a/torchgen/decompositions/gen_jit_decompositions.py
 +++ b/torchgen/decompositions/gen_jit_decompositions.py
 @@ -1,8 +1,12 @@
@@ -88,7 +88,7 @@ index b42948045c..e1cfc73a5e 100644
      write_decomposition_util_file(str(upgrader_path))
  
 diff --git a/torchgen/operator_versions/gen_mobile_upgraders.py b/torchgen/operator_versions/gen_mobile_upgraders.py
-index 362ce427d5..245056f815 100644
+index 845034cb7..a1c5767c2 100644
 --- a/torchgen/operator_versions/gen_mobile_upgraders.py
 +++ b/torchgen/operator_versions/gen_mobile_upgraders.py
 @@ -6,10 +6,13 @@ import os
@@ -107,9 +107,9 @@ index 362ce427d5..245056f815 100644
  from torchgen.code_template import CodeTemplate
  from torchgen.operator_versions.gen_mobile_upgraders_constant import (
      MOBILE_UPGRADERS_HEADER_DESCRIPTION,
-@@ -265,7 +268,10 @@ def construct_register_size(register_size_from_yaml: int) -> str:
+@@ -263,7 +266,10 @@ def construct_register_size(register_size_from_yaml: int) -> str:
  def construct_version_maps(
-     upgrader_bytecode_function_to_index_map: dict[str, Any]
+     upgrader_bytecode_function_to_index_map: dict[str, Any],
  ) -> str:
 -    version_map = torch._C._get_operator_version_map()
 +    if len(sys.argv) < 2 or sys.argv[1] != "dummy":
@@ -119,7 +119,7 @@ index 362ce427d5..245056f815 100644
      sorted_version_map_ = sorted(version_map.items(), key=itemgetter(0))  # type: ignore[no-any-return]
      sorted_version_map = dict(sorted_version_map_)
  
-@@ -381,7 +387,10 @@ def sort_upgrader(upgrader_list: list[dict[str, Any]]) -> list[dict[str, Any]]:
+@@ -375,7 +381,10 @@ def sort_upgrader(upgrader_list: list[dict[str, Any]]) -> list[dict[str, Any]]:
  
  
  def main() -> None:
@@ -132,7 +132,7 @@ index 362ce427d5..245056f815 100644
      for up in sorted_upgrader_list:
          print("after sort upgrader : ", next(iter(up)))
 diff --git a/torchgen/shape_functions/gen_jit_shape_functions.py b/torchgen/shape_functions/gen_jit_shape_functions.py
-index 56a3d8bf0d..490a3ea2e7 100644
+index 56a3d8bf0..ffd0785fd 100644
 --- a/torchgen/shape_functions/gen_jit_shape_functions.py
 +++ b/torchgen/shape_functions/gen_jit_shape_functions.py
 @@ -1,6 +1,7 @@
@@ -143,7 +143,7 @@ index 56a3d8bf0d..490a3ea2e7 100644
  from importlib.util import module_from_spec, spec_from_file_location
  from itertools import chain
  from pathlib import Path
-@@ -18,16 +19,21 @@ you are in the root directory of the Pytorch git repo"""
+@@ -18,17 +19,21 @@ you are in the root directory of the Pytorch git repo"""
  if not file_path.exists():
      raise Exception(err_msg)  # noqa: TRY002
  
@@ -157,6 +157,7 @@ index 56a3d8bf0d..490a3ea2e7 100644
 -
 -bounded_compute_graph_mapping = module.bounded_compute_graph_mapping
 -shape_compute_graph_mapping = module.shape_compute_graph_mapping
+-
 +if len(sys.argv) < 2 or sys.argv[1] != "dummy":
 +    spec = importlib.util.spec_from_file_location(module_name, file_path)
 +    assert spec is not None
@@ -173,5 +174,5 @@ index 56a3d8bf0d..490a3ea2e7 100644
 +    bounded_compute_graph_mapping = {}
 +    shape_compute_graph_mapping = {}
  
- 
  SHAPE_HEADER = r"""
+ /**
diff --git a/gnu/packages/patches/python-pytorch-runpath-2.7.0.patch b/gnu/packages/patches/python-pytorch-runpath-2.7.0.patch
deleted file mode 100644
index cd8bea370b8..00000000000
--- a/gnu/packages/patches/python-pytorch-runpath-2.7.0.patch
+++ /dev/null
@@ -1,30 +0,0 @@
-Libraries (such as 'libtorch_cpu.so') and executables (such as 'torch_shm_manager')
-get installed, quite surprisingly, to 'lib/python3.8/site-packages/{bin,lib}'.
-Make sure RUNPATH matches that.
-
-diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
-index be45936a8..7b19e5359 100644
---- a/cmake/Dependencies.cmake
-+++ b/cmake/Dependencies.cmake
-@@ -4,7 +4,7 @@ if(APPLE)
-   set(CMAKE_MACOSX_RPATH ON)
-   set(_rpath_portable_origin "@loader_path")
- else()
--  set(_rpath_portable_origin $ORIGIN)
-+  set(_rpath_portable_origin $ORIGIN/../lib)
- endif(APPLE)
- # Use separate rpaths during build and install phases
- set(CMAKE_SKIP_BUILD_RPATH  FALSE)
-diff --git a/functorch/CMakeLists.txt b/functorch/CMakeLists.txt
-index bdfa4bfe4..2a75e3825 100644
---- a/functorch/CMakeLists.txt
-+++ b/functorch/CMakeLists.txt
-@@ -26,7 +26,7 @@ target_link_libraries(${PROJECT_NAME} PRIVATE pybind::pybind11)
- 
- set_target_properties(${PROJECT_NAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY
-       ${CMAKE_BINARY_DIR}/functorch)
--set_target_properties(${PROJECT_NAME} PROPERTIES INSTALL_RPATH "${_rpath_portable_origin}/../torch/lib")
-+set_target_properties(${PROJECT_NAME} PROPERTIES INSTALL_RPATH "$ORIGIN/../torch/lib")
- 
- # Copy-pasted prefix/suffix logic for Python extensions from
- # https://github.com/pytorch/pytorch/blob/33bb8ae350611760139457b85842b1d7edf9aa11/caffe2/CMakeLists.txt#L1975
diff --git a/gnu/packages/patches/python-pytorch-runpath.patch b/gnu/packages/patches/python-pytorch-runpath.patch
index 811de9e2888..cd8bea370b8 100644
--- a/gnu/packages/patches/python-pytorch-runpath.patch
+++ b/gnu/packages/patches/python-pytorch-runpath.patch
@@ -3,7 +3,7 @@ get installed, quite surprisingly, to 'lib/python3.8/site-packages/{bin,lib}'.
 Make sure RUNPATH matches that.
 
 diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
-index f1f2eb7..cb5caea 100644
+index be45936a8..7b19e5359 100644
 --- a/cmake/Dependencies.cmake
 +++ b/cmake/Dependencies.cmake
 @@ -4,7 +4,7 @@ if(APPLE)
@@ -16,7 +16,7 @@ index f1f2eb7..cb5caea 100644
  # Use separate rpaths during build and install phases
  set(CMAKE_SKIP_BUILD_RPATH  FALSE)
 diff --git a/functorch/CMakeLists.txt b/functorch/CMakeLists.txt
-index bdfa4bf..2a75e38 100644
+index bdfa4bfe4..2a75e3825 100644
 --- a/functorch/CMakeLists.txt
 +++ b/functorch/CMakeLists.txt
 @@ -26,7 +26,7 @@ target_link_libraries(${PROJECT_NAME} PRIVATE pybind::pybind11)
diff --git a/gnu/packages/patches/python-pytorch-system-libraries-2.7.0.patch b/gnu/packages/patches/python-pytorch-system-libraries-2.7.0.patch
deleted file mode 100644
index 6d8da60e413..00000000000
--- a/gnu/packages/patches/python-pytorch-system-libraries-2.7.0.patch
+++ /dev/null
@@ -1,442 +0,0 @@
-Patch build files to also system libraries instead of bundled ones for the
-libraries not supported or working only by specifying USE_SYSTEM_LIBS.  This
-includes using the clog, cpuinfo, fbgemm, foxi, fp16, fxdiv, googletest,
-httlib, ideep, miniz, nnpack, oneapi-dnnl, pocketfft, pthreadpool,
-qnnpack-pytorch, tensorpipe, valgrind and xnnpack packages.
-
-diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt
-index 085af373e..3287429b4 100644
---- a/aten/src/ATen/CMakeLists.txt
-+++ b/aten/src/ATen/CMakeLists.txt
-@@ -468,9 +468,9 @@ if(AT_NNPACK_ENABLED)
-   list(APPEND ATen_CPU_DEPENDENCY_LIBS nnpack) # cpuinfo is added below
- endif()
- 
--if(MKLDNN_FOUND)
--  list(APPEND ATen_CPU_DEPENDENCY_LIBS ${MKLDNN_LIBRARIES})
--endif(MKLDNN_FOUND)
-+if(USE_MKLDNN)
-+  list(APPEND ATen_CPU_DEPENDENCY_LIBS DNNL::dnnl)
-+endif(USE_MKLDNN)
- 
- if(USE_MKLDNN_ACL)
-     list(APPEND ATen_CPU_INCLUDE ${ACL_INCLUDE_DIRS})
-diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
-index d2d23b7ab..1a7e5a042 100644
---- a/caffe2/CMakeLists.txt
-+++ b/caffe2/CMakeLists.txt
-@@ -91,9 +91,6 @@ if(NOT MSVC AND USE_XNNPACK)
-   if(NOT TARGET fxdiv)
-     set(FXDIV_BUILD_TESTS OFF CACHE BOOL "")
-     set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "")
--    add_subdirectory(
--      "${FXDIV_SOURCE_DIR}"
--      "${CMAKE_BINARY_DIR}/FXdiv")
-   endif()
- endif()
- 
-@@ -1135,7 +1132,6 @@ if(USE_XPU)
- endif()
- 
- if(NOT MSVC AND USE_XNNPACK)
--  TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)
- endif()
- 
- # ==========================================================
-@@ -1254,8 +1250,8 @@ endif()
- target_include_directories(torch_cpu PRIVATE
-   ${TORCH_ROOT}/third_party/cpp-httplib)
- 
--target_include_directories(torch_cpu PRIVATE
--  ${TORCH_ROOT}/third_party/nlohmann/include)
-+find_package(httplib REQUIRED)
-+target_link_libraries(torch_cpu PUBLIC httplib::httplib)
- 
- install(DIRECTORY "${TORCH_SRC_DIR}/csrc"
-   DESTINATION ${TORCH_INSTALL_INCLUDE_DIR}/torch
-@@ -1494,6 +1490,7 @@ target_link_libraries(torch_cpu PUBLIC c10)
- target_link_libraries(torch_cpu PUBLIC ${Caffe2_PUBLIC_DEPENDENCY_LIBS})
- target_link_libraries(torch_cpu PRIVATE ${Caffe2_DEPENDENCY_LIBS})
- target_link_libraries(torch_cpu PRIVATE ${Caffe2_DEPENDENCY_WHOLE_LINK_LIBS})
-+target_link_libraries(torch_cpu PRIVATE miniz clog)
- if(USE_MPI)
-   target_link_libraries(torch_cpu PRIVATE MPI::MPI_CXX)
- endif()
-@@ -1728,7 +1725,7 @@ if(BUILD_STATIC_RUNTIME_BENCHMARK)
-   add_executable(static_runtime_bench "${STATIC_RUNTIME_BENCHMARK_SRCS}")
-   add_executable(static_runtime_test "${STATIC_RUNTIME_TEST_SRCS}")
-   target_link_libraries(static_runtime_bench torch_library benchmark)
--  target_link_libraries(static_runtime_test torch_library gtest_main)
-+  target_link_libraries(static_runtime_test torch_library gtest_main gtest)
- endif()
- 
- if(BUILD_MOBILE_BENCHMARK)
-@@ -1747,7 +1744,7 @@ if(BUILD_MOBILE_TEST)
-   foreach(test_src ${ATen_MOBILE_TEST_SRCS})
-     get_filename_component(test_name ${test_src} NAME_WE)
-     add_executable(${test_name} "${test_src}")
--    target_link_libraries(${test_name} torch_library gtest_main)
-+    target_link_libraries(${test_name} torch_library gtest_main gtest)
-     target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
-     target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
-     target_include_directories(${test_name} PRIVATE ${ATen_CPU_INCLUDE})
-@@ -1768,7 +1765,7 @@ if(BUILD_TEST)
-         if(NOT MSVC)
-           add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}" ../aten/src/ATen/native/quantized/AffineQuantizerBase.cpp)
-           # TODO: Get rid of c10 dependency (which is only needed for the implementation of AT_ERROR)
--          target_link_libraries(${test_name}_${CPU_CAPABILITY} c10 sleef gtest_main nlohmann)
-+          target_link_libraries(${test_name}_${CPU_CAPABILITY} c10 sleef gtest_main gtest nlohmann)
-           if(USE_FBGEMM)
-             target_link_libraries(${test_name}_${CPU_CAPABILITY} fbgemm)
-           endif()
-@@ -1782,7 +1779,7 @@ if(BUILD_TEST)
-           endif()
-         else()
-           add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}")
--          target_link_libraries(${test_name}_${CPU_CAPABILITY} torch_library sleef gtest_main)
-+          target_link_libraries(${test_name}_${CPU_CAPABILITY} torch_library sleef gtest_main gtest)
-         endif()
-         target_include_directories(${test_name}_${CPU_CAPABILITY} PRIVATE $<INSTALL_INTERFACE:include>)
-         target_include_directories(${test_name}_${CPU_CAPABILITY} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
-@@ -1799,7 +1796,7 @@ if(BUILD_TEST)
-   foreach(test_src ${Caffe2_CPU_TEST_SRCS})
-     get_filename_component(test_name ${test_src} NAME_WE)
-     add_executable(${test_name} "${test_src}")
--    target_link_libraries(${test_name} torch_library gtest_main)
-+    target_link_libraries(${test_name} torch_library gtest_main gtest)
-     if(NOT MSVC)
-       target_link_libraries(${test_name} stdc++)
-     endif()
-@@ -1823,7 +1820,7 @@ if(BUILD_TEST)
-       add_executable(${test_name} "${test_src}")
-       find_library(metal NAMES Metal)
-       find_library(foundation NAMES Foundation)
--      target_link_libraries(${test_name} torch_library gtest_main ${metal} ${foundation})
-+      target_link_libraries(${test_name} torch_library gtest_main gtest ${metal} ${foundation})
-       target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
-       target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
-       target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
-@@ -1843,7 +1840,7 @@ if(BUILD_TEST)
-     foreach(test_src ${Caffe2_GPU_TEST_SRCS})
-       get_filename_component(test_name ${test_src} NAME_WE)
-       add_executable(${test_name} "${test_src}")
--      target_link_libraries(${test_name} torch_library gtest_main)
-+      target_link_libraries(${test_name} torch_library gtest_main gtest)
-       if(USE_CUDNN AND ${test_name} MATCHES "cudnn")
-         target_link_libraries(${test_name} torch::cudnn)
-       endif()
-@@ -1865,7 +1862,7 @@ if(BUILD_TEST)
-     foreach(test_src ${Caffe2_XPU_TEST_SRCS})
-       get_filename_component(test_name ${test_src} NAME_WE)
-       add_executable(${test_name} "${test_src}")
--      target_link_libraries(${test_name} torch_library gtest_main)
-+      target_link_libraries(${test_name} torch_library gtest_main gtest)
-       target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
-       target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
-       add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
-@@ -1880,7 +1877,7 @@ if(BUILD_TEST)
-     foreach(test_src ${Caffe2_VULKAN_TEST_SRCS})
-       get_filename_component(test_name ${test_src} NAME_WE)
-       add_executable(${test_name} "${test_src}")
--      target_link_libraries(${test_name} torch_library gtest_main)
-+      target_link_libraries(${test_name} torch_library gtest_main gtest)
-       target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
-       target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
-       add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
-@@ -1899,7 +1896,7 @@ if(BUILD_TEST)
-     foreach(test_src ${Caffe2_HIP_TEST_SRCS})
-       get_filename_component(test_name ${test_src} NAME_WE)
-       add_executable(${test_name} "${test_src}")
--      target_link_libraries(${test_name} torch_library gtest_main)
-+      target_link_libraries(${test_name} torch_library gtest_main gtest)
-       target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
-       target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE} ${Caffe2_HIP_INCLUDE})
-       target_compile_options(${test_name} PRIVATE ${HIP_CXX_FLAGS})
-diff --git a/caffe2/serialize/CMakeLists.txt b/caffe2/serialize/CMakeLists.txt
-index ebbff0f29..dcded2590 100644
---- a/caffe2/serialize/CMakeLists.txt
-+++ b/caffe2/serialize/CMakeLists.txt
-@@ -2,7 +2,6 @@ file(GLOB tmp *_test.cc)
- 
- set(Caffe2_CPU_TEST_SRCS ${Caffe2_CPU_TEST_SRCS} ${tmp})
- list(APPEND Caffe2_CPU_SRCS
--  ${PROJECT_SOURCE_DIR}/third_party/miniz-3.0.2/miniz.c
-   ${CMAKE_CURRENT_SOURCE_DIR}/inline_container.cc
-   ${CMAKE_CURRENT_SOURCE_DIR}/istream_adapter.cc
-   ${CMAKE_CURRENT_SOURCE_DIR}/file_adapter.cc
-diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
-index be45936a8..bb1aa1cc1 100644
---- a/cmake/Dependencies.cmake
-+++ b/cmake/Dependencies.cmake
-@@ -276,7 +276,7 @@ endif()
- # --- [ PocketFFT
- set(AT_POCKETFFT_ENABLED 0)
- if(NOT AT_MKL_ENABLED)
--  set(POCKETFFT_INCLUDE_DIR "${Torch_SOURCE_DIR}/third_party/pocketfft/")
-+  set(POCKETFFT_INCLUDE_DIR "#POCKETFFT_INCLUDE_DIR")
-   if(NOT EXISTS "${POCKETFFT_INCLUDE_DIR}")
-     message(FATAL_ERROR "pocketfft directory not found, expected ${POCKETFFT_INCLUDE_DIR}")
-   elif(NOT EXISTS "${POCKETFFT_INCLUDE_DIR}/pocketfft_hdronly.h")
-@@ -460,15 +460,6 @@ if(USE_PYTORCH_QNNPACK)
-       set(PYTORCH_QNNPACK_BUILD_TESTS OFF CACHE BOOL "")
-       set(PYTORCH_QNNPACK_BUILD_BENCHMARKS OFF CACHE BOOL "")
-       set(PYTORCH_QNNPACK_LIBRARY_TYPE "static" CACHE STRING "")
--      add_subdirectory(
--        "${PYTORCH_QNNPACK_SOURCE_DIR}"
--        "${CONFU_DEPENDENCIES_BINARY_DIR}/pytorch_qnnpack")
--      # We build static versions of QNNPACK and pthreadpool but link
--      # them into a shared library for Caffe2, so they need PIC.
--      set_property(TARGET pytorch_qnnpack PROPERTY POSITION_INDEPENDENT_CODE ON)
--      set_property(TARGET cpuinfo PROPERTY POSITION_INDEPENDENT_CODE ON)
--      # QNNPACK depends on gemmlowp headers
--      target_include_directories(pytorch_qnnpack PRIVATE "${CAFFE2_THIRD_PARTY_ROOT}/gemmlowp")
-     endif()
- 
-     list(APPEND Caffe2_DEPENDENCY_LIBS pytorch_qnnpack)
-@@ -558,16 +549,15 @@ if(USE_XNNPACK AND NOT USE_SYSTEM_XNNPACK)
-   list(APPEND Caffe2_DEPENDENCY_LIBS XNNPACK microkernels-prod)
- elseif(NOT TARGET XNNPACK AND USE_SYSTEM_XNNPACK)
-   add_library(XNNPACK SHARED IMPORTED)
--  add_library(microkernels-prod SHARED IMPORTED)
-+  add_library(microkernels-prod INTERFACE IMPORTED)
-   find_library(XNNPACK_LIBRARY XNNPACK)
--  find_library(microkernels-prod_LIBRARY microkernels-prod)
-   set_property(TARGET XNNPACK PROPERTY IMPORTED_LOCATION "${XNNPACK_LIBRARY}")
--  set_property(TARGET microkernels-prod PROPERTY IMPORTED_LOCATION "${microkernels-prod_LIBRARY}")
--  if(NOT XNNPACK_LIBRARY or NOT microkernels-prod_LIBRARY)
-+  set_property(TARGET microkernels-prod PROPERTY INTERFACE_LINK_LIBRARIES XNNPACK)
-+  if(NOT XNNPACK_LIBRARY)
-     message(FATAL_ERROR "Cannot find XNNPACK")
-   endif()
-   message("-- Found XNNPACK: ${XNNPACK_LIBRARY}")
--  list(APPEND Caffe2_DEPENDENCY_LIBS XNNPACK microkernels-prod)
-+  list(APPEND Caffe2_DEPENDENCY_LIBS XNNPACK)
- endif()
- 
- # ---[ Vulkan deps
-@@ -650,11 +640,6 @@ if(BUILD_TEST OR BUILD_MOBILE_BENCHMARK OR BUILD_MOBILE_TEST)
-   # this shouldn't be necessary anymore.
-   get_property(INC_DIR_temp DIRECTORY PROPERTY INCLUDE_DIRECTORIES)
-   set_property(DIRECTORY PROPERTY INCLUDE_DIRECTORIES "")
--  add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/googletest)
--  set_property(DIRECTORY PROPERTY INCLUDE_DIRECTORIES ${INC_DIR_temp})
--
--  include_directories(BEFORE SYSTEM ${CMAKE_CURRENT_LIST_DIR}/../third_party/googletest/googletest/include)
--  include_directories(BEFORE SYSTEM ${CMAKE_CURRENT_LIST_DIR}/../third_party/googletest/googlemock/include)
- 
-   # We will not need to test benchmark lib itself.
-   set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Disable benchmark testing as we don't need it.")
-@@ -732,16 +717,6 @@ if(USE_FBGEMM)
-     if(USE_ASAN)
-       set(USE_SANITIZER "address,undefined" CACHE STRING "-fsanitize options for FBGEMM")
-     endif()
--    add_subdirectory("${FBGEMM_SOURCE_DIR}")
--    set_property(TARGET fbgemm_generic PROPERTY POSITION_INDEPENDENT_CODE ON)
--    set_property(TARGET fbgemm_avx2 PROPERTY POSITION_INDEPENDENT_CODE ON)
--    set_property(TARGET fbgemm_avx512 PROPERTY POSITION_INDEPENDENT_CODE ON)
--    set_property(TARGET fbgemm PROPERTY POSITION_INDEPENDENT_CODE ON)
--    if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 13.0.0)
--      # See https://github.com/pytorch/pytorch/issues/74352
--      target_compile_options_if_supported(asmjit -Wno-deprecated-copy)
--      target_compile_options_if_supported(asmjit -Wno-unused-but-set-variable)
--    endif()
-     if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
-       target_compile_options_if_supported(asmjit -Wno-extra-semi)
-       target_compile_options_if_supported(fbgemm -Wno-extra-semi)
-@@ -829,7 +804,7 @@ if(NOT TARGET fp16 AND NOT USE_SYSTEM_FP16)
-       "${CONFU_DEPENDENCIES_BINARY_DIR}/FP16")
-   endif()
- elseif(NOT TARGET fp16 AND USE_SYSTEM_FP16)
--  add_library(fp16 STATIC "/usr/include/fp16.h")
-+  add_library(fp16 STATIC "#FP16_INCLUDE_DIR")
-   set_target_properties(fp16 PROPERTIES LINKER_LANGUAGE C)
- endif()
- list(APPEND Caffe2_DEPENDENCY_LIBS fp16)
-@@ -1170,7 +1145,6 @@ if(USE_DISTRIBUTED AND USE_TENSORPIPE)
-       message(WARNING "Archived TensorPipe forces CMake compatibility mode")
-       set(CMAKE_POLICY_VERSION_MINIMUM 3.5)
-     endif()
--    add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/tensorpipe)
-     if(CMAKE_VERSION VERSION_GREATER_EQUAL "4.0.0")
-       unset(CMAKE_POLICY_VERSION_MINIMUM)
-     endif()
-@@ -1340,7 +1314,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
-     endif()
-     set_property(TARGET onnx_proto PROPERTY IMPORTED_LOCATION ${ONNX_PROTO_LIBRARY})
-     message("-- Found onnx: ${ONNX_LIBRARY} ${ONNX_PROTO_LIBRARY}")
--    list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx)
-+    list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx onnx_optimizer)
-   endif()
-   # Recover the build shared libs option.
-   set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS})
-@@ -1500,9 +1474,8 @@ if(NOT INTERN_BUILD_MOBILE)
-   endif()
-   if(USE_MKLDNN)
-     include(${CMAKE_CURRENT_LIST_DIR}/public/mkldnn.cmake)
--    if(MKLDNN_FOUND)
-+    if(DNNL_FOUND)
-       set(AT_MKLDNN_ENABLED 1)
--      include_directories(AFTER SYSTEM ${MKLDNN_INCLUDE_DIR})
-     else()
-       message(WARNING "MKLDNN could not be found.")
-       caffe2_update_option(USE_MKLDNN OFF)
-@@ -1583,7 +1556,7 @@ endif()
- #
- set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
- set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE)
--add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
-+find_package(fmt)
- 
- # Disable compiler feature checks for `fmt`.
- #
-@@ -1592,7 +1565,6 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
- # CMAKE_CXX_FLAGS in ways that break feature checks. Since we already know
- # `fmt` is compatible with a superset of the compilers that PyTorch is, it
- # shouldn't be too bad to just disable the checks.
--set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "")
- 
- list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)
- set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS} CACHE BOOL "Build shared libs" FORCE)
-diff --git a/cmake/External/nnpack.cmake b/cmake/External/nnpack.cmake
-index 8a4a310d6..f413d2e61 100644
---- a/cmake/External/nnpack.cmake
-+++ b/cmake/External/nnpack.cmake
-@@ -40,7 +40,7 @@ endif()
- # (3) Android, iOS, Linux, macOS - supported
- ##############################################################################
- 
--if(ANDROID OR IOS OR ${CMAKE_SYSTEM_NAME} STREQUAL "Linux" OR ${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
-+if(FALSE)
-   message(STATUS "Brace yourself, we are building NNPACK")
-   set(CAFFE2_THIRD_PARTY_ROOT ${PROJECT_SOURCE_DIR}/third_party)
- 
-@@ -94,6 +94,5 @@ endif()
- # (4) Catch-all: not supported.
- ##############################################################################
- 
--message(WARNING "Unknown platform - I don't know how to build NNPACK. "
--                "See cmake/External/nnpack.cmake for details.")
--set(USE_NNPACK OFF)
-+set(NNPACK_FOUND TRUE)
-+set(USE_NNPACK ON)
-diff --git a/cmake/public/mkldnn.cmake b/cmake/public/mkldnn.cmake
-index 87935625f..9f8fa3df8 100644
---- a/cmake/public/mkldnn.cmake
-+++ b/cmake/public/mkldnn.cmake
-@@ -4,7 +4,7 @@ if(CPU_AARCH64)
-   include(${CMAKE_CURRENT_LIST_DIR}/ComputeLibrary.cmake)
- endif()
- 
--find_package(MKLDNN QUIET)
-+find_package(DNNL REQUIRED)
- 
- if(NOT TARGET caffe2::mkldnn)
-   add_library(caffe2::mkldnn INTERFACE IMPORTED)
-@@ -15,4 +15,4 @@ set_property(
-   ${MKLDNN_INCLUDE_DIR})
- set_property(
-   TARGET caffe2::mkldnn PROPERTY INTERFACE_LINK_LIBRARIES
--  ${MKLDNN_LIBRARIES})
-+  DNNL::dnnl)
-diff --git a/setup.py b/setup.py
-index 61ee9363f..3691cc35c 100644
---- a/setup.py
-+++ b/setup.py
-@@ -508,13 +508,9 @@ def build_deps():
-     # Windows has very poor support for them.
-     sym_files = [
-         "tools/shared/_utils_internal.py",
--        "torch/utils/benchmark/utils/valgrind_wrapper/callgrind.h",
--        "torch/utils/benchmark/utils/valgrind_wrapper/valgrind.h",
-     ]
-     orig_files = [
-         "torch/_utils_internal.py",
--        "third_party/valgrind-headers/callgrind.h",
--        "third_party/valgrind-headers/valgrind.h",
-     ]
-     for sym_file, orig_file in zip(sym_files, orig_files):
-         same = False
-diff --git a/test/cpp/c10d/CMakeLists.txt b/test/cpp/c10d/CMakeLists.txt
-index 5b423241d..e069accd6 100644
---- a/test/cpp/c10d/CMakeLists.txt
-+++ b/test/cpp/c10d/CMakeLists.txt
-@@ -26,17 +26,17 @@ function(c10d_add_test test_src)
-   endif()
- endfunction()
- 
--c10d_add_test(BackoffTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST OFF)
--c10d_add_test(FileStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST ${INSTALL_TEST})
--c10d_add_test(TCPStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST ${INSTALL_TEST})
-+c10d_add_test(BackoffTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST OFF)
-+c10d_add_test(FileStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST ${INSTALL_TEST})
-+c10d_add_test(TCPStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST ${INSTALL_TEST})
- if(NOT WIN32)
--  c10d_add_test(HashStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST ${INSTALL_TEST})
-+  c10d_add_test(HashStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST ${INSTALL_TEST})
- endif()
- 
- if(USE_CUDA)
-   if(USE_GLOO AND USE_C10D_GLOO)
--    c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main INSTALL_TEST ${INSTALL_TEST})
--    c10d_add_test(ProcessGroupGlooAsyncTest.cpp LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main INSTALL_TEST ${INSTALL_TEST})
-+    c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest INSTALL_TEST ${INSTALL_TEST})
-+    c10d_add_test(ProcessGroupGlooAsyncTest.cpp LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest INSTALL_TEST ${INSTALL_TEST})
-   endif()
-   if(USE_NCCL AND USE_C10D_NCCL)
-     # NCCL is a private dependency of libtorch, but the tests include some
-@@ -45,10 +45,10 @@ if(USE_CUDA)
-     # a private dependency of the tests as well.
-     c10d_add_test(
-       ProcessGroupNCCLTest.cpp
--      LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main __caffe2_nccl INSTALL_TEST ${INSTALL_TEST})
-+      LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest __caffe2_nccl INSTALL_TEST ${INSTALL_TEST})
-     c10d_add_test(
-       ProcessGroupNCCLErrorsTest.cpp
--      LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main __caffe2_nccl INSTALL_TEST ${INSTALL_TEST})
-+      LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest __caffe2_nccl INSTALL_TEST ${INSTALL_TEST})
-     if(INSTALL_TEST)
-       install(TARGETS c10d_cuda_test DESTINATION lib)
-     endif()
-@@ -60,14 +60,14 @@ if(USE_CUDA)
-     # a private dependency of the tests as well.
-     c10d_add_test(
-       ProcessGroupUCCTest.cpp
--      LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main __caffe2_ucc INSTALL_TEST ${INSTALL_TEST})
-+      LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest __caffe2_ucc INSTALL_TEST ${INSTALL_TEST})
-     if(INSTALL_TEST)
-       install(TARGETS c10d_cuda_test DESTINATION lib)
-     endif()
-   endif()
- else()
-   if(USE_GLOO AND USE_C10D_GLOO)
--    c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST OFF)
-+    c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST OFF)
-   endif()
- endif()
- 
-diff --git a/test/cpp/tensorexpr/CMakeLists.txt b/test/cpp/tensorexpr/CMakeLists.txt
-index 9c409e078..6cddd8de4 100644
---- a/test/cpp/tensorexpr/CMakeLists.txt
-+++ b/test/cpp/tensorexpr/CMakeLists.txt
-@@ -51,7 +51,7 @@ target_include_directories(tutorial_tensorexpr PRIVATE ${ATen_CPU_INCLUDE})
- # pthreadpool header. For some build environment we need add the dependency
- # explicitly.
- if(USE_PTHREADPOOL)
--  target_link_libraries(test_tensorexpr PRIVATE pthreadpool_interface)
-+  target_link_libraries(test_tensorexpr PRIVATE pthreadpool)
- endif()
- if(USE_CUDA)
-   target_compile_definitions(test_tensorexpr PRIVATE USE_CUDA)
-diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt
-index 8b8ebdc6e..034b5e56c 100644
---- a/torch/CMakeLists.txt
-+++ b/torch/CMakeLists.txt
-@@ -82,8 +82,6 @@ set(TORCH_PYTHON_LINK_LIBRARIES
-     Python::Module
-     pybind::pybind11
-     opentelemetry::api
--    httplib
--    nlohmann
-     shm
-     fmt::fmt-header-only
-     ATEN_CPU_FILES_GEN_LIB)
diff --git a/gnu/packages/patches/python-pytorch-system-libraries.patch b/gnu/packages/patches/python-pytorch-system-libraries.patch
index 6b3f36f3f0c..6d8da60e413 100644
--- a/gnu/packages/patches/python-pytorch-system-libraries.patch
+++ b/gnu/packages/patches/python-pytorch-system-libraries.patch
@@ -5,10 +5,10 @@ httlib, ideep, miniz, nnpack, oneapi-dnnl, pocketfft, pthreadpool,
 qnnpack-pytorch, tensorpipe, valgrind and xnnpack packages.
 
 diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt
-index 6d9152a4d0..97cb945722 100644
+index 085af373e..3287429b4 100644
 --- a/aten/src/ATen/CMakeLists.txt
 +++ b/aten/src/ATen/CMakeLists.txt
-@@ -414,9 +414,9 @@ if(AT_NNPACK_ENABLED)
+@@ -468,9 +468,9 @@ if(AT_NNPACK_ENABLED)
    list(APPEND ATen_CPU_DEPENDENCY_LIBS nnpack) # cpuinfo is added below
  endif()
  
@@ -19,10 +19,10 @@ index 6d9152a4d0..97cb945722 100644
 +  list(APPEND ATen_CPU_DEPENDENCY_LIBS DNNL::dnnl)
 +endif(USE_MKLDNN)
  
- if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "^(s390x|ppc64le)$")
-   list(APPEND ATen_CPU_DEPENDENCY_LIBS cpuinfo)
+ if(USE_MKLDNN_ACL)
+     list(APPEND ATen_CPU_INCLUDE ${ACL_INCLUDE_DIRS})
 diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
-index 9be7f3732f..111215dacc 100644
+index d2d23b7ab..1a7e5a042 100644
 --- a/caffe2/CMakeLists.txt
 +++ b/caffe2/CMakeLists.txt
 @@ -91,9 +91,6 @@ if(NOT MSVC AND USE_XNNPACK)
@@ -35,7 +35,7 @@ index 9be7f3732f..111215dacc 100644
    endif()
  endif()
  
-@@ -1081,7 +1078,6 @@ if(USE_XPU)
+@@ -1135,7 +1132,6 @@ if(USE_XPU)
  endif()
  
  if(NOT MSVC AND USE_XNNPACK)
@@ -43,7 +43,7 @@ index 9be7f3732f..111215dacc 100644
  endif()
  
  # ==========================================================
-@@ -1195,8 +1191,8 @@ endif()
+@@ -1254,8 +1250,8 @@ endif()
  target_include_directories(torch_cpu PRIVATE
    ${TORCH_ROOT}/third_party/cpp-httplib)
  
@@ -54,7 +54,7 @@ index 9be7f3732f..111215dacc 100644
  
  install(DIRECTORY "${TORCH_SRC_DIR}/csrc"
    DESTINATION ${TORCH_INSTALL_INCLUDE_DIR}/torch
-@@ -1425,6 +1421,7 @@ target_link_libraries(torch_cpu PUBLIC c10)
+@@ -1494,6 +1490,7 @@ target_link_libraries(torch_cpu PUBLIC c10)
  target_link_libraries(torch_cpu PUBLIC ${Caffe2_PUBLIC_DEPENDENCY_LIBS})
  target_link_libraries(torch_cpu PRIVATE ${Caffe2_DEPENDENCY_LIBS})
  target_link_libraries(torch_cpu PRIVATE ${Caffe2_DEPENDENCY_WHOLE_LINK_LIBS})
@@ -62,7 +62,7 @@ index 9be7f3732f..111215dacc 100644
  if(USE_MPI)
    target_link_libraries(torch_cpu PRIVATE MPI::MPI_CXX)
  endif()
-@@ -1659,7 +1656,7 @@ if(BUILD_STATIC_RUNTIME_BENCHMARK)
+@@ -1728,7 +1725,7 @@ if(BUILD_STATIC_RUNTIME_BENCHMARK)
    add_executable(static_runtime_bench "${STATIC_RUNTIME_BENCHMARK_SRCS}")
    add_executable(static_runtime_test "${STATIC_RUNTIME_TEST_SRCS}")
    target_link_libraries(static_runtime_bench torch_library benchmark)
@@ -71,7 +71,7 @@ index 9be7f3732f..111215dacc 100644
  endif()
  
  if(BUILD_MOBILE_BENCHMARK)
-@@ -1678,7 +1675,7 @@ if(BUILD_MOBILE_TEST)
+@@ -1747,7 +1744,7 @@ if(BUILD_MOBILE_TEST)
    foreach(test_src ${ATen_MOBILE_TEST_SRCS})
      get_filename_component(test_name ${test_src} NAME_WE)
      add_executable(${test_name} "${test_src}")
@@ -80,7 +80,7 @@ index 9be7f3732f..111215dacc 100644
      target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
      target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
      target_include_directories(${test_name} PRIVATE ${ATen_CPU_INCLUDE})
-@@ -1699,7 +1696,7 @@ if(BUILD_TEST)
+@@ -1768,7 +1765,7 @@ if(BUILD_TEST)
          if(NOT MSVC)
            add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}" ../aten/src/ATen/native/quantized/AffineQuantizerBase.cpp)
            # TODO: Get rid of c10 dependency (which is only needed for the implementation of AT_ERROR)
@@ -89,7 +89,7 @@ index 9be7f3732f..111215dacc 100644
            if(USE_FBGEMM)
              target_link_libraries(${test_name}_${CPU_CAPABILITY} fbgemm)
            endif()
-@@ -1713,7 +1710,7 @@ if(BUILD_TEST)
+@@ -1782,7 +1779,7 @@ if(BUILD_TEST)
            endif()
          else()
            add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}")
@@ -98,7 +98,7 @@ index 9be7f3732f..111215dacc 100644
          endif()
          target_include_directories(${test_name}_${CPU_CAPABILITY} PRIVATE $<INSTALL_INTERFACE:include>)
          target_include_directories(${test_name}_${CPU_CAPABILITY} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
-@@ -1730,7 +1727,7 @@ if(BUILD_TEST)
+@@ -1799,7 +1796,7 @@ if(BUILD_TEST)
    foreach(test_src ${Caffe2_CPU_TEST_SRCS})
      get_filename_component(test_name ${test_src} NAME_WE)
      add_executable(${test_name} "${test_src}")
@@ -107,7 +107,34 @@ index 9be7f3732f..111215dacc 100644
      if(NOT MSVC)
        target_link_libraries(${test_name} stdc++)
      endif()
-@@ -1810,7 +1807,7 @@ if(BUILD_TEST)
+@@ -1823,7 +1820,7 @@ if(BUILD_TEST)
+       add_executable(${test_name} "${test_src}")
+       find_library(metal NAMES Metal)
+       find_library(foundation NAMES Foundation)
+-      target_link_libraries(${test_name} torch_library gtest_main ${metal} ${foundation})
++      target_link_libraries(${test_name} torch_library gtest_main gtest ${metal} ${foundation})
+       target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
+       target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
+       target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
+@@ -1843,7 +1840,7 @@ if(BUILD_TEST)
+     foreach(test_src ${Caffe2_GPU_TEST_SRCS})
+       get_filename_component(test_name ${test_src} NAME_WE)
+       add_executable(${test_name} "${test_src}")
+-      target_link_libraries(${test_name} torch_library gtest_main)
++      target_link_libraries(${test_name} torch_library gtest_main gtest)
+       if(USE_CUDNN AND ${test_name} MATCHES "cudnn")
+         target_link_libraries(${test_name} torch::cudnn)
+       endif()
+@@ -1865,7 +1862,7 @@ if(BUILD_TEST)
+     foreach(test_src ${Caffe2_XPU_TEST_SRCS})
+       get_filename_component(test_name ${test_src} NAME_WE)
+       add_executable(${test_name} "${test_src}")
+-      target_link_libraries(${test_name} torch_library gtest_main)
++      target_link_libraries(${test_name} torch_library gtest_main gtest)
+       target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
+       target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
+       add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
+@@ -1880,7 +1877,7 @@ if(BUILD_TEST)
      foreach(test_src ${Caffe2_VULKAN_TEST_SRCS})
        get_filename_component(test_name ${test_src} NAME_WE)
        add_executable(${test_name} "${test_src}")
@@ -116,23 +143,32 @@ index 9be7f3732f..111215dacc 100644
        target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
        target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
        add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
+@@ -1899,7 +1896,7 @@ if(BUILD_TEST)
+     foreach(test_src ${Caffe2_HIP_TEST_SRCS})
+       get_filename_component(test_name ${test_src} NAME_WE)
+       add_executable(${test_name} "${test_src}")
+-      target_link_libraries(${test_name} torch_library gtest_main)
++      target_link_libraries(${test_name} torch_library gtest_main gtest)
+       target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
+       target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE} ${Caffe2_HIP_INCLUDE})
+       target_compile_options(${test_name} PRIVATE ${HIP_CXX_FLAGS})
 diff --git a/caffe2/serialize/CMakeLists.txt b/caffe2/serialize/CMakeLists.txt
-index 1552b59d0d..67e1a9a1a3 100644
+index ebbff0f29..dcded2590 100644
 --- a/caffe2/serialize/CMakeLists.txt
 +++ b/caffe2/serialize/CMakeLists.txt
 @@ -2,7 +2,6 @@ file(GLOB tmp *_test.cc)
  
  set(Caffe2_CPU_TEST_SRCS ${Caffe2_CPU_TEST_SRCS} ${tmp})
  list(APPEND Caffe2_CPU_SRCS
--  ${PROJECT_SOURCE_DIR}/third_party/miniz-2.1.0/miniz.c
+-  ${PROJECT_SOURCE_DIR}/third_party/miniz-3.0.2/miniz.c
    ${CMAKE_CURRENT_SOURCE_DIR}/inline_container.cc
    ${CMAKE_CURRENT_SOURCE_DIR}/istream_adapter.cc
    ${CMAKE_CURRENT_SOURCE_DIR}/file_adapter.cc
 diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
-index e78305e0a8..5b3c3d7bf2 100644
+index be45936a8..bb1aa1cc1 100644
 --- a/cmake/Dependencies.cmake
 +++ b/cmake/Dependencies.cmake
-@@ -270,7 +270,7 @@ endif()
+@@ -276,7 +276,7 @@ endif()
  # --- [ PocketFFT
  set(AT_POCKETFFT_ENABLED 0)
  if(NOT AT_MKL_ENABLED)
@@ -141,7 +177,7 @@ index e78305e0a8..5b3c3d7bf2 100644
    if(NOT EXISTS "${POCKETFFT_INCLUDE_DIR}")
      message(FATAL_ERROR "pocketfft directory not found, expected ${POCKETFFT_INCLUDE_DIR}")
    elif(NOT EXISTS "${POCKETFFT_INCLUDE_DIR}/pocketfft_hdronly.h")
-@@ -465,15 +465,6 @@ if(USE_PYTORCH_QNNPACK)
+@@ -460,15 +460,6 @@ if(USE_PYTORCH_QNNPACK)
        set(PYTORCH_QNNPACK_BUILD_TESTS OFF CACHE BOOL "")
        set(PYTORCH_QNNPACK_BUILD_BENCHMARKS OFF CACHE BOOL "")
        set(PYTORCH_QNNPACK_LIBRARY_TYPE "static" CACHE STRING "")
@@ -154,10 +190,31 @@ index e78305e0a8..5b3c3d7bf2 100644
 -      set_property(TARGET cpuinfo PROPERTY POSITION_INDEPENDENT_CODE ON)
 -      # QNNPACK depends on gemmlowp headers
 -      target_include_directories(pytorch_qnnpack PRIVATE "${CAFFE2_THIRD_PARTY_ROOT}/gemmlowp")
+     endif()
  
-       if(PYTORCH_QNNPACK_CUSTOM_THREADPOOL)
-         target_compile_definitions(
-@@ -660,11 +651,6 @@ if(BUILD_TEST OR BUILD_MOBILE_BENCHMARK OR BUILD_MOBILE_TEST)
+     list(APPEND Caffe2_DEPENDENCY_LIBS pytorch_qnnpack)
+@@ -558,16 +549,15 @@ if(USE_XNNPACK AND NOT USE_SYSTEM_XNNPACK)
+   list(APPEND Caffe2_DEPENDENCY_LIBS XNNPACK microkernels-prod)
+ elseif(NOT TARGET XNNPACK AND USE_SYSTEM_XNNPACK)
+   add_library(XNNPACK SHARED IMPORTED)
+-  add_library(microkernels-prod SHARED IMPORTED)
++  add_library(microkernels-prod INTERFACE IMPORTED)
+   find_library(XNNPACK_LIBRARY XNNPACK)
+-  find_library(microkernels-prod_LIBRARY microkernels-prod)
+   set_property(TARGET XNNPACK PROPERTY IMPORTED_LOCATION "${XNNPACK_LIBRARY}")
+-  set_property(TARGET microkernels-prod PROPERTY IMPORTED_LOCATION "${microkernels-prod_LIBRARY}")
+-  if(NOT XNNPACK_LIBRARY or NOT microkernels-prod_LIBRARY)
++  set_property(TARGET microkernels-prod PROPERTY INTERFACE_LINK_LIBRARIES XNNPACK)
++  if(NOT XNNPACK_LIBRARY)
+     message(FATAL_ERROR "Cannot find XNNPACK")
+   endif()
+   message("-- Found XNNPACK: ${XNNPACK_LIBRARY}")
+-  list(APPEND Caffe2_DEPENDENCY_LIBS XNNPACK microkernels-prod)
++  list(APPEND Caffe2_DEPENDENCY_LIBS XNNPACK)
+ endif()
+ 
+ # ---[ Vulkan deps
+@@ -650,11 +640,6 @@ if(BUILD_TEST OR BUILD_MOBILE_BENCHMARK OR BUILD_MOBILE_TEST)
    # this shouldn't be necessary anymore.
    get_property(INC_DIR_temp DIRECTORY PROPERTY INCLUDE_DIRECTORIES)
    set_property(DIRECTORY PROPERTY INCLUDE_DIRECTORIES "")
@@ -169,7 +226,7 @@ index e78305e0a8..5b3c3d7bf2 100644
  
    # We will not need to test benchmark lib itself.
    set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Disable benchmark testing as we don't need it.")
-@@ -742,16 +728,6 @@ if(USE_FBGEMM)
+@@ -732,16 +717,6 @@ if(USE_FBGEMM)
      if(USE_ASAN)
        set(USE_SANITIZER "address,undefined" CACHE STRING "-fsanitize options for FBGEMM")
      endif()
@@ -183,27 +240,27 @@ index e78305e0a8..5b3c3d7bf2 100644
 -      target_compile_options_if_supported(asmjit -Wno-deprecated-copy)
 -      target_compile_options_if_supported(asmjit -Wno-unused-but-set-variable)
 -    endif()
+     if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+       target_compile_options_if_supported(asmjit -Wno-extra-semi)
+       target_compile_options_if_supported(fbgemm -Wno-extra-semi)
+@@ -829,7 +804,7 @@ if(NOT TARGET fp16 AND NOT USE_SYSTEM_FP16)
+       "${CONFU_DEPENDENCIES_BINARY_DIR}/FP16")
    endif()
- 
-   if(USE_FBGEMM)
-@@ -819,7 +795,7 @@ if(NOT TARGET fp16 AND NOT USE_SYSTEM_FP16)
-     "${FP16_SOURCE_DIR}"
-     "${CONFU_DEPENDENCIES_BINARY_DIR}/FP16")
  elseif(NOT TARGET fp16 AND USE_SYSTEM_FP16)
 -  add_library(fp16 STATIC "/usr/include/fp16.h")
 +  add_library(fp16 STATIC "#FP16_INCLUDE_DIR")
    set_target_properties(fp16 PROPERTIES LINKER_LANGUAGE C)
  endif()
  list(APPEND Caffe2_DEPENDENCY_LIBS fp16)
-@@ -1161,7 +1137,6 @@ if(USE_DISTRIBUTED AND USE_TENSORPIPE)
- 
-     # Tensorpipe uses cuda_add_library
-     torch_update_find_cuda_flags()
+@@ -1170,7 +1145,6 @@ if(USE_DISTRIBUTED AND USE_TENSORPIPE)
+       message(WARNING "Archived TensorPipe forces CMake compatibility mode")
+       set(CMAKE_POLICY_VERSION_MINIMUM 3.5)
+     endif()
 -    add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/tensorpipe)
- 
-     list(APPEND Caffe2_DEPENDENCY_LIBS tensorpipe)
-     list(APPEND Caffe2_DEPENDENCY_LIBS nlohmann)
-@@ -1317,7 +1292,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
+     if(CMAKE_VERSION VERSION_GREATER_EQUAL "4.0.0")
+       unset(CMAKE_POLICY_VERSION_MINIMUM)
+     endif()
+@@ -1340,7 +1314,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
      endif()
      set_property(TARGET onnx_proto PROPERTY IMPORTED_LOCATION ${ONNX_PROTO_LIBRARY})
      message("-- Found onnx: ${ONNX_LIBRARY} ${ONNX_PROTO_LIBRARY}")
@@ -212,7 +269,7 @@ index e78305e0a8..5b3c3d7bf2 100644
    endif()
    # Recover the build shared libs option.
    set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS})
-@@ -1465,9 +1440,8 @@ if(NOT INTERN_BUILD_MOBILE)
+@@ -1500,9 +1474,8 @@ if(NOT INTERN_BUILD_MOBILE)
    endif()
    if(USE_MKLDNN)
      include(${CMAKE_CURRENT_LIST_DIR}/public/mkldnn.cmake)
@@ -223,7 +280,7 @@ index e78305e0a8..5b3c3d7bf2 100644
      else()
        message(WARNING "MKLDNN could not be found.")
        caffe2_update_option(USE_MKLDNN OFF)
-@@ -1519,7 +1493,7 @@ endif()
+@@ -1583,7 +1556,7 @@ endif()
  #
  set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
  set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE)
@@ -232,7 +289,7 @@ index e78305e0a8..5b3c3d7bf2 100644
  
  # Disable compiler feature checks for `fmt`.
  #
-@@ -1528,7 +1502,6 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
+@@ -1592,7 +1565,6 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
  # CMAKE_CXX_FLAGS in ways that break feature checks. Since we already know
  # `fmt` is compatible with a superset of the compilers that PyTorch is, it
  # shouldn't be too bad to just disable the checks.
@@ -241,7 +298,7 @@ index e78305e0a8..5b3c3d7bf2 100644
  list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)
  set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS} CACHE BOOL "Build shared libs" FORCE)
 diff --git a/cmake/External/nnpack.cmake b/cmake/External/nnpack.cmake
-index 9d5f0643ec..c3624e582a 100644
+index 8a4a310d6..f413d2e61 100644
 --- a/cmake/External/nnpack.cmake
 +++ b/cmake/External/nnpack.cmake
 @@ -40,7 +40,7 @@ endif()
@@ -253,7 +310,7 @@ index 9d5f0643ec..c3624e582a 100644
    message(STATUS "Brace yourself, we are building NNPACK")
    set(CAFFE2_THIRD_PARTY_ROOT ${PROJECT_SOURCE_DIR}/third_party)
  
-@@ -112,6 +112,5 @@ endif()
+@@ -94,6 +94,5 @@ endif()
  # (4) Catch-all: not supported.
  ##############################################################################
  
@@ -263,7 +320,7 @@ index 9d5f0643ec..c3624e582a 100644
 +set(NNPACK_FOUND TRUE)
 +set(USE_NNPACK ON)
 diff --git a/cmake/public/mkldnn.cmake b/cmake/public/mkldnn.cmake
-index 87935625f9..9f8fa3df81 100644
+index 87935625f..9f8fa3df8 100644
 --- a/cmake/public/mkldnn.cmake
 +++ b/cmake/public/mkldnn.cmake
 @@ -4,7 +4,7 @@ if(CPU_AARCH64)
@@ -282,10 +339,10 @@ index 87935625f9..9f8fa3df81 100644
 -  ${MKLDNN_LIBRARIES})
 +  DNNL::dnnl)
 diff --git a/setup.py b/setup.py
-index 2b0cfa99d7..2d76b0d035 100644
+index 61ee9363f..3691cc35c 100644
 --- a/setup.py
 +++ b/setup.py
-@@ -491,13 +491,9 @@ def build_deps():
+@@ -508,13 +508,9 @@ def build_deps():
      # Windows has very poor support for them.
      sym_files = [
          "tools/shared/_utils_internal.py",
@@ -300,40 +357,30 @@ index 2b0cfa99d7..2d76b0d035 100644
      for sym_file, orig_file in zip(sym_files, orig_files):
          same = False
 diff --git a/test/cpp/c10d/CMakeLists.txt b/test/cpp/c10d/CMakeLists.txt
-index 0874852517..58ad3e9d66 100644
+index 5b423241d..e069accd6 100644
 --- a/test/cpp/c10d/CMakeLists.txt
 +++ b/test/cpp/c10d/CMakeLists.txt
-@@ -16,15 +16,15 @@ function(c10d_add_test test_src)
-   add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
+@@ -26,17 +26,17 @@ function(c10d_add_test test_src)
+   endif()
  endfunction()
  
--c10d_add_test(BackoffTest.cpp torch_cpu gtest_main)
--c10d_add_test(FileStoreTest.cpp torch_cpu gtest_main)
--c10d_add_test(TCPStoreTest.cpp torch_cpu gtest_main)
-+c10d_add_test(BackoffTest.cpp torch_cpu gtest_main gtest)
-+c10d_add_test(FileStoreTest.cpp torch_cpu gtest_main gtest)
-+c10d_add_test(TCPStoreTest.cpp torch_cpu gtest_main gtest)
- if(INSTALL_TEST)
-   install(TARGETS FileStoreTest DESTINATION bin)
-   install(TARGETS TCPStoreTest DESTINATION bin)
- endif()
+-c10d_add_test(BackoffTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST OFF)
+-c10d_add_test(FileStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST ${INSTALL_TEST})
+-c10d_add_test(TCPStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST ${INSTALL_TEST})
++c10d_add_test(BackoffTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST OFF)
++c10d_add_test(FileStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST ${INSTALL_TEST})
++c10d_add_test(TCPStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST ${INSTALL_TEST})
  if(NOT WIN32)
--  c10d_add_test(HashStoreTest.cpp torch_cpu gtest_main)
-+  c10d_add_test(HashStoreTest.cpp torch_cpu gtest_main gtest)
-   if(INSTALL_TEST)
-     install(TARGETS HashStoreTest DESTINATION bin)
-   endif()
-@@ -32,11 +32,11 @@ endif()
+-  c10d_add_test(HashStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST ${INSTALL_TEST})
++  c10d_add_test(HashStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST ${INSTALL_TEST})
+ endif()
  
  if(USE_CUDA)
    if(USE_GLOO AND USE_C10D_GLOO)
--    c10d_add_test(ProcessGroupGlooTest.cpp torch_cpu c10d_cuda_test gtest_main)
-+    c10d_add_test(ProcessGroupGlooTest.cpp torch_cpu c10d_cuda_test gtest_main gtest)
-     if(INSTALL_TEST)
-       install(TARGETS ProcessGroupGlooTest DESTINATION bin)
-     endif()
--    c10d_add_test(ProcessGroupGlooAsyncTest.cpp torch_cpu c10d_cuda_test gtest_main)
-+    c10d_add_test(ProcessGroupGlooAsyncTest.cpp torch_cpu c10d_cuda_test gtest_main gtest)
+-    c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main INSTALL_TEST ${INSTALL_TEST})
+-    c10d_add_test(ProcessGroupGlooAsyncTest.cpp LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main INSTALL_TEST ${INSTALL_TEST})
++    c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest INSTALL_TEST ${INSTALL_TEST})
++    c10d_add_test(ProcessGroupGlooAsyncTest.cpp LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest INSTALL_TEST ${INSTALL_TEST})
    endif()
    if(USE_NCCL AND USE_C10D_NCCL)
      # NCCL is a private dependency of libtorch, but the tests include some
@@ -341,35 +388,34 @@ index 0874852517..58ad3e9d66 100644
      # a private dependency of the tests as well.
      c10d_add_test(
        ProcessGroupNCCLTest.cpp
--      torch_cpu c10d_cuda_test gtest_main __caffe2_nccl)
-+      torch_cpu c10d_cuda_test gtest_main gtest __caffe2_nccl)
+-      LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main __caffe2_nccl INSTALL_TEST ${INSTALL_TEST})
++      LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest __caffe2_nccl INSTALL_TEST ${INSTALL_TEST})
      c10d_add_test(
        ProcessGroupNCCLErrorsTest.cpp
--      torch_cpu c10d_cuda_test gtest_main __caffe2_nccl)
-+      torch_cpu c10d_cuda_test gtest_main gtest __caffe2_nccl)
+-      LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main __caffe2_nccl INSTALL_TEST ${INSTALL_TEST})
++      LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest __caffe2_nccl INSTALL_TEST ${INSTALL_TEST})
      if(INSTALL_TEST)
-       install(TARGETS ProcessGroupNCCLTest DESTINATION bin)
-       install(TARGETS ProcessGroupNCCLErrorsTest DESTINATION bin)
-@@ -62,7 +62,7 @@ if(USE_CUDA)
+       install(TARGETS c10d_cuda_test DESTINATION lib)
+     endif()
+@@ -60,14 +60,14 @@ if(USE_CUDA)
      # a private dependency of the tests as well.
      c10d_add_test(
        ProcessGroupUCCTest.cpp
--      torch_cpu c10d_cuda_test gtest_main __caffe2_ucc)
-+      torch_cpu c10d_cuda_test gtest_main gtest __caffe2_ucc)
+-      LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main __caffe2_ucc INSTALL_TEST ${INSTALL_TEST})
++      LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest __caffe2_ucc INSTALL_TEST ${INSTALL_TEST})
      if(INSTALL_TEST)
-       install(TARGETS ProcessGroupUCCTest DESTINATION bin)
        install(TARGETS c10d_cuda_test DESTINATION lib)
-@@ -70,7 +70,7 @@ if(USE_CUDA)
+     endif()
    endif()
  else()
    if(USE_GLOO AND USE_C10D_GLOO)
--    c10d_add_test(ProcessGroupGlooTest.cpp torch_cpu gtest_main)
-+    c10d_add_test(ProcessGroupGlooTest.cpp torch_cpu gtest_main gtest)
+-    c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST OFF)
++    c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST OFF)
    endif()
  endif()
  
 diff --git a/test/cpp/tensorexpr/CMakeLists.txt b/test/cpp/tensorexpr/CMakeLists.txt
-index 179270c4a4..72f5582e81 100644
+index 9c409e078..6cddd8de4 100644
 --- a/test/cpp/tensorexpr/CMakeLists.txt
 +++ b/test/cpp/tensorexpr/CMakeLists.txt
 @@ -51,7 +51,7 @@ target_include_directories(tutorial_tensorexpr PRIVATE ${ATen_CPU_INCLUDE})
@@ -382,7 +428,7 @@ index 179270c4a4..72f5582e81 100644
  if(USE_CUDA)
    target_compile_definitions(test_tensorexpr PRIVATE USE_CUDA)
 diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt
-index c74b45431c..5b5d0919d0 100644
+index 8b8ebdc6e..034b5e56c 100644
 --- a/torch/CMakeLists.txt
 +++ b/torch/CMakeLists.txt
 @@ -82,8 +82,6 @@ set(TORCH_PYTHON_LINK_LIBRARIES
diff --git a/gnu/packages/patches/python-pytorch-without-kineto-2.7.0.patch b/gnu/packages/patches/python-pytorch-without-kineto-2.7.0.patch
deleted file mode 100644
index 1b10f18d5a6..00000000000
--- a/gnu/packages/patches/python-pytorch-without-kineto-2.7.0.patch
+++ /dev/null
@@ -1,64 +0,0 @@
-Even when building without Kineto, the <ActivityType.h> header is still
-imported and the ActivityType type is used. This patch was copied from
-https://github.com/pytorch/pytorch/pull/111048 and adapted.
-
-diff --git a/torch/csrc/profiler/kineto_shim.h b/torch/csrc/profiler/kineto_shim.h
-index c4efd7785..2caef1f1e 100644
---- a/torch/csrc/profiler/kineto_shim.h
-+++ b/torch/csrc/profiler/kineto_shim.h
-@@ -12,7 +12,55 @@
- #undef USE_KINETO
- #endif
- 
-+#ifdef USE_KINETO
- #include <ActivityType.h>
-+#else
-+namespace libkineto {
-+// copied from header
-+/*
-+ * Copyright (c) Meta Platforms, Inc. and affiliates.
-+ * All rights reserved.
-+ *
-+ * This source code is licensed under the BSD-style license found in the
-+ * LICENSE file in the root directory of this source tree.
-+ */
-+
-+// Note : All activity types are not enabled by default. Please add them
-+// at correct position in the enum
-+enum class ActivityType {
-+    // Activity types enabled by default
-+    CPU_OP = 0, // cpu side ops
-+    USER_ANNOTATION,
-+    GPU_USER_ANNOTATION,
-+    GPU_MEMCPY,
-+    GPU_MEMSET,
-+    CONCURRENT_KERNEL, // on-device kernels
-+    EXTERNAL_CORRELATION,
-+    CUDA_RUNTIME, // host side cuda runtime events
-+    CUDA_DRIVER, // host side cuda driver events
-+    CPU_INSTANT_EVENT, // host side point-like events
-+    PYTHON_FUNCTION,
-+    OVERHEAD, // CUPTI induced overhead events sampled from its overhead API.
-+
-+    // Optional Activity types
-+    CUDA_SYNC, // synchronization events between runtime and kernels
-+    GLOW_RUNTIME, // host side glow runtime events
-+    MTIA_RUNTIME, // host side MTIA runtime events
-+    CUDA_PROFILER_RANGE, // CUPTI Profiler range for performance metrics
-+    MTIA_CCP_EVENTS, // MTIA ondevice CCP events
-+    HPU_OP, // HPU host side runtime event
-+    XPU_RUNTIME, // host side xpu runtime events
-+    MTIA_WORKLOADD,
-+
-+    PRIVATEUSE1_RUNTIME,
-+    PRIVATEUSE1_DRIVER,
-+
-+    ENUM_COUNT, // This is to add buffer and not used for any profiling logic. Add your new type before it.
-+    OPTIONAL_ACTIVITY_TYPE_START = CUDA_SYNC,
-+};
-+}
-+
-+#endif
- 
- #include <torch/csrc/Export.h>
- #include <torch/csrc/profiler/api.h>