Fix CoreML EP issue with external weight path handling. (#28062)

skottmckay · Copilot · web-flow · commit 7fdc60e70b70 · 2026-04-16T16:29:46.000-07:00
### Description  GitHub issue #28005: CoreML EP crashes with SIGBUS when loading a model with external data via CreateSession from a file path. Root Cause In tensorprotoutils.cc:362, TensorProtoWithExternalDataToTensorProto() passes model_path directly to ReadExternalDataForTensor(), but ReadExternalDataForTensor() expects a directory (it joins the path with the external data filename). This causes path construction like /path/to/model.onnx/model.onnx_data instead of /path/to/model.onnx_data. The CPU EP's UnpackInitializerData() at line ~2572 correctly uses model_path.parent_path(). The CoreML EP is the only caller that passes a full model file path (via graph_viewer_.ModelPath() in model_builder.cc:791), triggering the bug. Changes Fix (tensorprotoutils.cc:364): Changed ReadExternalDataForTensor(ten_proto, model_path, ...) to ReadExternalDataForTensor(ten_proto, model_path.parent_path(), ...), matching the pattern used by UnpackInitializerData(). Test (coreml_basic_test.cc:444): Added CoreMLExecutionProviderTest.ExternalDataInitializer — creates a model with external data, saves it to disk, and loads it from a file path with CoreML EP to verify the fix. The test passes with the fix applied. ### Motivation and Context  #28005 --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
diff --git a/onnxruntime/core/framework/tensorprotoutils.cc b/onnxruntime/core/framework/tensorprotoutils.cc
@@ -359,8 +359,17 @@ Status TensorProtoWithExternalDataToTensorProto(
   } else {
     // Load the external data into memory
     std::vector<uint8_t> unpacked_data;
-    ORT_RETURN_IF_ERROR(ReadExternalDataForTensor(ten_proto, model_path, unpacked_data));
+    // ReadExternalDataForTensor expects a directory. Preserve existing behavior for callers that
+    // already pass a directory, and only use parent_path() when model_path is a confirmed file.
+    std::filesystem::path external_data_path = model_path;
+    std::error_code ec;
+    if (std::filesystem::is_regular_file(model_path, ec)) {
+      external_data_path = model_path.parent_path();
+    } else if (ec) {
+      ec.clear();
+    }
 
+    ORT_RETURN_IF_ERROR(ReadExternalDataForTensor(ten_proto, external_data_path, unpacked_data));
     // Set the raw data in the new tensor
     onnxruntime::utils::SetRawDataInTensorProto(result, unpacked_data.data(), unpacked_data.size());
   }
diff --git a/onnxruntime/test/providers/coreml/coreml_basic_test.cc b/onnxruntime/test/providers/coreml/coreml_basic_test.cc
@@ -1,7 +1,12 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
+#include <algorithm>
+#include <cstdint>
+#include <filesystem>
+#include <fstream>
 #include <memory>
+#include <vector>
 
 #include "core/common/logging/logging.h"
 #include "core/graph/constants.h"
@@ -17,6 +22,7 @@
 #include "test/util/include/current_test_name.h"
 #include "test/util/include/default_providers.h"
 #include "test/util/include/inference_session_wrapper.h"
+#include "test/util/include/temp_dir.h"
 #include "test/util/include/test_environment.h"
 #include "test/util/include/test_utils.h"
 #include "core/graph/onnx_protobuf.h"
@@ -430,5 +436,138 @@ TEST(CoreMLExecutionProviderTest, TestModelCache) {
   TestModelLoad(model_data, MakeCoreMLExecutionProvider(), ExpectedEPNodeAssignment::All);
 #endif
 }
+
+// Test that CoreML EP can load a model with initializers stored in an external data file.
+// Regression test for https://github.com/microsoft/onnxruntime/issues/28005
+// The bug was that TensorProtoWithExternalDataToTensorProto passed a model file path
+// (e.g. "/path/to/model.onnx") to ReadExternalDataForTensor which expects a directory,
+// causing it to construct an invalid path like "/path/to/model.onnx/model.onnx_data".
+#if !defined(ORT_MINIMAL_BUILD)
+TEST(CoreMLExecutionProviderTest, ExternalDataInitializer) {
+  // Create a temp directory for the model and external data file
+  TemporaryDirectory tmp_dir(ORT_TSTR("coreml_external_data_test"));
+  const auto model_path = std::filesystem::path(tmp_dir.Path()) / ORT_TSTR("model.onnx");
+  const auto external_data_path = std::filesystem::path(tmp_dir.Path()) / ORT_TSTR("model.onnx_data");
+
+  // Write external data file: 6 floats for a {1,1,3,2} initializer
+  const std::vector<float> initializer_data = {0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f};
+  {
+    std::ofstream ofs(external_data_path, std::ios::binary);
+    ASSERT_TRUE(ofs.is_open());
+    ofs.write(reinterpret_cast<const char*>(initializer_data.data()),
+              initializer_data.size() * sizeof(float));
+    ofs.close();
+  }
+
+  // Build a simple model: output = X + initializer (Add op)
+  {
+    ONNX_NAMESPACE::ModelProto model_proto;
+    model_proto.set_ir_version(ONNX_NAMESPACE::IR_VERSION);
+    auto* opset = model_proto.add_opset_import();
+    opset->set_domain("");
+    opset->set_version(13);
+
+    auto* graph_proto = model_proto.mutable_graph();
+    graph_proto->set_name("test_external_data");
+
+    // Input X: {1,1,3,2} float tensor
+    auto* input = graph_proto->add_input();
+    input->set_name("X");
+    auto* input_type = input->mutable_type()->mutable_tensor_type();
+    input_type->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT);
+    auto* input_shape = input_type->mutable_shape();
+    input_shape->add_dim()->set_dim_value(1);
+    input_shape->add_dim()->set_dim_value(1);
+    input_shape->add_dim()->set_dim_value(3);
+    input_shape->add_dim()->set_dim_value(2);
+
+    // Output Y: {1,1,3,2} float tensor
+    auto* output = graph_proto->add_output();
+    output->set_name("Y");
+    auto* output_type = output->mutable_type()->mutable_tensor_type();
+    output_type->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT);
+    auto* output_shape = output_type->mutable_shape();
+    output_shape->add_dim()->set_dim_value(1);
+    output_shape->add_dim()->set_dim_value(1);
+    output_shape->add_dim()->set_dim_value(3);
+    output_shape->add_dim()->set_dim_value(2);
+
+    // Initializer W with external data
+    auto* initializer = graph_proto->add_initializer();
+    initializer->set_name("W");
+    initializer->set_data_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT);
+    initializer->add_dims(1);
+    initializer->add_dims(1);
+    initializer->add_dims(3);
+    initializer->add_dims(2);
+    initializer->set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL);
+
+    auto* ext_location = initializer->add_external_data();
+    ext_location->set_key("location");
+    ext_location->set_value("model.onnx_data");
+    auto* ext_offset = initializer->add_external_data();
+    ext_offset->set_key("offset");
+    ext_offset->set_value("0");
+    auto* ext_length = initializer->add_external_data();
+    ext_length->set_key("length");
+    ext_length->set_value(std::to_string(initializer_data.size() * sizeof(float)));
+
+    // Add node: Y = X + W
+    auto* node = graph_proto->add_node();
+    node->set_op_type("Add");
+    node->add_input("X");
+    node->add_input("W");
+    node->add_output("Y");
+
+    // Save model
+    std::ofstream ofs(model_path, std::ios::binary);
+    ASSERT_TRUE(ofs.is_open());
+    ASSERT_TRUE(model_proto.SerializeToOstream(&ofs));
+    ofs.close();
+  }
+
+  // Input data
+  std::vector<int64_t> dims = {1, 1, 3, 2};
+  std::vector<float> input_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
+  OrtValue ml_value_x;
+  AllocatorPtr allocator = CPUAllocator::DefaultInstance();
+  CreateMLValue<float>(allocator, dims, input_data, &ml_value_x);
+
+  NameMLValMap feeds;
+  feeds.insert(std::make_pair("X", ml_value_x));
+
+  RunOptions run_options;
+  run_options.run_tag = "ExternalDataInitializer";
+  std::vector<std::string> output_names = {"Y"};
+
+  // Load the model from a file path (not from memory) with the CoreML EP.
+  // This is the scenario that triggers the bug: CoreML EP must resolve external data
+  // relative to the model file's directory, not treat the model path as a directory.
+  SessionOptions so;
+  so.session_logid = "ExternalDataInitializer";
+  InferenceSessionWrapper session{so, GetEnvironment()};
+  ASSERT_STATUS_OK(session.RegisterExecutionProvider(MakeCoreMLExecutionProvider()));
+  ASSERT_STATUS_OK(session.Load(model_path.native()));
+  ASSERT_STATUS_OK(session.Initialize());
+
+#if defined(__APPLE__)
+  const auto& provider_types = session.GetRegisteredProviderTypes();
+  EXPECT_NE(std::find(provider_types.begin(), provider_types.end(), kCoreMLExecutionProvider), provider_types.end());
+  std::vector<OrtValue> fetches;
+  ASSERT_STATUS_OK(session.Run(run_options, feeds, output_names, &fetches));
+
+  // Verify the output: Y = X + W = {1.1, 2.2, 3.3, 4.4, 5.5, 6.6}
+  ASSERT_EQ(fetches.size(), 1u);
+  const auto& output_tensor = fetches[0].Get<Tensor>();
+  auto output_data = output_tensor.DataAsSpan<float>();
+  ASSERT_EQ(static_cast<size_t>(output_data.size()), input_data.size());
+  for (size_t i = 0; i < input_data.size(); ++i) {
+    EXPECT_NEAR(output_data[i], input_data[i] + initializer_data[i], 1e-5f)
+        << "Mismatch at index " << i;
+  }
+#endif  // defined(__APPLE__)
+}
+#endif  // !(ORT_MINIMAL_BUILD)
+
 }  // namespace test
 }  // namespace onnxruntime