Skip to content

Commit 7fdc60e

Browse files
skottmckayCopilot
andauthored
Fix CoreML EP issue with external weight path handling. (#28062)
### Description <!-- Describe your changes. --> GitHub issue #28005: CoreML EP crashes with SIGBUS when loading a model with external data via CreateSession from a file path. Root Cause In tensorprotoutils.cc:362, TensorProtoWithExternalDataToTensorProto() passes model_path directly to ReadExternalDataForTensor(), but ReadExternalDataForTensor() expects a directory (it joins the path with the external data filename). This causes path construction like /path/to/model.onnx/model.onnx_data instead of /path/to/model.onnx_data. The CPU EP's UnpackInitializerData() at line ~2572 correctly uses model_path.parent_path(). The CoreML EP is the only caller that passes a full model file path (via graph_viewer_.ModelPath() in model_builder.cc:791), triggering the bug. Changes Fix (tensorprotoutils.cc:364): Changed ReadExternalDataForTensor(ten_proto, model_path, ...) to ReadExternalDataForTensor(ten_proto, model_path.parent_path(), ...), matching the pattern used by UnpackInitializerData(). Test (coreml_basic_test.cc:444): Added CoreMLExecutionProviderTest.ExternalDataInitializer — creates a model with external data, saves it to disk, and loads it from a file path with CoreML EP to verify the fix. The test passes with the fix applied. ### Motivation and Context <!-- - Why is this change required? What problem does it solve? - If it fixes an open issue, please link to the issue here. --> #28005 --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent cafbd7c commit 7fdc60e

File tree

2 files changed

+149
-1
lines changed

2 files changed

+149
-1
lines changed

onnxruntime/core/framework/tensorprotoutils.cc

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -359,8 +359,17 @@ Status TensorProtoWithExternalDataToTensorProto(
359359
} else {
360360
// Load the external data into memory
361361
std::vector<uint8_t> unpacked_data;
362-
ORT_RETURN_IF_ERROR(ReadExternalDataForTensor(ten_proto, model_path, unpacked_data));
362+
// ReadExternalDataForTensor expects a directory. Preserve existing behavior for callers that
363+
// already pass a directory, and only use parent_path() when model_path is a confirmed file.
364+
std::filesystem::path external_data_path = model_path;
365+
std::error_code ec;
366+
if (std::filesystem::is_regular_file(model_path, ec)) {
367+
external_data_path = model_path.parent_path();
368+
} else if (ec) {
369+
ec.clear();
370+
}
363371

372+
ORT_RETURN_IF_ERROR(ReadExternalDataForTensor(ten_proto, external_data_path, unpacked_data));
364373
// Set the raw data in the new tensor
365374
onnxruntime::utils::SetRawDataInTensorProto(result, unpacked_data.data(), unpacked_data.size());
366375
}

onnxruntime/test/providers/coreml/coreml_basic_test.cc

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
11
// Copyright (c) Microsoft Corporation. All rights reserved.
22
// Licensed under the MIT License.
33

4+
#include <algorithm>
5+
#include <cstdint>
6+
#include <filesystem>
7+
#include <fstream>
48
#include <memory>
9+
#include <vector>
510

611
#include "core/common/logging/logging.h"
712
#include "core/graph/constants.h"
@@ -17,6 +22,7 @@
1722
#include "test/util/include/current_test_name.h"
1823
#include "test/util/include/default_providers.h"
1924
#include "test/util/include/inference_session_wrapper.h"
25+
#include "test/util/include/temp_dir.h"
2026
#include "test/util/include/test_environment.h"
2127
#include "test/util/include/test_utils.h"
2228
#include "core/graph/onnx_protobuf.h"
@@ -430,5 +436,138 @@ TEST(CoreMLExecutionProviderTest, TestModelCache) {
430436
TestModelLoad(model_data, MakeCoreMLExecutionProvider(), ExpectedEPNodeAssignment::All);
431437
#endif
432438
}
439+
440+
// Test that CoreML EP can load a model with initializers stored in an external data file.
441+
// Regression test for https://github.com/microsoft/onnxruntime/issues/28005
442+
// The bug was that TensorProtoWithExternalDataToTensorProto passed a model file path
443+
// (e.g. "/path/to/model.onnx") to ReadExternalDataForTensor which expects a directory,
444+
// causing it to construct an invalid path like "/path/to/model.onnx/model.onnx_data".
445+
#if !defined(ORT_MINIMAL_BUILD)
446+
TEST(CoreMLExecutionProviderTest, ExternalDataInitializer) {
447+
// Create a temp directory for the model and external data file
448+
TemporaryDirectory tmp_dir(ORT_TSTR("coreml_external_data_test"));
449+
const auto model_path = std::filesystem::path(tmp_dir.Path()) / ORT_TSTR("model.onnx");
450+
const auto external_data_path = std::filesystem::path(tmp_dir.Path()) / ORT_TSTR("model.onnx_data");
451+
452+
// Write external data file: 6 floats for a {1,1,3,2} initializer
453+
const std::vector<float> initializer_data = {0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f};
454+
{
455+
std::ofstream ofs(external_data_path, std::ios::binary);
456+
ASSERT_TRUE(ofs.is_open());
457+
ofs.write(reinterpret_cast<const char*>(initializer_data.data()),
458+
initializer_data.size() * sizeof(float));
459+
ofs.close();
460+
}
461+
462+
// Build a simple model: output = X + initializer (Add op)
463+
{
464+
ONNX_NAMESPACE::ModelProto model_proto;
465+
model_proto.set_ir_version(ONNX_NAMESPACE::IR_VERSION);
466+
auto* opset = model_proto.add_opset_import();
467+
opset->set_domain("");
468+
opset->set_version(13);
469+
470+
auto* graph_proto = model_proto.mutable_graph();
471+
graph_proto->set_name("test_external_data");
472+
473+
// Input X: {1,1,3,2} float tensor
474+
auto* input = graph_proto->add_input();
475+
input->set_name("X");
476+
auto* input_type = input->mutable_type()->mutable_tensor_type();
477+
input_type->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT);
478+
auto* input_shape = input_type->mutable_shape();
479+
input_shape->add_dim()->set_dim_value(1);
480+
input_shape->add_dim()->set_dim_value(1);
481+
input_shape->add_dim()->set_dim_value(3);
482+
input_shape->add_dim()->set_dim_value(2);
483+
484+
// Output Y: {1,1,3,2} float tensor
485+
auto* output = graph_proto->add_output();
486+
output->set_name("Y");
487+
auto* output_type = output->mutable_type()->mutable_tensor_type();
488+
output_type->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT);
489+
auto* output_shape = output_type->mutable_shape();
490+
output_shape->add_dim()->set_dim_value(1);
491+
output_shape->add_dim()->set_dim_value(1);
492+
output_shape->add_dim()->set_dim_value(3);
493+
output_shape->add_dim()->set_dim_value(2);
494+
495+
// Initializer W with external data
496+
auto* initializer = graph_proto->add_initializer();
497+
initializer->set_name("W");
498+
initializer->set_data_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT);
499+
initializer->add_dims(1);
500+
initializer->add_dims(1);
501+
initializer->add_dims(3);
502+
initializer->add_dims(2);
503+
initializer->set_data_location(ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL);
504+
505+
auto* ext_location = initializer->add_external_data();
506+
ext_location->set_key("location");
507+
ext_location->set_value("model.onnx_data");
508+
auto* ext_offset = initializer->add_external_data();
509+
ext_offset->set_key("offset");
510+
ext_offset->set_value("0");
511+
auto* ext_length = initializer->add_external_data();
512+
ext_length->set_key("length");
513+
ext_length->set_value(std::to_string(initializer_data.size() * sizeof(float)));
514+
515+
// Add node: Y = X + W
516+
auto* node = graph_proto->add_node();
517+
node->set_op_type("Add");
518+
node->add_input("X");
519+
node->add_input("W");
520+
node->add_output("Y");
521+
522+
// Save model
523+
std::ofstream ofs(model_path, std::ios::binary);
524+
ASSERT_TRUE(ofs.is_open());
525+
ASSERT_TRUE(model_proto.SerializeToOstream(&ofs));
526+
ofs.close();
527+
}
528+
529+
// Input data
530+
std::vector<int64_t> dims = {1, 1, 3, 2};
531+
std::vector<float> input_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
532+
OrtValue ml_value_x;
533+
AllocatorPtr allocator = CPUAllocator::DefaultInstance();
534+
CreateMLValue<float>(allocator, dims, input_data, &ml_value_x);
535+
536+
NameMLValMap feeds;
537+
feeds.insert(std::make_pair("X", ml_value_x));
538+
539+
RunOptions run_options;
540+
run_options.run_tag = "ExternalDataInitializer";
541+
std::vector<std::string> output_names = {"Y"};
542+
543+
// Load the model from a file path (not from memory) with the CoreML EP.
544+
// This is the scenario that triggers the bug: CoreML EP must resolve external data
545+
// relative to the model file's directory, not treat the model path as a directory.
546+
SessionOptions so;
547+
so.session_logid = "ExternalDataInitializer";
548+
InferenceSessionWrapper session{so, GetEnvironment()};
549+
ASSERT_STATUS_OK(session.RegisterExecutionProvider(MakeCoreMLExecutionProvider()));
550+
ASSERT_STATUS_OK(session.Load(model_path.native()));
551+
ASSERT_STATUS_OK(session.Initialize());
552+
553+
#if defined(__APPLE__)
554+
const auto& provider_types = session.GetRegisteredProviderTypes();
555+
EXPECT_NE(std::find(provider_types.begin(), provider_types.end(), kCoreMLExecutionProvider), provider_types.end());
556+
std::vector<OrtValue> fetches;
557+
ASSERT_STATUS_OK(session.Run(run_options, feeds, output_names, &fetches));
558+
559+
// Verify the output: Y = X + W = {1.1, 2.2, 3.3, 4.4, 5.5, 6.6}
560+
ASSERT_EQ(fetches.size(), 1u);
561+
const auto& output_tensor = fetches[0].Get<Tensor>();
562+
auto output_data = output_tensor.DataAsSpan<float>();
563+
ASSERT_EQ(static_cast<size_t>(output_data.size()), input_data.size());
564+
for (size_t i = 0; i < input_data.size(); ++i) {
565+
EXPECT_NEAR(output_data[i], input_data[i] + initializer_data[i], 1e-5f)
566+
<< "Mismatch at index " << i;
567+
}
568+
#endif // defined(__APPLE__)
569+
}
570+
#endif // !(ORT_MINIMAL_BUILD)
571+
433572
} // namespace test
434573
} // namespace onnxruntime

0 commit comments

Comments
 (0)