Skip to content

Commit 1b5a088

Browse files
Merge branch 'main' into dev/asolovev_tbb_upd
2 parents 558754a + 867c643 commit 1b5a088

File tree

9 files changed

+212
-7
lines changed

9 files changed

+212
-7
lines changed

cpp/oneapi/dal/algo/covariance/backend/cpu/compute_kernel_dense.cpp

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,14 @@
1818

1919
#include "oneapi/dal/algo/covariance/backend/cpu/compute_kernel.hpp"
2020
#include "oneapi/dal/algo/covariance/backend/cpu/compute_kernel_common.hpp"
21+
#include "oneapi/dal/algo/covariance/backend/cpu/partial_compute_kernel.hpp"
22+
#include "oneapi/dal/algo/covariance/backend/cpu/finalize_compute_kernel.hpp"
2123
#include "oneapi/dal/backend/interop/common.hpp"
2224
#include "oneapi/dal/backend/interop/error_converter.hpp"
2325
#include "oneapi/dal/backend/interop/table_conversion.hpp"
2426

27+
#include "oneapi/dal/backend/primitives/utils.hpp"
28+
2529
#include "oneapi/dal/table/row_accessor.hpp"
2630

2731
namespace oneapi::dal::covariance::backend {
@@ -30,13 +34,83 @@ using dal::backend::context_cpu;
3034
using descriptor_t = detail::descriptor_base<task::compute>;
3135
using parameters_t = detail::compute_parameters<task::compute>;
3236

37+
namespace be = dal::backend;
38+
namespace pr = be::primitives;
3339
namespace daal_covariance = daal::algorithms::covariance;
3440
namespace interop = dal::backend::interop;
3541

3642
template <typename Float, daal::internal::CpuType Cpu>
3743
using daal_covariance_kernel_t = daal_covariance::internal::
3844
CovarianceDenseBatchKernel<Float, daal_covariance::Method::defaultDense, Cpu>;
3945

46+
template <typename Float, typename Task>
47+
static compute_result<Task> call_daal_spmd_kernel(const context_cpu& ctx,
48+
const detail::descriptor_base<Task>& desc,
49+
const detail::compute_parameters<Task>& params,
50+
const table& data) {
51+
auto& comm = ctx.get_communicator();
52+
const std::int64_t component_count = data.get_column_count();
53+
54+
// Compute partial results locally on this rank's data
55+
partial_compute_input<Task> partial_input(data);
56+
auto partial_result =
57+
partial_compute_kernel_cpu<Float, method::by_default, Task>{}(ctx, desc, partial_input);
58+
59+
// Extract partial results as mutable arrays
60+
auto nobs_nd = pr::table2ndarray<Float>(partial_result.get_partial_n_rows());
61+
auto sums_nd = pr::table2ndarray<Float>(partial_result.get_partial_sum());
62+
auto crossproduct_nd = pr::table2ndarray<Float>(partial_result.get_partial_crossproduct());
63+
64+
auto nobs_ary = dal::array<Float>::wrap(nobs_nd.get_mutable_data(), nobs_nd.get_count());
65+
auto sums_ary = dal::array<Float>::wrap(sums_nd.get_mutable_data(), sums_nd.get_count());
66+
auto crossproduct_ary =
67+
dal::array<Float>::wrap(crossproduct_nd.get_mutable_data(), crossproduct_nd.get_count());
68+
69+
// The DAAL online kernel stores centered crossproducts:
70+
// cp = X^T*X - sums*sums^T/nobs
71+
// Simple allreduce of centered crossproducts is incorrect because each
72+
// rank uses its local mean. Un-center before allreduce, then re-center
73+
// with global statistics after.
74+
const Float local_nobs = *nobs_ary.get_data();
75+
if (!desc.get_assume_centered() && local_nobs >= 1.0) {
76+
Float* cp_ptr = crossproduct_ary.get_mutable_data();
77+
const Float* sums_ptr = sums_ary.get_data();
78+
const Float inv_nobs = Float(1) / local_nobs;
79+
for (std::int64_t i = 0; i < component_count; ++i) {
80+
for (std::int64_t j = 0; j < component_count; ++j) {
81+
cp_ptr[i * component_count + j] += inv_nobs * sums_ptr[i] * sums_ptr[j];
82+
}
83+
}
84+
}
85+
86+
// Allreduce raw crossproduct, sums, and nobs across all ranks
87+
comm.allreduce(nobs_ary).wait();
88+
comm.allreduce(sums_ary).wait();
89+
comm.allreduce(crossproduct_ary).wait();
90+
91+
// Re-center with global statistics
92+
const Float global_nobs = *nobs_ary.get_data();
93+
if (!desc.get_assume_centered() && global_nobs >= 1.0) {
94+
Float* cp_ptr = crossproduct_ary.get_mutable_data();
95+
const Float* sums_ptr = sums_ary.get_data();
96+
const Float inv_nobs = Float(1) / global_nobs;
97+
for (std::int64_t i = 0; i < component_count; ++i) {
98+
for (std::int64_t j = 0; j < component_count; ++j) {
99+
cp_ptr[i * component_count + j] -= inv_nobs * sums_ptr[i] * sums_ptr[j];
100+
}
101+
}
102+
}
103+
104+
// Reconstruct aggregated partial result and finalize
105+
partial_compute_result<Task> aggregated;
106+
aggregated.set_partial_n_rows(homogen_table::wrap(nobs_ary, 1, 1));
107+
aggregated.set_partial_sum(homogen_table::wrap(sums_ary, 1, component_count));
108+
aggregated.set_partial_crossproduct(
109+
homogen_table::wrap(crossproduct_ary, component_count, component_count));
110+
111+
return finalize_compute_kernel_cpu<Float, method::by_default, Task>{}(ctx, desc, aggregated);
112+
}
113+
40114
template <typename Float, typename Task>
41115
static compute_result<Task> call_daal_kernel(const context_cpu& ctx,
42116
const detail::descriptor_base<Task>& desc,
@@ -121,6 +195,9 @@ static compute_result<Task> compute(const context_cpu& ctx,
121195
const detail::descriptor_base<Task>& desc,
122196
const detail::compute_parameters<Task>& params,
123197
const compute_input<Task>& input) {
198+
if (ctx.get_communicator().get_rank_count() > 1) {
199+
return call_daal_spmd_kernel<Float, Task>(ctx, desc, params, input.get_data());
200+
}
124201
return call_daal_kernel<Float, Task>(ctx, desc, params, input.get_data());
125202
}
126203

cpp/oneapi/dal/algo/covariance/detail/compute_ops.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ struct compute_ops_dispatcher<Policy, Float, Method, Task> {
3434
compute_parameters<Task> select_parameters(const Policy& ctx,
3535
const descriptor_base<Task>& desc,
3636
const compute_input<Task>& input) const {
37-
using kernel_dispatcher_t = dal::backend::kernel_dispatcher<KERNEL_SINGLE_NODE_CPU(
37+
using kernel_dispatcher_t = dal::backend::kernel_dispatcher<KERNEL_UNIVERSAL_SPMD_CPU(
3838
parameters::compute_parameters_cpu<Float, Method, Task>)>;
3939
return kernel_dispatcher_t{}(ctx, desc, input);
4040
}
@@ -51,7 +51,7 @@ struct compute_ops_dispatcher<Policy, Float, Method, Task> {
5151
const descriptor_base<Task>& desc,
5252
const compute_parameters<Task>& params,
5353
const compute_input<Task>& input) const {
54-
using kernel_dispatcher_t = dal::backend::kernel_dispatcher<KERNEL_SINGLE_NODE_CPU(
54+
using kernel_dispatcher_t = dal::backend::kernel_dispatcher<KERNEL_UNIVERSAL_SPMD_CPU(
5555
backend::compute_kernel_cpu<Float, Method, Task>)>;
5656
return kernel_dispatcher_t()(ctx, desc, params, input);
5757
}

cpp/oneapi/dal/algo/covariance/test/spmd.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,6 @@ TEMPLATE_LIST_TEST_M(covariance_spmd_test,
8484
"covariance common flow",
8585
"[covariance][integration][spmd]",
8686
covariance_types) {
87-
SKIP_IF(this->get_policy().is_cpu());
8887
SKIP_IF(this->not_float64_friendly());
8988

9089
using Float = std::tuple_element_t<0, TestType>;

dev/bazel/daal.bzl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def daal_module(name, features=[], lib_tag="daal",
5454
srcs = auto_srcs + srcs,
5555
copts = copts + select({
5656
"@platforms//os:windows": [],
57-
"//conditions:default": ["-fvisibility=hidden"],
57+
"//conditions:default": ["-fvisibility=hidden", "-fvisibility-inlines-hidden"],
5858
}),
5959
local_defines = select({
6060
"@config//:assert_enabled": local_defines + ["__DAAL_IMPLEMENTATION", "DEBUG_ASSERT=1"],

dev/bazel/dal.bzl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -496,7 +496,7 @@ def _dal_module(name, lib_tag="dal", is_dpc=False, features=[],
496496
},
497497
copts = copts + select({
498498
"@platforms//os:windows": [],
499-
"//conditions:default": ["-fvisibility=hidden"],
499+
"//conditions:default": ["-fvisibility=hidden", "-fvisibility-inlines-hidden"],
500500
}),
501501
local_defines = local_defines + [
502502
# Enable ONEDAL_EXPORT visibility annotations, matching Make's

dev/bazel/deps/opencl.tpl.BUILD

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ cc_library(
88
"*.so.*",
99
],
1010
allow_empty = True,
11-
exclude = ["*.py", "*.cmake", "*.a"],
11+
exclude = ["*.py", "*.cmake", "*.a", "*.so.*-*"],
1212
),
1313
linkopts = ["-lOpenCL"],
1414
visibility = ["//visibility:public"],

makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ y := $(notdir $(filter $(_OS)/%,lnx/so win/dll mac/dylib))
131131
-cxx17 := $(if $(COMPILER_is_vc),/std:c++17,$(-Q)std=c++17)
132132
-optlevel := $(-optlevel.$(COMPILER))
133133
-fPIC := $(if $(OS_is_win),,-fPIC)
134-
-visibility := $(if $(OS_is_win),,-fvisibility=hidden)
134+
-visibility := $(if $(OS_is_win),,-fvisibility=hidden -fvisibility-inlines-hidden)
135135
-DMKL_ILP64 := $(if $(filter mkl,$(BACKEND_CONFIG)),-DMKL_ILP64)
136136
-DMKL_LP64 := $(if $(filter mkl,$(BACKEND_CONFIG)),-DMKL_LP64)
137137
-Zl := $(-Zl.$(COMPILER))
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
/*******************************************************************************
2+
* Copyright contributors to the oneDAL project
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*******************************************************************************/
16+
17+
#include <iomanip>
18+
#include <iostream>
19+
20+
#include "oneapi/dal/algo/covariance.hpp"
21+
#include "oneapi/dal/io/csv.hpp"
22+
#include "oneapi/dal/spmd/ccl/communicator.hpp"
23+
24+
#include "utils.hpp"
25+
26+
namespace dal = oneapi::dal;
27+
28+
void run() {
29+
const auto data_file_name = get_data_path("data/covcormoments_dense.csv");
30+
31+
const auto data = dal::read<dal::table>(dal::csv::data_source{ data_file_name });
32+
33+
const auto cov_desc = dal::covariance::descriptor<float>{}.set_result_options(
34+
dal::covariance::result_options::cov_matrix | dal::covariance::result_options::means);
35+
36+
auto comm = dal::preview::spmd::make_communicator<dal::preview::spmd::backend::ccl>();
37+
auto rank_id = comm.get_rank();
38+
auto rank_count = comm.get_rank_count();
39+
40+
auto input_vec = split_table_by_rows<float>(data, rank_count);
41+
42+
const auto result = dal::preview::compute(comm, cov_desc, input_vec[rank_id]);
43+
44+
if (comm.get_rank() == 0) {
45+
std::cout << "Sample covariance:\n" << result.get_cov_matrix() << std::endl;
46+
47+
std::cout << "Means:\n" << result.get_means() << std::endl;
48+
}
49+
}
50+
51+
int main(int argc, char const *argv[]) {
52+
ccl::init();
53+
int status = MPI_Init(nullptr, nullptr);
54+
if (status != MPI_SUCCESS) {
55+
throw std::runtime_error{ "Problem occurred during MPI init" };
56+
}
57+
58+
run();
59+
60+
status = MPI_Finalize();
61+
if (status != MPI_SUCCESS) {
62+
throw std::runtime_error{ "Problem occurred during MPI finalize" };
63+
}
64+
return 0;
65+
}
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
/*******************************************************************************
2+
* Copyright contributors to the oneDAL project
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*******************************************************************************/
16+
17+
#include <iomanip>
18+
#include <iostream>
19+
20+
#include "oneapi/dal/algo/covariance.hpp"
21+
#include "oneapi/dal/io/csv.hpp"
22+
#include "oneapi/dal/spmd/mpi/communicator.hpp"
23+
24+
#include "utils.hpp"
25+
26+
namespace dal = oneapi::dal;
27+
28+
void run() {
29+
const auto data_file_name = get_data_path("data/covcormoments_dense.csv");
30+
31+
const auto data = dal::read<dal::table>(dal::csv::data_source{ data_file_name });
32+
33+
const auto cov_desc = dal::covariance::descriptor<float>{}.set_result_options(
34+
dal::covariance::result_options::cov_matrix | dal::covariance::result_options::means);
35+
36+
auto comm = dal::preview::spmd::make_communicator<dal::preview::spmd::backend::mpi>();
37+
auto rank_id = comm.get_rank();
38+
auto rank_count = comm.get_rank_count();
39+
40+
auto input_vec = split_table_by_rows<float>(data, rank_count);
41+
42+
const auto result = dal::preview::compute(comm, cov_desc, input_vec[rank_id]);
43+
44+
if (comm.get_rank() == 0) {
45+
std::cout << "Sample covariance:\n" << result.get_cov_matrix() << std::endl;
46+
47+
std::cout << "Means:\n" << result.get_means() << std::endl;
48+
}
49+
}
50+
51+
int main(int argc, char const *argv[]) {
52+
int status = MPI_Init(nullptr, nullptr);
53+
if (status != MPI_SUCCESS) {
54+
throw std::runtime_error{ "Problem occurred during MPI init" };
55+
}
56+
57+
run();
58+
59+
status = MPI_Finalize();
60+
if (status != MPI_SUCCESS) {
61+
throw std::runtime_error{ "Problem occurred during MPI finalize" };
62+
}
63+
return 0;
64+
}

0 commit comments

Comments
 (0)