Skip to content

Commit 0f1648e

Browse files
committed
Less overhead for reductions (macro backend), reduction identifiers.
Count constructor calls instead of using a nestable lock. Drop ID in declaration of custom reductions. Introduce calls for querying further mutex identifiers in the backend. Reductions use address of parallel data as mutex identifier (both backends).
1 parent 0963042 commit 0f1648e

7 files changed

Lines changed: 51 additions & 58 deletions

File tree

include/opdi/backend/backendInterface.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ namespace opdi {
4040

4141
virtual std::size_t getLockIdentifier(omp_lock_t* lock) = 0;
4242
virtual std::size_t getNestLockIdentifier(omp_nest_lock_t* lock) = 0;
43+
virtual std::size_t getCriticalIdentifier(std::string const& name) = 0;
44+
virtual std::size_t getReductionIdentifier() = 0;
4345

4446
virtual void* getParallelData() = 0;
4547
virtual void* getTaskData() = 0;

include/opdi/backend/macro/macroBackend.cpp

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,8 @@ std::stack<bool> opdi::ReductionTools::needsBarrierBeforeReductions;
4141
std::stack<bool> opdi::ReductionTools::needsBarrierAfterReductions;
4242
int opdi::ReductionTools::implicitTaskNestingDepth = 0;
4343

44-
template<typename Type, int identifier>
45-
omp_nest_lock_t opdi::Reducer<Type, identifier>::reductionLock;
46-
47-
template<typename Type, int identifier>
48-
bool opdi::Reducer<Type, identifier>::isInitialized = false;
44+
template<typename Type>
45+
size_t opdi::Reducer<Type>::nConstructorCalls = 0;
4946

5047
// global macro backend variables
5148

include/opdi/backend/macro/macros.hpp

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -165,15 +165,13 @@
165165
#define OPDI_CRITICAL_NAME(name) \
166166
OPDI_PRAGMA(omp critical (name)) \
167167
{ \
168-
std::size_t const opdiInternalCriticalIdentifier = \
169-
dynamic_cast<opdi::MacroBackend*>(opdi::backend)->getCriticalIdentifier(std::string(#name)); \
168+
std::size_t const opdiInternalCriticalIdentifier = opdi::backend->getCriticalIdentifier(std::string(#name)); \
170169
opdi::logic->onMutexAcquired(opdi::LogicInterface::MutexKind::Critical, opdiInternalCriticalIdentifier);
171170

172171
#define OPDI_CRITICAL_NAME_ARGS(name, ...) \
173172
OPDI_PRAGMA(omp critical (name) __VA_ARGS__) \
174173
{ \
175-
std::size_t const opdiInternalCriticalIdentifier = \
176-
dynamic_cast<opdi::MacroBackend*>(opdi::backend)->getCriticalIdentifier(std::string(#name)); \
174+
std::size_t const opdiInternalCriticalIdentifier = opdi::backend->getCriticalIdentifier(std::string(#name)); \
177175
opdi::logic->onMutexAcquired(opdi::LogicInterface::MutexKind::Critical, opdiInternalCriticalIdentifier);
178176

179177
#define OPDI_END_CRITICAL \
@@ -236,31 +234,27 @@
236234
// reduction macros
237235

238236
#if _OPENMP >= 202411
239-
#define OPDI_INTERNAL_DECLARE_REDUCTION(OP_NAME, TYPE, OP, INIT, ID) \
240-
TYPE operator OP (opdi::Reducer<TYPE, ID> const& lhs, \
241-
opdi::Reducer<TYPE, ID> const& rhs) { \
237+
#define OPDI_DECLARE_REDUCTION(OP_NAME, TYPE, OP, INIT) \
238+
TYPE operator OP (opdi::Reducer<TYPE> const& lhs, \
239+
opdi::Reducer<TYPE> const& rhs) { \
242240
return lhs.value OP rhs.value; \
243241
} \
244242
\
245243
OPDI_PRAGMA(omp declare_reduction(OP_NAME : TYPE) \
246-
combiner(opdi::Reducer<TYPE, ID>(omp_out) = \
247-
opdi::Reducer<TYPE, ID>(omp_out) OP opdi::Reducer<TYPE, ID>(omp_in)) \
244+
combiner(opdi::Reducer(omp_out) = opdi::Reducer(omp_out) OP opdi::Reducer(omp_in)) \
248245
initializer(omp_priv = INIT))
249246
#else
250-
#define OPDI_INTERNAL_DECLARE_REDUCTION(OP_NAME, TYPE, OP, INIT, ID) \
251-
TYPE operator OP (opdi::Reducer<TYPE, ID> const& lhs, \
252-
opdi::Reducer<TYPE, ID> const& rhs) { \
247+
#define OPDI_DECLARE_REDUCTION(OP_NAME, TYPE, OP, INIT) \
248+
TYPE operator OP (opdi::Reducer<TYPE> const& lhs, \
249+
opdi::Reducer<TYPE> const& rhs) { \
253250
return lhs.value OP rhs.value; \
254251
} \
255252
\
256253
OPDI_PRAGMA(omp declare reduction(OP_NAME : TYPE : \
257-
opdi::Reducer<TYPE, ID>(omp_out) = opdi::Reducer<TYPE, ID>(omp_out) OP opdi::Reducer<TYPE, ID>(omp_in)) \
254+
opdi::Reducer(omp_out) = opdi::Reducer(omp_out) OP opdi::Reducer(omp_in)) \
258255
initializer(omp_priv = INIT))
259256
#endif
260257

261-
#define OPDI_DECLARE_REDUCTION(OP_NAME, TYPE, OP, INIT) \
262-
OPDI_INTERNAL_DECLARE_REDUCTION(OP_NAME, TYPE, OP, INIT, __COUNTER__)
263-
264258
#define OPDI_REDUCTION private(opdi::internalReductionProbe)
265259

266260

include/opdi/backend/macro/mutexIdentifiers.hpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,7 @@ namespace opdi {
3636
private:
3737

3838
omp_lock_t criticalLock;
39-
4039
std::map<std::string, std::size_t> criticalIdentifiers;
41-
4240
std::size_t nextCriticalIdentifier;
4341

4442
public:
@@ -67,6 +65,10 @@ namespace opdi {
6765
return result;
6866
}
6967

68+
std::size_t getReductionIdentifier() {
69+
return reinterpret_cast<std::size_t>(opdi::backend->getParallelData());
70+
}
71+
7072
std::size_t getLockIdentifier(omp_lock_t* lock) {
7173
return reinterpret_cast<std::size_t>(lock);
7274
}

include/opdi/backend/macro/reductionTools.hpp

Lines changed: 19 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@
3535

3636
#include "../runtime.hpp"
3737

38+
#include "mutexIdentifiers.hpp"
39+
3840
namespace opdi {
3941

4042
struct ReductionTools {
@@ -109,51 +111,35 @@ namespace opdi {
109111
}
110112
};
111113

112-
template<typename Type, int identifier>
114+
template<typename Type>
113115
struct Reducer {
114116
public:
115-
static omp_nest_lock_t reductionLock;
116-
static bool isInitialized;
117+
static size_t nConstructorCalls;
118+
#pragma omp threadprivate(nConstructorCalls)
117119

118120
Type& value;
119121

120-
void checkInitialized() {
121-
122-
bool initialized;
123-
#pragma omp atomic read
124-
initialized = Reducer::isInitialized;
125-
126-
if (!initialized) {
127-
128-
opdi_set_lock(&ReductionTools::globalReductionLock);
129-
130-
#pragma omp atomic read
131-
initialized = Reducer::isInitialized;
132-
133-
if (!initialized) {
134-
opdi_init_nest_lock(&Reducer::reductionLock);
135-
ReductionTools::individualReductionLocks.push_back(&Reducer::reductionLock);
136-
137-
#pragma omp atomic write
138-
Reducer::isInitialized = true;
139-
}
140-
141-
opdi_unset_lock(&ReductionTools::globalReductionLock);
142-
}
143-
}
144-
145122
Reducer(Type& value) : value(value) {
146123
/* push barrier prior to first reduction-related operation */
147124
ReductionTools::addBarrierBeforeReductionsIfNeeded();
148-
this->checkInitialized();
149-
opdi_set_nest_lock(&reductionLock);
125+
126+
/* first constructor call in the course of a statement acquires the mutex */
127+
if (nConstructorCalls == 0) {
128+
opdi::logic->onMutexAcquired(opdi::LogicInterface::MutexKind::Reduction,
129+
opdi::backend->getReductionIdentifier());
130+
}
131+
++nConstructorCalls;
150132
}
151133

152134
Reducer& operator=(Type const& rhs) {
153135
value = rhs;
154-
opdi_unset_nest_lock(&reductionLock);
155-
opdi_unset_nest_lock(&reductionLock);
156-
opdi_unset_nest_lock(&reductionLock);
136+
137+
opdi::logic->onMutexReleased(opdi::LogicInterface::MutexKind::Reduction,
138+
opdi::backend->getReductionIdentifier());
139+
140+
assert(nConstructorCalls == 3);
141+
nConstructorCalls = 0;
142+
157143
return *this;
158144
}
159145
};

include/opdi/backend/ompt/omptBackend.hpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,16 @@ namespace opdi {
162162
return (std::size_t) waitId;
163163
}
164164

165+
std::size_t getCriticalIdentifier(std::string const& name) {
166+
OPDI_UNUSED(name);
167+
OPDI_ERROR("OMPT backend does not support explicit queries of mutex identifiers of critical regions.");
168+
return 0;
169+
}
170+
171+
std::size_t getReductionIdentifier() {
172+
return reinterpret_cast<std::size_t>(getParallelData());
173+
}
174+
165175
void* getParallelData() {
166176
ompt_data_t* parallelData;
167177
int teamSize;

include/opdi/backend/ompt/reductionCallbacks.hpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@
3333
#include "../../helpers/macros.hpp"
3434
#include "../../logic/logicInterface.hpp"
3535

36+
#include "../backendInterface.hpp"
37+
3638
#include "callbacksBase.hpp"
3739

3840
namespace opdi {
@@ -55,10 +57,10 @@ namespace opdi {
5557
OPDI_UNUSED(codeptr);
5658

5759
if (ompt_scope_begin == _endpoint) {
58-
logic->onMutexAcquired(LogicInterface::MutexKind::Reduction, 0);
60+
logic->onMutexAcquired(LogicInterface::MutexKind::Reduction, opdi::backend->getReductionIdentifier());
5961
}
6062
else {
61-
logic->onMutexReleased(LogicInterface::MutexKind::Reduction, 0);
63+
logic->onMutexReleased(LogicInterface::MutexKind::Reduction, opdi::backend->getReductionIdentifier());
6264
}
6365
}
6466

0 commit comments

Comments
 (0)