Skip to content

Commit 3bf83ff

Browse files
committed
Create task data for initial implicit tasks.
Backend support for querying the current task's data. Support mutexes prior to tool initialization (OMPT backend). Adapt output instrument. Merge branch 'feature/initialImplicitTask' into develop
2 parents 6875a3b + 44d61f6 commit 3bf83ff

20 files changed

Lines changed: 210 additions & 86 deletions

include/opdi/backend/backendInterface.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ namespace opdi {
4242
virtual std::size_t getNestedLockIdentifier(omp_nest_lock_t* lock) = 0;
4343

4444
virtual void* getParallelData() = 0;
45+
virtual void* getTaskData() = 0;
46+
47+
virtual void setInitialImplicitTaskData(void* data) = 0;
4548
};
4649

4750
extern BackendInterface* backend;

include/opdi/backend/macro/macroBackend.hpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525

2626
#pragma once
2727

28+
#include <cassert>
29+
2830
#include "../../config.hpp"
2931

3032
#ifdef OPDI_BACKEND
@@ -57,9 +59,15 @@ namespace opdi {
5759

5860
void init() {
5961
opdi_init_lock(&ReductionTools::globalReducerLock);
62+
63+
// task data for initial implicit task is created in the logic layer
6064
}
6165

6266
void finalize() {
67+
// pop task data associated with initial implicit task
68+
DataTools::popTaskData();
69+
assert(DataTools::getTaskData() == nullptr);
70+
6371
opdi_set_lock(&ReductionTools::globalReducerLock);
6472

6573
for (auto lock : ReductionTools::individualReducerLocks) {
@@ -74,6 +82,15 @@ namespace opdi {
7482
void* getParallelData() {
7583
return DataTools::getParallelData();
7684
}
85+
86+
void* getTaskData() {
87+
return DataTools::getTaskData();
88+
}
89+
90+
void setInitialImplicitTaskData(void* data) {
91+
assert(DataTools::getTaskData() == nullptr);
92+
DataTools::pushTaskData(data);
93+
}
7794
};
7895

7996
}

include/opdi/backend/macro/probes.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,8 @@ namespace opdi {
7070
tool->getTapePosition(oldTape, currentPosition);
7171

7272
DataTools::pushParallelData(this->parallelData);
73-
this->taskData = logic->onImplicitTaskBegin(omp_get_num_threads(), omp_get_thread_num(), this->parallelData);
73+
this->taskData = logic->onImplicitTaskBegin(false, omp_get_num_threads(), omp_get_thread_num(),
74+
this->parallelData);
7475
DataTools::pushTaskData(this->taskData);
7576

7677
// check if copy statements have been recorded before the correct tape was set

include/opdi/backend/ompt/implicitTaskCallbacks.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,13 +50,14 @@ namespace opdi {
5050
unsigned int index,
5151
int flags) {
5252

53-
// no callback treatment for initial task
53+
// logic layer is in general not yet set up when the initial implicit task is created
54+
// initial implicit task handling takes place independently in the logic layer
5455
if (flags & ompt_task_initial) {
5556
return;
5657
}
5758

5859
if (ompt_scope_begin == endpoint) {
59-
taskData->ptr = logic->onImplicitTaskBegin(actualParallelism, index, parallelData->ptr);
60+
taskData->ptr = logic->onImplicitTaskBegin(false, actualParallelism, index, parallelData->ptr);
6061
}
6162
else {
6263
logic->onImplicitTaskEnd(taskData->ptr);

include/opdi/backend/ompt/omptBackend.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ ompt_set_callback_t opdi::CallbacksBase::setCallback;
3636
ompt_get_callback_t opdi::CallbacksBase::getCallback;
3737

3838
ompt_get_parallel_info_t opdi::OmptBackend::getParallelInfo = NULL;
39+
ompt_get_task_info_t opdi::OmptBackend::getTaskInfo = NULL;
3940
ompt_finalize_tool_t opdi::OmptBackend::finalizeTool = NULL;
4041

4142
// ompt entry point

include/opdi/backend/ompt/omptBackend.hpp

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#define OPDI_BACKEND OPDI_OMPT_BACKEND
3434
#endif
3535

36+
#include <cassert>
3637
#include <iostream>
3738

3839
#include "../../helpers/exceptions.hpp"
@@ -72,6 +73,7 @@ namespace opdi {
7273
// runtime entry points to be queried in addition to the ones stored in CallbacksBase
7374

7475
static ompt_get_parallel_info_t getParallelInfo;
76+
static ompt_get_task_info_t getTaskInfo;
7577
static ompt_finalize_tool_t finalizeTool;
7678

7779
public:
@@ -89,6 +91,7 @@ namespace opdi {
8991
ompt_set_callback_t setCallback = (ompt_set_callback_t) lookup("ompt_set_callback");
9092
ompt_get_callback_t getCallback = (ompt_get_callback_t) lookup("ompt_get_callback");
9193
OmptBackend::getParallelInfo = (ompt_get_parallel_info_t) lookup("ompt_get_parallel_info");
94+
OmptBackend::getTaskInfo = (ompt_get_task_info_t) lookup("ompt_get_task_info");
9295
OmptBackend::finalizeTool = (ompt_finalize_tool_t) lookup("ompt_finalize_tool");
9396

9497
// initialize base
@@ -157,12 +160,40 @@ namespace opdi {
157160

158161
int result = getParallelInfo(0, &parallelData, &teamSize);
159162

160-
if (result != 2) {
161-
return nullptr;
162-
}
163+
assert(result == 2);
163164

164165
return parallelData->ptr;
165166
}
167+
168+
void* getTaskData() {
169+
int flags;
170+
ompt_data_t* taskData;
171+
ompt_frame_t* taskFrame;
172+
ompt_data_t* parallelData;
173+
int threadNum;
174+
175+
int result = getTaskInfo(0, &flags, &taskData, &taskFrame, &parallelData, &threadNum);
176+
177+
assert(result == 2);
178+
179+
return taskData->ptr;
180+
}
181+
182+
void setInitialImplicitTaskData(void* data) {
183+
int flags;
184+
ompt_data_t* taskData;
185+
ompt_frame_t* taskFrame;
186+
ompt_data_t* parallelData;
187+
int threadNum;
188+
189+
int result = getTaskInfo(0, &flags, &taskData, &taskFrame, &parallelData, &threadNum);
190+
191+
assert(result == 2);
192+
assert(flags & ompt_task_initial);
193+
assert(taskData->ptr == nullptr);
194+
195+
taskData->ptr = data;
196+
}
166197
};
167198
}
168199

include/opdi/logic/logicInterface.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,8 @@ namespace opdi {
5858
virtual void* onParallelBegin(void* encounteringTaskData, int maxThreads) = 0;
5959
virtual void onParallelEnd(void* data) = 0;
6060

61-
virtual void* onImplicitTaskBegin(int actualParallelism, int index, void* parallelData) = 0;
61+
virtual void* onImplicitTaskBegin(bool initialImplicitTask, int actualParallelism, int index,
62+
void* parallelData) = 0;
6263
virtual void onImplicitTaskEnd(void* data) = 0;
6364

6465
virtual void onMutexDestroyed(MutexKind kind, std::size_t waitId) = 0;

include/opdi/logic/omp/implicitTaskOmpLogic.cpp

Lines changed: 61 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -40,45 +40,62 @@ void opdi::ImplicitTaskOmpLogic::internalFinalize() {
4040
this->tapePool.finalize();
4141
}
4242

43-
void* opdi::ImplicitTaskOmpLogic::onImplicitTaskBegin(int actualParallelism, int index, void* parallelDataPtr) {
43+
void* opdi::ImplicitTaskOmpLogic::onImplicitTaskBegin(bool initialImplicitTask, int actualParallelism, int index,
44+
void* parallelDataPtr) {
4445

4546
ParallelData* parallelData = (ParallelData*) parallelDataPtr;
4647

47-
if (parallelData != nullptr) {
48-
if (index == 0) {
49-
parallelData->actualThreads = actualParallelism;
50-
}
48+
// check if the handling of the parallel region was skipped
49+
if (parallelData != nullptr || initialImplicitTask) {
5150

5251
Data* data = new Data;
52+
data->initialImplicitTask = initialImplicitTask;
5353
data->level = omp_get_level();
5454
data->index = index;
55-
data->oldTape = tool->getThreadLocalTape();
56-
data->parallelData = parallelData;
5755

58-
void* newTape = this->tapePool.getTape(parallelData->parentTape, index);
56+
// OpDiLib does not interfere with the initial implicit task AD-wise, e.g., does not track its tape / does not assume
57+
// that the tape does not change. OpDiLib uses the initial implicit task's data primarily to track its adjoint access
58+
// mode.
59+
if (!initialImplicitTask) {
60+
if (index == 0) {
61+
parallelData->actualThreads = actualParallelism;
62+
}
5963

60-
if (parallelData->activeParallelRegion) {
61-
tool->setActive(newTape, true);
62-
}
64+
data->oldTape = tool->getThreadLocalTape();
65+
data->parallelData = parallelData;
6366

64-
data->tape = newTape;
67+
void* newTape = this->tapePool.getTape(parallelData->parentTape, index);
6568

66-
data->positions.push_back(tool->allocPosition());
67-
tool->getTapePosition(newTape, data->positions.back());
69+
if (parallelData->activeParallelRegion) {
70+
tool->setActive(newTape, true);
71+
}
6872

69-
tool->setThreadLocalTape(newTape);
73+
data->tape = newTape;
7074

71-
AdjointAccessControl::pushMode(parallelData->parentAdjointAccessMode);
72-
data->adjointAccessModes.push_back(parallelData->parentAdjointAccessMode);
75+
data->positions.push_back(tool->allocPosition());
76+
tool->getTapePosition(newTape, data->positions.back());
77+
78+
tool->setThreadLocalTape(newTape);
79+
80+
AdjointAccessControl::pushMode(parallelData->parentAdjointAccessMode);
81+
data->adjointAccessModes.push_back(parallelData->parentAdjointAccessMode);
82+
83+
parallelData->childTasks[index] = data;
84+
}
85+
else {
86+
data->oldTape = nullptr;
87+
data->tape = nullptr;
88+
data->parallelData = nullptr;
89+
90+
data->adjointAccessModes.push_back(ImplicitTaskOmpLogic::defaultAdjointAccessMode);
91+
}
7392

7493
#if OPDI_OMP_LOGIC_INSTRUMENT
7594
for (auto& instrument : ompLogicInstruments) {
7695
instrument->onImplicitTaskBegin(data);
7796
}
7897
#endif
7998

80-
parallelData->childTasks[index] = data;
81-
8299
return data;
83100
}
84101

@@ -90,35 +107,41 @@ void opdi::ImplicitTaskOmpLogic::onImplicitTaskEnd(void* dataPtr) {
90107
if (dataPtr != nullptr) {
91108
Data* data = (Data*) dataPtr;
92109

110+
#if OPDI_OMP_LOGIC_INSTRUMENT
111+
for (auto& instrument : ompLogicInstruments) {
112+
instrument->onImplicitTaskEnd(data);
113+
}
114+
#endif
115+
93116
AdjointAccessMode lastAccessMode = AdjointAccessControl::currentMode();
94117
AdjointAccessControl::popMode();
95118
AdjointAccessControl::currentMode() = lastAccessMode;
96119

97-
tool->setThreadLocalTape(data->oldTape);
120+
if (!data->initialImplicitTask) {
121+
tool->setThreadLocalTape(data->oldTape);
98122

99-
data->positions.push_back(tool->allocPosition());
100-
tool->getTapePosition(data->tape, data->positions.back());
123+
data->positions.push_back(tool->allocPosition());
124+
tool->getTapePosition(data->tape, data->positions.back());
101125

102-
if (!data->parallelData->activeParallelRegion) {
103-
if (tool->comparePosition(data->positions.front(), data->positions.back()) != 0) {
104-
OPDI_WARNING("Something became active during a passive parallel region. This is not supported and will not be ",
105-
"differentiated correctly.");
126+
if (!data->parallelData->activeParallelRegion) {
127+
if (tool->comparePosition(data->positions.front(), data->positions.back()) != 0) {
128+
OPDI_WARNING("Something became active during a passive parallel region. This is not supported and will not be ",
129+
"differentiated correctly.");
130+
}
106131
}
107-
}
108132

109-
#if OPDI_OMP_LOGIC_INSTRUMENT
110-
for (auto& instrument : ompLogicInstruments) {
111-
instrument->onImplicitTaskEnd(data);
112-
}
113-
#endif
133+
tool->setActive(data->tape, false);
114134

115-
tool->setActive(data->tape, false);
135+
// ensure that the most recent activity change *per thread* reflects the current activity
136+
if (data->oldTape == data->parallelData->parentTape && data->parallelData->activeParallelRegion) {
137+
tool->setActive(data->oldTape, true);
138+
}
116139

117-
// ensure that the most recent activity change *per thread* reflects the current activity
118-
if (data->oldTape == data->parallelData->parentTape && data->parallelData->activeParallelRegion) {
119-
tool->setActive(data->oldTape, true);
140+
// do not delete data, it is deleted as part of parallel regions
141+
}
142+
else {
143+
// delete task data, there is no parallel region to do so
144+
delete data;
120145
}
121-
122-
// do not delete data, it is deleted as part of parallel regions
123146
}
124147
}

include/opdi/logic/omp/implicitTaskOmpLogic.hpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,13 @@ namespace opdi {
4848

4949
using LogicInterface::AdjointAccessMode;
5050

51+
static AdjointAccessMode const defaultAdjointAccessMode;
52+
5153
using ParallelData = typename ParallelOmpLogic::Data;
5254

5355
struct Data {
5456
public:
57+
bool initialImplicitTask;
5558
int level;
5659
int index;
5760
void* oldTape;
@@ -61,7 +64,8 @@ namespace opdi {
6164
std::deque<AdjointAccessMode> adjointAccessModes;
6265
};
6366

64-
virtual void* onImplicitTaskBegin(int actualParallelism, int index, void* parallelDataPtr);
67+
virtual void* onImplicitTaskBegin(bool initialImplicitTask, int actualParallelism, int index,
68+
void* parallelDataPtr);
6569
virtual void onImplicitTaskEnd(void* dataPtr);
6670
};
6771
}

0 commit comments

Comments
 (0)