|
66 | 66 | bool constexpr opdiInternalBarrierIndicator = true; \ |
67 | 67 | void* opdiInternalTapePosition1 = opdi::tool->allocPosition(); \ |
68 | 68 | opdi::tool->getTapePosition(opdi::tool->getThreadLocalTape(), opdiInternalTapePosition1); \ |
| 69 | + /* broadcast-related barrier */ \ |
69 | 70 | opdi::logic->onSyncRegion(opdi::LogicInterface::SyncRegionKind::BarrierImplementation, \ |
70 | 71 | opdi::LogicInterface::ScopeEndpoint::Begin); \ |
| 72 | + opdi::logic->onSyncRegion(opdi::LogicInterface::SyncRegionKind::BarrierImplementation, \ |
| 73 | + opdi::LogicInterface::ScopeEndpoint::End); \ |
71 | 74 | void* opdiInternalTapePosition2 = opdi::tool->allocPosition(); \ |
72 | 75 | opdi::tool->getTapePosition(opdi::tool->getThreadLocalTape(), opdiInternalTapePosition2); \ |
73 | 76 | opdi::ImplicitBarrierTools::beginRegionWithImplicitBarrier(); \ |
74 | 77 | { \ |
75 | | - opdi::SingleProbe localSingleProbe; \ |
| 78 | + opdi::SingleProbe localSingleProbe; /* worksharing events */ \ |
76 | 79 | OPDI_PRAGMA(omp single __VA_ARGS__) \ |
77 | 80 | { \ |
| 81 | + /* delay broadcast-related barrier for executor */ \ |
78 | 82 | opdi::tool->erase(opdi::tool->getThreadLocalTape(), opdiInternalTapePosition1, opdiInternalTapePosition2); |
79 | 83 |
|
80 | 84 | #define OPDI_SINGLE_NOWAIT(...) \ |
81 | 85 | { \ |
82 | 86 | bool constexpr opdiInternalBarrierIndicator = false; \ |
83 | 87 | void* opdiInternalTapePosition1 = opdi::tool->allocPosition(); \ |
84 | | - void* opdiInternalTapePosition2 = opdi::tool->allocPosition(); /* for consistency with the end macro */ \ |
85 | 88 | opdi::tool->getTapePosition(opdi::tool->getThreadLocalTape(), opdiInternalTapePosition1); \ |
| 89 | + /* broadcast-related barrier */ \ |
86 | 90 | opdi::logic->onSyncRegion(opdi::LogicInterface::SyncRegionKind::BarrierImplementation, \ |
87 | 91 | opdi::LogicInterface::ScopeEndpoint::Begin); \ |
| 92 | + opdi::logic->onSyncRegion(opdi::LogicInterface::SyncRegionKind::BarrierImplementation, \ |
| 93 | + opdi::LogicInterface::ScopeEndpoint::End); \ |
| 94 | + void* opdiInternalTapePosition2 = opdi::tool->allocPosition(); \ |
| 95 | + opdi::tool->getTapePosition(opdi::tool->getThreadLocalTape(), opdiInternalTapePosition2); \ |
88 | 96 | opdi::ImplicitBarrierTools::beginRegionWithImplicitBarrier(); \ |
89 | 97 | { \ |
90 | 98 | opdi::SingleProbe localSingleProbe; \ |
91 | 99 | OPDI_PRAGMA(omp single nowait __VA_ARGS__) \ |
92 | 100 | { \ |
93 | | - opdi::tool->reset(opdi::tool->getThreadLocalTape(), opdiInternalTapePosition1); |
| 101 | + /* delay broadcast-related barrier for executor */ \ |
| 102 | + opdi::tool->erase(opdi::tool->getThreadLocalTape(), opdiInternalTapePosition1, opdiInternalTapePosition2); |
94 | 103 |
|
95 | 104 | #define OPDI_END_SINGLE \ |
| 105 | + /* broadcast-related barrier */ \ |
96 | 106 | opdi::logic->onSyncRegion(opdi::LogicInterface::SyncRegionKind::BarrierImplementation, \ |
97 | 107 | opdi::LogicInterface::ScopeEndpoint::Begin); \ |
| 108 | + opdi::logic->onSyncRegion(opdi::LogicInterface::SyncRegionKind::BarrierImplementation, \ |
| 109 | + opdi::LogicInterface::ScopeEndpoint::End); \ |
98 | 110 | } \ |
99 | | - opdi::logic->onSyncRegion(opdi::LogicInterface::SyncRegionKind::BarrierImplementation, \ |
100 | | - opdi::LogicInterface::ScopeEndpoint::End); \ |
101 | 111 | } \ |
102 | 112 | opdi::tool->freePosition(opdiInternalTapePosition1); \ |
103 | 113 | opdi::tool->freePosition(opdiInternalTapePosition2); \ |
| 114 | + /* implicit barrier */ \ |
104 | 115 | opdi::ImplicitBarrierTools::implicitBarrierStack.top() = opdiInternalBarrierIndicator; \ |
105 | 116 | opdi::ImplicitBarrierTools::endRegionWithImplicitBarrier(); \ |
106 | 117 | } |
|
0 commit comments