|
64 | 64 | #define OPDI_SINGLE(...) \ |
65 | 65 | { \ |
66 | 66 | bool constexpr opdiInternalBarrierIndicator = true; \ |
| 67 | + bool constexpr opdiInternalBroadcastIndicator = false; \ |
| 68 | + void* opdiInternalTapePosition1; /* for consistency with the end macro */ \ |
| 69 | + void* opdiInternalTapePosition2; \ |
| 70 | + OPDI_UNUSED(opdiInternalTapePosition1); \ |
| 71 | + OPDI_UNUSED(opdiInternalTapePosition2); \ |
| 72 | + opdi::ImplicitBarrierTools::beginRegionWithImplicitBarrier(); \ |
| 73 | + { \ |
| 74 | + opdi::SingleProbe localSingleProbe; /* worksharing events */ \ |
| 75 | + OPDI_PRAGMA(omp single __VA_ARGS__) \ |
| 76 | + { |
| 77 | + |
| 78 | +#define OPDI_SINGLE_NOWAIT(...) \ |
| 79 | + { \ |
| 80 | + bool constexpr opdiInternalBarrierIndicator = false; \ |
| 81 | + bool constexpr opdiInternalBroadcastIndicator = false; \ |
| 82 | + void* opdiInternalTapePosition1; /* for consistency with the end macro */ \ |
| 83 | + void* opdiInternalTapePosition2; \ |
| 84 | + OPDI_UNUSED(opdiInternalTapePosition1); \ |
| 85 | + OPDI_UNUSED(opdiInternalTapePosition2); \ |
| 86 | + opdi::ImplicitBarrierTools::beginRegionWithImplicitBarrier(); \ |
| 87 | + { \ |
| 88 | + opdi::SingleProbe localSingleProbe; /* worksharing events */ \ |
| 89 | + OPDI_PRAGMA(omp single nowait __VA_ARGS__) \ |
| 90 | + { |
| 91 | + |
| 92 | +#define OPDI_SINGLE_COPYPRIVATE(...) \ |
| 93 | + { \ |
| 94 | + bool constexpr opdiInternalBarrierIndicator = true; \ |
| 95 | + bool constexpr opdiInternalBroadcastIndicator = true; \ |
67 | 96 | void* opdiInternalTapePosition1 = opdi::tool->allocPosition(); \ |
68 | 97 | opdi::tool->getTapePosition(opdi::tool->getThreadLocalTape(), opdiInternalTapePosition1); \ |
| 98 | + /* broadcast-related barrier */ \ |
69 | 99 | opdi::logic->onSyncRegion(opdi::LogicInterface::SyncRegionKind::BarrierImplementation, \ |
70 | 100 | opdi::LogicInterface::ScopeEndpoint::Begin); \ |
| 101 | + opdi::logic->onSyncRegion(opdi::LogicInterface::SyncRegionKind::BarrierImplementation, \ |
| 102 | + opdi::LogicInterface::ScopeEndpoint::End); \ |
71 | 103 | void* opdiInternalTapePosition2 = opdi::tool->allocPosition(); \ |
72 | 104 | opdi::tool->getTapePosition(opdi::tool->getThreadLocalTape(), opdiInternalTapePosition2); \ |
73 | 105 | opdi::ImplicitBarrierTools::beginRegionWithImplicitBarrier(); \ |
74 | 106 | { \ |
75 | | - opdi::SingleProbe localSingleProbe; \ |
| 107 | + opdi::SingleProbe localSingleProbe; /* worksharing events */ \ |
76 | 108 | OPDI_PRAGMA(omp single __VA_ARGS__) \ |
77 | 109 | { \ |
| 110 | + /* delay broadcast-related barrier for executor */ \ |
78 | 111 | opdi::tool->erase(opdi::tool->getThreadLocalTape(), opdiInternalTapePosition1, opdiInternalTapePosition2); |
79 | 112 |
|
80 | | -#define OPDI_SINGLE_NOWAIT(...) \ |
| 113 | +#define OPDI_SINGLE_COPYPRIVATE_NOWAIT(...) \ |
81 | 114 | { \ |
82 | 115 | bool constexpr opdiInternalBarrierIndicator = false; \ |
| 116 | + bool constexpr opdiInternalBroadcastIndicator = true; \ |
83 | 117 | void* opdiInternalTapePosition1 = opdi::tool->allocPosition(); \ |
84 | | - void* opdiInternalTapePosition2 = opdi::tool->allocPosition(); /* for consistency with the end macro */ \ |
85 | 118 | opdi::tool->getTapePosition(opdi::tool->getThreadLocalTape(), opdiInternalTapePosition1); \ |
| 119 | + /* broadcast-related barrier */ \ |
86 | 120 | opdi::logic->onSyncRegion(opdi::LogicInterface::SyncRegionKind::BarrierImplementation, \ |
87 | 121 | opdi::LogicInterface::ScopeEndpoint::Begin); \ |
| 122 | + opdi::logic->onSyncRegion(opdi::LogicInterface::SyncRegionKind::BarrierImplementation, \ |
| 123 | + opdi::LogicInterface::ScopeEndpoint::End); \ |
| 124 | + void* opdiInternalTapePosition2 = opdi::tool->allocPosition(); \ |
| 125 | + opdi::tool->getTapePosition(opdi::tool->getThreadLocalTape(), opdiInternalTapePosition2); \ |
88 | 126 | opdi::ImplicitBarrierTools::beginRegionWithImplicitBarrier(); \ |
89 | 127 | { \ |
90 | 128 | opdi::SingleProbe localSingleProbe; \ |
91 | 129 | OPDI_PRAGMA(omp single nowait __VA_ARGS__) \ |
92 | 130 | { \ |
93 | | - opdi::tool->reset(opdi::tool->getThreadLocalTape(), opdiInternalTapePosition1); |
| 131 | + /* delay broadcast-related barrier for executor */ \ |
| 132 | + opdi::tool->erase(opdi::tool->getThreadLocalTape(), opdiInternalTapePosition1, opdiInternalTapePosition2); |
94 | 133 |
|
95 | 134 | #define OPDI_END_SINGLE \ |
96 | | - opdi::logic->onSyncRegion(opdi::LogicInterface::SyncRegionKind::BarrierImplementation, \ |
97 | | - opdi::LogicInterface::ScopeEndpoint::Begin); \ |
| 135 | + /* broadcast-related barrier */ \ |
| 136 | + if (opdiInternalBroadcastIndicator) { \ |
| 137 | + opdi::logic->onSyncRegion(opdi::LogicInterface::SyncRegionKind::BarrierImplementation, \ |
| 138 | + opdi::LogicInterface::ScopeEndpoint::Begin); \ |
| 139 | + opdi::logic->onSyncRegion(opdi::LogicInterface::SyncRegionKind::BarrierImplementation, \ |
| 140 | + opdi::LogicInterface::ScopeEndpoint::End); \ |
| 141 | + } \ |
98 | 142 | } \ |
99 | | - opdi::logic->onSyncRegion(opdi::LogicInterface::SyncRegionKind::BarrierImplementation, \ |
100 | | - opdi::LogicInterface::ScopeEndpoint::End); \ |
101 | 143 | } \ |
102 | | - opdi::tool->freePosition(opdiInternalTapePosition1); \ |
103 | | - opdi::tool->freePosition(opdiInternalTapePosition2); \ |
| 144 | + if (opdiInternalBroadcastIndicator) { \ |
| 145 | + opdi::tool->freePosition(opdiInternalTapePosition1); \ |
| 146 | + opdi::tool->freePosition(opdiInternalTapePosition2); \ |
| 147 | + } \ |
| 148 | + /* implicit barrier */ \ |
104 | 149 | opdi::ImplicitBarrierTools::implicitBarrierStack.top() = opdiInternalBarrierIndicator; \ |
105 | 150 | opdi::ImplicitBarrierTools::endRegionWithImplicitBarrier(); \ |
106 | 151 | } |
|
0 commit comments