@@ -116,7 +116,6 @@ void TrackerTraitsGPU<nLayers>::computeLayerTracklets(const int iteration, int i
116116 conf.nBlocksLayerTracklets [iteration],
117117 conf.nThreadsLayerTracklets [iteration],
118118 mTimeFrameGPU ->getStreams ());
119- mTimeFrameGPU ->syncStreams (); // TODO evaluate if this can be removed
120119}
121120
122121template <int nLayers>
@@ -125,18 +124,30 @@ void TrackerTraitsGPU<nLayers>::computeLayerCells(const int iteration)
125124 mTimeFrameGPU ->createCellsLUTDevice ();
126125 auto & conf = o2::its::ITSGpuTrackingParamConfig::Instance ();
127126
127+ std::vector<bool > isTrackletStreamSynched (this ->mTrkParams [iteration].TrackletsPerRoad ());
128+ auto syncOnce = [&](const int iLayer) {
129+ if (!isTrackletStreamSynched[iLayer]) {
130+ mTimeFrameGPU ->syncStream (iLayer);
131+ isTrackletStreamSynched[iLayer] = true ;
132+ }
133+ };
134+
128135 for (int iLayer = 0 ; iLayer < this ->mTrkParams [iteration].CellsPerRoad (); ++iLayer) {
129- if (!mTimeFrameGPU ->getNTracklets ()[iLayer + 1 ] || !mTimeFrameGPU ->getNTracklets ()[iLayer]) {
136+ // need to ensure that trackleting on layers iLayer and iLayer + 1 are done (only once)
137+ syncOnce (iLayer);
138+ syncOnce (iLayer + 1 );
139+ // if there are no tracklets skip entirely
140+ const int currentLayerTrackletsNum{static_cast <int >(mTimeFrameGPU ->getNTracklets ()[iLayer])};
141+ if (!currentLayerTrackletsNum || !mTimeFrameGPU ->getNTracklets ()[iLayer + 1 ]) {
130142 mTimeFrameGPU ->getNCells ()[iLayer] = 0 ;
131143 continue ;
132144 }
133- const int currentLayerTrackletsNum{static_cast <int >(mTimeFrameGPU ->getNTracklets ()[iLayer])};
134145 countCellsHandler (mTimeFrameGPU ->getDeviceArrayClusters (),
135146 mTimeFrameGPU ->getDeviceArrayUnsortedClusters (),
136147 mTimeFrameGPU ->getDeviceArrayTrackingFrameInfo (),
137148 mTimeFrameGPU ->getDeviceArrayTracklets (),
138149 mTimeFrameGPU ->getDeviceArrayTrackletsLUT (),
139- mTimeFrameGPU -> getNTracklets ()[iLayer] ,
150+ currentLayerTrackletsNum ,
140151 iLayer,
141152 nullptr ,
142153 mTimeFrameGPU ->getDeviceArrayCellsLUT (),
@@ -147,14 +158,15 @@ void TrackerTraitsGPU<nLayers>::computeLayerCells(const int iteration)
147158 this ->mTrkParams [iteration].CellDeltaTanLambdaSigma ,
148159 this ->mTrkParams [iteration].NSigmaCut ,
149160 conf.nBlocksLayerCells [iteration],
150- conf.nThreadsLayerCells [iteration]);
161+ conf.nThreadsLayerCells [iteration],
162+ mTimeFrameGPU ->getStreams ());
151163 mTimeFrameGPU ->createCellsBuffers (iLayer);
152164 computeCellsHandler (mTimeFrameGPU ->getDeviceArrayClusters (),
153165 mTimeFrameGPU ->getDeviceArrayUnsortedClusters (),
154166 mTimeFrameGPU ->getDeviceArrayTrackingFrameInfo (),
155167 mTimeFrameGPU ->getDeviceArrayTracklets (),
156168 mTimeFrameGPU ->getDeviceArrayTrackletsLUT (),
157- mTimeFrameGPU -> getNTracklets ()[iLayer] ,
169+ currentLayerTrackletsNum ,
158170 iLayer,
159171 mTimeFrameGPU ->getDeviceCells ()[iLayer],
160172 mTimeFrameGPU ->getDeviceArrayCellsLUT (),
@@ -165,8 +177,10 @@ void TrackerTraitsGPU<nLayers>::computeLayerCells(const int iteration)
165177 this ->mTrkParams [iteration].CellDeltaTanLambdaSigma ,
166178 this ->mTrkParams [iteration].NSigmaCut ,
167179 conf.nBlocksLayerCells [iteration],
168- conf.nThreadsLayerCells [iteration]);
180+ conf.nThreadsLayerCells [iteration],
181+ mTimeFrameGPU ->getStreams ());
169182 }
183+ mTimeFrameGPU ->syncStreams (); // TODO evaluate if this can be removed
170184}
171185
172186template <int nLayers>
0 commit comments