AliceO2/GPU/GPUTracking/DataCompression/GPUTPCCompression.h at 9044202913d11c6287ea93d13057b6a1b948b277 · davidrohr/AliceO2 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
// All rights not expressly granted are reserved.
//
// This software is distributed under the terms of the GNU General Public
// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
//
// In applying this license CERN does not waive the privileges and immunities
// granted to it by virtue of its status as an Intergovernmental Organization
// or submit itself to any jurisdiction.

/// \file GPUTPCCompression.h
/// \author David Rohr

#ifndef GPUTPCCOMPRESSION_H
#define GPUTPCCOMPRESSION_H

#include "GPUDef.h"
#include "GPUProcessor.h"
#include "GPUCommonMath.h"
#include "GPUParam.h"
#include "DataFormatsTPC/CompressedClusters.h"

namespace o2::gpu
{
class GPUTPCGMMerger;

class GPUTPCCompression : public GPUProcessor
{
  friend class GPUTPCCompressionKernels;
  friend class GPUTPCCompressionGatherKernels;
  friend class GPUChainTracking;

 public:
#ifndef GPUCA_GPUCODE
  void InitializeProcessor();
  void RegisterMemoryAllocation();
  void SetMaxData(const GPUTrackingInOutPointers& io);

  void* SetPointersOutputGPU(void* mem);
  void* SetPointersOutputHost(void* mem);
  void* SetPointersOutputPtrs(void* mem);
  void* SetPointersOutput(void* mem);
  void* SetPointersScratch(void* mem);
  void* SetPointersMemory(void* mem);
#endif

  static constexpr uint32_t P_MAX_QMAX = 1 << 10;
  static constexpr uint32_t P_MAX_QTOT = 5 * 5 * P_MAX_QMAX;
  static constexpr uint32_t P_MAX_TIME = 1 << 24;
  static constexpr uint32_t P_MAX_PAD = 1 << 16;
  static constexpr uint32_t P_MAX_SIGMA = 1 << 8;
  static constexpr uint32_t P_MAX_FLAGS = 1 << 8;
  static constexpr uint32_t P_MAX_QPT = 1 << 8;

  GPUd() static void truncateSignificantBitsCharge(uint16_t& charge, const GPUParam& param) { truncateSignificantBits(charge, param.rec.tpc.sigBitsCharge, P_MAX_QTOT); }
  GPUd() static void truncateSignificantBitsChargeMax(uint16_t& charge, const GPUParam& param) { truncateSignificantBits(charge, param.rec.tpc.sigBitsCharge, P_MAX_QMAX); }
  GPUd() static void truncateSignificantBitsWidth(uint8_t& width, const GPUParam& param) { truncateSignificantBits(width, param.rec.tpc.sigBitsWidth, P_MAX_SIGMA); }

#ifndef GPUCA_GPUCODE
  void DumpCompressedClusters(std::ostream& out);
#endif

 protected:
  struct memory {
    uint32_t nStoredTracks = 0;
    uint32_t nStoredAttachedClusters = 0;
    uint32_t nStoredUnattachedClusters = 0;
  };

  constexpr static uint32_t NSECTORS = GPUCA_NSECTORS;

  o2::tpc::CompressedClustersPtrs mPtrs;
  o2::tpc::CompressedClusters* mOutput = nullptr;
  o2::tpc::CompressedClusters* mOutputA = nullptr; // Always points to host buffer
  o2::tpc::CompressedClustersFlat* mOutputFlat = nullptr;

  memory* mMemory = nullptr;
  uint32_t* mAttachedClusterFirstIndex = nullptr;
  uint8_t* mClusterStatus = nullptr;

  uint32_t mMaxTracks = 0;
  uint32_t mMaxClusters = 0;
  uint32_t mMaxTrackClusters = 0;
  uint32_t mMaxClustersInCache = 0;
  size_t mMaxClusterFactorBase1024 = 0;

  template <class T>
  void SetPointersCompressedClusters(void*& mem, T& c, uint32_t nClA, uint32_t nTr, uint32_t nClU, bool reducedClA);
  template <class T>
  GPUd() static void truncateSignificantBits(T& val, uint32_t nBits, uint32_t max);

  int16_t mMemoryResOutputHost = -1;
  int16_t mMemoryResOutputGPU = -1;
};

template <class T>
GPUdi() void GPUTPCCompression::truncateSignificantBits(T& v, uint32_t nBits, uint32_t max)
{
  if (nBits == 0) {
    return;
  }

  uint32_t val = v;
  uint32_t ldz = sizeof(uint32_t) * 8 - CAMath::Clz(val);
  if (val && ldz > nBits) {
    if (val & (1 << (ldz - nBits - 1))) {
      val += (1 << (ldz - nBits - 1));
      ldz = sizeof(uint32_t) * 8 - CAMath::Clz(val);
    }
    val &= ((1 << ldz) - 1) ^ ((1 << (ldz - nBits)) - 1);
    if (val >= max) {
      val = max - 1;
    }
    // GPUInfo("CHANGING X %x --> %x", (uint32_t) v, val);
    v = val;
  }
}
} // namespace o2::gpu

#endif