forked from AliceO2Group/AliceO2
-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathSemiregularSpline2D3D.h
More file actions
475 lines (376 loc) · 16.2 KB
/
SemiregularSpline2D3D.h
File metadata and controls
475 lines (376 loc) · 16.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
// All rights not expressly granted are reserved.
//
// This software is distributed under the terms of the GNU General Public
// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
//
// In applying this license CERN does not waive the privileges and immunities
// granted to it by virtue of its status as an Intergovernmental Organization
// or submit itself to any jurisdiction.
/// \file SemiregularSpline2D3D.h
/// \brief Definition of SemiregularSpline2D3D class
///
/// \author Felix Lapp
/// \author Sergey Gorbunov <sergey.gorbunov@cern.ch>
#ifndef ALICEO2_GPUCOMMON_TPCFASTTRANSFORMATION_SEMIREGULARSPLINE2D3D_H
#define ALICEO2_GPUCOMMON_TPCFASTTRANSFORMATION_SEMIREGULARSPLINE2D3D_H
#include "GPUCommonDef.h"
#include "RegularSpline1D.h"
#include "FlatObject.h"
#if !defined(__CLING__) && !defined(G__ROOT) && !defined(GPUCA_GPUCODE) && !defined(GPUCA_NO_VC)
#include <Vc/Vc>
#include <Vc/SimdArray>
#endif
namespace o2
{
namespace gpu
{
///
/// The SemiregularSpline2D3D class represents twoo-dimensional spline interpolation on a semi-unifom grid.
///
/// The class is flat C structure. No virtual methods, no ROOT types are used.
/// It is designed for spline parameterisation of TPC transformation.
///
/// ---
/// The spline interpolates a generic function F:[u,v)->(x,y,z),
/// where u,v belong to [0,1]x[0,1]
///
/// It is a variation of IrregularSpline2D3D class, see IrregularSpline2D3D.h for more details.
///
/// Important:
/// -- The number of knots may change during initialisation
/// -- Don't forget to call correctEdges() for the array of F values (see IrregularSpline2D3D.h )
///
///
class SemiregularSpline2D3D : public FlatObject
{
public:
/// _____________ Constructors / destructors __________________________
/// Default constructor. Creates an empty uninitialised object
SemiregularSpline2D3D();
/// Copy constructor: disabled to avoid ambiguity. Use cloneFromObject() instead
SemiregularSpline2D3D(const SemiregularSpline2D3D&) = delete;
/// Assignment operator: disabled to avoid ambiguity. Use cloneFromObject() instead
SemiregularSpline2D3D& operator=(const SemiregularSpline2D3D&) = delete;
/// Destructor
~SemiregularSpline2D3D() = default;
/// _____________ FlatObject functionality, see FlatObject class for description ____________
/// Memory alignment
using FlatObject::getBufferAlignmentBytes;
using FlatObject::getClassAlignmentBytes;
/// Construction interface
void cloneFromObject(const SemiregularSpline2D3D& obj, char* newFlatBufferPtr);
void destroy();
/// Making the data buffer external
using FlatObject::releaseInternalBuffer;
void moveBufferTo(char* newBufferPtr);
/// Moving the class with its external buffer to another location
void setActualBufferAddress(char* actualFlatBufferPtr);
void setFutureBufferAddress(char* futureFlatBufferPtr);
/// _______________ Construction interface ________________________
/// Constructor
///
void construct(const int32_t numberOfRows, const int32_t numbersOfKnots[]);
/// _______________ Main functionality ________________________
/// Correction of data values at edge knots.
///
/// It is needed for the fast spline mathematics to work correctly. See explanation in IrregularSpline1D.h header
///
/// \param data array of function values. It has the size of getNumberOfKnots()
template <typename T>
void correctEdges(T* data) const;
/// Get interpolated value for f(u,v) using data array correctedData[getNumberOfKnots()] with corrected edges
template <typename T>
void getSpline(const T* correctedData, float u, float v, T& x, T& y, T& z) const;
/// Same as getSpline, but using vectorized calculation.
/// \param correctedData should be at least 128-bit aligned
void getSplineVec(const float* correctedData, float u, float v, float& x, float& y, float& z) const;
/// Get number total of knots: UxV
int32_t getNumberOfKnots() const { return mNumberOfKnots; }
/// Get number of rows. Always the same as gridV's number of knots
int32_t getNumberOfRows() const { return mNumberOfRows; }
/// Get 1-D grid for U coordinate
const RegularSpline1D& getGridV() const { return mGridV; }
/// Get 1-D grid for V coordinate
// const RegularSpline1D& getGridV() const { return mGridV; }
const RegularSpline1D& getGridU(const int32_t i) const { return getSplineArray()[i]; }
/// Get u,v of i-th knot
void getKnotUV(int32_t iKnot, float& u, float& v) const;
/// Get size of the mFlatBuffer data
size_t getFlatBufferSize() const { return mFlatBufferSize; }
/// Gets the knot index which is the i-th knot in v-space and the j-th knot in u-space
int32_t getDataIndex(int32_t i, int32_t j) const;
int32_t getDataIndex0(int32_t i, int32_t j) const;
/// Gets the offset for the data index map inside the flat buffer
int32_t getDataIndexMapOffset() const { return mDataIndexMapOffset; }
/// Get pointer to the flat buffer
const char* getFlatBufferPtr() const { return mFlatBufferPtr; }
/// Get minimal required alignment for the class
static constexpr size_t getClassAlignmentBytes() { return 8; }
/// Get minimal required alignment for the flat buffer
static constexpr size_t getBufferAlignmentBytes() { return 8; }
/// Get minimal required alignment for the spline data
static constexpr size_t getDataAlignmentBytes() { return 8; }
// Gets the spline array for u-coordinates
const RegularSpline1D* getSplineArray() const
{
return reinterpret_cast<const RegularSpline1D*>(mFlatBufferPtr);
}
const int32_t* getDataIndexMap() const
{
return reinterpret_cast<const int32_t*>(mFlatBufferPtr + mDataIndexMapOffset);
}
private:
void relocateBufferPointers(const char* oldBuffer, char* newBuffer);
RegularSpline1D* getSplineArrayNonConst()
{
return reinterpret_cast<RegularSpline1D*>(mFlatBufferPtr);
}
int32_t* getDataIndexMapNonConst()
{
return reinterpret_cast<int32_t*>(mFlatBufferPtr + mDataIndexMapOffset);
}
///
/// ==== Data members ====
///
RegularSpline1D mGridV; ///< grid for V axis
int32_t mNumberOfRows;
int32_t mNumberOfKnots;
int32_t mDataIndexMapOffset;
ClassDefNV(SemiregularSpline2D3D, 1);
};
/// ====================================================
/// Inline implementations of some methods
/// ====================================================
inline int32_t SemiregularSpline2D3D::getDataIndex(int32_t u, int32_t v) const
{
return (getDataIndexMap()[v] + u) * 3;
}
inline int32_t SemiregularSpline2D3D::getDataIndex0(int32_t u, int32_t v) const
{
return (getDataIndexMap()[v] + u);
}
inline void SemiregularSpline2D3D::getKnotUV(int32_t iKnot, float& u, float& v) const
{
// iterate through all RegularSpline1D's
for (int32_t i = 0; i < mNumberOfRows; i++) {
const RegularSpline1D& gridU = getGridU(i);
const int32_t nk = gridU.getNumberOfKnots();
// if the searched index is less or equal as the number of knots in the current spline
// the searched u-v-coordinates have to be in this spline.
if (iKnot <= nk - 1) {
// in that case v is the current index
v = mGridV.knotIndexToU(i);
// and u the coordinate of the given index
u = gridU.knotIndexToU(iKnot);
break;
}
// if iKnot is greater than number of knots the searched u-v cannot be in the current gridU
// so we search for nk less indizes and continue with the next v-coordinate
iKnot -= nk;
}
}
template <typename T>
inline void SemiregularSpline2D3D::correctEdges(T* data) const
{
// Regular v-Grid (vertical)
const RegularSpline1D& gridV = getGridV();
int32_t nv = mNumberOfRows;
// EIGENTLICH V VOR U!!!
// Wegen Splines aber U vor V
{ // ==== left edge of U ====
// loop through all gridUs
for (int32_t iv = 1; iv < mNumberOfRows - 1; iv++) {
T* f0 = data + getDataIndex(0, iv);
T* f1 = f0 + 3;
T* f2 = f0 + 6;
T* f3 = f0 + 9;
for (int32_t idim = 0; idim < 3; idim++) {
f0[idim] = (T)(0.5 * f0[idim] + 1.5 * f1[idim] - 1.5 * f2[idim] + 0.5 * f3[idim]);
}
}
}
{ // ==== right edge of U ====
// loop through all gridUs
for (int32_t iv = 1; iv < mNumberOfRows - 1; iv++) {
const RegularSpline1D& gridU = getGridU(iv);
int32_t nu = gridU.getNumberOfKnots();
T* f0 = data + getDataIndex(nu - 4, iv);
T* f1 = f0 + 3;
T* f2 = f0 + 6;
T* f3 = f0 + 9;
for (int32_t idim = 0; idim < 3; idim++) {
f3[idim] = (T)(0.5 * f0[idim] - 1.5 * f1[idim] + 1.5 * f2[idim] + 0.5 * f3[idim]);
}
}
}
{ // ==== low edge of V ====
const RegularSpline1D& gridU = getGridU(0);
int32_t nu = gridU.getNumberOfKnots();
for (int32_t iu = 0; iu < nu; iu++) {
// f0 to f3 are the x,y,z values of 4 points in the grid along the v axis.
// Since there are no knots because of the irregularity you can get this by using the getSplineMethod.
T* f0 = data + getDataIndex(iu, 0);
float u = gridU.knotIndexToU(iu);
float x1 = 0, y1 = 0, z1 = 0, x2 = 0, y2 = 0, z2 = 0, x3 = 0, y3 = 0, z3 = 0;
getSpline(data, u, gridV.knotIndexToU(1), x1, y1, z1);
getSpline(data, u, gridV.knotIndexToU(2), x2, y2, z2);
getSpline(data, u, gridV.knotIndexToU(3), x3, y3, z3);
T f1[3] = {x1, y1, z1};
T f2[3] = {x2, y2, z2};
T f3[3] = {x3, y3, z3};
for (int32_t idim = 0; idim < 3; idim++) {
f0[idim] = (T)(0.5 * f0[idim] + 1.5 * f1[idim] - 1.5 * f2[idim] + 0.5 * f3[idim]);
}
}
}
{ // ==== high edge of V ====
int32_t nu = getGridU(nv - 1).getNumberOfKnots();
for (int32_t iu = 0; iu < nu; iu++) {
float u = getGridU(nv - 1).knotIndexToU(iu);
float x1 = 0, y1 = 0, z1 = 0, x2 = 0, y2 = 0, z2 = 0, x3 = 0, y3 = 0, z3 = 0;
getSpline(data, u, gridV.knotIndexToU(nv - 4), x1, y1, z1);
getSpline(data, u, gridV.knotIndexToU(nv - 3), x2, y2, z2);
getSpline(data, u, gridV.knotIndexToU(nv - 2), x3, y3, z3);
T f0[3] = {x1, y1, z1};
T f1[3] = {x2, y2, z2};
T f2[3] = {x3, y3, z3};
T* f3 = data + getDataIndex(iu, nv - 1);
for (int32_t idim = 0; idim < 3; idim++) {
f3[idim] = (T)(0.5 * f0[idim] - 1.5 * f1[idim] + 1.5 * f2[idim] + 0.5 * f3[idim]);
}
}
}
// =============== CORRECT CORNERS ==============
{ // === Lower left corner with u-direction ===
T* f0 = data;
T* f1 = f0 + 3;
T* f2 = f0 + 6;
T* f3 = f0 + 9;
for (int32_t idim = 0; idim < 3; idim++) {
f0[idim] = (T)(0.5 * f0[idim] + 1.5 * f1[idim] - 1.5 * f2[idim] + 0.5 * f3[idim]);
}
}
{ // ==== Lower right corner with u-direction ===
const RegularSpline1D& gridU = getGridU(0);
int32_t nu = gridU.getNumberOfKnots();
T* f0 = data + getDataIndex(nu - 4, 0);
T* f1 = f0 + 3;
T* f2 = f0 + 6;
T* f3 = f0 + 9;
for (int32_t idim = 0; idim < 3; idim++) {
f3[idim] = (T)(0.5 * f0[idim] - 1.5 * f1[idim] + 1.5 * f2[idim] + 0.5 * f3[idim]);
}
}
{ // === upper left corner with u-direction ===
T* f0 = data + getDataIndex(0, nv - 1);
T* f1 = f0 + 3;
T* f2 = f0 + 6;
T* f3 = f0 + 9;
for (int32_t idim = 0; idim < 3; idim++) {
f0[idim] = (T)(0.5 * f0[idim] + 1.5 * f1[idim] - 1.5 * f2[idim] + 0.5 * f3[idim]);
}
}
{ // ==== upper right corner with u-direction ===
const RegularSpline1D& gridU = getGridU(nv - 1);
int32_t nu = gridU.getNumberOfKnots();
T* f0 = data + getDataIndex(nu - 4, nv - 1);
T* f1 = f0 + 3;
T* f2 = f0 + 6;
T* f3 = f0 + 9;
for (int32_t idim = 0; idim < 3; idim++) {
f3[idim] = (T)(0.5 * f0[idim] - 1.5 * f1[idim] + 1.5 * f2[idim] + 0.5 * f3[idim]);
}
}
}
template <typename T>
inline void SemiregularSpline2D3D::getSpline(const T* correctedData, float u, float v, T& x, T& y, T& z) const
{
// Get interpolated value for f(u,v) using data array correctedData[getNumberOfKnots()] with corrected edges
// find the v indizes of the u-splines that are needed.
int32_t iknotv = mGridV.getKnotIndex(v);
// to save the index positions of u-coordinates we create an array
T dataVx[12];
// int32_t dataOffset0 = getDataIndex0(0, iknotv-1); //index of the very left point in the vi-1-th gridU
// we loop through the 4 needed u-Splines
int32_t vxIndex = 0;
for (int32_t vi = 0; vi < 4; vi++, vxIndex += 3) {
const int32_t vDelta = iknotv + vi - 1;
const RegularSpline1D& gridU = getGridU(vDelta);
// and find at which index in that specific spline the u-coordinate must lay.
const int32_t ui = gridU.getKnotIndex(u);
const int32_t dataOffset = getDataIndex(ui - 1, vDelta); //(dataOffset0 + (ui-1))*3;
dataVx[vxIndex + 0] = gridU.getSpline(ui, correctedData[dataOffset], correctedData[dataOffset + 3], correctedData[dataOffset + 6], correctedData[dataOffset + 9], u);
dataVx[vxIndex + 1] = gridU.getSpline(ui, correctedData[dataOffset + 1], correctedData[dataOffset + 4], correctedData[dataOffset + 7], correctedData[dataOffset + 10], u);
dataVx[vxIndex + 2] = gridU.getSpline(ui, correctedData[dataOffset + 2], correctedData[dataOffset + 5], correctedData[dataOffset + 8], correctedData[dataOffset + 11], u);
}
// return results
x = mGridV.getSpline(iknotv, dataVx[0], dataVx[3], dataVx[6], dataVx[9], v);
y = mGridV.getSpline(iknotv, dataVx[1], dataVx[4], dataVx[7], dataVx[10], v);
z = mGridV.getSpline(iknotv, dataVx[2], dataVx[5], dataVx[8], dataVx[11], v);
}
inline void SemiregularSpline2D3D::getSplineVec(const float* correctedData, float u, float v, float& x, float& y, float& z) const
{
// Same as getSpline, but using vectorized calculation.
// \param correctedData should be at least 128-bit aligned
#if !defined(__CLING__) && !defined(G__ROOT) && !defined(GPUCA_GPUCODE) && !defined(GPUCA_NO_VC)
//&& !defined(__CLING__)
/*
Idea: There are 16 knots important for (u, v).
a,b,c,d := Knots in first u-grid
e,f,g,h := Knots in second u-grid
i,j,k,l := Knots in third u-grid
m,n,o,p := Knots in fourth u-grid.
It could be possible to calculate the spline in 3 dimentions for a,b,c,d at the same time as e,f,g,h etc.
3 of the 4 parallel threads of the vector would calculate x,y,z for one row and the last task already calculates x for the next one.
=> 4x faster
Problem:To do this, we need vectors where every i-th element is used for the calculation. So what we need is:
[a,e,i,m]
[b,f,j,n]
[c,g,k,o]
[d,h,l,p]
This is barely possible to do with a good performance because e.g. a,e,i,m do not lay beside each other in data.
Work around 1:
Don't calculate knots parrallel but the dimensions. But you can only be 3x faster this way because the 4th thread would be the x-dimension of the next point.
Work around 2:
Try to create a matrix as it was mentioned earlier ([a,e,i,m][b,f,..]...) by copying data.
This may be less efficient than Work around 1 but needs to be measured.
*/
// workaround 1:
int32_t vGridi = mGridV.getKnotIndex(v);
float dataU[12];
int32_t vOffset = 0;
for (int32_t vi = 0; vi < 4; vi++, vOffset += 3) {
const RegularSpline1D& gridU = getGridU(vi + vGridi - 1);
// and find at which index in that specific spline the u-coordinate must lay.
int32_t ui = gridU.getKnotIndex(u);
// using getDataIndex we know at which position knot (ui, vi) is saved.
// dataU0 to U3 are 4 points along the u-spline surrounding the u coordinate.
const float* dataU0 = correctedData + getDataIndex(ui - 1, vGridi + vi - 1);
Vc::float_v dt0(dataU0 + 0);
Vc::float_v dt1(dataU0 + 3);
Vc::float_v dt2(dataU0 + 6);
Vc::float_v dt3(dataU0 + 9);
Vc::float_v resU = gridU.getSpline(ui, dt0, dt1, dt2, dt3, u);
// save the results in dataVx array
dataU[vOffset + 0] = resU[0];
dataU[vOffset + 1] = resU[1];
dataU[vOffset + 2] = resU[2];
}
Vc::float_v dataV0(dataU + 0);
Vc::float_v dataV1(dataU + 3);
Vc::float_v dataV2(dataU + 6);
Vc::float_v dataV3(dataU + 9);
Vc::float_v res = mGridV.getSpline(vGridi, dataV0, dataV1, dataV2, dataV3, v);
x = res[0];
y = res[1];
z = res[2];
// getSpline( correctedData, u, v, x, y, z );
#else
getSpline(correctedData, u, v, x, y, z);
#endif
}
} // namespace gpu
} // namespace o2
#endif