Skip to content

Commit 766bb7a

Browse files
committed
[ntuple] fix up type name normalization
1 parent 4d38c0b commit 766bb7a

3 files changed

Lines changed: 248 additions & 88 deletions

File tree

tree/ntuple/inc/ROOT/RFieldUtils.hxx

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,10 @@ std::tuple<std::string, std::vector<std::size_t>> ParseArrayType(const std::stri
5757

5858
/// Used in RFieldBase::Create() in order to get the comma-separated list of template types
5959
/// E.g., gets {"int", "std::variant<double,int>"} from "int,std::variant<double,int>".
60+
/// If maxArgs > 0, stop tokenizing after the given number of tokens are found. Used to strip
61+
/// STL allocator and other optional arguments.
6062
/// TODO(jblomer): Try to merge with TClassEdit::TSplitType
61-
std::vector<std::string> TokenizeTypeList(std::string_view templateType);
63+
std::vector<std::string> TokenizeTypeList(std::string_view templateType, std::size_t maxArgs = 0);
6264

6365
} // namespace Internal
6466
} // namespace ROOT

tree/ntuple/src/RFieldUtils.cxx

Lines changed: 198 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -118,25 +118,14 @@ std::vector<AnglePos> FindTemplateAngleBrackets(const std::string &typeName)
118118
return result;
119119
}
120120

121-
} // namespace
122-
123-
std::string ROOT::Internal::GetCanonicalTypePrefix(const std::string &typeName)
121+
// TClassEdit::CleanType and the name demangling insert blanks between closing angle brackets,
122+
// as they were required before C++11. We want to remove them for RNTuple.
123+
void RemoveSpaceBeforeClosingAngleBracket(std::string &typeName)
124124
{
125-
std::string canonicalType{TClassEdit::CleanType(typeName.c_str(), /*mode=*/1)};
126-
if (canonicalType.substr(0, 7) == "struct ") {
127-
canonicalType.erase(0, 7);
128-
} else if (canonicalType.substr(0, 5) == "enum ") {
129-
canonicalType.erase(0, 5);
130-
} else if (canonicalType.substr(0, 2) == "::") {
131-
canonicalType.erase(0, 2);
132-
}
133-
134-
// TClassEdit::CleanType inserts blanks between closing angle brackets, as they were required before C++11. We want
135-
// to remove them for RNTuple.
136-
auto angle = canonicalType.find('<');
125+
auto angle = typeName.find('<');
137126
if (angle != std::string::npos) {
138-
auto dst = canonicalType.begin() + angle;
139-
auto end = canonicalType.end();
127+
auto dst = typeName.begin() + angle;
128+
auto end = typeName.end();
140129
for (auto src = dst; src != end; ++src) {
141130
if (*src == ' ') {
142131
auto next = src + 1;
@@ -147,9 +136,185 @@ std::string ROOT::Internal::GetCanonicalTypePrefix(const std::string &typeName)
147136
}
148137
*(dst++) = *src;
149138
}
150-
canonicalType.erase(dst, end);
139+
typeName.erase(dst, end);
140+
}
141+
}
142+
143+
// The demangled name adds spaces after commas
144+
void RemoveSpaceAfterComma(std::string &typeName)
145+
{
146+
auto itr = typeName.begin();
147+
while (itr != typeName.end()) {
148+
auto c = *itr;
149+
itr++;
150+
151+
if (c != ',')
152+
continue;
153+
154+
R__ASSERT(itr != typeName.end());
155+
if (*itr == ' ') {
156+
itr = typeName.erase(itr);
157+
}
158+
}
159+
}
160+
161+
// Map fundamental integer types to stdint integer types (e.g. int --> std::int32_t)
162+
void MapIntegerType(std::string &typeName)
163+
{
164+
if (typeName == "signed char") {
165+
typeName = ROOT::RField<signed char>::TypeName();
166+
} else if (typeName == "unsigned char") {
167+
typeName = ROOT::RField<unsigned char>::TypeName();
168+
} else if (typeName == "short" || typeName == "short int" || typeName == "signed short" ||
169+
typeName == "signed short int") {
170+
typeName = ROOT::RField<short int>::TypeName();
171+
} else if (typeName == "unsigned short" || typeName == "unsigned short int") {
172+
typeName = ROOT::RField<unsigned short int>::TypeName();
173+
} else if (typeName == "int" || typeName == "signed" || typeName == "signed int") {
174+
typeName = ROOT::RField<int>::TypeName();
175+
} else if (typeName == "unsigned" || typeName == "unsigned int") {
176+
typeName = ROOT::RField<unsigned int>::TypeName();
177+
} else if (typeName == "long" || typeName == "long int" || typeName == "signed long" ||
178+
typeName == "signed long int") {
179+
typeName = ROOT::RField<long int>::TypeName();
180+
} else if (typeName == "unsigned long" || typeName == "unsigned long int") {
181+
typeName = ROOT::RField<unsigned long int>::TypeName();
182+
} else if (typeName == "long long" || typeName == "long long int" || typeName == "signed long long" ||
183+
typeName == "signed long long int") {
184+
typeName = ROOT::RField<long long int>::TypeName();
185+
} else if (typeName == "unsigned long long" || typeName == "unsigned long long int") {
186+
typeName = ROOT::RField<unsigned long long int>::TypeName();
187+
}
188+
}
189+
190+
std::string GetRenormalizedMetaTypeName(const std::string &metaNormalizedName)
191+
{
192+
const std::string canonicalTypePrefix{ROOT::Internal::GetCanonicalTypePrefix(metaNormalizedName)};
193+
// RNTuple resolves Double32_t for the normalized type name but keeps Double32_t for the type alias
194+
// (also in template parameters)
195+
if (canonicalTypePrefix == "Double32_t")
196+
return "double";
197+
198+
if (canonicalTypePrefix.find('<') == std::string::npos) {
199+
// If there are no templates, the function is done.
200+
return canonicalTypePrefix;
201+
}
202+
203+
const auto angleBrackets = FindTemplateAngleBrackets(canonicalTypePrefix);
204+
R__ASSERT(!angleBrackets.empty());
205+
206+
std::string normName;
207+
std::string::size_type currentPos = 0;
208+
for (std::size_t i = 0; i < angleBrackets.size(); i++) {
209+
const auto [posOpen, posClose] = angleBrackets[i];
210+
// Append the type prefix until the open angle bracket.
211+
normName += canonicalTypePrefix.substr(currentPos, posOpen + 1 - currentPos);
212+
213+
const auto argList = canonicalTypePrefix.substr(posOpen + 1, posClose - posOpen - 1);
214+
const auto templateArgs = ROOT::Internal::TokenizeTypeList(argList);
215+
R__ASSERT(!templateArgs.empty());
216+
217+
for (const auto &a : templateArgs) {
218+
normName += GetNormalizedTemplateArg(a, GetRenormalizedMetaTypeName) + ",";
219+
}
220+
221+
normName[normName.size() - 1] = '>';
222+
currentPos = posClose + 1;
223+
}
224+
225+
// Append the rest of the type from the last closing angle bracket.
226+
const auto lastClosePos = angleBrackets.back().second;
227+
normName += canonicalTypePrefix.substr(lastClosePos + 1);
228+
229+
return normName;
230+
}
231+
232+
std::string GetRenormalizedDemangledTypeName(const std::string &demangledName)
233+
{
234+
std::string canonicalTypePrefix{demangledName};
235+
MapIntegerType(canonicalTypePrefix);
236+
237+
if (canonicalTypePrefix.find('<') == std::string::npos) {
238+
// If there are no templates, the function is done.
239+
return canonicalTypePrefix;
240+
}
241+
RemoveSpaceBeforeClosingAngleBracket(canonicalTypePrefix);
242+
RemoveSpaceAfterComma(canonicalTypePrefix);
243+
244+
const auto angleBrackets = FindTemplateAngleBrackets(canonicalTypePrefix);
245+
R__ASSERT(!angleBrackets.empty());
246+
247+
// Remove optional stdlib template arguments
248+
int maxTemplateArgs = 0;
249+
if (canonicalTypePrefix.rfind("std::vector<", 0) == 0 || canonicalTypePrefix.rfind("std::set<", 0) == 0 ||
250+
canonicalTypePrefix.rfind("std::unordered_set<", 0) == 0 ||
251+
canonicalTypePrefix.rfind("std::multiset<", 0) == 0 ||
252+
canonicalTypePrefix.rfind("std::unordered_multiset<", 0) == 0 ||
253+
canonicalTypePrefix.rfind("std::unique_ptr<", 0) == 0) {
254+
maxTemplateArgs = 1;
255+
} else if (canonicalTypePrefix.rfind("std::map<", 0) == 0 ||
256+
canonicalTypePrefix.rfind("std::unordered_map<", 0) == 0 ||
257+
canonicalTypePrefix.rfind("std::multimap<", 0) == 0 ||
258+
canonicalTypePrefix.rfind("std::unordered_multimap<", 0) == 0) {
259+
maxTemplateArgs = 2;
260+
}
261+
262+
std::string normName;
263+
std::string::size_type currentPos = 0;
264+
for (std::size_t i = 0; i < angleBrackets.size(); i++) {
265+
const auto [posOpen, posClose] = angleBrackets[i];
266+
// Append the type prefix until the open angle bracket.
267+
normName += canonicalTypePrefix.substr(currentPos, posOpen + 1 - currentPos);
268+
269+
const auto argList = canonicalTypePrefix.substr(posOpen + 1, posClose - posOpen - 1);
270+
auto templateArgs = ROOT::Internal::TokenizeTypeList(argList, maxTemplateArgs);
271+
R__ASSERT(!templateArgs.empty());
272+
273+
for (const auto &a : templateArgs) {
274+
normName += GetNormalizedTemplateArg(a, GetRenormalizedDemangledTypeName) + ",";
275+
}
276+
277+
normName[normName.size() - 1] = '>';
278+
currentPos = posClose + 1;
279+
}
280+
281+
// Append the rest of the type from the last closing angle bracket.
282+
const auto lastClosePos = angleBrackets.back().second;
283+
normName += canonicalTypePrefix.substr(lastClosePos + 1);
284+
285+
// Reverse std::string --> std::basic_string<char> typedef
286+
static std::string stringName = []() {
287+
int e;
288+
char *str = TClassEdit::DemangleName(typeid(std::string).name(), e);
289+
R__ASSERT(str && e == 0);
290+
std::string res{str};
291+
free(str);
292+
res.erase(std::remove(res.begin(), res.end(), ' '), res.end());
293+
return res;
294+
}();
295+
if (normName == stringName)
296+
return "std::string";
297+
298+
return normName;
299+
}
300+
301+
} // namespace
302+
303+
std::string ROOT::Internal::GetCanonicalTypePrefix(const std::string &typeName)
304+
{
305+
// Remove outer cv qualifiers
306+
std::string canonicalType{TClassEdit::CleanType(typeName.c_str(), /*mode=*/1)};
307+
308+
if (canonicalType.substr(0, 7) == "struct ") {
309+
canonicalType.erase(0, 7);
310+
} else if (canonicalType.substr(0, 5) == "enum ") {
311+
canonicalType.erase(0, 5);
312+
} else if (canonicalType.substr(0, 2) == "::") {
313+
canonicalType.erase(0, 2);
151314
}
152315

316+
RemoveSpaceBeforeClosingAngleBracket(canonicalType);
317+
153318
if (canonicalType.substr(0, 6) == "array<") {
154319
canonicalType = "std::" + canonicalType;
155320
} else if (canonicalType.substr(0, 7) == "atomic<") {
@@ -191,77 +356,27 @@ std::string ROOT::Internal::GetCanonicalTypePrefix(const std::string &typeName)
191356
canonicalType = it->second;
192357
}
193358

194-
// Map fundamental integer types to stdint integer types (e.g. int --> std::int32_t)
195-
if (canonicalType == "signed char") {
196-
canonicalType = RField<signed char>::TypeName();
197-
} else if (canonicalType == "unsigned char") {
198-
canonicalType = RField<unsigned char>::TypeName();
199-
} else if (canonicalType == "short" || canonicalType == "short int" || canonicalType == "signed short" ||
200-
canonicalType == "signed short int") {
201-
canonicalType = RField<short int>::TypeName();
202-
} else if (canonicalType == "unsigned short" || canonicalType == "unsigned short int") {
203-
canonicalType = RField<unsigned short int>::TypeName();
204-
} else if (canonicalType == "int" || canonicalType == "signed" || canonicalType == "signed int") {
205-
canonicalType = RField<int>::TypeName();
206-
} else if (canonicalType == "unsigned" || canonicalType == "unsigned int") {
207-
canonicalType = RField<unsigned int>::TypeName();
208-
} else if (canonicalType == "long" || canonicalType == "long int" || canonicalType == "signed long" ||
209-
canonicalType == "signed long int") {
210-
canonicalType = RField<long int>::TypeName();
211-
} else if (canonicalType == "unsigned long" || canonicalType == "unsigned long int") {
212-
canonicalType = RField<unsigned long int>::TypeName();
213-
} else if (canonicalType == "long long" || canonicalType == "long long int" || canonicalType == "signed long long" ||
214-
canonicalType == "signed long long int") {
215-
canonicalType = RField<long long int>::TypeName();
216-
} else if (canonicalType == "unsigned long long" || canonicalType == "unsigned long long int") {
217-
canonicalType = RField<unsigned long long int>::TypeName();
218-
}
359+
MapIntegerType(canonicalType);
219360

220361
return canonicalType;
221362
}
222363

223-
std::string ROOT::Internal::GetRenormalizedTypeName(const std::string &metaNormalizedName)
364+
std::string ROOT::Internal::GetRenormalizedTypeName(const std::type_info &ti)
224365
{
225-
const std::string canonicalTypePrefix{GetCanonicalTypePrefix(metaNormalizedName)};
226-
// RNTuple resolves Double32_t for the normalized type name but keeps Double32_t for the type alias
227-
// (also in template parameters)
228-
if (canonicalTypePrefix == "Double32_t")
229-
return "double";
230-
231-
if (canonicalTypePrefix.find('<') == std::string::npos) {
232-
// If there are no templates, the function is done.
233-
return canonicalTypePrefix;
234-
}
235-
236-
const auto angleBrackets = FindTemplateAngleBrackets(canonicalTypePrefix);
237-
R__ASSERT(!angleBrackets.empty());
238-
239-
std::string normName;
240-
std::string::size_type currentPos = 0;
241-
for (std::size_t i = 0; i < angleBrackets.size(); i++) {
242-
const auto [posOpen, posClose] = angleBrackets[i];
243-
// Append the type prefix until the open angle bracket.
244-
normName += canonicalTypePrefix.substr(currentPos, posOpen + 1 - currentPos);
245-
246-
const auto argList = canonicalTypePrefix.substr(posOpen + 1, posClose - posOpen - 1);
247-
const auto templateArgs = TokenizeTypeList(argList);
248-
R__ASSERT(!templateArgs.empty());
249-
250-
for (const auto &a : templateArgs) {
251-
normName += GetNormalizedTemplateArg(a, [](const std::string &n){ return GetRenormalizedTypeName(n); }) + ",";
252-
}
253-
254-
normName[normName.size() - 1] = '>';
255-
currentPos = posClose + 1;
256-
}
257-
258-
// Append the rest of the type from the last closing angle bracket.
259-
const auto lastClosePos = angleBrackets.back().second;
260-
normName += canonicalTypePrefix.substr(lastClosePos + 1);
366+
int errCode;
367+
char *cstrDemangledName = TClassEdit::DemangleName(ti.name(), errCode);
368+
R__ASSERT(cstrDemangledName && errCode == 0);
369+
std::string normName{GetRenormalizedDemangledTypeName(cstrDemangledName)};
370+
free(cstrDemangledName);
261371

262372
return normName;
263373
}
264374

375+
std::string ROOT::Internal::GetRenormalizedTypeName(const std::string &metaNormalizedName)
376+
{
377+
return GetRenormalizedMetaTypeName(metaNormalizedName);
378+
}
379+
265380
std::string ROOT::Internal::GetNormalizedUnresolvedTypeName(const std::string &origName)
266381
{
267382
const TClassEdit::EModType modType = static_cast<TClassEdit::EModType>(
@@ -435,7 +550,7 @@ std::tuple<std::string, std::vector<std::size_t>> ROOT::Internal::ParseArrayType
435550
return std::make_tuple(prefix, sizeVec);
436551
}
437552

438-
std::vector<std::string> ROOT::Internal::TokenizeTypeList(std::string_view templateType)
553+
std::vector<std::string> ROOT::Internal::TokenizeTypeList(std::string_view templateType, std::size_t maxArgs)
439554
{
440555
std::vector<std::string> result;
441556
if (templateType.empty())
@@ -452,6 +567,8 @@ std::vector<std::string> ROOT::Internal::TokenizeTypeList(std::string_view templ
452567
case ',':
453568
if (nestingLevel == 0) {
454569
result.push_back(std::string(typeBegin, typeCursor - typeBegin));
570+
if (maxArgs && result.size() == maxArgs)
571+
return result;
455572
typeBegin = typeCursor + 1;
456573
}
457574
break;
@@ -461,8 +578,3 @@ std::vector<std::string> ROOT::Internal::TokenizeTypeList(std::string_view templ
461578
result.push_back(std::string(typeBegin, typeCursor - typeBegin));
462579
return result;
463580
}
464-
465-
std::string ROOT::Internal::GetRenormalizedTypeName(const std::type_info &ti)
466-
{
467-
return ROOT::Internal::GetRenormalizedTypeName(ROOT::Internal::GetDemangledTypeName(ti));
468-
}

0 commit comments

Comments
 (0)