Skip to content

Commit f77a812

Browse files
authored
[lldb] Add a GetSubsetExtractorSP method to DataExtractor (llvm#177309)
We have many places where an ObjectFile subclass will take the DataExtractor representing the entire binary, create a subsection of that in a new DataExtractor for processing. For instance, an object file might have symbol table entries with offsets into the string table. A common code pattern is to create a DataExtractor representing the string table, and then pulling out the c-strings based on those offsets from the string table DataExtractor. When code does this, it creates a new DataExtractor, copies the Endianness and Wordsize from the original, copies the DataBufferSP from the original, and specifies a new start and offset into the DataBuffer. However, if the binary is actaully stored in a VirtualDataExtractor, this code pattern loses the correct virtual-to-physical table translation and will not work correctly. This new method simplifies this common pattern, and correctly takes a subset of a VirtualDataExtractor. The current implementation only allows a subset of a VirtualDataExtractor that is contained within a single virtual entry (LookupTable entry) and returns a DataExtractor with the corret offsets calculated from the LookupTable. If we need to a VirtualDataExtractor to create a Subset DataExtractor representing multiple separate virtual ranges of data, we'll need to copy over the LookupTable entries that cover all the bytes, and update them to be relative to the new VirtualDataExtractor. It's a bit of work, and it's not needed right now, so I'm not tackling that. I am working on a larger PR which needs this new method. This PR contains a unit test that uses it. rdar://148939795
1 parent d7ac1fb commit f77a812

6 files changed

Lines changed: 208 additions & 6 deletions

File tree

lldb/include/lldb/Utility/DataExtractor.h

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "lldb/lldb-forward.h"
1717
#include "lldb/lldb-types.h"
1818
#include "llvm/ADT/ArrayRef.h"
19+
#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
1920
#include "llvm/Support/DataExtractor.h"
2021
#include "llvm/Support/SwapByteOrder.h"
2122

@@ -818,6 +819,33 @@ class DataExtractor {
818819
/// The extracted unsigned integer value.
819820
uint64_t GetULEB128(lldb::offset_t *offset_ptr) const;
820821

822+
/// Return a new DataExtractor which represents a subset of an existing
823+
/// data extractor's bytes, copying all other fields from the existing
824+
/// data extractor.
825+
///
826+
/// \param[in] offset
827+
/// The starting byte offset into the shared data buffer.
828+
/// \param[in] length
829+
/// The length of bytes that the new extractor can operate on.
830+
///
831+
/// \return
832+
/// A shared pointer to a new DataExtractor.
833+
virtual lldb::DataExtractorSP GetSubsetExtractorSP(lldb::offset_t offset,
834+
lldb::offset_t length);
835+
836+
/// Return a new DataExtractor which represents a subset of an existing
837+
/// data extractor's bytes, copying all other fields from the existing
838+
/// data extractor. The length will be the largest contiguous region that
839+
/// can be provided starting at \a offset; it is safe to read any bytes
840+
/// within the returned subset Extractor.
841+
///
842+
/// \param[in] offset
843+
/// The starting byte offset into the shared data buffer.
844+
///
845+
/// \return
846+
/// A shared pointer to a new DataExtractor.
847+
virtual lldb::DataExtractorSP GetSubsetExtractorSP(lldb::offset_t offset);
848+
821849
lldb::DataBufferSP &GetSharedDataBuffer() { return m_data_sp; }
822850

823851
bool HasData() { return m_start && m_end && m_end - m_start > 0; }
@@ -997,10 +1025,16 @@ class DataExtractor {
9971025

9981026
void Checksum(llvm::SmallVectorImpl<uint8_t> &dest, uint64_t max_data = 0);
9991027

1000-
llvm::ArrayRef<uint8_t> GetData() const {
1028+
virtual llvm::ArrayRef<uint8_t> GetData() const {
10011029
return {GetDataStart(), size_t(GetByteSize())};
10021030
}
10031031

1032+
llvm::DWARFDataExtractor GetAsLLVMDWARF() const {
1033+
return llvm::DWARFDataExtractor(GetData(),
1034+
GetByteOrder() == lldb::eByteOrderLittle,
1035+
GetAddressByteSize());
1036+
}
1037+
10041038
llvm::DataExtractor GetAsLLVM() const {
10051039
return {GetData(), GetByteOrder() == lldb::eByteOrderLittle,
10061040
uint8_t(GetAddressByteSize())};

lldb/include/lldb/Utility/VirtualDataExtractor.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,22 @@ class VirtualDataExtractor : public DataExtractor {
4343
lldb::ByteOrder byte_order, uint32_t addr_size,
4444
LookupTable lookup_table);
4545

46+
VirtualDataExtractor(const lldb::DataBufferSP &data_sp,
47+
LookupTable lookup_table);
48+
4649
const void *GetData(lldb::offset_t *offset_ptr,
4750
lldb::offset_t length) const override;
4851

4952
const uint8_t *PeekData(lldb::offset_t offset,
5053
lldb::offset_t length) const override;
5154

55+
lldb::DataExtractorSP GetSubsetExtractorSP(lldb::offset_t offset,
56+
lldb::offset_t length) override;
57+
58+
lldb::DataExtractorSP GetSubsetExtractorSP(lldb::offset_t offset) override;
59+
60+
llvm::ArrayRef<uint8_t> GetData() const override;
61+
5262
/// Unchecked overrides
5363
/// @{
5464
uint8_t GetU8_unchecked(lldb::offset_t *offset_ptr) const override;

lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -417,14 +417,14 @@ ObjectFileSP ObjectContainerBSDArchive::GetObjectFile(const FileSpec *file) {
417417
lldb::offset_t data_offset = 0;
418418
DataExtractorSP extractor_sp =
419419
std::make_shared<DataExtractor>(child_data_sp);
420-
return ObjectFile::FindPlugin(
420+
return lldb_private::ObjectFile::FindPlugin(
421421
module_sp, &child, m_offset + object->file_offset,
422422
object->file_size, extractor_sp, data_offset);
423423
}
424424
lldb::offset_t data_offset = object->file_offset;
425425
DataExtractorSP extractor_sp =
426426
std::make_shared<DataExtractor>(m_archive_sp->GetData());
427-
return ObjectFile::FindPlugin(
427+
return lldb_private::ObjectFile::FindPlugin(
428428
module_sp, file, m_offset + object->file_offset, object->file_size,
429429
extractor_sp, data_offset);
430430
}
@@ -476,8 +476,8 @@ size_t ObjectContainerBSDArchive::GetModuleSpecifications(
476476
continue;
477477
FileSpec child = GetChildFileSpecificationsFromThin(
478478
object->ar_name.GetStringRef(), file);
479-
if (ObjectFile::GetModuleSpecifications(child, 0, object->file_size,
480-
specs)) {
479+
if (lldb_private::ObjectFile::GetModuleSpecifications(
480+
child, 0, object->file_size, specs)) {
481481
ModuleSpec &spec =
482482
specs.GetModuleSpecRefAtIndex(specs.GetSize() - 1);
483483
llvm::sys::TimePoint<> object_mod_time(
@@ -492,7 +492,7 @@ size_t ObjectContainerBSDArchive::GetModuleSpecifications(
492492
const lldb::offset_t object_file_offset =
493493
file_offset + object->file_offset;
494494
if (object->file_offset < file_size && file_size > object_file_offset) {
495-
if (ObjectFile::GetModuleSpecifications(
495+
if (lldb_private::ObjectFile::GetModuleSpecifications(
496496
file, object_file_offset, file_size - object_file_offset,
497497
specs)) {
498498
ModuleSpec &spec =

lldb/source/Utility/DataExtractor.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1050,3 +1050,16 @@ void DataExtractor::Checksum(llvm::SmallVectorImpl<uint8_t> &dest,
10501050
dest.clear();
10511051
dest.append(result.begin(), result.end());
10521052
}
1053+
1054+
DataExtractorSP DataExtractor::GetSubsetExtractorSP(offset_t offset,
1055+
offset_t length) {
1056+
DataExtractorSP new_sp = std::make_shared<DataExtractor>(
1057+
GetSharedDataBuffer(), GetByteOrder(), GetAddressByteSize());
1058+
new_sp->SetData(GetSharedDataBuffer(), GetSharedDataOffset() + offset,
1059+
length);
1060+
return new_sp;
1061+
}
1062+
1063+
DataExtractorSP DataExtractor::GetSubsetExtractorSP(offset_t offset) {
1064+
return GetSubsetExtractorSP(offset, GetByteSize() - offset);
1065+
}

lldb/source/Utility/VirtualDataExtractor.cpp

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,12 @@ VirtualDataExtractor::VirtualDataExtractor(const DataBufferSP &data_sp,
3131
m_lookup_table.Sort();
3232
}
3333

34+
VirtualDataExtractor::VirtualDataExtractor(const DataBufferSP &data_sp,
35+
LookupTable lookup_table)
36+
: DataExtractor(data_sp), m_lookup_table(std::move(lookup_table)) {
37+
m_lookup_table.Sort();
38+
}
39+
3440
const VirtualDataExtractor::LookupTable::Entry *
3541
VirtualDataExtractor::FindEntry(offset_t virtual_addr) const {
3642
// Use RangeDataVector's binary search instead of linear search.
@@ -137,3 +143,70 @@ uint64_t VirtualDataExtractor::GetU64_unchecked(offset_t *offset_ptr) const {
137143
*offset_ptr += 8;
138144
return result;
139145
}
146+
147+
DataExtractorSP
148+
VirtualDataExtractor::GetSubsetExtractorSP(offset_t virtual_offset,
149+
offset_t virtual_length) {
150+
const LookupTable::Entry *entry = FindEntry(virtual_offset);
151+
assert(
152+
entry &&
153+
"VirtualDataExtractor subset extractor requires valid virtual address");
154+
if (!entry)
155+
return {};
156+
157+
// Entry::data is the offset into the DataBuffer's actual start/end range
158+
// Entry::base is the virtual address at the start of this region of data
159+
offset_t offset_into_entry_range = virtual_offset - entry->base;
160+
assert(
161+
offset_into_entry_range + virtual_length <= entry->size &&
162+
"VirtualDataExtractor subset may not span multiple LookupTable entries");
163+
if (offset_into_entry_range + virtual_length > entry->size)
164+
return {};
165+
166+
// We could support a Subset VirtualDataExtractor which covered
167+
// multiple LookupTable virtual entries, but we'd need to mutate
168+
// all of the LookupTable entries that were properly included in
169+
// the Subset, a bit tricky. So we won't implement that until it's
170+
// needed.
171+
172+
offset_t physical_start = entry->data + offset_into_entry_range;
173+
std::shared_ptr<DataExtractor> new_sp = std::make_shared<DataExtractor>(
174+
GetSharedDataBuffer(), GetByteOrder(), GetAddressByteSize());
175+
new_sp->SetData(GetSharedDataBuffer(), physical_start, virtual_length);
176+
return new_sp;
177+
}
178+
179+
// Return a DataExtractorSP that contains a single LookupTable's entry; all
180+
// bytes are guaranteed to be readable.
181+
DataExtractorSP
182+
VirtualDataExtractor::GetSubsetExtractorSP(offset_t virtual_offset) {
183+
const LookupTable::Entry *entry = FindEntry(virtual_offset);
184+
assert(
185+
entry &&
186+
"VirtualDataExtractor subset extractor requires valid virtual address");
187+
if (!entry)
188+
return {};
189+
190+
// Entry::data is the offset into the DataBuffer's actual start/end range
191+
// Entry::base is the virtual address at the start of this region of data
192+
offset_t offset_into_entry_range = virtual_offset - entry->base;
193+
194+
offset_t physical_start = entry->data + offset_into_entry_range;
195+
std::shared_ptr<DataExtractor> new_sp = std::make_shared<DataExtractor>(
196+
GetSharedDataBuffer(), GetByteOrder(), GetAddressByteSize());
197+
new_sp->SetData(GetSharedDataBuffer(), physical_start,
198+
entry->size - offset_into_entry_range);
199+
return new_sp;
200+
}
201+
202+
// Return an ArrayRef to the first contiguous region of the LookupTable
203+
// only. The LookupTable entries may have gaps of unmapped data, and we
204+
// can't include those in the ArrayRef or something may touch those pages.
205+
llvm::ArrayRef<uint8_t> VirtualDataExtractor::GetData() const {
206+
const LookupTable::Entry *entry = FindEntry(0);
207+
assert(entry &&
208+
"VirtualDataExtractor GetData requires valid virtual address");
209+
if (!entry)
210+
return {};
211+
return {m_start + entry->data, entry->size};
212+
}

lldb/unittests/Utility/VirtualDataExtractorTest.cpp

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -581,3 +581,75 @@ TEST(VirtualDataExtractorTest, ReadExactlyAtEntryEnd) {
581581
EXPECT_EQ(extractor->GetU8(&virtual_offset), 0x04U);
582582
EXPECT_EQ(virtual_offset, 0x1004U);
583583
}
584+
585+
TEST(VirtualDataExtractorTest, SubsetExtractorGetU32) {
586+
uint32_t buffer[16];
587+
// 0x11111111 0x22222222 ... 0xffffffff
588+
for (int i = 0; i < 16; i++)
589+
buffer[i] =
590+
i << 28 | i << 24 | i << 20 | i << 16 | i << 12 | i << 8 | i << 4 | i;
591+
DataBufferSP buffer_sp =
592+
std::make_shared<DataBufferUnowned>((uint8_t *)&buffer, sizeof(buffer));
593+
lldb::DataExtractorSP extractor = std::make_shared<VirtualDataExtractor>(
594+
buffer_sp, eByteOrderLittle, 8,
595+
Table{Entry(0x0, 4 * sizeof(uint32_t), 12 * sizeof(uint32_t)),
596+
Entry(0x10, 4 * sizeof(uint32_t), 0 * sizeof(uint32_t)),
597+
Entry(0x20, 4 * sizeof(uint32_t), 8 * sizeof(uint32_t)),
598+
Entry(0x30, 4 * sizeof(uint32_t), 4 * sizeof(uint32_t))});
599+
600+
offset_t virtual_offset = 0;
601+
// Entry(0x0, 4*sizeof(uint32_t), 12*sizeof(uint32_t))
602+
EXPECT_EQ(extractor->GetU32(&virtual_offset), 0xccccccccU);
603+
EXPECT_EQ(extractor->GetU32(&virtual_offset), 0xddddddddU);
604+
EXPECT_EQ(extractor->GetU32(&virtual_offset), 0xeeeeeeeeU);
605+
EXPECT_EQ(extractor->GetU32(&virtual_offset), 0xffffffffU);
606+
// Entry(0x10, 4*sizeof(uint32_t), 0*sizeof(uint32_t))
607+
EXPECT_EQ(extractor->GetU32(&virtual_offset), 0x00000000U);
608+
EXPECT_EQ(extractor->GetU32(&virtual_offset), 0x11111111U);
609+
EXPECT_EQ(extractor->GetU32(&virtual_offset), 0x22222222U);
610+
EXPECT_EQ(extractor->GetU32(&virtual_offset), 0x33333333U);
611+
// Entry(0x20, 4*sizeof(uint32_t), 8*sizeof(uint32_t))
612+
EXPECT_EQ(extractor->GetU32(&virtual_offset), 0x88888888U);
613+
EXPECT_EQ(extractor->GetU32(&virtual_offset), 0x99999999U);
614+
EXPECT_EQ(extractor->GetU32(&virtual_offset), 0xAAAAAAAAU);
615+
EXPECT_EQ(extractor->GetU32(&virtual_offset), 0xBBBBBBBBU);
616+
// Entry(0x30, 4*sizeof(uint32_t), 4*sizeof(uint32_t))
617+
EXPECT_EQ(extractor->GetU32(&virtual_offset), 0x44444444U);
618+
EXPECT_EQ(extractor->GetU32(&virtual_offset), 0x55555555U);
619+
EXPECT_EQ(extractor->GetU32(&virtual_offset), 0x66666666U);
620+
EXPECT_EQ(extractor->GetU32(&virtual_offset), 0x77777777U);
621+
622+
// sub_extractor starts at buffer[4] for 4 uint32_t's, aligned
623+
// to the start of a LookupTable entry.
624+
lldb::DataExtractorSP aligned_sub_extractor = extractor->GetSubsetExtractorSP(
625+
4 * sizeof(uint32_t), 4 * sizeof(uint32_t));
626+
627+
virtual_offset = 0;
628+
// Entry(0x10, 4*sizeof(uint32_t), 0*sizeof(uint32_t))
629+
// {subset virtual offset: 0x0}
630+
EXPECT_EQ(aligned_sub_extractor->GetU32(&virtual_offset), 0x00000000U);
631+
EXPECT_EQ(aligned_sub_extractor->GetU32(&virtual_offset), 0x11111111U);
632+
EXPECT_EQ(aligned_sub_extractor->GetU32(&virtual_offset), 0x22222222U);
633+
EXPECT_EQ(aligned_sub_extractor->GetU32(&virtual_offset), 0x33333333U);
634+
635+
// sub_extractor starts at buffer[10] for 2 uint32_t's,
636+
// only PART of a LookupTable entry.
637+
lldb::DataExtractorSP misaligned_sub_extractor =
638+
extractor->GetSubsetExtractorSP(10 * sizeof(uint32_t),
639+
2 * sizeof(uint32_t));
640+
virtual_offset = 0;
641+
EXPECT_EQ(misaligned_sub_extractor->GetU32(&virtual_offset), 0xAAAAAAAAU);
642+
EXPECT_EQ(misaligned_sub_extractor->GetU32(&virtual_offset), 0xBBBBBBBBU);
643+
644+
lldb::DataExtractorSP contiguous_subset = extractor->GetSubsetExtractorSP(0);
645+
EXPECT_EQ(contiguous_subset->GetByteSize(), 4 * sizeof(uint32_t));
646+
647+
lldb::DataExtractorSP misaligned_contiguous_subset =
648+
extractor->GetSubsetExtractorSP(2 * sizeof(uint32_t));
649+
EXPECT_EQ(misaligned_contiguous_subset->GetByteSize(), 2 * sizeof(uint32_t));
650+
651+
// Ask for a subset in the second LookupTable entry.
652+
lldb::DataExtractorSP middle_contiguous_subset =
653+
extractor->GetSubsetExtractorSP(4 * sizeof(uint32_t));
654+
EXPECT_EQ(middle_contiguous_subset->GetByteSize(), 4 * sizeof(uint32_t));
655+
}

0 commit comments

Comments
 (0)