Skip to content

Commit 2343d4b

Browse files
committed
[CARBONDATA-4063] Refactor getBlockId and getShortBlcokId functions
Why is this PR needed? Currently, getBlockId and getShortBlockId functions are too complex and unreadable, which can be more simpler and readable. What changes were proposed in this PR? Refactor the getBlockId and getShortBlockId functions without changing the blockid format and shortblockid format, There is no complatiability issue after the rafactoring. Does this PR introduce any user interface change? No Is any new testcase added? No
1 parent 32c2306 commit 2343d4b

8 files changed

Lines changed: 91 additions & 161 deletions

File tree

core/src/main/java/org/apache/carbondata/core/indexstore/blockletindex/BlockIndex.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,6 @@
6363
import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf;
6464
import org.apache.carbondata.core.util.BlockletIndexUtil;
6565
import org.apache.carbondata.core.util.ByteUtil;
66-
import org.apache.carbondata.core.util.CarbonUtil;
6766
import org.apache.carbondata.core.util.DataFileFooterConverter;
6867
import org.apache.carbondata.core.util.path.CarbonTablePath;
6968

@@ -797,8 +796,8 @@ private boolean addBlockBasedOnMinMaxValue(FilterExecutor filterExecutor, byte[]
797796
// replaced with '#', to support multi level partitioning. For example, BlockId will be
798797
// look like `part1=1#part2=2/xxxxxxxxx`. During query also, blockId should be
799798
// replaced by '#' in place of '/', to match and prune data on SI table.
800-
uniqueBlockPath = CarbonUtil
801-
.getBlockId(carbonTable.getAbsoluteTableIdentifier(), filePath, "", true, false, true);
799+
uniqueBlockPath = CarbonTablePath
800+
.getBlockId(carbonTable.getAbsoluteTableIdentifier(), filePath, "", true);
802801
} else {
803802
uniqueBlockPath = filePath.substring(filePath.lastIndexOf("/Part") + 1);
804803
}

core/src/main/java/org/apache/carbondata/core/mutate/CarbonUpdateUtil.java

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -822,9 +822,7 @@ public static void createBlockDetailsMap(BlockMappingVO blockMappingVO,
822822
SegmentUpdateDetails detail = segmentUpdateStatusManager.getDetailsForABlock(key);
823823

824824
if (null != detail) {
825-
826825
alreadyDeletedCount = Long.parseLong(detail.getDeletedRowsInBlock());
827-
828826
}
829827

830828
RowCountDetailsVO rowCountDetailsVO =
@@ -845,16 +843,7 @@ public static void createBlockDetailsMap(BlockMappingVO blockMappingVO,
845843
*/
846844
public static String getSegmentBlockNameKey(String segID, String blockName,
847845
boolean isPartitionTable) {
848-
String blockNameWithOutPartAndBatchNo = blockName
849-
.substring(blockName.indexOf(CarbonCommonConstants.HYPHEN) + 1,
850-
blockName.lastIndexOf(CarbonTablePath.getCarbonDataExtension()))
851-
.replace(CarbonTablePath.BATCH_PREFIX, CarbonCommonConstants.UNDERSCORE);
852-
// to remove compressor name
853-
int index = blockNameWithOutPartAndBatchNo.lastIndexOf(CarbonCommonConstants.POINT);
854-
if (index != -1) {
855-
blockNameWithOutPartAndBatchNo = blockNameWithOutPartAndBatchNo
856-
.replace(blockNameWithOutPartAndBatchNo.substring(index), "");
857-
}
846+
String blockNameWithOutPartAndBatchNo = CarbonTablePath.getShortBlockId(blockName);
858847
if (isPartitionTable) {
859848
return blockNameWithOutPartAndBatchNo;
860849
} else {

core/src/main/java/org/apache/carbondata/core/scan/executor/impl/AbstractQueryExecutor.java

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -446,11 +446,9 @@ private BlockExecutionInfo getBlockExecutionInfoForBlock(QueryModel queryModel,
446446
segmentProperties.getComplexDimensions(),
447447
blockExecutionInfo.getActualQueryMeasures().length,
448448
queryModel.getTable().getTableInfo().isTransactionalTable());
449-
boolean isStandardTable = CarbonUtil.isStandardCarbonTable(queryModel.getTable());
450-
String blockId = CarbonUtil
449+
String blockId = CarbonTablePath
451450
.getBlockId(queryModel.getAbsoluteTableIdentifier(), filePath, segment.getSegmentNo(),
452-
queryModel.getTable().getTableInfo().isTransactionalTable(),
453-
isStandardTable, queryModel.getTable().isHivePartitionTable());
451+
queryModel.getTable().isHivePartitionTable());
454452
blockExecutionInfo.setBlockId(CarbonTablePath.getShortBlockId(blockId));
455453
blockExecutionInfo.setDeleteDeltaFilePath(deleteDeltaFiles);
456454
blockExecutionInfo.setStartBlockletIndex(0);

core/src/main/java/org/apache/carbondata/core/scan/expression/conditional/ImplicitExpression.java

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717

1818
package org.apache.carbondata.core.scan.expression.conditional;
1919

20-
import java.io.File;
2120
import java.util.HashMap;
2221
import java.util.HashSet;
2322
import java.util.List;
@@ -60,7 +59,8 @@ public ImplicitExpression(Map<String, Set<Integer>> blockIdToBlockletIdMapping)
6059
}
6160

6261
private void addBlockEntry(String blockletPath) {
63-
String blockId = blockletPath.substring(0, blockletPath.lastIndexOf(File.separator));
62+
String blockId = blockletPath.substring(0,
63+
blockletPath.lastIndexOf(CarbonCommonConstants.FILE_SEPARATOR));
6464
// Check if blockId contains old tuple id format, and convert it to compatible format.
6565
// Example for non-partition table:
6666
// Old tuple id format: 0/0/0-0_batchno0-0-0-1599806689305.snappy
@@ -71,9 +71,9 @@ private void addBlockEntry(String blockletPath) {
7171
if (blockId.contains(CarbonTablePath.BATCH_PREFIX)) {
7272
blockId = CarbonTablePath.getShortBlockId(blockId);
7373
// In case of non-partition table, remove index of part prefix (Part0) from blockId.
74-
if (!blockId.substring(0, blockId.indexOf(File.separator))
74+
if (!blockId.substring(0, blockId.indexOf(CarbonCommonConstants.FILE_SEPARATOR))
7575
.contains(CarbonCommonConstants.EQUALS)) {
76-
blockId = blockId.substring(blockId.indexOf(File.separator) + 1);
76+
blockId = blockId.substring(blockId.indexOf(CarbonCommonConstants.FILE_SEPARATOR) + 1);
7777
}
7878
}
7979
Set<Integer> blockletIds = blockIdToBlockletIdMapping.get(blockId);
@@ -82,7 +82,8 @@ private void addBlockEntry(String blockletPath) {
8282
blockIdToBlockletIdMapping.put(blockId, blockletIds);
8383
}
8484
blockletIds.add(
85-
Integer.parseInt(blockletPath.substring(blockletPath.lastIndexOf(File.separator) + 1)));
85+
Integer.parseInt(blockletPath.substring(
86+
blockletPath.lastIndexOf(CarbonCommonConstants.FILE_SEPARATOR) + 1)));
8687
}
8788

8889
@Override

core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java

Lines changed: 0 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -2737,72 +2737,6 @@ private static long getMaxOfBlockAndFileSize(long blockSize, long fileSize) {
27372737
return maxSize;
27382738
}
27392739

2740-
/**
2741-
* Generate the blockId as per the block path
2742-
*
2743-
* @param identifier
2744-
* @param filePath
2745-
* @param segmentId
2746-
* @param isTransactionalTable
2747-
* @param isStandardTable
2748-
* @return
2749-
*/
2750-
public static String getBlockId(AbsoluteTableIdentifier identifier, String filePath,
2751-
String segmentId, boolean isTransactionalTable, boolean isStandardTable) {
2752-
return getBlockId(identifier, filePath, segmentId, isTransactionalTable, isStandardTable,
2753-
false);
2754-
}
2755-
2756-
/**
2757-
* Generate the blockId as per the block path
2758-
*
2759-
* @return
2760-
*/
2761-
public static String getBlockId(AbsoluteTableIdentifier identifier, String filePath,
2762-
String segmentId, boolean isTransactionalTable, boolean isStandardTable,
2763-
boolean isPartitionTable) {
2764-
String blockId;
2765-
String blockName = filePath.substring(filePath.lastIndexOf("/") + 1);
2766-
String tablePath = identifier.getTablePath();
2767-
2768-
if (filePath.startsWith(tablePath)) {
2769-
if (!isTransactionalTable || isStandardTable) {
2770-
blockId = "Part0" + CarbonCommonConstants.FILE_SEPARATOR + "Segment_" + segmentId
2771-
+ CarbonCommonConstants.FILE_SEPARATOR + blockName;
2772-
} else {
2773-
// This is the case with partition table.
2774-
String partitionDir;
2775-
int partLength = filePath.length() - blockName.length() - 1;
2776-
if (tablePath.length() + 1 < partLength) {
2777-
partitionDir =
2778-
filePath.substring(tablePath.length() + 1, partLength);
2779-
} else {
2780-
partitionDir = "";
2781-
}
2782-
if (isPartitionTable) {
2783-
blockId =
2784-
partitionDir.replace(CarbonCommonConstants.FILE_SEPARATOR, "#")
2785-
+ CarbonCommonConstants.FILE_SEPARATOR + blockName;
2786-
} else {
2787-
// Replace / with # on partition director to support multi level partitioning. And access
2788-
// them all as a single entity.
2789-
if (partitionDir.isEmpty()) {
2790-
blockId = segmentId + CarbonCommonConstants.FILE_SEPARATOR + blockName;
2791-
} else {
2792-
blockId = partitionDir.replace(CarbonCommonConstants.FILE_SEPARATOR, "#")
2793-
+ CarbonCommonConstants.FILE_SEPARATOR + segmentId
2794-
+ CarbonCommonConstants.FILE_SEPARATOR + blockName;
2795-
}
2796-
2797-
}
2798-
}
2799-
} else {
2800-
blockId = filePath.substring(0, filePath.length() - blockName.length()).replace("/", "#")
2801-
+ CarbonCommonConstants.FILE_SEPARATOR + "Segment_" + segmentId
2802-
+ CarbonCommonConstants.FILE_SEPARATOR + blockName;
2803-
}
2804-
return blockId;
2805-
}
28062740

28072741
/**
28082742
* sets the local dictionary columns to wrapper schema, if the table property

core/src/main/java/org/apache/carbondata/core/util/path/CarbonTablePath.java

Lines changed: 73 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter;
2626
import org.apache.carbondata.core.datastore.impl.FileFactory;
2727
import org.apache.carbondata.core.locks.LockUsage;
28+
import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
2829
import org.apache.carbondata.core.metadata.ColumnarFormatVersion;
2930

3031
import static org.apache.carbondata.core.constants.CarbonCommonConstants.DASH;
@@ -215,38 +216,6 @@ public static String getTableStatusFilePathWithUUID(String tablePath, String uui
215216
}
216217
}
217218

218-
/**
219-
* Below method will be used to get the index file present in the segment folder
220-
* based on task id
221-
*
222-
* @param taskId task id of the file
223-
* @param segmentId segment number
224-
* @return full qualified carbon index path
225-
*/
226-
private static String getCarbonIndexFilePath(final String tablePath, final String taskId,
227-
final String segmentId, final String bucketNumber) {
228-
String segmentDir = getSegmentPath(tablePath, segmentId);
229-
CarbonFile carbonFile =
230-
FileFactory.getCarbonFile(segmentDir);
231-
232-
CarbonFile[] files = carbonFile.listFiles(new CarbonFileFilter() {
233-
@Override
234-
public boolean accept(CarbonFile file) {
235-
if (bucketNumber.equals("-1")) {
236-
return file.getName().startsWith(taskId) && file.getName().endsWith(INDEX_FILE_EXT);
237-
}
238-
return file.getName().startsWith(taskId + DASH + bucketNumber) && file.getName()
239-
.endsWith(INDEX_FILE_EXT);
240-
}
241-
});
242-
if (files.length > 0) {
243-
return files[0].getAbsolutePath();
244-
} else {
245-
throw new RuntimeException("Missing Carbon index file for Segment[" + segmentId + "], "
246-
+ "taskId[" + taskId + "]");
247-
}
248-
}
249-
250219
/**
251220
* Below method will be used to get the carbon index file path
252221
* @param taskId
@@ -635,42 +604,86 @@ public static String getCarbonMergeIndexExtension() {
635604
return MERGE_INDEX_FILE_EXT;
636605
}
637606

607+
/**
608+
* Generate the blockId as per the block path
609+
*
610+
* @param identifier
611+
* @param filePath
612+
* @param segmentId
613+
* @param isPartitionTable
614+
* @return blockid, which is the identify of a block
615+
*/
616+
public static String getBlockId(AbsoluteTableIdentifier identifier, String filePath,
617+
String segmentId, boolean isPartitionTable) {
618+
String blockName = filePath.substring(filePath.lastIndexOf(
619+
CarbonCommonConstants.FILE_SEPARATOR) + 1);
620+
String tablePath = identifier.getTablePath();
621+
622+
String partitionDir = "";
623+
// 1. For block of Added Segments, The BlockId consistsof
624+
// <partitionPath><segmentId><blockName>
625+
if (!filePath.startsWith(tablePath)) {
626+
partitionDir = getPartitionDir(tablePath, filePath, blockName);
627+
return partitionDir.replace(CarbonCommonConstants.FILE_SEPARATOR, "#")
628+
+ CarbonCommonConstants.FILE_SEPARATOR + segmentId
629+
+ CarbonCommonConstants.FILE_SEPARATOR + blockName;
630+
}
631+
632+
// 2. For block of partitiontable, The BlockId consistsof
633+
// <partitionPath><blockName>
634+
if (isPartitionTable) {
635+
partitionDir = getPartitionDir(tablePath, filePath, blockName);
636+
return partitionDir.replace(CarbonCommonConstants.FILE_SEPARATOR, "#")
637+
+ CarbonCommonConstants.FILE_SEPARATOR + blockName;
638+
}
639+
640+
// 3. For nonpartitiontable, The BlockId consistsof
641+
// <segmentId><blockName>
642+
return segmentId + CarbonCommonConstants.FILE_SEPARATOR + blockName;
643+
}
644+
638645
/**
639646
* This method will remove strings in path and return short block id
640647
*
641648
* @param blockId
642649
* @return shortBlockId
643650
*/
644651
public static String getShortBlockId(String blockId) {
645-
String blockIdWithCompressorName =
646-
blockId.replace(PARTITION_PREFIX + "0" + CarbonCommonConstants.FILE_SEPARATOR, "")
647-
.replace(SEGMENT_PREFIX, "").replace(BATCH_PREFIX, CarbonCommonConstants.UNDERSCORE)
648-
.replace(DATA_PART_PREFIX, "").replace(CARBON_DATA_EXT, "");
649-
// to remove compressor name
650-
if (!blockId.equalsIgnoreCase(blockIdWithCompressorName)) {
651-
int index = blockIdWithCompressorName.lastIndexOf(POINT);
652-
int fileSeperatorIndex = blockIdWithCompressorName.lastIndexOf(File.separator);
653-
if (index != -1) {
654-
String modifiedBlockId;
655-
if (index > fileSeperatorIndex) {
656-
// Default case when path ends with compressor name.
657-
// Example: 0/0-0_0-0-0-1600789595862.snappy
658-
modifiedBlockId =
659-
blockIdWithCompressorName.replace(blockIdWithCompressorName.substring(index), "");
660-
} else {
661-
// in case of CACHE_LEVEL = BLOCKLET, blockId path contains both block id and blocklet id
662-
// so check for next file seperator and remove compressor name.
663-
// Example: 0/0-0_0-0-0-1600789595862.snappy/0
664-
modifiedBlockId = blockIdWithCompressorName
665-
.replace(blockIdWithCompressorName.substring(index, fileSeperatorIndex), "");
666-
}
667-
return modifiedBlockId;
668-
} else {
669-
return blockIdWithCompressorName;
670-
}
671-
} else {
672-
return blockIdWithCompressorName;
652+
// 1. If the blockid is already shortblockid, return directly
653+
int suffixIndex = blockId.lastIndexOf(CARBON_DATA_EXT);
654+
if (suffixIndex < 0) {
655+
return blockId;
656+
}
657+
// 2. get the filepath. in the type of part=a/part-0-0_batchno0-0-0-1597409791503.snappy
658+
String filePath = blockId.substring(0, suffixIndex);
659+
660+
// 3. get the compressor name, in the type of '.snappy'/'.zstd'/'.gzip'
661+
String compressorName = filePath.substring(filePath.lastIndexOf(POINT));
662+
663+
// 4. get rid of 'Part0/' 'Segment_' '_batchno' 'part-' '.carbondata' and compressorname
664+
return blockId.replace(compressorName, "")
665+
.replace(PARTITION_PREFIX + "0" + CarbonCommonConstants.FILE_SEPARATOR, "")
666+
.replace(SEGMENT_PREFIX, "")
667+
.replace(BATCH_PREFIX, CarbonCommonConstants.UNDERSCORE)
668+
.replace(DATA_PART_PREFIX, "")
669+
.replace(CARBON_DATA_EXT, "");
670+
}
671+
672+
/**
673+
* get the partition path in the block path
674+
*
675+
* @param tablePath
676+
* @param filePath
677+
* @param blockName
678+
* @return blockid, which is the identify of a block
679+
*/
680+
public static String getPartitionDir(String tablePath, String filePath, String blockName) {
681+
// The filepath is consist with <tablePath><partitionPath><blockName>
682+
// The partitionPath is the string truncated between tablePath and blockName
683+
if (!filePath.startsWith(tablePath)) {
684+
return filePath.substring(0, filePath.length() - blockName.length());
673685
}
686+
return filePath.substring(tablePath.length() + 1, filePath.length() - blockName.length() - 1);
674687
}
675688

676689
/**

integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/iud/DeleteCarbonTableTestCase.scala

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -288,21 +288,17 @@ class DeleteCarbonTableTestCase extends QueryTest with BeforeAndAfterAll {
288288

289289
val carbonDataFilename = new File(carbonTable.getTablePath + "/Fact/Part0/Segment_0/")
290290
.listFiles().filter(fn => fn.getName.endsWith(".carbondata"))
291-
val blockId = CarbonUtil.getBlockId(carbonTable.getAbsoluteTableIdentifier,
291+
val blockId = CarbonTablePath.getBlockId(carbonTable.getAbsoluteTableIdentifier,
292292
carbonDataFilename(0).getAbsolutePath,
293-
"0",
294-
carbonTable.isTransactionalTable,
295-
CarbonUtil.isStandardCarbonTable(carbonTable))
293+
"0", false)
296294

297-
assert(blockId.startsWith("Part0/Segment_0/part-0-0_batchno0-0-0-"))
295+
assert(blockId.startsWith("0/part-0-0_batchno0-0-0-"))
298296
val carbonDataFilename_part = new File(carbonTable_part.getTablePath + "/c3=aa").listFiles()
299297
.filter(fn => fn.getName.endsWith(".carbondata"))
300-
val blockId_part = CarbonUtil.getBlockId(carbonTable.getAbsoluteTableIdentifier,
298+
val blockId_part = CarbonTablePath.getBlockId(carbonTable.getAbsoluteTableIdentifier,
301299
carbonDataFilename_part(0).getAbsolutePath,
302-
"0",
303-
carbonTable.isTransactionalTable,
304-
CarbonUtil.isStandardCarbonTable(carbonTable))
305-
assert(blockId_part.startsWith("Part0/Segment_0/part-0-100100000100001_batchno0-0-0-"))
300+
"0", false)
301+
assert(blockId_part.startsWith("0/part-0-100100000100001_batchno0-0-0-"))
306302
val tableBlockPath = CarbonUpdateUtil
307303
.getTableBlockPath(listOfTupleId(0),
308304
carbonTable.getTablePath,

integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/iud/UpdateCarbonTableTestCase.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1024,7 +1024,7 @@ class UpdateCarbonTableTestCase extends QueryTest with BeforeAndAfterAll {
10241024
}
10251025

10261026
test("check data after update with row.filter pushdown as false") {
1027-
sql("""drop table if exists iud.dest33_flat""")
1027+
sql("""drop table if exists iud.dest33_part""")
10281028
sql(
10291029
"""create table iud.dest33_part (c1 int,c2 string, c3 short) STORED AS carbondata"""
10301030
.stripMargin)

0 commit comments

Comments
 (0)