Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 86 additions & 0 deletions fe/fe-core/src/main/java/org/apache/doris/catalog/Partition.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,20 @@
import org.apache.doris.cloud.catalog.CloudPartition;
import org.apache.doris.common.Config;
import org.apache.doris.common.FeConstants;
import org.apache.doris.common.util.Util;
import org.apache.doris.rpc.RpcException;

import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.gson.annotations.SerializedName;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import java.security.MessageDigest;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
Expand Down Expand Up @@ -92,6 +97,8 @@ public enum PartitionState {
@SerializedName(value = "di", alternate = {"distributionInfo"})
private DistributionInfo distributionInfo;

private transient volatile String remoteMetaChecksum;

protected Partition() {
}

Expand Down Expand Up @@ -258,6 +265,85 @@ public List<MaterializedIndex> getMaterializedIndices(IndexExtState extState) {
return indices;
}

public String getMetaChecksum() {
MessageDigest digest = DigestUtils.getSha256Digest();
// Include partition-level fields whose changes should invalidate the cached
// remote partition payload, even when visibleVersion / visibleVersionTime
// remain unchanged (e.g. ALTER TABLE ... RENAME PARTITION only mutates name).
updateMetaChecksum(digest, (byte) 11, id);
updateMetaChecksumString(digest, (byte) 12, name);
updateMetaChecksum(digest, (byte) 13, state == null ? -1L : state.ordinal());
updateMetaChecksum(digest, (byte) 14, visibleVersion);
updateMetaChecksum(digest, (byte) 15, visibleVersionTime);
updateMetaChecksum(digest, (byte) 16, nextVersion);
if (distributionInfo != null) {
DistributionInfoType distType = distributionInfo.getType();
updateMetaChecksum(digest, (byte) 17, distType == null ? -1L : distType.ordinal());
updateMetaChecksum(digest, (byte) 18, distributionInfo.getBucketNum());
updateMetaChecksum(digest, (byte) 19, distributionInfo.getAutoBucket() ? 1L : 0L);
} else {
updateMetaChecksum(digest, (byte) 17, -1L);
}
List<MaterializedIndex> indexes = getMaterializedIndices(IndexExtState.VISIBLE);
indexes.sort(Comparator.comparingLong(MaterializedIndex::getId));
for (MaterializedIndex index : indexes) {
updateMetaChecksum(digest, (byte) 1, index.getId());
Comment thread
HonestManXin marked this conversation as resolved.
List<Tablet> tablets = Lists.newArrayList(index.getTablets());
tablets.sort(Comparator.comparingLong(Tablet::getId));
for (Tablet tablet : tablets) {
updateMetaChecksum(digest, (byte) 2, tablet.getId());
List<Replica> replicas = Lists.newArrayList(tablet.getReplicas());
replicas.sort(Comparator.comparingLong(Replica::getId)
.thenComparingLong(Replica::getBackendIdWithoutException));
for (Replica replica : replicas) {
Comment thread
HonestManXin marked this conversation as resolved.
updateMetaChecksum(digest, (byte) 3, replica.getId());
updateMetaChecksum(digest, (byte) 4, replica.getBackendIdWithoutException());
// Include all replica fields that affect getQueryableReplicas() filtering,
// so a stale remote cache is invalidated whenever any of them changes
// (e.g. replica becomes bad, lastFailedVersion is set, version/state changes).
updateMetaChecksum(digest, (byte) 5, replica.getVersion());
updateMetaChecksum(digest, (byte) 6, replica.getLastFailedVersion());
updateMetaChecksum(digest, (byte) 7, replica.getPathHash());
Replica.ReplicaState state = replica.getState();
updateMetaChecksum(digest, (byte) 8, state == null ? -1L : state.ordinal());
updateMetaChecksum(digest, (byte) 9, replica.isBad() ? 1L : 0L);
updateMetaChecksum(digest, (byte) 10, replica.isUserDrop() ? 1L : 0L);
}
}
}
return Hex.encodeHexString(digest.digest());
}

public String getRemoteMetaChecksum() {
return remoteMetaChecksum;
}

public void setRemoteMetaChecksum(String checksum) {
if (checksum != null) {
this.remoteMetaChecksum = checksum;
}
}

private void updateMetaChecksum(MessageDigest digest, byte tag, long value) {
Util.updateMessageDigest(digest, tag);
Util.updateMessageDigest(digest, value);
}

private void updateMetaChecksumString(MessageDigest digest, byte tag, String value) {
Util.updateMessageDigest(digest, tag);
if (value == null) {
Util.updateMessageDigest(digest, -1L);
return;
}
int len = value.length();
Util.updateMessageDigest(digest, (long) len);
for (int i = 0; i < len; i++) {
char c = value.charAt(i);
digest.update((byte) (c >>> 8));
digest.update((byte) c);
}
}

public long getAllDataSize(boolean singleReplica) {
return getDataSize(singleReplica) + getRemoteDataSize();
}
Expand Down
15 changes: 15 additions & 0 deletions fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,21 @@ public static LongUnaryOperator overflowSafeIncrement() {
};
}

public static void updateMessageDigest(MessageDigest digest, byte value) {
digest.update(value);
}

public static void updateMessageDigest(MessageDigest digest, long value) {
digest.update((byte) (value >>> 56));
digest.update((byte) (value >>> 48));
digest.update((byte) (value >>> 40));
digest.update((byte) (value >>> 32));
digest.update((byte) (value >>> 24));
digest.update((byte) (value >>> 16));
digest.update((byte) (value >>> 8));
digest.update((byte) value);
}


// Get a string represent the schema signature, contains:
// list of columns and bloom filter column info.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Random;
import java.util.concurrent.TimeUnit;
Expand Down Expand Up @@ -266,18 +267,10 @@ public RemoteOlapTable getOlapTable(String dbName, String table, long tableId, L
request.setPasswd(password);
request.setVersion(FeConstants.meta_version);
for (Partition partition : partitions) {
TPartitionMeta meta = new TPartitionMeta();
meta.setId(partition.getId());
meta.setVisibleVersion(partition.getVisibleVersion());
meta.setVisibleVersionTime(partition.getVisibleVersionTime());
request.addToPartitions(meta);
request.addToPartitions(buildPartitionMeta(partition));
}
for (Partition partition : tempPartitions) {
TPartitionMeta meta = new TPartitionMeta();
meta.setId(partition.getId());
meta.setVisibleVersion(partition.getVisibleVersion());
meta.setVisibleVersionTime(partition.getVisibleVersionTime());
request.addToTempPartitions(meta);
request.addToTempPartitions(buildPartitionMeta(partition));
}
String msg = String.format("failed to get table meta from remote doris:%s", name);
return randomCallWithRetry(client -> {
Expand All @@ -291,13 +284,17 @@ public RemoteOlapTable getOlapTable(String dbName, String table, long tableId, L
remoteOlapTable = RemoteOlapTable.fromOlapTable(olapTable);
}
List<Partition> updatedPartitions = new ArrayList<>(result.getUpdatedPartitionsSize());
List<String> updatedPartitionChecksums = result.isSetUpdatedPartitionChecksums()
? result.getUpdatedPartitionChecksums() : Collections.emptyList();
if (result.getUpdatedPartitionsSize() > 0) {
for (ByteBuffer buffer : result.getUpdatedPartitions()) {
for (int i = 0; i < result.getUpdatedPartitionsSize(); i++) {
ByteBuffer buffer = result.getUpdatedPartitions().get(i);
try (ByteArrayInputStream in =
new ByteArrayInputStream(buffer.array(), buffer.position(), buffer.remaining());
DataInputStream dataInputStream = new DataInputStream(in)) {
String partitionStr = Text.readString(dataInputStream);
Partition partition = GsonUtils.GSON.fromJson(partitionStr, Partition.class);
setRemoteMetaChecksum(partition, updatedPartitionChecksums, i);
updatedPartitions.add(partition);
}
}
Expand All @@ -308,27 +305,51 @@ public RemoteOlapTable getOlapTable(String dbName, String table, long tableId, L
}
remoteOlapTable.rebuildPartitions(partitions, updatedPartitions, removedPartitions);
// rebuild temp partitions
List<Partition> updatedTempPartitions = new ArrayList<>();
if (result.isSetUpdatedTempPartitions() && result.getUpdatedTempPartitionsSize() > 0) {
updatedPartitions = new ArrayList<>(result.getUpdatedTempPartitionsSize());
for (ByteBuffer buffer : result.getUpdatedTempPartitions()) {
List<String> updatedTempPartitionChecksums = result.isSetUpdatedTempPartitionChecksums()
? result.getUpdatedTempPartitionChecksums() : Collections.emptyList();
for (int i = 0; i < result.getUpdatedTempPartitionsSize(); i++) {
ByteBuffer buffer = result.getUpdatedTempPartitions().get(i);
try (ByteArrayInputStream in =
new ByteArrayInputStream(buffer.array(), buffer.position(), buffer.remaining());
DataInputStream dataInputStream = new DataInputStream(in)) {
String partitionStr = Text.readString(dataInputStream);
Partition partition = GsonUtils.GSON.fromJson(partitionStr, Partition.class);
updatedPartitions.add(partition);
setRemoteMetaChecksum(partition, updatedTempPartitionChecksums, i);
updatedTempPartitions.add(partition);
}
}
}
removedPartitions = result.getRemovedTempPartitions();
if (removedPartitions == null) {
removedPartitions = new ArrayList<>();
List<Long> removedTempPartitions = result.getRemovedTempPartitions();
if (removedTempPartitions == null) {
removedTempPartitions = new ArrayList<>();
}
remoteOlapTable.rebuildTempPartitions(tempPartitions, updatedPartitions, removedPartitions);
remoteOlapTable.rebuildTempPartitions(tempPartitions, updatedTempPartitions, removedTempPartitions);
return remoteOlapTable;
}, msg, timeoutMs);
}

private TPartitionMeta buildPartitionMeta(Partition partition) {
TPartitionMeta meta = new TPartitionMeta();
meta.setId(partition.getId());
meta.setVisibleVersion(partition.getVisibleVersion());
meta.setVisibleVersionTime(partition.getVisibleVersionTime());
String remoteMetaChecksum = partition.getRemoteMetaChecksum();
if (remoteMetaChecksum == null) {
remoteMetaChecksum = partition.getMetaChecksum();
partition.setRemoteMetaChecksum(remoteMetaChecksum);
}
meta.setMetaChecksum(remoteMetaChecksum);
return meta;
}

private void setRemoteMetaChecksum(Partition partition, List<String> checksums, int index) {
if (index < checksums.size()) {
partition.setRemoteMetaChecksum(checksums.get(index));
}
}

public TBeginRemoteTxnResult beginRemoteTxn(TBeginRemoteTxnRequest request) throws Exception {
request.setUser(user);
request.setPasswd(password);
Expand Down
Loading
Loading