Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
62 commits
Select commit Hold shift + click to select a range
264a379
Add forward index compression ratio and per-column compression stats …
jsol-splunk Apr 11, 2026
123a388
Add comprehensive tests and fix redundant loop for compression stats …
jsol-splunk Apr 11, 2026
92a187e
Restructure compression stats API: nested DTOs, feature flag gating, …
jsol-splunk Apr 11, 2026
c5a4bf5
Add writer tracking gating, per-column compression stats in metadata …
jsol-splunk Apr 11, 2026
316cd09
Restructure columnCompressionStats to standard array DTO and gate on …
jsol-splunk Apr 11, 2026
645e52f
Fix default codec persistence, dictionary column stats, stale metrics…
jsol-splunk Apr 11, 2026
994dc6c
Fix compression stats API structure, field names, and dict-only suppr…
jsol-splunk Apr 11, 2026
4e70c19
Add compressionStats and storageBreakdown as proper DTO fields on met…
jsol-splunk Apr 11, 2026
19207d1
Fix metadata endpoint DTO immutability, coverage fields, and writer t…
jsol-splunk Apr 11, 2026
9d165fc
Exclude old segments without stats from compression ratio denominator
jsol-splunk Apr 11, 2026
3466115
Fix writer uncompressed size tracking and harden compression stats API
jsol-splunk Apr 11, 2026
fde3bc7
Fix negative ratio, tier gauge leak, and CLP codec persistence
jsol-splunk Apr 11, 2026
4193273
Consolidate CLP codec compression resolution into ForwardIndexType
jsol-splunk Apr 11, 2026
649f7cd
Preserve tier info when compression stats flag is off, skip stale col…
jsol-splunk Apr 12, 2026
4810bcc
Fix replica normalization for dict column sentinels and empty summaries
jsol-splunk Apr 12, 2026
5a10ca2
Fix tier gauge leader check and exclude old raw segments from server …
jsol-splunk Apr 12, 2026
b50f6c9
Fix post-rebase import ordering, constant rename, and deprecated API
jsol-splunk May 12, 2026
7f6ae31
Add missing test coverage for ColumnMetadataImpl and CompressionStats…
jsol-splunk May 12, 2026
627bbb4
Expand test coverage for compression stats across all affected modules
jsol-splunk May 12, 2026
1f83507
Polish TableSizeResourceTest: remove redundant queryParam and align a…
jsol-splunk May 12, 2026
5355b5d
Add ServerSegmentMetadataReader coverage: CompressionStatsSummary/Sto…
jsol-splunk May 12, 2026
27c12f0
Fix missing @Nullable import in ColumnMetadata after rebase onto #18470
jsol-splunk May 12, 2026
6bbf140
Add CLPForwardIndexCreatorV2 getUncompressedSize and setTrackUncompre…
jsol-splunk May 12, 2026
fd562d6
Retrigger CI
jsol-splunk May 13, 2026
af45e52
Fix hasDictionary=true wrongly reported for raw (no-dict) columns in …
jsol-splunk May 19, 2026
a7b9912
Revert unrelated ZkStarter timing changes from hasDictionary fix commit
jsol-splunk May 19, 2026
4f21fd7
Fix isHasDictionary asymmetric JSON serialization and resolveCompress…
jsol-splunk May 19, 2026
f1a0365
Add default impl for isCompressionStatsEnabled in IndexCreationContex…
jsol-splunk May 19, 2026
edb3a05
Fix tier gauge using wrong metric API — use setOrUpdateTableGauge/rem…
jsol-splunk May 19, 2026
3dd9afa
Retrigger CI
jsol-splunk May 13, 2026
029ae04
Move CompressionStatsRealtimeIngestionIntegrationTest to CustomDataQu…
jsol-splunk May 29, 2026
f568715
Add /// Javadoc to compression stats public API classes and config field
jsol-splunk May 29, 2026
5945869
Extend compression stats to dict columns with codecBreakdown and fiel…
jsol-splunk Jun 1, 2026
f05ca6e
Gate dict ingest byte tracking behind compression stats flag; fix Tab…
jsol-splunk Jun 1, 2026
f8a4011
Fix ServerSegmentMetadataReader skip guard to drop codec=null entries…
jsol-splunk Jun 2, 2026
2d7b1a3
Fix realtime integration test schema name to match table name
jsol-splunk Jun 2, 2026
caec779
Add 5-param SegmentDictionaryCreator constructor with trackRawIngestB…
jsol-splunk Jun 2, 2026
3f2a498
Fix TablesResource to collect compression stats over all segment colu…
jsol-splunk Jun 2, 2026
01da108
Fix dict-only summary in TableSizeReader; split compression loop in T…
jsol-splunk Jun 2, 2026
c840f66
Rename SegmentSizeInfo fields rawForwardIndexSizeBytes/compressedForw…
jsol-splunk Jun 2, 2026
8a972bb
Fix CompressionStatsRealtimeIngestionIntegrationTest time column to D…
jsol-splunk Jun 2, 2026
5e31b8c
Fix typo: getUnonDiskSizeBytes -> getUncompressedForwardIndexSizeByte…
jsol-splunk Jun 2, 2026
d440713
Fix CompressionStatsRealtimeIngestionIntegrationTest: override getSor…
jsol-splunk Jun 2, 2026
fa3bdb2
Add /// Javadoc to StorageBreakdownInfo (new class added in this PR)
jsol-splunk Jun 2, 2026
946bd3f
Fix controllerUrl null in CompressionStatsRealtimeIngestionIntegratio…
jsol-splunk Jun 2, 2026
a01c3cf
Guard null rawIngestSizeBytes in integration test assertions for cons…
jsol-splunk Jun 2, 2026
a15f1db
Default _trackUncompressedSize to false — enabled externally via setT…
jsol-splunk Jun 10, 2026
ff3030f
Add ?includeColumnStats=false param to /size and /metadata endpoints …
jsol-splunk Jun 10, 2026
26dba0f
Fix writer tracking tests: explicitly call setTrackUncompressedSize(t…
jsol-splunk Jun 10, 2026
233f600
Fix checkstyle indentation in setTrackUncompressedSize calls inside t…
jsol-splunk Jun 10, 2026
3b10c14
Fix checkstyle line-length violation in TablesResource comment
jsol-splunk Jun 11, 2026
81f92c3
Fix checkstyle line-length violations in pinot-controller
jsol-splunk Jun 11, 2026
7faa702
Infer uncompressed size from _chunkDataOffset in writeChunk() for Fix…
jsol-splunk Jun 11, 2026
1788b55
Fix getUncompressedSize() to include in-flight bytes from unflushed p…
jsol-splunk Jun 11, 2026
709fc38
Add coverage tests for compression stats tracking in MV creators, leg…
jsol-splunk Jun 12, 2026
26e2981
Fix compilation: add assertTrue import to MultiValueVarByteRawIndexCr…
jsol-splunk Jun 12, 2026
944baf8
Add coverage tests: SingleValueVarByteRawIndexCreator tracking, dict …
jsol-splunk Jun 12, 2026
bb679b5
Fix testCodecNotUpdatedWhenCompressionStatsDisabled: correct expected…
jsol-splunk Jun 12, 2026
a983200
Fix testGetTableMetadataMixedDictRawCodec: add includeColumnStats=tru…
jsol-splunk Jun 15, 2026
2b5947f
Clear LARGEST_SEGMENT_SIZE_ON_SERVER gauge when all segment fetches e…
jsol-splunk Jun 16, 2026
52d4520
Fix compressionStats summary absent when includeColumnStats=false in …
jsol-splunk Jun 16, 2026
c6fce9e
Fix compilation: replace non-existent setUpHttpMocks with testRunner …
jsol-splunk Jun 16, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,18 @@ public enum ControllerGauge implements AbstractMetrics.Gauge {
// Percentage of segments we failed to get size for
TABLE_STORAGE_EST_MISSING_SEGMENT_PERCENT("TableStorageEstMissingSegmentPercent", false),

// Forward index compression ratio scaled by 100 (e.g., 4.5x ratio → 450). Divide by 100 to get actual ratio.
TABLE_COMPRESSION_RATIO_PERCENT("TableCompressionRatioPercent", false),

// Raw (uncompressed) forward index size per replica
TABLE_RAW_FORWARD_INDEX_SIZE_PER_REPLICA("TableRawForwardIndexSizePerReplica", false),

// Compressed forward index size per replica
TABLE_COMPRESSED_FORWARD_INDEX_SIZE_PER_REPLICA("TableCompressedForwardIndexSizePerReplica", false),

// Size per replica broken down by storage tier
TABLE_TIERED_STORAGE_SIZE("TableTieredStorageSize", false),

// Number of scheduled Cron jobs
CRON_SCHEDULER_JOB_SCHEDULED("cronSchedulerJobScheduled", false),

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.pinot.common.restlet.resources;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import java.util.List;
import java.util.Map;
import javax.annotation.Nullable;


/// Per-column forward index compression statistics, as reported by each server for a given segment.
///
/// For raw (non-dictionary) columns, both `rawIngestSizeInBytes` and `onDiskSizeInBytes` are populated
/// and `codec` reflects the compression algorithm. For dictionary-encoded columns, `codec` is
/// {@link #CODEC_DICT_ENCODED} and only `onDiskSizeInBytes` reflects the on-disk forward index size. Columns without a
/// forward index (forward-index-disabled) are excluded entirely.
@JsonIgnoreProperties(ignoreUnknown = true)
public class ColumnCompressionStatsInfo {
/// Sentinel codec value for dictionary-encoded columns.
public static final String CODEC_DICT_ENCODED = "DICT_ENCODED";

private final String _column;

/// Total uncompressed byte size of values written to the forward index during segment creation.
/// `-1` (sentinel) when unavailable — e.g. for dictionary-encoded columns or old segments built before
/// stats tracking was enabled.
private final long _rawIngestSizeInBytes;

/// On-disk byte size of the forward index file for this column in this segment.
private final long _onDiskSizeInBytes;

/// Compression ratio (`rawIngestSizeInBytes / onDiskSizeInBytes`). `0` when unavailable.
private final double _compressionRatio;

/// Compression codec name (e.g. `"ZSTANDARD"`, `"LZ4"`, `"SNAPPY"`, `"PASS_THROUGH"`),
/// {@link #CODEC_DICT_ENCODED} for dictionary-encoded columns, or `"MIXED"` when segments in the same table use
/// different codecs for this column.
private final String _codec;

/// Names of all indexes present on this column in this segment (e.g. `["forward_index", "inverted_index"]`).
private final List<String> _indexes;

/// Per-codec breakdown. Null unless codec is `"MIXED"` — in that case maps codec name to sizes and segment count.
private final Map<String, CodecBreakdownEntry> _codecBreakdown;

@JsonCreator
public ColumnCompressionStatsInfo(
@JsonProperty("column") String column,
@JsonProperty("rawIngestSizeInBytes") long rawIngestSizeInBytes,
@JsonProperty("onDiskSizeInBytes") long onDiskSizeInBytes,
@JsonProperty("compressionRatio") double compressionRatio,
@JsonProperty("codec") @Nullable String codec,
@JsonProperty("indexes") @Nullable List<String> indexes,
@JsonProperty("codecBreakdown") @Nullable Map<String, CodecBreakdownEntry> codecBreakdown) {
_column = column;
_rawIngestSizeInBytes = rawIngestSizeInBytes;
_onDiskSizeInBytes = onDiskSizeInBytes;
_compressionRatio = compressionRatio;
_codec = codec;
_indexes = indexes;
_codecBreakdown = codecBreakdown;
}

public String getColumn() {
return _column;
}

public long getRawIngestSizeInBytes() {
return _rawIngestSizeInBytes;
}

public long getOnDiskSizeInBytes() {
return _onDiskSizeInBytes;
}

public double getCompressionRatio() {
return _compressionRatio;
}

@Nullable
public String getCodec() {
return _codec;
}

@Nullable
@JsonInclude(JsonInclude.Include.NON_NULL)
public List<String> getIndexes() {
return _indexes;
}

@Nullable
@JsonInclude(JsonInclude.Include.NON_NULL)
public Map<String, CodecBreakdownEntry> getCodecBreakdown() {
return _codecBreakdown;
}

/// Per-codec breakdown entry in the {@code codecBreakdown} map. Only present when {@code codec="MIXED"}.
@JsonIgnoreProperties(ignoreUnknown = true)
public static class CodecBreakdownEntry {
private final int _segments;
private final long _rawIngestSizeInBytes;
private final long _onDiskSizeInBytes;

@JsonCreator
public CodecBreakdownEntry(
@JsonProperty("segments") int segments,
@JsonProperty("rawIngestSizeInBytes") long rawIngestSizeInBytes,
@JsonProperty("onDiskSizeInBytes") long onDiskSizeInBytes) {
_segments = segments;
_rawIngestSizeInBytes = rawIngestSizeInBytes;
_onDiskSizeInBytes = onDiskSizeInBytes;
}

public int getSegments() {
return _segments;
}

public long getRawIngestSizeInBytes() {
return _rawIngestSizeInBytes;
}

public long getOnDiskSizeInBytes() {
return _onDiskSizeInBytes;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.pinot.common.restlet.resources;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.annotation.JsonProperty;


/// Table-level compression statistics summary, aggregated across all servers for a sub-table type
/// (offline or realtime). Reported under the `compressionStats` key in the
/// `GET /tables/{tableName}/size` response.
///
/// Sizes are "per replica" — each segment's contribution is the maximum reported across its replicas,
/// so the total reflects a single logical copy of the data rather than the physical replication factor.
///
/// `isPartialCoverage` is true when one or more segments lack stats (e.g. built before
/// `compressionStatsEnabled` was set), meaning the ratio is computed from a subset of segments only.
@JsonIgnoreProperties(ignoreUnknown = true)
public class CompressionStatsSummary {
/// Sum of per-replica max raw ingest forward index sizes across all segments that have stats.
private final long _rawIngestSizePerReplicaInBytes;

/// Sum of per-replica max on-disk forward index sizes across all segments that have stats.
private final long _onDiskSizePerReplicaInBytes;

/// Overall ratio of raw to compressed size (`rawSize / compressedSize`). `0` when no segments have stats.
private final double _compressionRatio;

/// Number of segments that have compression stats (built with `compressionStatsEnabled=true`).
private final int _segmentsWithStats;

/// Total number of segments in the sub-table.
private final int _totalSegments;

/// True when `segmentsWithStats < totalSegments`, meaning the ratio covers only a subset of segments.
private final boolean _isPartialCoverage;

@JsonCreator
public CompressionStatsSummary(
@JsonProperty("rawIngestSizePerReplicaInBytes") long rawIngestSizePerReplicaInBytes,
@JsonProperty("onDiskSizePerReplicaInBytes") long onDiskSizePerReplicaInBytes,
@JsonProperty("compressionRatio") double compressionRatio,
@JsonProperty("segmentsWithStats") int segmentsWithStats,
@JsonProperty("totalSegments") int totalSegments,
@JsonProperty("isPartialCoverage") boolean isPartialCoverage) {
_rawIngestSizePerReplicaInBytes = rawIngestSizePerReplicaInBytes;
_onDiskSizePerReplicaInBytes = onDiskSizePerReplicaInBytes;
_compressionRatio = compressionRatio;
_segmentsWithStats = segmentsWithStats;
_totalSegments = totalSegments;
_isPartialCoverage = isPartialCoverage;
}

public long getRawIngestSizePerReplicaInBytes() {
return _rawIngestSizePerReplicaInBytes;
}

public long getOnDiskSizePerReplicaInBytes() {
return _onDiskSizePerReplicaInBytes;
}

public double getCompressionRatio() {
return _compressionRatio;
}

public int getSegmentsWithStats() {
return _segmentsWithStats;
}

public int getTotalSegments() {
return _totalSegments;
}

@JsonProperty("isPartialCoverage")
public boolean isPartialCoverage() {
return _isPartialCoverage;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,44 @@
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.annotation.JsonProperty;
import java.util.Map;
import javax.annotation.Nullable;


@JsonIgnoreProperties(ignoreUnknown = true)
public class SegmentSizeInfo {
private final String _segmentName;
private final long _diskSizeInBytes;
/// Segment-level aggregate raw ingest size across all columns that have stats. `-1` when unavailable.
private final long _rawIngestSizeBytes;
/// Segment-level aggregate on-disk size across all columns that have stats. `-1` when unavailable.
private final long _onDiskSizeBytes;
private final String _tier;
private final Map<String, ColumnCompressionStatsInfo> _columnCompressionStats;

public SegmentSizeInfo(String segmentName, long sizeBytes) {
this(segmentName, sizeBytes, -1, -1, null, null);
}

public SegmentSizeInfo(String segmentName, long sizeBytes, long rawIngestSizeBytes,
long onDiskSizeBytes, @Nullable String tier) {
this(segmentName, sizeBytes, rawIngestSizeBytes, onDiskSizeBytes, tier, null);
}

@JsonCreator
public SegmentSizeInfo(@JsonProperty("segmentName") String segmentName,
@JsonProperty("diskSizeInBytes") long sizeBytes) {
@JsonProperty("diskSizeInBytes") long sizeBytes,
@JsonProperty("rawIngestSizeBytes") long rawIngestSizeBytes,
@JsonProperty("onDiskSizeBytes") long onDiskSizeBytes,
@JsonProperty("tier") @Nullable String tier,
@JsonProperty("columnCompressionStats") @Nullable Map<String, ColumnCompressionStatsInfo>

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This per column stats may blow up the response, please make sure the REST API has an explicit param to ask for this, default should be off.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice Idea! Added a query param includeColumnStats. Including per column stats only if this param is passed and set to true.

columnCompressionStats) {
_segmentName = segmentName;
_diskSizeInBytes = sizeBytes;
_rawIngestSizeBytes = rawIngestSizeBytes;
_onDiskSizeBytes = onDiskSizeBytes;
_tier = tier;
_columnCompressionStats = columnCompressionStats;
}

public String getSegmentName() {
Expand All @@ -43,6 +69,24 @@ public long getDiskSizeInBytes() {
return _diskSizeInBytes;
}

public long getRawIngestSizeBytes() {
return _rawIngestSizeBytes;
}

public long getOnDiskSizeBytes() {
return _onDiskSizeBytes;
}

@Nullable
public String getTier() {
return _tier;
}

@Nullable
public Map<String, ColumnCompressionStatsInfo> getColumnCompressionStats() {
return _columnCompressionStats;
}

@Override
public boolean equals(Object o) {
if (this == o) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.pinot.common.restlet.resources;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.annotation.JsonProperty;
import java.util.Map;


/// Storage breakdown by tier, reported under the `storageBreakdown` key in the
/// `GET /tables/{tableName}/size` and `GET /tables/{tableName}/metadata` responses.
/// Maps tier name (e.g. `"default"`, `"hotTier"`) to segment count and per-replica size.
@JsonIgnoreProperties(ignoreUnknown = true)
public class StorageBreakdownInfo {

private final Map<String, TierInfo> _tiers;

@JsonCreator
public StorageBreakdownInfo(@JsonProperty("tiers") Map<String, TierInfo> tiers) {
_tiers = tiers;
}

public Map<String, TierInfo> getTiers() {
return _tiers;
}

/// Segment count and per-replica on-disk size for a single storage tier.
@JsonIgnoreProperties(ignoreUnknown = true)
public static class TierInfo {
private final int _count;
private final long _sizePerReplicaInBytes;

@JsonCreator
public TierInfo(@JsonProperty("count") int count,
@JsonProperty("sizePerReplicaInBytes") long sizePerReplicaInBytes) {
_count = count;
_sizePerReplicaInBytes = sizePerReplicaInBytes;
}

public int getCount() {
return _count;
}

public long getSizePerReplicaInBytes() {
return _sizePerReplicaInBytes;
}
}
}
Loading
Loading