apache · peter-toth · May 27, 2026 · Jun 8, 2026 · Jun 9, 2026
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java
@@ -1489,9 +1489,34 @@ public ColumnIndexStore getColumnIndexStore(int blockIndex) {
     return ciStore;
   }
 
-  private RowRanges getRowRanges(int blockIndex) {
-    assert FilterCompat.isFilteringRequired(options.getRecordFilter())
-        : "Should not be invoked if filter is null or NOOP";
+  /**
+   * Computes the {@link RowRanges} within the given row group that may pass the configured filter
+   * (set via {@link ParquetReadOptions} or {@link ParquetInputFormat#setFilterPredicate}). If no
+   * filter is configured, returns a {@link RowRanges} covering all rows in the row group. If the
+   * row group has no rows, returns {@link RowRanges#EMPTY}.
+   *
+   * <p>This computation is metadata-only: it consults each filter-referenced column's column
+   * index from the file footer; no column data is read from disk. The result can be passed to
+   * {@link #readFilteredRowGroup(int, RowRanges)} (intersected with any caller-supplied row
+   * ranges if desired) to read only the matching pages.
+   *
+   * @param blockIndex the row group (block) index
+   * @return row ranges within the block that may pass the configured filter
+   * @throws IllegalArgumentException if {@code blockIndex} is out of range
+   */
+  public RowRanges getRowRanges(int blockIndex) {
+    if (blockIndex < 0 || blockIndex >= blocks.size()) {
+      throw new IllegalArgumentException(String.format(
+          "Invalid block index %s, the valid block index range are: [%s, %s]",
+          blockIndex, 0, blocks.size() - 1));
+    }
+    long rowCount = blocks.get(blockIndex).getRowCount();
+    if (rowCount == 0L) {
+      return RowRanges.EMPTY;
+    }
+    if (!FilterCompat.isFilteringRequired(options.getRecordFilter())) {
+      return RowRanges.createSingle(rowCount);
+    }
     RowRanges rowRanges = blockRowRanges.get(blockIndex);
     if (rowRanges == null) {
       rowRanges = ColumnIndexFilter.calculateRowRanges(

diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetFileReaderRowRanges.java b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetFileReaderRowRanges.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.hadoop;
+
+import static org.apache.parquet.hadoop.ParquetFileWriter.Mode.OVERWRITE;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThrows;
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.IOException;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.parquet.HadoopReadOptions;
+import org.apache.parquet.ParquetReadOptions;
+import org.apache.parquet.example.data.Group;
+import org.apache.parquet.example.data.simple.SimpleGroupFactory;
+import org.apache.parquet.hadoop.example.ExampleParquetWriter;
+import org.apache.parquet.hadoop.metadata.BlockMetaData;
+import org.apache.parquet.hadoop.util.HadoopInputFile;
+import org.apache.parquet.internal.filter2.columnindex.RowRanges;
+import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.MessageTypeParser;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+/**
+ * Tests {@link ParquetFileReader#getRowRanges(int)}.
+ */
+public class TestParquetFileReaderRowRanges {
+
+  private static final int ROW_COUNT = 10_000;
+  private static final MessageType SCHEMA =
+      MessageTypeParser.parseMessageType("message test { required int64 id; required int64 grp; }");
+
+  @Rule
+  public final TemporaryFolder temp = new TemporaryFolder();
+
+  private Path file;
+
+  @Before
+  public void writeFile() throws IOException {
+    File f = temp.newFile();
+    f.delete();
+    file = new Path(f.toURI());
+
+    // Small page size produces many pages per column chunk.
+    try (ParquetWriter<Group> writer = ExampleParquetWriter.builder(file)
+        .withType(SCHEMA)
+        .withWriteMode(OVERWRITE)
+        .withRowGroupSize(64L * 1024 * 1024)
+        .withPageSize(4 * 1024)
+        .build()) {
+      SimpleGroupFactory factory = new SimpleGroupFactory(SCHEMA);
+      for (int i = 0; i < ROW_COUNT; i++) {
+        writer.write(factory.newGroup().append("id", (long) i).append("grp", (long) (i % 8)));
+      }
+    }
+  }
+
+  private ParquetFileReader openReader() throws IOException {
+    Configuration conf = new Configuration();
+    ParquetReadOptions options = HadoopReadOptions.builder(conf).build();
+    return ParquetFileReader.open(HadoopInputFile.fromPath(file, conf), options);
+  }
+
+  @Test
+  public void getRowRangesWithoutFilterCoversAllRows() throws IOException {
+    try (ParquetFileReader reader = openReader()) {
+      assertEquals(1, reader.getRowGroups().size());
+      BlockMetaData block = reader.getRowGroups().get(0);
+
+      RowRanges ranges = reader.getRowRanges(0);
+
+      assertEquals(block.getRowCount(), ranges.rowCount());
+      assertTrue(ranges.isOverlapping(0L, block.getRowCount() - 1));
+    }
+  }
+
+  @Test
+  public void getRowRangesRejectsOutOfRangeBlockIndex() throws IOException {
+    try (ParquetFileReader reader = openReader()) {
+      int blockCount = reader.getRowGroups().size();
+      assertThrows(IllegalArgumentException.class, () -> reader.getRowRanges(-1));
+      assertThrows(IllegalArgumentException.class, () -> reader.getRowRanges(blockCount));
+    }
+  }
+}