diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml
index 6db82dfb51fc..a55243c6d0f2 100644
--- a/hadoop-hdds/common/src/main/resources/ozone-default.xml
+++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml
@@ -3659,6 +3659,16 @@
If it exceeds pending tasks will be cancelled.
+
+ ozone.recon.dn.metrics.collection.thread.count
+ 0
+ OZONE, RECON, DN
+
+ Size of the thread pool Recon uses to collect JMX metrics from DataNodes.
+ A value of 0 (or any non-positive value) means "auto" and selects
+ 2 x Runtime.availableProcessors() at startup.
+
+
ozone.scm.datanode.admin.monitor.interval
30s
diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconServerConfigKeys.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconServerConfigKeys.java
index b4da42d8f03a..0130cbbab7ee 100644
--- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconServerConfigKeys.java
+++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconServerConfigKeys.java
@@ -221,6 +221,11 @@ public final class ReconServerConfigKeys {
"ozone.recon.dn.metrics.collection.timeout";
public static final String OZONE_RECON_DN_METRICS_COLLECTION_TIMEOUT_DEFAULT = "10m";
+ public static final String OZONE_RECON_DN_METRICS_COLLECTION_THREAD_COUNT =
+ "ozone.recon.dn.metrics.collection.thread.count";
+ public static final int OZONE_RECON_DN_METRICS_COLLECTION_THREAD_COUNT_DEFAULT =
+ Runtime.getRuntime().availableProcessors() * 2;
+
/**
* Application-level ceiling on the number of ContainerIDs fetched from SCM
* per RPC call during container sync. The effective batch size is
diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/TarExtractor.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/TarExtractor.java
index b3bd17bdece4..49f2cdc40a3d 100644
--- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/TarExtractor.java
+++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/TarExtractor.java
@@ -34,10 +34,9 @@
import java.util.UUID;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
import java.util.concurrent.Future;
-import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadFactory;
-import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
@@ -73,7 +72,7 @@ public class TarExtractor {
public TarExtractor(int threadPoolSize, String threadNamePrefix) {
this.threadPoolSize = threadPoolSize;
this.threadFactory =
- new ThreadFactoryBuilder().setNameFormat("FetchOMDBTar-%d" + threadNamePrefix)
+ new ThreadFactoryBuilder().setNameFormat(threadNamePrefix + "FetchOMDBTar-%d")
.build();
}
@@ -163,8 +162,7 @@ private void writeFile(Path outputDir, String fileName, byte[] fileData) {
public void start() {
if (executorServiceStarted.compareAndSet(false, true)) {
- this.executor =
- new ThreadPoolExecutor(0, threadPoolSize, 60L, TimeUnit.SECONDS, new LinkedBlockingQueue<>(), threadFactory);
+ this.executor = Executors.newFixedThreadPool(threadPoolSize, threadFactory);
}
}
diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/DataNodeMetricsService.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/DataNodeMetricsService.java
index 6b3adf302daf..4ba64be1a64a 100644
--- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/DataNodeMetricsService.java
+++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/api/DataNodeMetricsService.java
@@ -19,6 +19,8 @@
import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_DN_METRICS_COLLECTION_MINIMUM_API_DELAY;
import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_DN_METRICS_COLLECTION_MINIMUM_API_DELAY_DEFAULT;
+import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_DN_METRICS_COLLECTION_THREAD_COUNT;
+import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_DN_METRICS_COLLECTION_THREAD_COUNT_DEFAULT;
import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_DN_METRICS_COLLECTION_TIMEOUT;
import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_DN_METRICS_COLLECTION_TIMEOUT_DEFAULT;
@@ -32,9 +34,10 @@
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
import java.util.concurrent.Future;
-import java.util.concurrent.LinkedBlockingQueue;
-import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.ThreadFactory;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
@@ -61,11 +64,9 @@
public class DataNodeMetricsService {
private static final Logger LOG = LoggerFactory.getLogger(DataNodeMetricsService.class);
- private static final int MAX_POOL_SIZE = 500;
- private static final int KEEP_ALIVE_TIME = 5;
private static final int POLL_INTERVAL_MS = 200;
- private final ThreadPoolExecutor executorService;
+ private final ExecutorService executorService;
private final ReconNodeManager reconNodeManager;
private final boolean httpsEnabled;
private final int minimumApiDelayMs;
@@ -96,14 +97,15 @@ public DataNodeMetricsService(
OZONE_RECON_DN_METRICS_COLLECTION_TIMEOUT_DEFAULT, TimeUnit.MILLISECONDS);
this.metricsServiceProviderFactory = metricsServiceProviderFactory;
this.lastCollectionEndTime.set(-minimumApiDelayMs);
- int corePoolSize = Runtime.getRuntime().availableProcessors() * 2;
- this.executorService = new ThreadPoolExecutor(
- corePoolSize, MAX_POOL_SIZE,
- KEEP_ALIVE_TIME, TimeUnit.SECONDS,
- new LinkedBlockingQueue<>(),
- new ThreadFactoryBuilder()
- .setNameFormat("DataNodeMetricsCollector-%d")
- .build());
+ int corePoolSize = config.getInt(OZONE_RECON_DN_METRICS_COLLECTION_THREAD_COUNT,
+ OZONE_RECON_DN_METRICS_COLLECTION_THREAD_COUNT_DEFAULT);
+ corePoolSize = corePoolSize > 0
+ ? corePoolSize
+ : OZONE_RECON_DN_METRICS_COLLECTION_THREAD_COUNT_DEFAULT;
+ ThreadFactory threadFactory = new ThreadFactoryBuilder()
+ .setNameFormat("DataNodeMetricsCollector-%d")
+ .build();
+ this.executorService = Executors.newFixedThreadPool(corePoolSize, threadFactory);
}
/**
diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/OzoneManagerServiceProviderImpl.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/OzoneManagerServiceProviderImpl.java
index dca33c759b80..cd62b2160daf 100644
--- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/OzoneManagerServiceProviderImpl.java
+++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/OzoneManagerServiceProviderImpl.java
@@ -229,7 +229,7 @@ public OzoneManagerServiceProviderImpl(
new ThreadFactoryBuilder().setNameFormat(threadNamePrefix + "SyncOM-%d")
.build();
// Number of parallel workers
- int omDBTarProcessorThreadCount = Math.max(64, Runtime.getRuntime().availableProcessors());
+ int omDBTarProcessorThreadCount = Math.min(64, Runtime.getRuntime().availableProcessors());
this.reconContext = reconContext;
this.taskStatusUpdaterManager = taskStatusUpdaterManager;
this.omDBLagThreshold = configuration.getLong(RECON_OM_DELTA_UPDATE_LAG_THRESHOLD,
diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/TestTarExtractor.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/TestTarExtractor.java
new file mode 100644
index 000000000000..5a21a2f20323
--- /dev/null
+++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/TestTarExtractor.java
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ozone.recon;
+
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.mockito.Mockito.any;
+import static org.mockito.Mockito.eq;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.mockStatic;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ThreadFactory;
+import java.util.concurrent.TimeUnit;
+import org.junit.jupiter.api.Test;
+import org.mockito.ArgumentCaptor;
+import org.mockito.MockedStatic;
+
+/**
+ * Tests for {@link TarExtractor}.
+ */
+public class TestTarExtractor {
+
+ @Test
+ public void testStartCreatesFixedThreadPoolWithConfiguredSize() {
+ int poolSize = 8;
+ String threadPrefix = "TestPrefix-";
+ ExecutorService mockExecutor = mock(ExecutorService.class);
+
+ // Construct outside mockStatic block so ThreadFactoryBuilder can
+ // use the real Executors.defaultThreadFactory() internally.
+ TarExtractor extractor = new TarExtractor(poolSize, threadPrefix);
+
+ try (MockedStatic executorsMock = mockStatic(Executors.class)) {
+ executorsMock.when(() -> Executors.newFixedThreadPool(
+ eq(poolSize), any(ThreadFactory.class))).thenReturn(mockExecutor);
+
+ extractor.start();
+
+ executorsMock.verify(() -> Executors.newFixedThreadPool(
+ eq(poolSize), any(ThreadFactory.class)));
+ }
+ }
+
+ @Test
+ public void testThreadFactoryUsesConfiguredPrefix() {
+ int poolSize = 4;
+ String threadPrefix = "MyCustomPrefix-";
+ ExecutorService mockExecutor = mock(ExecutorService.class);
+ TarExtractor extractor = new TarExtractor(poolSize, threadPrefix);
+
+ ArgumentCaptor factoryCaptor =
+ ArgumentCaptor.forClass(ThreadFactory.class);
+
+ try (MockedStatic executorsMock = mockStatic(Executors.class)) {
+ executorsMock.when(() -> Executors.newFixedThreadPool(
+ eq(poolSize), any(ThreadFactory.class))).thenReturn(mockExecutor);
+
+ extractor.start();
+
+ executorsMock.verify(() -> Executors.newFixedThreadPool(
+ eq(poolSize), factoryCaptor.capture()));
+
+ ThreadFactory capturedFactory = factoryCaptor.getValue();
+ Thread thread = capturedFactory.newThread(() -> {
+ });
+ assertTrue(thread.getName().startsWith(threadPrefix),
+ "Thread name should start with configured prefix, but was: "
+ + thread.getName());
+ }
+ }
+
+ @Test
+ public void testStopShutsDownExecutor() throws InterruptedException {
+ int poolSize = 4;
+ String threadPrefix = "ShutdownTest-";
+ ExecutorService mockExecutor = mock(ExecutorService.class);
+
+ // Construct outside mockStatic block.
+ TarExtractor extractor = new TarExtractor(poolSize, threadPrefix);
+
+ try (MockedStatic executorsMock = mockStatic(Executors.class)) {
+ executorsMock.when(() -> Executors.newFixedThreadPool(
+ eq(poolSize), any(ThreadFactory.class))).thenReturn(mockExecutor);
+ when(mockExecutor.awaitTermination(60, TimeUnit.SECONDS))
+ .thenReturn(true);
+
+ extractor.start();
+ extractor.stop();
+
+ verify(mockExecutor).shutdown();
+ }
+ }
+
+ @Test
+ public void testStartIsIdempotent() {
+ int poolSize = 4;
+ String threadPrefix = "IdempotentTest-";
+ ExecutorService mockExecutor = mock(ExecutorService.class);
+ TarExtractor extractor = new TarExtractor(poolSize, threadPrefix);
+
+ try (MockedStatic executorsMock = mockStatic(Executors.class)) {
+ executorsMock.when(() -> Executors.newFixedThreadPool(
+ eq(poolSize), any(ThreadFactory.class))).thenReturn(mockExecutor);
+
+ extractor.start();
+ extractor.start(); // second call should be a no-op
+
+ // newFixedThreadPool should only be called once
+ executorsMock.verify(() -> Executors.newFixedThreadPool(
+ eq(poolSize), any(ThreadFactory.class)));
+ }
+ }
+}