Skip to content

Commit ad891ec

Browse files
HDDS-12607. Parallelize recon tasks to speed up OM rocksdb reading tasks (#9243).
1 parent a55efa1 commit ad891ec

24 files changed

+933
-256
lines changed

hadoop-hdds/common/src/main/resources/ozone-default.xml

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4318,6 +4318,43 @@
43184318
recon rocks DB containerKeyTable
43194319
</description>
43204320
</property>
4321+
4322+
<property>
4323+
<name>ozone.recon.filesizecount.flush.db.max.threshold</name>
4324+
<value>200000</value>
4325+
<tag>OZONE, RECON, PERFORMANCE</tag>
4326+
<description>
4327+
Maximum threshold number of entries to hold in memory for File Size Count task in hashmap before flushing to
4328+
recon derby DB
4329+
</description>
4330+
</property>
4331+
4332+
<property>
4333+
<name>ozone.recon.task.reprocess.max.iterators</name>
4334+
<value>5</value>
4335+
<tag>OZONE, RECON, PERFORMANCE</tag>
4336+
<description>
4337+
Maximum number of iterator threads to use for parallel table iteration during reprocess
4338+
</description>
4339+
</property>
4340+
4341+
<property>
4342+
<name>ozone.recon.task.reprocess.max.workers</name>
4343+
<value>20</value>
4344+
<tag>OZONE, RECON, PERFORMANCE</tag>
4345+
<description>
4346+
Maximum number of worker threads to use for parallel table processing during reprocess
4347+
</description>
4348+
</property>
4349+
4350+
<property>
4351+
<name>ozone.recon.task.reprocess.max.keys.in.memory</name>
4352+
<value>2000</value>
4353+
<tag>OZONE, RECON, PERFORMANCE</tag>
4354+
<description>
4355+
Maximum number of keys to batch in memory before handing to worker threads during parallel reprocess
4356+
</description>
4357+
</property>
43214358

43224359
<property>
43234360
<name>ozone.recon.heatmap.provider</name>

hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconAndAdminContainerCLI.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -457,6 +457,8 @@ private static void setupConfigKeys() {
457457
1, SECONDS);
458458
CONF.setTimeDuration(HddsConfigKeys.HDDS_SCM_WAIT_TIME_AFTER_SAFE_MODE_EXIT,
459459
0, SECONDS);
460+
// Configure multiple task threads for concurrent task execution
461+
CONF.setInt("ozone.recon.task.thread.count", 6);
460462
CONF.set(OzoneConfigKeys.OZONE_SCM_CLOSE_CONTAINER_WAIT_DURATION, "2s");
461463
CONF.set(ScmConfigKeys.OZONE_SCM_PIPELINE_SCRUB_INTERVAL, "2s");
462464
CONF.set(ScmConfigKeys.OZONE_SCM_PIPELINE_DESTROY_TIMEOUT, "5s");

hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconContainerEndpoint.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ public void init() throws Exception {
6565
OzoneConfiguration conf = new OzoneConfiguration();
6666
conf.set(OMConfigKeys.OZONE_DEFAULT_BUCKET_LAYOUT,
6767
OMConfigKeys.OZONE_BUCKET_LAYOUT_FILE_SYSTEM_OPTIMIZED);
68+
// Configure multiple task threads for concurrent task execution
69+
conf.setInt("ozone.recon.task.thread.count", 6);
6870
recon = new ReconService(conf);
6971
cluster = MiniOzoneCluster.newBuilder(conf)
7072
.setNumDatanodes(3)

hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconConstants.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,9 @@ public final class ReconConstants {
9393
// For file-size count reprocessing: ensure only one task truncates the table.
9494
public static final AtomicBoolean FILE_SIZE_COUNT_TABLE_TRUNCATED = new AtomicBoolean(false);
9595

96-
public static final AtomicBoolean CONTAINER_KEY_TABLES_TRUNCATED = new AtomicBoolean(false);
96+
// For container key mapper reprocessing: ensure only one task performs initialization
97+
// (truncates tables + clears shared map)
98+
public static final AtomicBoolean CONTAINER_KEY_MAPPER_INITIALIZED = new AtomicBoolean(false);
9799

98100
private ReconConstants() {
99101
// Never Constructed
@@ -105,6 +107,6 @@ private ReconConstants() {
105107
*/
106108
public static void resetTableTruncatedFlags() {
107109
FILE_SIZE_COUNT_TABLE_TRUNCATED.set(false);
108-
CONTAINER_KEY_TABLES_TRUNCATED.set(false);
110+
CONTAINER_KEY_MAPPER_INITIALIZED.set(false);
109111
}
110112
}

hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconServerConfigKeys.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,28 @@ public final class ReconServerConfigKeys {
162162
public static final long
163163
OZONE_RECON_CONTAINER_KEY_FLUSH_TO_DB_MAX_THRESHOLD_DEFAULT = 150 * 1000L;
164164

165+
public static final String
166+
OZONE_RECON_FILESIZECOUNT_FLUSH_TO_DB_MAX_THRESHOLD =
167+
"ozone.recon.filesizecount.flush.db.max.threshold";
168+
169+
public static final long
170+
OZONE_RECON_FILESIZECOUNT_FLUSH_TO_DB_MAX_THRESHOLD_DEFAULT = 200 * 1000L;
171+
172+
public static final String
173+
OZONE_RECON_TASK_REPROCESS_MAX_ITERATORS = "ozone.recon.task.reprocess.max.iterators";
174+
175+
public static final int OZONE_RECON_TASK_REPROCESS_MAX_ITERATORS_DEFAULT = 5;
176+
177+
public static final String
178+
OZONE_RECON_TASK_REPROCESS_MAX_WORKERS = "ozone.recon.task.reprocess.max.workers";
179+
180+
public static final int OZONE_RECON_TASK_REPROCESS_MAX_WORKERS_DEFAULT = 20;
181+
182+
public static final String
183+
OZONE_RECON_TASK_REPROCESS_MAX_KEYS_IN_MEMORY = "ozone.recon.task.reprocess.max.keys.in.memory";
184+
185+
public static final int OZONE_RECON_TASK_REPROCESS_MAX_KEYS_IN_MEMORY_DEFAULT = 2000;
186+
165187
public static final String OZONE_RECON_SCM_SNAPSHOT_TASK_INTERVAL_DELAY =
166188
"ozone.recon.scm.snapshot.task.interval.delay";
167189

hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/OzoneManagerServiceProviderImpl.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -750,11 +750,11 @@ public boolean syncDataFromOM() {
750750
fullSnapshotReconTaskUpdater.updateDetails();
751751
// Update the current OM metadata manager in task controller
752752
reconTaskController.updateOMMetadataManager(omMetadataManager);
753-
753+
754754
// Pass on DB update events to tasks that are listening.
755755
reconTaskController.consumeOMEvents(new OMUpdateEventBatch(
756756
omdbUpdatesHandler.getEvents(), omdbUpdatesHandler.getLatestSequenceNumber()), omMetadataManager);
757-
757+
758758
// Check if task reinitialization is needed due to buffer overflow or task failures
759759
boolean bufferOverflowed = reconTaskController.hasEventBufferOverflowed();
760760
boolean tasksFailed = reconTaskController.hasTasksFailed();

0 commit comments

Comments
 (0)