diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionRemoteProcedureBase.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionRemoteProcedureBase.java index fe8c9bfdd7c1..978fb419ac21 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionRemoteProcedureBase.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionRemoteProcedureBase.java @@ -287,7 +287,12 @@ private TransitRegionStateProcedure getParent(MasterProcedureEnv env) { } private void unattach(MasterProcedureEnv env) { - getParent(env).unattachRemoteProc(this); + TransitRegionStateProcedure parent = getParent(env); + if (parent != null) { + parent.unattachRemoteProc(this); + } + // If parent is null, it means parent has already completed and been moved to 'completed' map. + // No need to unattach in this case. } private CompletableFuture getFuture() { @@ -429,7 +434,21 @@ protected void deserializeStateData(ProcedureStateSerializer serializer) throws @Override protected void afterReplay(MasterProcedureEnv env) { - getParent(env).attachRemoteProc(this); + TransitRegionStateProcedure parent = getParent(env); + if (parent != null) { + parent.attachRemoteProc(this); + } else { + // Parent procedure has already completed. This can happen if: + // 1. Parent TRSP completed and was moved to 'completed' map + // 2. Master crashed before this child procedure was cleaned up + // 3. On restart, parent is in 'completed' map, child is in 'procedures' map + // 4. getProcedure() only checks 'procedures' map, so returns null + // This orphaned child procedure will be cleaned up by the procedure executor. + LOG.warn( + "Parent procedure {} not found for {}, region={}. " + + "Parent may have already completed. This procedure will be cleaned up.", + getParentProcId(), this, region.getEncodedName()); + } } @Override diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionRemoteProcedureBaseOrphanAfterReplay.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionRemoteProcedureBaseOrphanAfterReplay.java new file mode 100644 index 000000000000..522e28f2a3c6 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionRemoteProcedureBaseOrphanAfterReplay.java @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.assignment; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.RegionInfoBuilder; +import org.apache.hadoop.hbase.master.MasterServices; +import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; +import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Test for HBASE-29806: RegionRemoteProcedureBase.afterReplay() causes NPE when parent procedure + * has already completed. + *

+ * This test verifies that afterReplay() handles the case where the parent + * TransitRegionStateProcedure has already completed and been moved to the 'completed' map, while + * the child RegionRemoteProcedureBase is still in the 'procedures' map during restart. + */ +@Category({ MasterTests.class, SmallTests.class }) +public class TestRegionRemoteProcedureBaseOrphanAfterReplay { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestRegionRemoteProcedureBaseOrphanAfterReplay.class); + + private static final Logger LOG = + LoggerFactory.getLogger(TestRegionRemoteProcedureBaseOrphanAfterReplay.class); + + /** + * Test that afterReplay() does not throw NPE when the parent procedure is not found. + *

+ * This simulates the scenario where: + *

    + *
  1. Parent TransitRegionStateProcedure has completed and been moved to 'completed' map
  2. + *
  3. Child RegionRemoteProcedureBase is still pending in 'procedures' map
  4. + *
  5. During restart, afterReplay() is called on the child
  6. + *
  7. getProcedure(parentProcId) returns null because it only checks 'procedures' map
  8. + *
+ * Before the fix, this would cause a NullPointerException. After the fix, it should log a warning + * and return gracefully. + */ + @Test + public void testAfterReplayWithOrphanedChildProcedure() { + // Create mock environment + MasterProcedureEnv env = mock(MasterProcedureEnv.class); + MasterServices masterServices = mock(MasterServices.class); + @SuppressWarnings("unchecked") + ProcedureExecutor procExecutor = mock(ProcedureExecutor.class); + + when(env.getMasterServices()).thenReturn(masterServices); + when(masterServices.getMasterProcedureExecutor()).thenReturn(procExecutor); + + // When getProcedure is called with any procId, return null + // This simulates the parent being in the 'completed' map instead of 'procedures' map + when(procExecutor.getProcedure(1L)).thenReturn(null); + + // Create an OpenRegionProcedure (a concrete subclass of RegionRemoteProcedureBase) + // Use the no-arg constructor and set fields via reflection or by directly testing + RegionInfo regionInfo = RegionInfoBuilder.newBuilder(TableName.valueOf("test-table")).build(); + ServerName serverName = ServerName.valueOf("localhost", 12345, 1); + + // Create a test subclass that allows us to set the parent proc id + TestableOpenRegionProcedure proc = new TestableOpenRegionProcedure(regionInfo, serverName); + proc.setParentProcId(1L); + + // Call afterReplay - this should NOT throw NPE + // Before the fix, this would throw: java.lang.NullPointerException + LOG.info("Calling afterReplay on orphaned child procedure..."); + proc.afterReplay(env); + LOG.info("afterReplay completed successfully without NPE"); + + // If we reach here without exception, the test passes + } + + /** + * A testable subclass of OpenRegionProcedure that allows us to set the parent proc id for testing + * purposes. + */ + private static class TestableOpenRegionProcedure extends OpenRegionProcedure { + private long parentProcId; + + public TestableOpenRegionProcedure(RegionInfo region, ServerName targetServer) { + super(); + this.region = region; + this.targetServer = targetServer; + } + + public void setParentProcId(long parentProcId) { + this.parentProcId = parentProcId; + } + + @Override + public long getParentProcId() { + return this.parentProcId; + } + + @Override + public boolean hasParent() { + return true; + } + } +}