diff --git a/src/edu/umass/cs/gigapaxos/PaxosAcceptor.java b/src/edu/umass/cs/gigapaxos/PaxosAcceptor.java index b274e10c..a847a908 100644 --- a/src/edu/umass/cs/gigapaxos/PaxosAcceptor.java +++ b/src/edu/umass/cs/gigapaxos/PaxosAcceptor.java @@ -76,15 +76,15 @@ public class PaxosAcceptor { .isLoggingEnabled() || SQLPaxosLogger.isJournalingEnabled(); // active but never run for coordinator yet -protected boolean notRunYet() { - return this.state == (byte)STATES.ACTIVE_1.ordinal(); -} + protected boolean notRunYet() { + return this.state == (byte)STATES.ACTIVE_1.ordinal(); + } -protected void setActive2() { - this.state = (byte)STATES.ACTIVE_2.ordinal(); -} + protected void setActive2() { + this.state = (byte)STATES.ACTIVE_2.ordinal(); + } -protected static enum STATES { + protected static enum STATES { RECOVERY, ACTIVE_1, // active, haven't yet run for coordinator ACTIVE_2, // active, have run for coordinator at least once diff --git a/src/edu/umass/cs/gigapaxos/PaxosConfig.java b/src/edu/umass/cs/gigapaxos/PaxosConfig.java index ba0f4556..ac64aed2 100644 --- a/src/edu/umass/cs/gigapaxos/PaxosConfig.java +++ b/src/edu/umass/cs/gigapaxos/PaxosConfig.java @@ -40,7 +40,6 @@ import edu.umass.cs.nio.SSLDataProcessingWorker; import edu.umass.cs.nio.SSLDataProcessingWorker.SSL_MODES; import edu.umass.cs.nio.interfaces.NodeConfig; -import edu.umass.cs.reconfiguration.interfaces.ReconfigurableNodeConfig; import edu.umass.cs.reconfiguration.interfaces.ReplicableRequest; import edu.umass.cs.utils.Config; import edu.umass.cs.utils.DiskMap; @@ -919,6 +918,24 @@ public static enum PC implements Config.ConfigurableEnum, FORWARD_PREEMPTED_REQUESTS(true), + /** + * If true, an Active Replica will start coordinator election process + * (i.e., Phase 1 of Paxos or Leader Election) during startup, when an + * Active has not yet run for a Coordinator. + * Checkout {@link PaxosInstanceStateMachine#notRunYet()}. + * Note that this option ensures better liveness since replica groups + * will have an elected coordinator faster. However, the options can cause + * flaky leadership during replica group startup: rapid leader changes + * before a long-running coordinator is elected. + *

+ * If false, there will be a coordinator chosen deterministically during + * startup, even when all the Nodes do not start with Phase 1 of Paxos. + * This option is more stable but can cause liveness issue when the + * deterministically chosen coordinator during startup suddenly crashed, + * making all the Nodes need to wait until coordinator timeout. + */ + ENABLE_STARTUP_COORDINATOR_ELECTION(true), + /** * FIXME: The options below only exist for testing stringification diff --git a/src/edu/umass/cs/gigapaxos/PaxosInstanceStateMachine.java b/src/edu/umass/cs/gigapaxos/PaxosInstanceStateMachine.java index 956a7d82..5e5111b9 100644 --- a/src/edu/umass/cs/gigapaxos/PaxosInstanceStateMachine.java +++ b/src/edu/umass/cs/gigapaxos/PaxosInstanceStateMachine.java @@ -135,6 +135,9 @@ public class PaxosInstanceStateMachine implements Keyable, Pausable { private static final boolean ENABLE_INSTRUMENTATION = Config .getGlobalBoolean(PC.ENABLE_INSTRUMENTATION); + private static final boolean ENABLE_STARTUP_COORDINATOR_ELECTION = Config. + getGlobalBoolean(PC.ENABLE_STARTUP_COORDINATOR_ELECTION); + private static final boolean instrument() { return ENABLE_INSTRUMENTATION; } @@ -2152,11 +2155,14 @@ private synchronized PaxosCoordinator tryMakeCoordinator(Ballot newBallot) { this.paxosState.getSlot(), false); } -private boolean notRunYet() { + private boolean notRunYet() { + if (!ENABLE_STARTUP_COORDINATOR_ELECTION) { + return false; + } return this.paxosState.notRunYet(); -} + } -private String getBallots() { + private String getBallots() { return "[" + (this.coordinator != null ? "C:(" + (this.coordinator != null ? this.coordinator