From b4321ebfe92c75a0914362fa1df1d035e274847d Mon Sep 17 00:00:00 2001 From: Tsukilc <153273766+Tsukilc@users.noreply.github.com> Date: Thu, 6 Nov 2025 18:53:28 +0800 Subject: [PATCH 01/31] fix(server): disable server-role in StandardTaskScheduler --- .../hugegraph/task/StandardTaskScheduler.java | 43 ++------ .../apache/hugegraph/task/TaskManager.java | 97 +++---------------- 2 files changed, 21 insertions(+), 119 deletions(-) diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/StandardTaskScheduler.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/StandardTaskScheduler.java index 5f60792af1..577512059a 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/StandardTaskScheduler.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/StandardTaskScheduler.java @@ -17,18 +17,7 @@ package org.apache.hugegraph.task; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.concurrent.Callable; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.concurrent.TimeoutException; - +import com.google.common.collect.ImmutableMap; import org.apache.hugegraph.HugeException; import org.apache.hugegraph.HugeGraph; import org.apache.hugegraph.HugeGraphParams; @@ -57,7 +46,8 @@ import org.apache.tinkerpop.gremlin.structure.Vertex; import org.slf4j.Logger; -import com.google.common.collect.ImmutableMap; +import java.util.*; +import java.util.concurrent.*; public class StandardTaskScheduler implements TaskScheduler { @@ -211,30 +201,9 @@ public Future schedule(HugeTask task) { return this.submitTask(task); } - // Check this is on master for normal task schedule - this.checkOnMasterNode("schedule"); - if (this.serverManager().onlySingleNode() && !task.computer()) { - /* - * Speed up for single node, submit the task immediately, - * this code can be removed without affecting code logic - */ - task.status(TaskStatus.QUEUED); - task.server(this.serverManager().selfNodeId()); - this.save(task); - return this.submitTask(task); - } else { - /* - * Just set the SCHEDULING status and save the task, - * it will be scheduled by periodic scheduler worker - */ - task.status(TaskStatus.SCHEDULING); - this.save(task); - - // Notify master server to schedule and execute immediately - TaskManager.instance().notifyNewTask(task); - - return task; - } + task.status(TaskStatus.QUEUED); + this.save(task); + return this.submitTask(task); } private Future submitTask(HugeTask task) { diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskManager.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskManager.java index 277822a386..67c2831bd6 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskManager.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskManager.java @@ -17,15 +17,6 @@ package org.apache.hugegraph.task; -import java.util.Map; -import java.util.Queue; -import java.util.concurrent.Callable; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.ThreadPoolExecutor; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; - import org.apache.hugegraph.HugeException; import org.apache.hugegraph.HugeGraphParams; import org.apache.hugegraph.concurrent.PausableScheduledThreadPool; @@ -33,10 +24,13 @@ import org.apache.hugegraph.util.Consumers; import org.apache.hugegraph.util.E; import org.apache.hugegraph.util.ExecutorUtil; -import org.apache.hugegraph.util.LockUtil; import org.apache.hugegraph.util.Log; import org.slf4j.Logger; +import java.util.Map; +import java.util.Queue; +import java.util.concurrent.*; + /** * Central task management system that coordinates task scheduling and execution. * Manages task schedulers for different graphs and handles role-based execution. @@ -102,11 +96,6 @@ private TaskManager(int pool) { // For a schedule task to run, just one thread is ok this.schedulerExecutor = ExecutorUtil.newPausableScheduledThreadPool( 1, TASK_SCHEDULER); - // Start after 10x period time waiting for HugeGraphServer startup - this.schedulerExecutor.scheduleWithFixedDelay(this::scheduleOrExecuteJob, - 10 * SCHEDULE_PERIOD, - SCHEDULE_PERIOD, - TimeUnit.MILLISECONDS); } public void addScheduler(HugeGraphParams graph) { @@ -401,73 +390,17 @@ void notifyNewTask(HugeTask task) { } private void scheduleOrExecuteJob() { - // Called by scheduler timer - try { - for (TaskScheduler entry : this.schedulers.values()) { - // Maybe other threads close&remove scheduler at the same time - synchronized (entry) { - this.scheduleOrExecuteJobForGraph(entry); - } - } - } catch (Throwable e) { - LOG.error("Exception occurred when schedule job", e); - } - } - - private void scheduleOrExecuteJobForGraph(TaskScheduler scheduler) { - E.checkNotNull(scheduler, "scheduler"); - - if (scheduler instanceof StandardTaskScheduler) { - StandardTaskScheduler standardTaskScheduler = (StandardTaskScheduler) (scheduler); - ServerInfoManager serverManager = scheduler.serverManager(); - String spaceGraphName = scheduler.spaceGraphName(); - - LockUtil.lock(spaceGraphName, LockUtil.GRAPH_LOCK); - try { - /* - * Skip if: - * graph is closed (iterate schedulers before graph is closing) - * or - * graph is not initialized(maybe truncated or cleared). - * - * If graph is closing by other thread, current thread get - * serverManager and try lock graph, at the same time other - * thread deleted the lock-group, current thread would get - * exception 'LockGroup xx does not exists'. - * If graph is closed, don't call serverManager.initialized() - * due to it will reopen graph tx. - */ - if (!serverManager.graphIsReady()) { - return; - } - - // Update server heartbeat - serverManager.heartbeat(); - - /* - * Master will schedule tasks to suitable servers. - * Note a Worker may become to a Master, so elected-Master also needs to - * execute tasks assigned by previous Master when enableRoleElected=true. - * However, when enableRoleElected=false, a Master is only set by the - * config assignment, assigned-Master always stays the same state. - */ - if (serverManager.selfIsMaster()) { - standardTaskScheduler.scheduleTasksOnMaster(); - if (!this.enableRoleElected && !serverManager.onlySingleNode()) { - // assigned-Master + non-single-node don't need to execute tasks - return; - } - } - - // Execute queued tasks scheduled to current server - standardTaskScheduler.executeTasksOnWorker(serverManager.selfNodeId()); - - // Cancel tasks scheduled to current server - standardTaskScheduler.cancelTasksOnWorker(serverManager.selfNodeId()); - } finally { - LockUtil.unlock(spaceGraphName, LockUtil.GRAPH_LOCK); - } - } + //// Called by scheduler timer + //try { + // for (TaskScheduler entry : this.schedulers.values()) { + // // Maybe other threads close&remove scheduler at the same time + // synchronized (entry) { + // this.scheduleOrExecuteJobForGraph(entry); + // } + // } + //} catch (Throwable e) { + // LOG.error("Exception occurred when schedule job", e); + //} } private static final ThreadLocal CONTEXTS = new ThreadLocal<>(); From d19096df584763d5676dbd9bd4907db31fbe7cc5 Mon Sep 17 00:00:00 2001 From: Tsukilc <153273766+Tsukilc@users.noreply.github.com> Date: Thu, 6 Nov 2025 19:51:10 +0800 Subject: [PATCH 02/31] fix(server): disable server-role in StandardTaskScheduler --- .../hugegraph/task/StandardTaskScheduler.java | 113 +----- .../apache/hugegraph/core/TaskCoreTest.java | 325 +++++++++--------- 2 files changed, 176 insertions(+), 262 deletions(-) diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/StandardTaskScheduler.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/StandardTaskScheduler.java index 577512059a..6a0a9a018a 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/StandardTaskScheduler.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/StandardTaskScheduler.java @@ -46,7 +46,10 @@ import org.apache.tinkerpop.gremlin.structure.Vertex; import org.slf4j.Logger; -import java.util.*; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; import java.util.concurrent.*; public class StandardTaskScheduler implements TaskScheduler { @@ -242,7 +245,6 @@ public void initTaskCallable(HugeTask task) { @Override public synchronized void cancel(HugeTask task) { E.checkArgumentNotNull(task, "Task can't be null"); - this.checkOnMasterNode("cancel"); if (task.completed() || task.cancelling()) { return; @@ -250,36 +252,20 @@ public synchronized void cancel(HugeTask task) { LOG.info("Cancel task '{}' in status {}", task.id(), task.status()); - if (task.server() == null) { - // The task not scheduled to workers, set canceled immediately - assert task.status().code() < TaskStatus.QUEUED.code(); - if (task.status(TaskStatus.CANCELLED)) { - this.save(task); - return; - } - } else if (task.status(TaskStatus.CANCELLING)) { - // The task scheduled to workers, let the worker node to cancel + HugeTask memTask = this.tasks.get(task.id()); + if (memTask != null) { + boolean cancelled = memTask.cancel(true); + LOG.info("Task '{}' cancel result: {}", task.id(), cancelled); + return; + } + + if (task.status(TaskStatus.CANCELLED)) { this.save(task); - assert task.server() != null : task; - assert this.serverManager().selfIsMaster(); - if (!task.server().equals(this.serverManager().selfNodeId())) { - /* - * Remove the task from memory if it's running on worker node, - * but keep the task in memory if it's running on master node. - * Cancel-scheduling will read the task from backend store, if - * removed this instance from memory, there will be two task - * instances with the same id, and can't cancel the real task that - * is running but removed from memory. - */ - this.remove(task); - } - // Notify master server to schedule and execute immediately - TaskManager.instance().notifyNewTask(task); return; } throw new HugeException("Can't cancel task '%s' in status %s", - task.id(), task.status()); + task.id(), task.status()); } @Override @@ -287,79 +273,6 @@ public ServerInfoManager serverManager() { return this.serverManager; } - protected synchronized void scheduleTasksOnMaster() { - // Master server schedule all scheduling tasks to suitable worker nodes - Collection serverInfos = this.serverManager().allServerInfos(); - String page = this.supportsPaging() ? PageInfo.PAGE_NONE : null; - do { - Iterator> tasks = this.tasks(TaskStatus.SCHEDULING, PAGE_SIZE, page); - while (tasks.hasNext()) { - HugeTask task = tasks.next(); - if (task.server() != null) { - // Skip if already scheduled - continue; - } - - if (!this.serverManager.selfIsMaster()) { - return; - } - - HugeServerInfo server = this.serverManager().pickWorkerNode(serverInfos, task); - if (server == null) { - LOG.info("The master can't find suitable servers to " + - "execute task '{}', wait for next schedule", task.id()); - continue; - } - - // Found suitable server, update task status - assert server.id() != null; - task.server(server.id()); - task.status(TaskStatus.SCHEDULED); - this.save(task); - - // Update server load in memory, it will be saved at the ending - server.increaseLoad(task.load()); - - LOG.info("Scheduled task '{}' to server '{}'", task.id(), server.id()); - } - if (page != null) { - page = PageInfo.pageInfo(tasks); - } - } while (page != null); - - // Save to store - this.serverManager().updateServerInfos(serverInfos); - } - - protected void executeTasksOnWorker(Id server) { - String page = this.supportsPaging() ? PageInfo.PAGE_NONE : null; - do { - Iterator> tasks = this.tasks(TaskStatus.SCHEDULED, PAGE_SIZE, page); - while (tasks.hasNext()) { - HugeTask task = tasks.next(); - this.initTaskCallable(task); - Id taskServer = task.server(); - if (taskServer == null) { - LOG.warn("Task '{}' may not be scheduled", task.id()); - continue; - } - HugeTask memTask = this.tasks.get(task.id()); - if (memTask != null) { - assert memTask.status().code() > task.status().code(); - continue; - } - if (taskServer.equals(server)) { - task.status(TaskStatus.QUEUED); - this.save(task); - this.submitTask(task); - } - } - if (page != null) { - page = PageInfo.pageInfo(tasks); - } - } while (page != null); - } - protected void cancelTasksOnWorker(Id server) { String page = this.supportsPaging() ? PageInfo.PAGE_NONE : null; do { diff --git a/hugegraph-server/hugegraph-test/src/main/java/org/apache/hugegraph/core/TaskCoreTest.java b/hugegraph-server/hugegraph-test/src/main/java/org/apache/hugegraph/core/TaskCoreTest.java index 212ccc0588..e608fc28b6 100644 --- a/hugegraph-server/hugegraph-test/src/main/java/org/apache/hugegraph/core/TaskCoreTest.java +++ b/hugegraph-server/hugegraph-test/src/main/java/org/apache/hugegraph/core/TaskCoreTest.java @@ -17,11 +17,8 @@ package org.apache.hugegraph.core; -import java.util.Arrays; -import java.util.Iterator; -import java.util.Random; -import java.util.concurrent.TimeoutException; - +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import org.apache.hugegraph.HugeException; import org.apache.hugegraph.HugeGraph; import org.apache.hugegraph.api.job.GremlinAPI.GremlinRequest; @@ -32,17 +29,16 @@ import org.apache.hugegraph.job.EphemeralJobBuilder; import org.apache.hugegraph.job.GremlinJob; import org.apache.hugegraph.job.JobBuilder; -import org.apache.hugegraph.task.HugeTask; -import org.apache.hugegraph.task.TaskCallable; -import org.apache.hugegraph.task.TaskScheduler; -import org.apache.hugegraph.task.TaskStatus; +import org.apache.hugegraph.task.*; import org.apache.hugegraph.testutil.Assert; import org.apache.hugegraph.testutil.Whitebox; import org.junit.Before; import org.junit.Test; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; +import java.util.Iterator; +import java.util.List; +import java.util.Random; +import java.util.concurrent.TimeoutException; public class TaskCoreTest extends BaseCoreTest { @@ -80,7 +76,7 @@ public void testTask() throws TimeoutException { scheduler.delete(id, false); }, e -> { Assert.assertContains("Can't delete incomplete task '88888'", - e.getMessage()); + e.getMessage()); }); task = scheduler.waitUntilTaskCompleted(task.id(), 10); @@ -89,8 +85,8 @@ public void testTask() throws TimeoutException { Assert.assertEquals(TaskStatus.SUCCESS, task.status()); Assert.assertEquals("test-task", scheduler.task(id).name()); - Assert.assertEquals("test-task", scheduler.tasks(Arrays.asList(id)) - .next().name()); + Assert.assertEquals("test-task", scheduler.tasks(List.of(id)) + .next().name()); Iterator> iter = scheduler.tasks(ImmutableList.of(id)); Assert.assertTrue(iter.hasNext()); @@ -144,7 +140,7 @@ protected void done() { new HugeTask<>(id, null, callable); }, e -> { Assert.assertContains("Invalid task id type, it must be number", - e.getMessage()); + e.getMessage()); }); Assert.assertThrows(NullPointerException.class, () -> { @@ -178,18 +174,18 @@ public void testEphemeralJob() throws TimeoutException { EphemeralJobBuilder builder = EphemeralJobBuilder.of(graph); builder.name("test-job-ephemeral") - .job(new EphemeralJob() { - @Override - public String type() { - return "test"; - } - - @Override - public Object execute() throws Exception { - sleepAWhile(); - return ImmutableMap.of("k1", 13579, "k2", "24680"); - } - }); + .job(new EphemeralJob() { + @Override + public String type() { + return "test"; + } + + @Override + public Object execute() throws Exception { + sleepAWhile(); + return ImmutableMap.of("k1", 13579, "k2", "24680"); + } + }); HugeTask task = builder.schedule(); Assert.assertEquals("test-job-ephemeral", task.name()); @@ -221,8 +217,8 @@ public void testGremlinJob() throws TimeoutException { JobBuilder builder = JobBuilder.of(graph); builder.name("test-job-gremlin") - .input(request.toJson()) - .job(new GremlinJob()); + .input(request.toJson()) + .job(new GremlinJob()); HugeTask task = builder.schedule(); Assert.assertEquals("test-job-gremlin", task.name()); @@ -249,22 +245,22 @@ public void testGremlinJobWithScript() throws TimeoutException { TaskScheduler scheduler = graph.taskScheduler(); String script = "schema=graph.schema();" + - "schema.propertyKey('name').asText().ifNotExist().create();" + - "schema.propertyKey('age').asInt().ifNotExist().create();" + - "schema.propertyKey('lang').asText().ifNotExist().create();" + - "schema.propertyKey('date').asDate().ifNotExist().create();" + - "schema.propertyKey('price').asInt().ifNotExist().create();" + - "schema.vertexLabel('person1').properties('name','age').ifNotExist()" + - ".create();" + - "schema.vertexLabel('person2').properties('name','age').ifNotExist()" + - ".create();" + - "schema.edgeLabel('knows').sourceLabel('person1').targetLabel('person2')." + - "properties('date').ifNotExist().create();" + - "for(int i = 0; i < 1000; i++) {" + - " p1=graph.addVertex(T.label,'person1','name','p1-'+i,'age',29);" + - " p2=graph.addVertex(T.label,'person2','name','p2-'+i,'age',27);" + - " p1.addEdge('knows',p2,'date','2016-01-10');" + - "}"; + "schema.propertyKey('name').asText().ifNotExist().create();" + + "schema.propertyKey('age').asInt().ifNotExist().create();" + + "schema.propertyKey('lang').asText().ifNotExist().create();" + + "schema.propertyKey('date').asDate().ifNotExist().create();" + + "schema.propertyKey('price').asInt().ifNotExist().create();" + + "schema.vertexLabel('person1').properties('name','age').ifNotExist()" + + ".create();" + + "schema.vertexLabel('person2').properties('name','age').ifNotExist()" + + ".create();" + + "schema.edgeLabel('knows').sourceLabel('person1').targetLabel('person2')." + + "properties('date').ifNotExist().create();" + + "for(int i = 0; i < 1000; i++) {" + + " p1=graph.addVertex(T.label,'person1','name','p1-'+i,'age',29);" + + " p2=graph.addVertex(T.label,'person2','name','p2-'+i,'age',27);" + + " p1.addEdge('knows',p2,'date','2016-01-10');" + + "}"; HugeTask task = runGremlinJob(script); task = scheduler.waitUntilTaskCompleted(task.id(), 10); @@ -310,27 +306,27 @@ public void testGremlinJobWithSerializedResults() throws TimeoutException { TaskScheduler scheduler = graph.taskScheduler(); String script = "schema=graph.schema();" + - "schema.propertyKey('name').asText().ifNotExist().create();" + - "schema.vertexLabel('char').useCustomizeNumberId()" + - " .properties('name').ifNotExist().create();" + - "schema.edgeLabel('next').sourceLabel('char').targetLabel('char')" + - " .properties('name').ifNotExist().create();" + - "g.addV('char').property(id,1).property('name','A').as('a')" + - " .addV('char').property(id,2).property('name','B').as('b')" + - " .addV('char').property(id,3).property('name','C').as('c')" + - " .addV('char').property(id,4).property('name','D').as('d')" + - " .addV('char').property(id,5).property('name','E').as('e')" + - " .addV('char').property(id,6).property('name','F').as('f')" + - " .addE('next').from('a').to('b').property('name','ab')" + - " .addE('next').from('b').to('c').property('name','bc')" + - " .addE('next').from('b').to('d').property('name','bd')" + - " .addE('next').from('c').to('d').property('name','cd')" + - " .addE('next').from('c').to('e').property('name','ce')" + - " .addE('next').from('d').to('e').property('name','de')" + - " .addE('next').from('e').to('f').property('name','ef')" + - " .addE('next').from('f').to('d').property('name','fd')" + - " .iterate();" + - "g.tx().commit(); g.E().count();"; + "schema.propertyKey('name').asText().ifNotExist().create();" + + "schema.vertexLabel('char').useCustomizeNumberId()" + + " .properties('name').ifNotExist().create();" + + "schema.edgeLabel('next').sourceLabel('char').targetLabel('char')" + + " .properties('name').ifNotExist().create();" + + "g.addV('char').property(id,1).property('name','A').as('a')" + + " .addV('char').property(id,2).property('name','B').as('b')" + + " .addV('char').property(id,3).property('name','C').as('c')" + + " .addV('char').property(id,4).property('name','D').as('d')" + + " .addV('char').property(id,5).property('name','E').as('e')" + + " .addV('char').property(id,6).property('name','F').as('f')" + + " .addE('next').from('a').to('b').property('name','ab')" + + " .addE('next').from('b').to('c').property('name','bc')" + + " .addE('next').from('b').to('d').property('name','bd')" + + " .addE('next').from('c').to('d').property('name','cd')" + + " .addE('next').from('c').to('e').property('name','ce')" + + " .addE('next').from('d').to('e').property('name','de')" + + " .addE('next').from('e').to('f').property('name','ef')" + + " .addE('next').from('f').to('d').property('name','fd')" + + " .iterate();" + + "g.tx().commit(); g.E().count();"; HugeTask task = runGremlinJob(script); task = scheduler.waitUntilTaskCompleted(task.id(), 10); @@ -346,15 +342,15 @@ public void testGremlinJobWithSerializedResults() throws TimeoutException { task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.SUCCESS, task.status()); String expected = String.format("[{\"labels\":[[],[],[]],\"objects\":[" + - "{\"id\":1,\"label\":\"char\",\"type\":\"vertex\"," + - "\"properties\":{\"name\":\"A\"}}," + - "{\"id\":\"L1>%s>%s>>L2\",\"label\":\"next\"," + - "\"type\":\"edge\",\"outV\":1," + - "\"outVLabel\":\"char\",\"inV\":2,\"" + - "inVLabel\":\"char\",\"properties\":{\"name\":\"ab\"}}," + - "{\"id\":2,\"label\":\"char\",\"type\":\"vertex\"," + - "\"properties\":{\"name\":\"B\"}}" + - "]}]", edgeLabelId, edgeLabelId); + "{\"id\":1,\"label\":\"char\",\"type\":\"vertex\"," + + "\"properties\":{\"name\":\"A\"}}," + + "{\"id\":\"L1>%s>%s>>L2\",\"label\":\"next\"," + + "\"type\":\"edge\",\"outV\":1," + + "\"outVLabel\":\"char\",\"inV\":2,\"" + + "inVLabel\":\"char\",\"properties\":{\"name\":\"ab\"}}," + + "{\"id\":2,\"label\":\"char\",\"type\":\"vertex\"," + + "\"properties\":{\"name\":\"B\"}}" + + "]}]", edgeLabelId, edgeLabelId); Assert.assertEquals(expected, task.result()); script = "g.V(1).out().out().path()"; @@ -362,19 +358,19 @@ public void testGremlinJobWithSerializedResults() throws TimeoutException { task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.SUCCESS, task.status()); expected = "[{\"labels\":[[],[],[]],\"objects\":[" + - "{\"id\":1,\"label\":\"char\",\"type\":\"vertex\"," + - "\"properties\":{\"name\":\"A\"}}," + - "{\"id\":2,\"label\":\"char\",\"type\":\"vertex\"," + - "\"properties\":{\"name\":\"B\"}}," + - "{\"id\":3,\"label\":\"char\",\"type\":\"vertex\"," + - "\"properties\":{\"name\":\"C\"}}]}," + - "{\"labels\":[[],[],[]],\"objects\":[" + - "{\"id\":1,\"label\":\"char\",\"type\":\"vertex\"," + - "\"properties\":{\"name\":\"A\"}}," + - "{\"id\":2,\"label\":\"char\",\"type\":\"vertex\"," + - "\"properties\":{\"name\":\"B\"}}," + - "{\"id\":4,\"label\":\"char\",\"type\":\"vertex\"," + - "\"properties\":{\"name\":\"D\"}}]}]"; + "{\"id\":1,\"label\":\"char\",\"type\":\"vertex\"," + + "\"properties\":{\"name\":\"A\"}}," + + "{\"id\":2,\"label\":\"char\",\"type\":\"vertex\"," + + "\"properties\":{\"name\":\"B\"}}," + + "{\"id\":3,\"label\":\"char\",\"type\":\"vertex\"," + + "\"properties\":{\"name\":\"C\"}}]}," + + "{\"labels\":[[],[],[]],\"objects\":[" + + "{\"id\":1,\"label\":\"char\",\"type\":\"vertex\"," + + "\"properties\":{\"name\":\"A\"}}," + + "{\"id\":2,\"label\":\"char\",\"type\":\"vertex\"," + + "\"properties\":{\"name\":\"B\"}}," + + "{\"id\":4,\"label\":\"char\",\"type\":\"vertex\"," + + "\"properties\":{\"name\":\"D\"}}]}]"; Assert.assertEquals(expected, task.result()); script = "g.V(1).outE().inV().tree()"; @@ -382,16 +378,16 @@ public void testGremlinJobWithSerializedResults() throws TimeoutException { task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.SUCCESS, task.status()); expected = String.format("[[{\"key\":{\"id\":1,\"label\":\"char\",\"type\":\"vertex\"," + - "\"properties\":{\"name\":\"A\"}}," + - "\"value\":[" + - "{\"key\":{\"id\":\"L1>%s>%s>>L2\",\"label\":\"next\"," + - "\"type\":\"edge\",\"outV\":1," + - "\"outVLabel\":\"char\",\"inV\":2,\"inVLabel\":\"char\"," + - "\"properties\":{\"name\":\"ab\"}}," + - "\"value\":[{\"key\":{\"id\":2,\"label\":\"char\"," + - "\"type\":\"vertex\"," + - "\"properties\":{\"name\":\"B\"}},\"value\":[]}]}]}]]", - edgeLabelId, edgeLabelId); + "\"properties\":{\"name\":\"A\"}}," + + "\"value\":[" + + "{\"key\":{\"id\":\"L1>%s>%s>>L2\",\"label\":\"next\"," + + "\"type\":\"edge\",\"outV\":1," + + "\"outVLabel\":\"char\",\"inV\":2,\"inVLabel\":\"char\"," + + "\"properties\":{\"name\":\"ab\"}}," + + "\"value\":[{\"key\":{\"id\":2,\"label\":\"char\"," + + "\"type\":\"vertex\"," + + "\"properties\":{\"name\":\"B\"}},\"value\":[]}]}]}]]", + edgeLabelId, edgeLabelId); Assert.assertEquals(expected, task.result()); script = "g.V(1).out().out().tree()"; @@ -399,14 +395,14 @@ public void testGremlinJobWithSerializedResults() throws TimeoutException { task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.SUCCESS, task.status()); expected = "[[{\"key\":{\"id\":1,\"label\":\"char\",\"type\":\"vertex\"," + - "\"properties\":{\"name\":\"A\"}}," + - "\"value\":[{\"key\":{\"id\":2,\"label\":\"char\",\"type\":\"vertex\"," + - "\"properties\":{\"name\":\"B\"}}," + - "\"value\":[" + - "{\"key\":{\"id\":3,\"label\":\"char\",\"type\":\"vertex\",\"properties\":" + - "{\"name\":\"C\"}},\"value\":[]}," + - "{\"key\":{\"id\":4,\"label\":\"char\",\"type\":\"vertex\",\"properties\":" + - "{\"name\":\"D\"}},\"value\":[]}]}]}]]"; + "\"properties\":{\"name\":\"A\"}}," + + "\"value\":[{\"key\":{\"id\":2,\"label\":\"char\",\"type\":\"vertex\"," + + "\"properties\":{\"name\":\"B\"}}," + + "\"value\":[" + + "{\"key\":{\"id\":3,\"label\":\"char\",\"type\":\"vertex\",\"properties\":" + + "{\"name\":\"C\"}},\"value\":[]}," + + "{\"key\":{\"id\":4,\"label\":\"char\",\"type\":\"vertex\",\"properties\":" + + "{\"name\":\"D\"}},\"value\":[]}]}]}]]"; Assert.assertEquals(expected, task.result()); } @@ -417,8 +413,8 @@ public void testGremlinJobWithFailure() throws TimeoutException { JobBuilder builder = JobBuilder.of(graph); builder.name("test-job-gremlin") - .input("") - .job(new GremlinJob()); + .input("") + .job(new GremlinJob()); HugeTask task = builder.schedule(); task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.FAILED, task.status()); @@ -426,7 +422,7 @@ public void testGremlinJobWithFailure() throws TimeoutException { builder = JobBuilder.of(graph); builder.name("test-job-gremlin") - .job(new GremlinJob()); + .job(new GremlinJob()); task = builder.schedule(); scheduler.waitUntilTaskCompleted(task.id(), 10); task = scheduler.task(task.id()); @@ -435,8 +431,8 @@ public void testGremlinJobWithFailure() throws TimeoutException { builder = JobBuilder.of(graph); builder.name("test-job-gremlin") - .input("{}") - .job(new GremlinJob()); + .input("{}") + .job(new GremlinJob()); task = builder.schedule(); task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.FAILED, task.status()); @@ -444,8 +440,8 @@ public void testGremlinJobWithFailure() throws TimeoutException { builder = JobBuilder.of(graph); builder.name("test-job-gremlin") - .input("{\"gremlin\":8}") - .job(new GremlinJob()); + .input("{\"gremlin\":8}") + .job(new GremlinJob()); task = builder.schedule(); task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.FAILED, task.status()); @@ -453,8 +449,8 @@ public void testGremlinJobWithFailure() throws TimeoutException { builder = JobBuilder.of(graph); builder.name("test-job-gremlin") - .input("{\"gremlin\":\"\"}") - .job(new GremlinJob()); + .input("{\"gremlin\":\"\"}") + .job(new GremlinJob()); task = builder.schedule(); task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.FAILED, task.status()); @@ -462,8 +458,8 @@ public void testGremlinJobWithFailure() throws TimeoutException { builder = JobBuilder.of(graph); builder.name("test-job-gremlin") - .input("{\"gremlin\":\"\", \"bindings\":\"\"}") - .job(new GremlinJob()); + .input("{\"gremlin\":\"\", \"bindings\":\"\"}") + .job(new GremlinJob()); task = builder.schedule(); task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.FAILED, task.status()); @@ -471,8 +467,8 @@ public void testGremlinJobWithFailure() throws TimeoutException { builder = JobBuilder.of(graph); builder.name("test-job-gremlin") - .input("{\"gremlin\":\"\", \"bindings\":{}}") - .job(new GremlinJob()); + .input("{\"gremlin\":\"\", \"bindings\":{}}") + .job(new GremlinJob()); task = builder.schedule(); task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.FAILED, task.status()); @@ -480,8 +476,8 @@ public void testGremlinJobWithFailure() throws TimeoutException { builder = JobBuilder.of(graph); builder.name("test-job-gremlin") - .input("{\"gremlin\":\"\", \"bindings\":{}, \"language\":{}}") - .job(new GremlinJob()); + .input("{\"gremlin\":\"\", \"bindings\":{}, \"language\":{}}") + .job(new GremlinJob()); task = builder.schedule(); task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.FAILED, task.status()); @@ -489,8 +485,8 @@ public void testGremlinJobWithFailure() throws TimeoutException { builder = JobBuilder.of(graph); builder.name("test-job-gremlin") - .input("{\"gremlin\":\"\", \"bindings\":{}, \"language\":\"\"}") - .job(new GremlinJob()); + .input("{\"gremlin\":\"\", \"bindings\":{}, \"language\":\"\"}") + .job(new GremlinJob()); task = builder.schedule(); task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.FAILED, task.status()); @@ -498,14 +494,14 @@ public void testGremlinJobWithFailure() throws TimeoutException { builder = JobBuilder.of(graph); builder.name("test-job-gremlin") - .input("{\"gremlin\":\"\", \"bindings\":{}, " + - "\"language\":\"test\", \"aliases\":{}}") - .job(new GremlinJob()); + .input("{\"gremlin\":\"\", \"bindings\":{}, " + + "\"language\":\"test\", \"aliases\":{}}") + .job(new GremlinJob()); task = builder.schedule(); task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.FAILED, task.status()); Assert.assertContains("test is not an available GremlinScriptEngine", - task.result()); + task.result()); } @Test @@ -514,16 +510,16 @@ public void testGremlinJobWithError() throws TimeoutException { Assert.assertThrows(IllegalArgumentException.class, () -> { JobBuilder.of(graph) - .job(new GremlinJob()) - .schedule(); + .job(new GremlinJob()) + .schedule(); }, e -> { Assert.assertContains("Job name can't be null", e.getMessage()); }); Assert.assertThrows(IllegalArgumentException.class, () -> { JobBuilder.of(graph) - .name("test-job-gremlin") - .schedule(); + .name("test-job-gremlin") + .schedule(); }, e -> { Assert.assertContains("Job callable can't be null", e.getMessage()); }); @@ -541,7 +537,7 @@ public void testGremlinJobWithError() throws TimeoutException { }, e -> { Assert.assertContains("Task input size", e.getMessage()); Assert.assertContains("exceeded limit 16777216 bytes", - e.getMessage()); + e.getMessage()); }); } @@ -557,13 +553,16 @@ public void testGremlinJobAndCancel() throws TimeoutException { scheduler.cancel(task); task = scheduler.task(task.id()); - Assert.assertEquals(TaskStatus.CANCELLING, task.status()); + System.out.println(scheduler.getClass()); + if (scheduler.getClass().equals(DistributedTaskScheduler.class)) { + Assert.assertEquals(TaskStatus.CANCELLING, task.status()); + } task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.CANCELLED, task.status()); Assert.assertEquals("test-gremlin-job", task.name()); Assert.assertTrue(task.result(), task.result() == null || - task.result().endsWith("InterruptedException")); + task.result().endsWith("InterruptedException")); // Cancel success task HugeTask task2 = runGremlinJob("1+2"); @@ -583,22 +582,22 @@ public void testGremlinJobAndCancel() throws TimeoutException { task3 = scheduler.task(task3.id()); Assert.assertEquals(TaskStatus.FAILED, task3.status()); Assert.assertContains("LimitExceedException: Job results size 800001 " + - "has exceeded the max limit 800000", - task3.result()); + "has exceeded the max limit 800000", + task3.result()); // Cancel failure task with big results (task exceeded limit 16M) String bigResults = "def random = new Random(); def rs=[];" + - "for (i in 0..4) {" + - " def len = 1024 * 1024;" + - " def item = new StringBuilder(len);" + - " for (j in 0..len) { " + - " item.append(\"node:\"); " + - " item.append((char) random.nextInt(256)); " + - " item.append(\",\"); " + - " };" + - " rs.add(item);" + - "};" + - "rs;"; + "for (i in 0..4) {" + + " def len = 1024 * 1024;" + + " def item = new StringBuilder(len);" + + " for (j in 0..len) { " + + " item.append(\"node:\"); " + + " item.append((char) random.nextInt(256)); " + + " item.append(\",\"); " + + " };" + + " rs.add(item);" + + "};" + + "rs;"; HugeTask task4 = runGremlinJob(bigResults); task4 = scheduler.waitUntilTaskCompleted(task4.id(), 10); Assert.assertEquals(TaskStatus.FAILED, task4.status()); @@ -606,9 +605,9 @@ public void testGremlinJobAndCancel() throws TimeoutException { task4 = scheduler.task(task4.id()); Assert.assertEquals(TaskStatus.FAILED, task4.status()); Assert.assertContains("LimitExceedException: Task result size", - task4.result()); + task4.result()); Assert.assertContains("exceeded limit 16777216 bytes", - task4.result()); + task4.result()); } @Test @@ -617,11 +616,11 @@ public void testGremlinJobAndRestore() throws Exception { TaskScheduler scheduler = graph.taskScheduler(); String gremlin = "println('task start');" + - "for(int i=gremlinJob.progress(); i<=10; i++) {" + - " gremlinJob.updateProgress(i);" + - " Thread.sleep(200); " + - " println('sleep=>'+i);" + - "}; 100;"; + "for(int i=gremlinJob.progress(); i<=10; i++) {" + + " gremlinJob.updateProgress(i);" + + " Thread.sleep(200); " + + " println('sleep=>'+i);" + + "}; 100;"; HugeTask task = runGremlinJob(gremlin); sleepAWhile(200 * 6); @@ -629,22 +628,24 @@ public void testGremlinJobAndRestore() throws Exception { scheduler.cancel(task); task = scheduler.task(task.id()); - Assert.assertEquals(TaskStatus.CANCELLING, task.status()); + if (scheduler.getClass().equals(DistributedTaskScheduler.class)) { + Assert.assertEquals(TaskStatus.CANCELLING, task.status()); + } task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.CANCELLED, task.status()); Assert.assertTrue("progress=" + task.progress(), - 0 < task.progress() && task.progress() < 10); + 0 < task.progress() && task.progress() < 10); Assert.assertEquals(0, task.retries()); - Assert.assertEquals(null, task.result()); + Assert.assertNull(task.result()); HugeTask finalTask = task; Assert.assertThrows(IllegalArgumentException.class, () -> { Whitebox.invoke(scheduler.getClass(), "restore", scheduler, - finalTask); + finalTask); }, e -> { Assert.assertContains("No need to restore completed task", - e.getMessage()); + e.getMessage()); }); HugeTask task2 = scheduler.task(task.id()); @@ -652,7 +653,7 @@ public void testGremlinJobAndRestore() throws Exception { Whitebox.invoke(scheduler.getClass(), "restore", scheduler, task2); }, e -> { Assert.assertContains("No need to restore completed task", - e.getMessage()); + e.getMessage()); }); Whitebox.setInternalState(task2, "status", TaskStatus.RUNNING); @@ -679,8 +680,8 @@ private HugeTask runGremlinJob(String gremlin) { JobBuilder builder = JobBuilder.of(graph); builder.name("test-gremlin-job") - .input(request.toJson()) - .job(new GremlinJob()); + .input(request.toJson()) + .job(new GremlinJob()); return builder.schedule(); } From a32901a06391bb835fcd6e74f6d278377a8b05f8 Mon Sep 17 00:00:00 2001 From: Tsukilc <153273766+Tsukilc@users.noreply.github.com> Date: Fri, 7 Nov 2025 18:38:45 +0800 Subject: [PATCH 03/31] fix(server): disable server-role in StandardTaskScheduler --- .../apache/hugegraph/task/HugeServerInfo.java | 16 +-- .../hugegraph/task/ServerInfoManager.java | 106 ++---------------- .../hugegraph/task/StandardTaskScheduler.java | 66 ++--------- 3 files changed, 18 insertions(+), 170 deletions(-) diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/HugeServerInfo.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/HugeServerInfo.java index 71feb3f688..6bc789f873 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/HugeServerInfo.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/HugeServerInfo.java @@ -17,12 +17,6 @@ package org.apache.hugegraph.task; -import java.util.ArrayList; -import java.util.Date; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - import org.apache.hugegraph.HugeGraph; import org.apache.hugegraph.HugeGraphParams; import org.apache.hugegraph.backend.id.Id; @@ -43,6 +37,8 @@ import org.apache.tinkerpop.gremlin.structure.Vertex; import org.apache.tinkerpop.gremlin.structure.VertexProperty; +import java.util.*; + public class HugeServerInfo { // Unit millisecond @@ -209,14 +205,6 @@ public static HugeServerInfo fromVertex(Vertex vertex) { return serverInfo; } - public boolean suitableFor(HugeTask task, long now) { - if (task.computer() != this.role.computer()) { - return false; - } - return this.updateTime.getTime() + EXPIRED_INTERVAL >= now && - this.load() + task.load() <= this.maxLoad; - } - public static Schema schema(HugeGraphParams graph) { return new Schema(graph); } diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/ServerInfoManager.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/ServerInfoManager.java index bcef869017..af579bb124 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/ServerInfoManager.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/ServerInfoManager.java @@ -17,14 +17,7 @@ package org.apache.hugegraph.task; -import static org.apache.hugegraph.backend.query.Query.NO_LIMIT; - -import java.util.Collection; -import java.util.Iterator; -import java.util.Map; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutorService; - +import com.google.common.collect.ImmutableMap; import org.apache.hugegraph.HugeException; import org.apache.hugegraph.HugeGraph; import org.apache.hugegraph.HugeGraphParams; @@ -35,7 +28,6 @@ import org.apache.hugegraph.backend.query.QueryResults; import org.apache.hugegraph.backend.tx.GraphTransaction; import org.apache.hugegraph.exception.ConnectionException; -import org.apache.hugegraph.iterator.ListIterator; import org.apache.hugegraph.iterator.MapperIterator; import org.apache.hugegraph.masterelection.GlobalMasterInfo; import org.apache.hugegraph.schema.PropertyKey; @@ -50,7 +42,12 @@ import org.apache.tinkerpop.gremlin.structure.Vertex; import org.slf4j.Logger; -import com.google.common.collect.ImmutableMap; +import java.util.Iterator; +import java.util.Map; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorService; + +import static org.apache.hugegraph.backend.query.Query.NO_LIMIT; public class ServerInfoManager { @@ -64,7 +61,7 @@ public class ServerInfoManager { private volatile GlobalMasterInfo globalNodeInfo; - private volatile boolean onlySingleNode; + private final boolean onlySingleNode; private volatile boolean closed; public ServerInfoManager(HugeGraphParams graph, ExecutorService dbExecutor) { @@ -228,48 +225,6 @@ protected boolean graphIsReady() { return !this.closed && this.graph.started() && this.graph.initialized(); } - protected synchronized HugeServerInfo pickWorkerNode(Collection servers, - HugeTask task) { - HugeServerInfo master = null; - HugeServerInfo serverWithMinLoad = null; - int minLoad = Integer.MAX_VALUE; - boolean hasWorkerNode = false; - long now = DateUtil.now().getTime(); - - // Iterate servers to find suitable one - for (HugeServerInfo server : servers) { - if (!server.alive()) { - continue; - } - if (server.role().master()) { - master = server; - continue; - } - hasWorkerNode = true; - if (!server.suitableFor(task, now)) { - continue; - } - if (server.load() < minLoad) { - minLoad = server.load(); - serverWithMinLoad = server; - } - } - - boolean singleNode = !hasWorkerNode; - if (singleNode != this.onlySingleNode) { - LOG.info("Switch only_single_node to {}", singleNode); - this.onlySingleNode = singleNode; - } - - // Only schedule to master if there are no workers and master are suitable - if (!hasWorkerNode) { - if (master != null && master.suitableFor(task, now)) { - serverWithMinLoad = master; - } - } - return serverWithMinLoad; - } - private GraphTransaction tx() { assert Thread.currentThread().getName().contains("server-info-db-worker"); return this.graph.systemTransaction(); @@ -299,33 +254,6 @@ private Id save(HugeServerInfo serverInfo) { }); } - private int save(Collection serverInfos) { - return this.call(() -> { - if (serverInfos.isEmpty()) { - return 0; - } - HugeServerInfo.Schema schema = HugeServerInfo.schema(this.graph); - if (!schema.existVertexLabel(HugeServerInfo.P.SERVER)) { - throw new HugeException("Schema is missing for %s", HugeServerInfo.P.SERVER); - } - // Save server info in batch - GraphTransaction tx = this.tx(); - int updated = 0; - for (HugeServerInfo server : serverInfos) { - if (!server.updated()) { - continue; - } - HugeVertex vertex = tx.constructVertex(false, server.asArray()); - tx.addVertex(vertex); - updated++; - } - // NOTE: actually it is auto-commit, to be improved - tx.commitOrRollback(); - - return updated; - }); - } - private V call(Callable callable) { assert !Thread.currentThread().getName().startsWith( "server-info-db-worker") : "can't call by itself"; @@ -388,24 +316,6 @@ private HugeServerInfo removeServerInfo(Id serverId) { }); } - protected void updateServerInfos(Collection serverInfos) { - this.save(serverInfos); - } - - protected Collection allServerInfos() { - Iterator infos = this.serverInfos(NO_LIMIT, null); - try (ListIterator iter = new ListIterator<>( - MAX_SERVERS, infos)) { - return iter.list(); - } catch (Exception e) { - throw new HugeException("Failed to close server info iterator", e); - } - } - - protected Iterator serverInfos(String page) { - return this.serverInfos(ImmutableMap.of(), PAGE_SIZE, page); - } - protected Iterator serverInfos(long limit, String page) { return this.serverInfos(ImmutableMap.of(), limit, page); } diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/StandardTaskScheduler.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/StandardTaskScheduler.java index 6a0a9a018a..36b9c871ea 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/StandardTaskScheduler.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/StandardTaskScheduler.java @@ -118,11 +118,9 @@ private TaskTransaction tx() { // NOTE: only the owner thread can access task tx if (this.taskTx == null) { /* - * NOTE: don't synchronized(this) due to scheduler thread hold - * this lock through scheduleTasks(), then query tasks and wait - * for db-worker thread after call(), the tx may not be initialized - * but can't catch this lock, then cause deadlock. - * We just use this.serverManager as a monitor here + * NOTE: don't synchronized(this) to avoid potential deadlock + * when multiple threads are accessing task transaction. + * We use this.serverManager as a monitor here for thread safety. */ synchronized (this.serverManager) { if (this.taskTx == null) { @@ -139,9 +137,9 @@ private TaskTransaction tx() { @Override public void restoreTasks() { - Id selfServer = this.serverManager().selfNodeId(); List> taskList = new ArrayList<>(); // Restore 'RESTORING', 'RUNNING' and 'QUEUED' tasks in order. + // Single-node mode: restore all pending tasks without server filtering for (TaskStatus status : TaskStatus.PENDING_STATUSES) { String page = this.supportsPaging() ? PageInfo.PAGE_NONE : null; do { @@ -149,9 +147,7 @@ public void restoreTasks() { for (iter = this.findTask(status, PAGE_SIZE, page); iter.hasNext(); ) { HugeTask task = iter.next(); - if (selfServer.equals(task.server())) { - taskList.add(task); - } + taskList.add(task); } if (page != null) { page = PageInfo.pageInfo(iter); @@ -273,55 +269,11 @@ public ServerInfoManager serverManager() { return this.serverManager; } - protected void cancelTasksOnWorker(Id server) { - String page = this.supportsPaging() ? PageInfo.PAGE_NONE : null; - do { - Iterator> tasks = this.tasks(TaskStatus.CANCELLING, PAGE_SIZE, page); - while (tasks.hasNext()) { - HugeTask task = tasks.next(); - Id taskServer = task.server(); - if (taskServer == null) { - LOG.warn("Task '{}' may not be scheduled", task.id()); - continue; - } - if (!taskServer.equals(server)) { - continue; - } - /* - * Task may be loaded from backend store and not initialized. - * like: A task is completed but failed to save in the last - * step, resulting in the status of the task not being - * updated to storage, the task is not in memory, so it's not - * initialized when canceled. - */ - HugeTask memTask = this.tasks.get(task.id()); - if (memTask != null) { - task = memTask; - } else { - this.initTaskCallable(task); - } - boolean cancelled = task.cancel(true); - LOG.info("Server '{}' cancel task '{}' with cancelled={}", - server, task.id(), cancelled); - } - if (page != null) { - page = PageInfo.pageInfo(tasks); - } - } while (page != null); - } - @Override public void taskDone(HugeTask task) { this.remove(task); - - Id selfServerId = this.serverManager().selfNodeId(); - try { - this.serverManager().decreaseLoad(task.load()); - } catch (Throwable e) { - LOG.error("Failed to decrease load for task '{}' on server '{}'", - task.id(), selfServerId, e); - } - LOG.debug("Task '{}' done on server '{}'", task.id(), selfServerId); + // Single-node mode: no need to manage load + LOG.debug("Task '{}' done", task.id()); } protected void remove(HugeTask task) { @@ -621,9 +573,7 @@ public V call(Callable callable) { } private void checkOnMasterNode(String op) { - if (!this.serverManager().selfIsMaster()) { - throw new HugeException("Can't %s task on non-master server", op); - } + // Single-node mode: all operations are allowed, no role check needed } private boolean supportsPaging() { From 42ee2ba5a05d154167a26b05b2dc31215f05bb58 Mon Sep 17 00:00:00 2001 From: Tsukilc <153273766+Tsukilc@users.noreply.github.com> Date: Fri, 7 Nov 2025 18:41:46 +0800 Subject: [PATCH 04/31] fix(server): disable server-role in StandardTaskScheduler --- .../main/java/org/apache/hugegraph/task/TaskManager.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskManager.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskManager.java index 67c2831bd6..7d49ab4041 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskManager.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskManager.java @@ -29,7 +29,12 @@ import java.util.Map; import java.util.Queue; -import java.util.concurrent.*; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.Callable; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.ThreadPoolExecutor; /** * Central task management system that coordinates task scheduling and execution. From c976caaee397167bdd9eb7075b2b6244ec7472aa Mon Sep 17 00:00:00 2001 From: Tsukilc <153273766+Tsukilc@users.noreply.github.com> Date: Fri, 7 Nov 2025 18:47:58 +0800 Subject: [PATCH 05/31] fix(server): disable server-role in StandardTaskScheduler --- .../apache/hugegraph/task/TaskManager.java | 45 +------------------ 1 file changed, 1 insertion(+), 44 deletions(-) diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskManager.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskManager.java index 7d49ab4041..d57a674c42 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskManager.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskManager.java @@ -28,13 +28,7 @@ import org.slf4j.Logger; import java.util.Map; -import java.util.Queue; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.Callable; -import java.util.concurrent.TimeoutException; -import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.*; /** * Central task management system that coordinates task scheduling and execution. @@ -224,14 +218,6 @@ private void closeDistributedSchedulerTx(HugeGraphParams graph) { } } - public void pauseScheduledThreadPool() { - this.schedulerExecutor.pauseSchedule(); - } - - public void resumeScheduledThreadPool() { - this.schedulerExecutor.resumeSchedule(); - } - public TaskScheduler getScheduler(HugeGraphParams graph) { return this.schedulers.get(graph); } @@ -379,35 +365,6 @@ public void onAsRoleWorker() { } } - void notifyNewTask(HugeTask task) { - Queue queue = this.schedulerExecutor - .getQueue(); - if (queue.size() <= 1) { - /* - * Notify to schedule tasks initiatively when have new task - * It's OK to not notify again if there are more than one task in - * queue(like two, one is timer task, one is immediate task), - * we don't want too many immediate tasks to be inserted into queue, - * one notify will cause all the tasks to be processed. - */ - this.schedulerExecutor.submit(this::scheduleOrExecuteJob); - } - } - - private void scheduleOrExecuteJob() { - //// Called by scheduler timer - //try { - // for (TaskScheduler entry : this.schedulers.values()) { - // // Maybe other threads close&remove scheduler at the same time - // synchronized (entry) { - // this.scheduleOrExecuteJobForGraph(entry); - // } - // } - //} catch (Throwable e) { - // LOG.error("Exception occurred when schedule job", e); - //} - } - private static final ThreadLocal CONTEXTS = new ThreadLocal<>(); public static void setContext(String context) { From 570d67066a34c05c2eafe39d67bab15b5a8f6628 Mon Sep 17 00:00:00 2001 From: Tsukilc <153273766+Tsukilc@users.noreply.github.com> Date: Mon, 10 Nov 2025 13:13:03 +0800 Subject: [PATCH 06/31] fix(server): disable server-role in StandardTaskScheduler --- .../masterelection/StandardRoleListener.java | 5 ++-- .../hugegraph/task/ServerInfoManager.java | 26 +++++-------------- .../apache/hugegraph/task/TaskManager.java | 6 ----- 3 files changed, 8 insertions(+), 29 deletions(-) diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/masterelection/StandardRoleListener.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/masterelection/StandardRoleListener.java index dbbea6d91e..74515dacec 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/masterelection/StandardRoleListener.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/masterelection/StandardRoleListener.java @@ -17,12 +17,12 @@ package org.apache.hugegraph.masterelection; -import java.util.Objects; - import org.apache.hugegraph.task.TaskManager; import org.apache.hugegraph.util.Log; import org.slf4j.Logger; +import java.util.Objects; + public class StandardRoleListener implements RoleListener { private static final Logger LOG = Log.logger(StandardRoleListener.class); @@ -36,7 +36,6 @@ public class StandardRoleListener implements RoleListener { public StandardRoleListener(TaskManager taskManager, GlobalMasterInfo roleInfo) { this.taskManager = taskManager; - this.taskManager.enableRoleElection(); this.roleInfo = roleInfo; this.selfIsMaster = false; } diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/ServerInfoManager.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/ServerInfoManager.java index af579bb124..6fc8c52802 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/ServerInfoManager.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/ServerInfoManager.java @@ -61,7 +61,6 @@ public class ServerInfoManager { private volatile GlobalMasterInfo globalNodeInfo; - private final boolean onlySingleNode; private volatile boolean closed; public ServerInfoManager(HugeGraphParams graph, ExecutorService dbExecutor) { @@ -73,7 +72,6 @@ public ServerInfoManager(HugeGraphParams graph, ExecutorService dbExecutor) { this.globalNodeInfo = null; - this.onlySingleNode = false; this.closed = false; } @@ -112,11 +110,11 @@ public synchronized void initServerInfo(GlobalMasterInfo nodeInfo) { try { Thread.sleep(existed.expireTime() - now + 1); } catch (InterruptedException e) { - throw new HugeException("Interrupted when waiting for server info expired", e); + throw new HugeException("Interrupted when waiting for server info expired", e); } } E.checkArgument(existed == null || !existed.alive(), - "The server with name '%s' already in cluster", serverId); + "The server with name '%s' already in cluster", serverId); if (nodeInfo.nodeRole().master()) { String page = this.supportsPaging() ? PageInfo.PAGE_NONE : null; @@ -125,8 +123,8 @@ public synchronized void initServerInfo(GlobalMasterInfo nodeInfo) { while (servers.hasNext()) { existed = servers.next(); E.checkArgument(!existed.role().master() || !existed.alive(), - "Already existed master '%s' in current cluster", - existed.id()); + "Already existed master '%s' in current cluster", + existed.id()); } if (page != null) { page = PageInfo.pageInfo(servers); @@ -173,11 +171,6 @@ public boolean selfIsMaster() { return this.selfNodeRole() != null && this.selfNodeRole().master(); } - public boolean onlySingleNode() { - // Only exists one node in the whole master - return this.onlySingleNode; - } - public synchronized void heartbeat() { assert this.graphIsReady(); @@ -209,13 +202,6 @@ public synchronized void heartbeat() { assert serverInfo != null; } - public synchronized void decreaseLoad(int load) { - assert load > 0 : load; - HugeServerInfo serverInfo = this.selfServerInfo(); - serverInfo.increaseLoad(-load); - this.save(serverInfo); - } - public int calcMaxLoad() { // TODO: calc max load based on CPU and Memory resources return 10000; @@ -245,7 +231,7 @@ private Id save(HugeServerInfo serverInfo) { HugeServerInfo.Schema schema = HugeServerInfo.schema(this.graph); if (!schema.existVertexLabel(HugeServerInfo.P.SERVER)) { throw new HugeException("Schema is missing for %s '%s'", - HugeServerInfo.P.SERVER, serverInfo); + HugeServerInfo.P.SERVER, serverInfo); } HugeVertex vertex = this.tx().constructVertex(false, serverInfo.asArray()); // Add or update server info in backend store @@ -264,7 +250,7 @@ private V call(Callable callable) { return this.dbExecutor.submit(callable).get(); } catch (Throwable e) { throw new HugeException("Failed to update/query server info: %s", - e, e.toString()); + e, e.toString()); } } diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskManager.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskManager.java index d57a674c42..07ec28e55b 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskManager.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskManager.java @@ -69,8 +69,6 @@ public final class TaskManager { private final ExecutorService ephemeralTaskExecutor; private final PausableScheduledThreadPool distributedSchedulerExecutor; - private boolean enableRoleElected = false; - public static TaskManager instance() { return MANAGER; } @@ -329,10 +327,6 @@ public int pendingTasks() { return size; } - public void enableRoleElection() { - this.enableRoleElected = true; - } - public void onAsRoleMaster() { try { for (TaskScheduler entry : this.schedulers.values()) { From 59154280a52ca9e1b618af28a1d1d3773ccac496 Mon Sep 17 00:00:00 2001 From: Tsukilc <153273766+Tsukilc@users.noreply.github.com> Date: Thu, 6 Nov 2025 18:53:28 +0800 Subject: [PATCH 07/31] fix(server): disable server-role in StandardTaskScheduler --- .../hugegraph/task/StandardTaskScheduler.java | 43 ++------ .../apache/hugegraph/task/TaskManager.java | 97 +++---------------- 2 files changed, 21 insertions(+), 119 deletions(-) diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/StandardTaskScheduler.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/StandardTaskScheduler.java index 5f60792af1..577512059a 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/StandardTaskScheduler.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/StandardTaskScheduler.java @@ -17,18 +17,7 @@ package org.apache.hugegraph.task; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.concurrent.Callable; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.concurrent.TimeoutException; - +import com.google.common.collect.ImmutableMap; import org.apache.hugegraph.HugeException; import org.apache.hugegraph.HugeGraph; import org.apache.hugegraph.HugeGraphParams; @@ -57,7 +46,8 @@ import org.apache.tinkerpop.gremlin.structure.Vertex; import org.slf4j.Logger; -import com.google.common.collect.ImmutableMap; +import java.util.*; +import java.util.concurrent.*; public class StandardTaskScheduler implements TaskScheduler { @@ -211,30 +201,9 @@ public Future schedule(HugeTask task) { return this.submitTask(task); } - // Check this is on master for normal task schedule - this.checkOnMasterNode("schedule"); - if (this.serverManager().onlySingleNode() && !task.computer()) { - /* - * Speed up for single node, submit the task immediately, - * this code can be removed without affecting code logic - */ - task.status(TaskStatus.QUEUED); - task.server(this.serverManager().selfNodeId()); - this.save(task); - return this.submitTask(task); - } else { - /* - * Just set the SCHEDULING status and save the task, - * it will be scheduled by periodic scheduler worker - */ - task.status(TaskStatus.SCHEDULING); - this.save(task); - - // Notify master server to schedule and execute immediately - TaskManager.instance().notifyNewTask(task); - - return task; - } + task.status(TaskStatus.QUEUED); + this.save(task); + return this.submitTask(task); } private Future submitTask(HugeTask task) { diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskManager.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskManager.java index 277822a386..67c2831bd6 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskManager.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskManager.java @@ -17,15 +17,6 @@ package org.apache.hugegraph.task; -import java.util.Map; -import java.util.Queue; -import java.util.concurrent.Callable; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.ThreadPoolExecutor; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; - import org.apache.hugegraph.HugeException; import org.apache.hugegraph.HugeGraphParams; import org.apache.hugegraph.concurrent.PausableScheduledThreadPool; @@ -33,10 +24,13 @@ import org.apache.hugegraph.util.Consumers; import org.apache.hugegraph.util.E; import org.apache.hugegraph.util.ExecutorUtil; -import org.apache.hugegraph.util.LockUtil; import org.apache.hugegraph.util.Log; import org.slf4j.Logger; +import java.util.Map; +import java.util.Queue; +import java.util.concurrent.*; + /** * Central task management system that coordinates task scheduling and execution. * Manages task schedulers for different graphs and handles role-based execution. @@ -102,11 +96,6 @@ private TaskManager(int pool) { // For a schedule task to run, just one thread is ok this.schedulerExecutor = ExecutorUtil.newPausableScheduledThreadPool( 1, TASK_SCHEDULER); - // Start after 10x period time waiting for HugeGraphServer startup - this.schedulerExecutor.scheduleWithFixedDelay(this::scheduleOrExecuteJob, - 10 * SCHEDULE_PERIOD, - SCHEDULE_PERIOD, - TimeUnit.MILLISECONDS); } public void addScheduler(HugeGraphParams graph) { @@ -401,73 +390,17 @@ void notifyNewTask(HugeTask task) { } private void scheduleOrExecuteJob() { - // Called by scheduler timer - try { - for (TaskScheduler entry : this.schedulers.values()) { - // Maybe other threads close&remove scheduler at the same time - synchronized (entry) { - this.scheduleOrExecuteJobForGraph(entry); - } - } - } catch (Throwable e) { - LOG.error("Exception occurred when schedule job", e); - } - } - - private void scheduleOrExecuteJobForGraph(TaskScheduler scheduler) { - E.checkNotNull(scheduler, "scheduler"); - - if (scheduler instanceof StandardTaskScheduler) { - StandardTaskScheduler standardTaskScheduler = (StandardTaskScheduler) (scheduler); - ServerInfoManager serverManager = scheduler.serverManager(); - String spaceGraphName = scheduler.spaceGraphName(); - - LockUtil.lock(spaceGraphName, LockUtil.GRAPH_LOCK); - try { - /* - * Skip if: - * graph is closed (iterate schedulers before graph is closing) - * or - * graph is not initialized(maybe truncated or cleared). - * - * If graph is closing by other thread, current thread get - * serverManager and try lock graph, at the same time other - * thread deleted the lock-group, current thread would get - * exception 'LockGroup xx does not exists'. - * If graph is closed, don't call serverManager.initialized() - * due to it will reopen graph tx. - */ - if (!serverManager.graphIsReady()) { - return; - } - - // Update server heartbeat - serverManager.heartbeat(); - - /* - * Master will schedule tasks to suitable servers. - * Note a Worker may become to a Master, so elected-Master also needs to - * execute tasks assigned by previous Master when enableRoleElected=true. - * However, when enableRoleElected=false, a Master is only set by the - * config assignment, assigned-Master always stays the same state. - */ - if (serverManager.selfIsMaster()) { - standardTaskScheduler.scheduleTasksOnMaster(); - if (!this.enableRoleElected && !serverManager.onlySingleNode()) { - // assigned-Master + non-single-node don't need to execute tasks - return; - } - } - - // Execute queued tasks scheduled to current server - standardTaskScheduler.executeTasksOnWorker(serverManager.selfNodeId()); - - // Cancel tasks scheduled to current server - standardTaskScheduler.cancelTasksOnWorker(serverManager.selfNodeId()); - } finally { - LockUtil.unlock(spaceGraphName, LockUtil.GRAPH_LOCK); - } - } + //// Called by scheduler timer + //try { + // for (TaskScheduler entry : this.schedulers.values()) { + // // Maybe other threads close&remove scheduler at the same time + // synchronized (entry) { + // this.scheduleOrExecuteJobForGraph(entry); + // } + // } + //} catch (Throwable e) { + // LOG.error("Exception occurred when schedule job", e); + //} } private static final ThreadLocal CONTEXTS = new ThreadLocal<>(); From e7da7e51fe303bd5128129a20d8c5cb49c8d92c8 Mon Sep 17 00:00:00 2001 From: Tsukilc <153273766+Tsukilc@users.noreply.github.com> Date: Thu, 6 Nov 2025 19:51:10 +0800 Subject: [PATCH 08/31] fix(server): disable server-role in StandardTaskScheduler --- .../hugegraph/task/StandardTaskScheduler.java | 113 +----- .../apache/hugegraph/core/TaskCoreTest.java | 325 +++++++++--------- 2 files changed, 176 insertions(+), 262 deletions(-) diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/StandardTaskScheduler.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/StandardTaskScheduler.java index 577512059a..6a0a9a018a 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/StandardTaskScheduler.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/StandardTaskScheduler.java @@ -46,7 +46,10 @@ import org.apache.tinkerpop.gremlin.structure.Vertex; import org.slf4j.Logger; -import java.util.*; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; import java.util.concurrent.*; public class StandardTaskScheduler implements TaskScheduler { @@ -242,7 +245,6 @@ public void initTaskCallable(HugeTask task) { @Override public synchronized void cancel(HugeTask task) { E.checkArgumentNotNull(task, "Task can't be null"); - this.checkOnMasterNode("cancel"); if (task.completed() || task.cancelling()) { return; @@ -250,36 +252,20 @@ public synchronized void cancel(HugeTask task) { LOG.info("Cancel task '{}' in status {}", task.id(), task.status()); - if (task.server() == null) { - // The task not scheduled to workers, set canceled immediately - assert task.status().code() < TaskStatus.QUEUED.code(); - if (task.status(TaskStatus.CANCELLED)) { - this.save(task); - return; - } - } else if (task.status(TaskStatus.CANCELLING)) { - // The task scheduled to workers, let the worker node to cancel + HugeTask memTask = this.tasks.get(task.id()); + if (memTask != null) { + boolean cancelled = memTask.cancel(true); + LOG.info("Task '{}' cancel result: {}", task.id(), cancelled); + return; + } + + if (task.status(TaskStatus.CANCELLED)) { this.save(task); - assert task.server() != null : task; - assert this.serverManager().selfIsMaster(); - if (!task.server().equals(this.serverManager().selfNodeId())) { - /* - * Remove the task from memory if it's running on worker node, - * but keep the task in memory if it's running on master node. - * Cancel-scheduling will read the task from backend store, if - * removed this instance from memory, there will be two task - * instances with the same id, and can't cancel the real task that - * is running but removed from memory. - */ - this.remove(task); - } - // Notify master server to schedule and execute immediately - TaskManager.instance().notifyNewTask(task); return; } throw new HugeException("Can't cancel task '%s' in status %s", - task.id(), task.status()); + task.id(), task.status()); } @Override @@ -287,79 +273,6 @@ public ServerInfoManager serverManager() { return this.serverManager; } - protected synchronized void scheduleTasksOnMaster() { - // Master server schedule all scheduling tasks to suitable worker nodes - Collection serverInfos = this.serverManager().allServerInfos(); - String page = this.supportsPaging() ? PageInfo.PAGE_NONE : null; - do { - Iterator> tasks = this.tasks(TaskStatus.SCHEDULING, PAGE_SIZE, page); - while (tasks.hasNext()) { - HugeTask task = tasks.next(); - if (task.server() != null) { - // Skip if already scheduled - continue; - } - - if (!this.serverManager.selfIsMaster()) { - return; - } - - HugeServerInfo server = this.serverManager().pickWorkerNode(serverInfos, task); - if (server == null) { - LOG.info("The master can't find suitable servers to " + - "execute task '{}', wait for next schedule", task.id()); - continue; - } - - // Found suitable server, update task status - assert server.id() != null; - task.server(server.id()); - task.status(TaskStatus.SCHEDULED); - this.save(task); - - // Update server load in memory, it will be saved at the ending - server.increaseLoad(task.load()); - - LOG.info("Scheduled task '{}' to server '{}'", task.id(), server.id()); - } - if (page != null) { - page = PageInfo.pageInfo(tasks); - } - } while (page != null); - - // Save to store - this.serverManager().updateServerInfos(serverInfos); - } - - protected void executeTasksOnWorker(Id server) { - String page = this.supportsPaging() ? PageInfo.PAGE_NONE : null; - do { - Iterator> tasks = this.tasks(TaskStatus.SCHEDULED, PAGE_SIZE, page); - while (tasks.hasNext()) { - HugeTask task = tasks.next(); - this.initTaskCallable(task); - Id taskServer = task.server(); - if (taskServer == null) { - LOG.warn("Task '{}' may not be scheduled", task.id()); - continue; - } - HugeTask memTask = this.tasks.get(task.id()); - if (memTask != null) { - assert memTask.status().code() > task.status().code(); - continue; - } - if (taskServer.equals(server)) { - task.status(TaskStatus.QUEUED); - this.save(task); - this.submitTask(task); - } - } - if (page != null) { - page = PageInfo.pageInfo(tasks); - } - } while (page != null); - } - protected void cancelTasksOnWorker(Id server) { String page = this.supportsPaging() ? PageInfo.PAGE_NONE : null; do { diff --git a/hugegraph-server/hugegraph-test/src/main/java/org/apache/hugegraph/core/TaskCoreTest.java b/hugegraph-server/hugegraph-test/src/main/java/org/apache/hugegraph/core/TaskCoreTest.java index 212ccc0588..e608fc28b6 100644 --- a/hugegraph-server/hugegraph-test/src/main/java/org/apache/hugegraph/core/TaskCoreTest.java +++ b/hugegraph-server/hugegraph-test/src/main/java/org/apache/hugegraph/core/TaskCoreTest.java @@ -17,11 +17,8 @@ package org.apache.hugegraph.core; -import java.util.Arrays; -import java.util.Iterator; -import java.util.Random; -import java.util.concurrent.TimeoutException; - +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import org.apache.hugegraph.HugeException; import org.apache.hugegraph.HugeGraph; import org.apache.hugegraph.api.job.GremlinAPI.GremlinRequest; @@ -32,17 +29,16 @@ import org.apache.hugegraph.job.EphemeralJobBuilder; import org.apache.hugegraph.job.GremlinJob; import org.apache.hugegraph.job.JobBuilder; -import org.apache.hugegraph.task.HugeTask; -import org.apache.hugegraph.task.TaskCallable; -import org.apache.hugegraph.task.TaskScheduler; -import org.apache.hugegraph.task.TaskStatus; +import org.apache.hugegraph.task.*; import org.apache.hugegraph.testutil.Assert; import org.apache.hugegraph.testutil.Whitebox; import org.junit.Before; import org.junit.Test; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; +import java.util.Iterator; +import java.util.List; +import java.util.Random; +import java.util.concurrent.TimeoutException; public class TaskCoreTest extends BaseCoreTest { @@ -80,7 +76,7 @@ public void testTask() throws TimeoutException { scheduler.delete(id, false); }, e -> { Assert.assertContains("Can't delete incomplete task '88888'", - e.getMessage()); + e.getMessage()); }); task = scheduler.waitUntilTaskCompleted(task.id(), 10); @@ -89,8 +85,8 @@ public void testTask() throws TimeoutException { Assert.assertEquals(TaskStatus.SUCCESS, task.status()); Assert.assertEquals("test-task", scheduler.task(id).name()); - Assert.assertEquals("test-task", scheduler.tasks(Arrays.asList(id)) - .next().name()); + Assert.assertEquals("test-task", scheduler.tasks(List.of(id)) + .next().name()); Iterator> iter = scheduler.tasks(ImmutableList.of(id)); Assert.assertTrue(iter.hasNext()); @@ -144,7 +140,7 @@ protected void done() { new HugeTask<>(id, null, callable); }, e -> { Assert.assertContains("Invalid task id type, it must be number", - e.getMessage()); + e.getMessage()); }); Assert.assertThrows(NullPointerException.class, () -> { @@ -178,18 +174,18 @@ public void testEphemeralJob() throws TimeoutException { EphemeralJobBuilder builder = EphemeralJobBuilder.of(graph); builder.name("test-job-ephemeral") - .job(new EphemeralJob() { - @Override - public String type() { - return "test"; - } - - @Override - public Object execute() throws Exception { - sleepAWhile(); - return ImmutableMap.of("k1", 13579, "k2", "24680"); - } - }); + .job(new EphemeralJob() { + @Override + public String type() { + return "test"; + } + + @Override + public Object execute() throws Exception { + sleepAWhile(); + return ImmutableMap.of("k1", 13579, "k2", "24680"); + } + }); HugeTask task = builder.schedule(); Assert.assertEquals("test-job-ephemeral", task.name()); @@ -221,8 +217,8 @@ public void testGremlinJob() throws TimeoutException { JobBuilder builder = JobBuilder.of(graph); builder.name("test-job-gremlin") - .input(request.toJson()) - .job(new GremlinJob()); + .input(request.toJson()) + .job(new GremlinJob()); HugeTask task = builder.schedule(); Assert.assertEquals("test-job-gremlin", task.name()); @@ -249,22 +245,22 @@ public void testGremlinJobWithScript() throws TimeoutException { TaskScheduler scheduler = graph.taskScheduler(); String script = "schema=graph.schema();" + - "schema.propertyKey('name').asText().ifNotExist().create();" + - "schema.propertyKey('age').asInt().ifNotExist().create();" + - "schema.propertyKey('lang').asText().ifNotExist().create();" + - "schema.propertyKey('date').asDate().ifNotExist().create();" + - "schema.propertyKey('price').asInt().ifNotExist().create();" + - "schema.vertexLabel('person1').properties('name','age').ifNotExist()" + - ".create();" + - "schema.vertexLabel('person2').properties('name','age').ifNotExist()" + - ".create();" + - "schema.edgeLabel('knows').sourceLabel('person1').targetLabel('person2')." + - "properties('date').ifNotExist().create();" + - "for(int i = 0; i < 1000; i++) {" + - " p1=graph.addVertex(T.label,'person1','name','p1-'+i,'age',29);" + - " p2=graph.addVertex(T.label,'person2','name','p2-'+i,'age',27);" + - " p1.addEdge('knows',p2,'date','2016-01-10');" + - "}"; + "schema.propertyKey('name').asText().ifNotExist().create();" + + "schema.propertyKey('age').asInt().ifNotExist().create();" + + "schema.propertyKey('lang').asText().ifNotExist().create();" + + "schema.propertyKey('date').asDate().ifNotExist().create();" + + "schema.propertyKey('price').asInt().ifNotExist().create();" + + "schema.vertexLabel('person1').properties('name','age').ifNotExist()" + + ".create();" + + "schema.vertexLabel('person2').properties('name','age').ifNotExist()" + + ".create();" + + "schema.edgeLabel('knows').sourceLabel('person1').targetLabel('person2')." + + "properties('date').ifNotExist().create();" + + "for(int i = 0; i < 1000; i++) {" + + " p1=graph.addVertex(T.label,'person1','name','p1-'+i,'age',29);" + + " p2=graph.addVertex(T.label,'person2','name','p2-'+i,'age',27);" + + " p1.addEdge('knows',p2,'date','2016-01-10');" + + "}"; HugeTask task = runGremlinJob(script); task = scheduler.waitUntilTaskCompleted(task.id(), 10); @@ -310,27 +306,27 @@ public void testGremlinJobWithSerializedResults() throws TimeoutException { TaskScheduler scheduler = graph.taskScheduler(); String script = "schema=graph.schema();" + - "schema.propertyKey('name').asText().ifNotExist().create();" + - "schema.vertexLabel('char').useCustomizeNumberId()" + - " .properties('name').ifNotExist().create();" + - "schema.edgeLabel('next').sourceLabel('char').targetLabel('char')" + - " .properties('name').ifNotExist().create();" + - "g.addV('char').property(id,1).property('name','A').as('a')" + - " .addV('char').property(id,2).property('name','B').as('b')" + - " .addV('char').property(id,3).property('name','C').as('c')" + - " .addV('char').property(id,4).property('name','D').as('d')" + - " .addV('char').property(id,5).property('name','E').as('e')" + - " .addV('char').property(id,6).property('name','F').as('f')" + - " .addE('next').from('a').to('b').property('name','ab')" + - " .addE('next').from('b').to('c').property('name','bc')" + - " .addE('next').from('b').to('d').property('name','bd')" + - " .addE('next').from('c').to('d').property('name','cd')" + - " .addE('next').from('c').to('e').property('name','ce')" + - " .addE('next').from('d').to('e').property('name','de')" + - " .addE('next').from('e').to('f').property('name','ef')" + - " .addE('next').from('f').to('d').property('name','fd')" + - " .iterate();" + - "g.tx().commit(); g.E().count();"; + "schema.propertyKey('name').asText().ifNotExist().create();" + + "schema.vertexLabel('char').useCustomizeNumberId()" + + " .properties('name').ifNotExist().create();" + + "schema.edgeLabel('next').sourceLabel('char').targetLabel('char')" + + " .properties('name').ifNotExist().create();" + + "g.addV('char').property(id,1).property('name','A').as('a')" + + " .addV('char').property(id,2).property('name','B').as('b')" + + " .addV('char').property(id,3).property('name','C').as('c')" + + " .addV('char').property(id,4).property('name','D').as('d')" + + " .addV('char').property(id,5).property('name','E').as('e')" + + " .addV('char').property(id,6).property('name','F').as('f')" + + " .addE('next').from('a').to('b').property('name','ab')" + + " .addE('next').from('b').to('c').property('name','bc')" + + " .addE('next').from('b').to('d').property('name','bd')" + + " .addE('next').from('c').to('d').property('name','cd')" + + " .addE('next').from('c').to('e').property('name','ce')" + + " .addE('next').from('d').to('e').property('name','de')" + + " .addE('next').from('e').to('f').property('name','ef')" + + " .addE('next').from('f').to('d').property('name','fd')" + + " .iterate();" + + "g.tx().commit(); g.E().count();"; HugeTask task = runGremlinJob(script); task = scheduler.waitUntilTaskCompleted(task.id(), 10); @@ -346,15 +342,15 @@ public void testGremlinJobWithSerializedResults() throws TimeoutException { task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.SUCCESS, task.status()); String expected = String.format("[{\"labels\":[[],[],[]],\"objects\":[" + - "{\"id\":1,\"label\":\"char\",\"type\":\"vertex\"," + - "\"properties\":{\"name\":\"A\"}}," + - "{\"id\":\"L1>%s>%s>>L2\",\"label\":\"next\"," + - "\"type\":\"edge\",\"outV\":1," + - "\"outVLabel\":\"char\",\"inV\":2,\"" + - "inVLabel\":\"char\",\"properties\":{\"name\":\"ab\"}}," + - "{\"id\":2,\"label\":\"char\",\"type\":\"vertex\"," + - "\"properties\":{\"name\":\"B\"}}" + - "]}]", edgeLabelId, edgeLabelId); + "{\"id\":1,\"label\":\"char\",\"type\":\"vertex\"," + + "\"properties\":{\"name\":\"A\"}}," + + "{\"id\":\"L1>%s>%s>>L2\",\"label\":\"next\"," + + "\"type\":\"edge\",\"outV\":1," + + "\"outVLabel\":\"char\",\"inV\":2,\"" + + "inVLabel\":\"char\",\"properties\":{\"name\":\"ab\"}}," + + "{\"id\":2,\"label\":\"char\",\"type\":\"vertex\"," + + "\"properties\":{\"name\":\"B\"}}" + + "]}]", edgeLabelId, edgeLabelId); Assert.assertEquals(expected, task.result()); script = "g.V(1).out().out().path()"; @@ -362,19 +358,19 @@ public void testGremlinJobWithSerializedResults() throws TimeoutException { task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.SUCCESS, task.status()); expected = "[{\"labels\":[[],[],[]],\"objects\":[" + - "{\"id\":1,\"label\":\"char\",\"type\":\"vertex\"," + - "\"properties\":{\"name\":\"A\"}}," + - "{\"id\":2,\"label\":\"char\",\"type\":\"vertex\"," + - "\"properties\":{\"name\":\"B\"}}," + - "{\"id\":3,\"label\":\"char\",\"type\":\"vertex\"," + - "\"properties\":{\"name\":\"C\"}}]}," + - "{\"labels\":[[],[],[]],\"objects\":[" + - "{\"id\":1,\"label\":\"char\",\"type\":\"vertex\"," + - "\"properties\":{\"name\":\"A\"}}," + - "{\"id\":2,\"label\":\"char\",\"type\":\"vertex\"," + - "\"properties\":{\"name\":\"B\"}}," + - "{\"id\":4,\"label\":\"char\",\"type\":\"vertex\"," + - "\"properties\":{\"name\":\"D\"}}]}]"; + "{\"id\":1,\"label\":\"char\",\"type\":\"vertex\"," + + "\"properties\":{\"name\":\"A\"}}," + + "{\"id\":2,\"label\":\"char\",\"type\":\"vertex\"," + + "\"properties\":{\"name\":\"B\"}}," + + "{\"id\":3,\"label\":\"char\",\"type\":\"vertex\"," + + "\"properties\":{\"name\":\"C\"}}]}," + + "{\"labels\":[[],[],[]],\"objects\":[" + + "{\"id\":1,\"label\":\"char\",\"type\":\"vertex\"," + + "\"properties\":{\"name\":\"A\"}}," + + "{\"id\":2,\"label\":\"char\",\"type\":\"vertex\"," + + "\"properties\":{\"name\":\"B\"}}," + + "{\"id\":4,\"label\":\"char\",\"type\":\"vertex\"," + + "\"properties\":{\"name\":\"D\"}}]}]"; Assert.assertEquals(expected, task.result()); script = "g.V(1).outE().inV().tree()"; @@ -382,16 +378,16 @@ public void testGremlinJobWithSerializedResults() throws TimeoutException { task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.SUCCESS, task.status()); expected = String.format("[[{\"key\":{\"id\":1,\"label\":\"char\",\"type\":\"vertex\"," + - "\"properties\":{\"name\":\"A\"}}," + - "\"value\":[" + - "{\"key\":{\"id\":\"L1>%s>%s>>L2\",\"label\":\"next\"," + - "\"type\":\"edge\",\"outV\":1," + - "\"outVLabel\":\"char\",\"inV\":2,\"inVLabel\":\"char\"," + - "\"properties\":{\"name\":\"ab\"}}," + - "\"value\":[{\"key\":{\"id\":2,\"label\":\"char\"," + - "\"type\":\"vertex\"," + - "\"properties\":{\"name\":\"B\"}},\"value\":[]}]}]}]]", - edgeLabelId, edgeLabelId); + "\"properties\":{\"name\":\"A\"}}," + + "\"value\":[" + + "{\"key\":{\"id\":\"L1>%s>%s>>L2\",\"label\":\"next\"," + + "\"type\":\"edge\",\"outV\":1," + + "\"outVLabel\":\"char\",\"inV\":2,\"inVLabel\":\"char\"," + + "\"properties\":{\"name\":\"ab\"}}," + + "\"value\":[{\"key\":{\"id\":2,\"label\":\"char\"," + + "\"type\":\"vertex\"," + + "\"properties\":{\"name\":\"B\"}},\"value\":[]}]}]}]]", + edgeLabelId, edgeLabelId); Assert.assertEquals(expected, task.result()); script = "g.V(1).out().out().tree()"; @@ -399,14 +395,14 @@ public void testGremlinJobWithSerializedResults() throws TimeoutException { task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.SUCCESS, task.status()); expected = "[[{\"key\":{\"id\":1,\"label\":\"char\",\"type\":\"vertex\"," + - "\"properties\":{\"name\":\"A\"}}," + - "\"value\":[{\"key\":{\"id\":2,\"label\":\"char\",\"type\":\"vertex\"," + - "\"properties\":{\"name\":\"B\"}}," + - "\"value\":[" + - "{\"key\":{\"id\":3,\"label\":\"char\",\"type\":\"vertex\",\"properties\":" + - "{\"name\":\"C\"}},\"value\":[]}," + - "{\"key\":{\"id\":4,\"label\":\"char\",\"type\":\"vertex\",\"properties\":" + - "{\"name\":\"D\"}},\"value\":[]}]}]}]]"; + "\"properties\":{\"name\":\"A\"}}," + + "\"value\":[{\"key\":{\"id\":2,\"label\":\"char\",\"type\":\"vertex\"," + + "\"properties\":{\"name\":\"B\"}}," + + "\"value\":[" + + "{\"key\":{\"id\":3,\"label\":\"char\",\"type\":\"vertex\",\"properties\":" + + "{\"name\":\"C\"}},\"value\":[]}," + + "{\"key\":{\"id\":4,\"label\":\"char\",\"type\":\"vertex\",\"properties\":" + + "{\"name\":\"D\"}},\"value\":[]}]}]}]]"; Assert.assertEquals(expected, task.result()); } @@ -417,8 +413,8 @@ public void testGremlinJobWithFailure() throws TimeoutException { JobBuilder builder = JobBuilder.of(graph); builder.name("test-job-gremlin") - .input("") - .job(new GremlinJob()); + .input("") + .job(new GremlinJob()); HugeTask task = builder.schedule(); task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.FAILED, task.status()); @@ -426,7 +422,7 @@ public void testGremlinJobWithFailure() throws TimeoutException { builder = JobBuilder.of(graph); builder.name("test-job-gremlin") - .job(new GremlinJob()); + .job(new GremlinJob()); task = builder.schedule(); scheduler.waitUntilTaskCompleted(task.id(), 10); task = scheduler.task(task.id()); @@ -435,8 +431,8 @@ public void testGremlinJobWithFailure() throws TimeoutException { builder = JobBuilder.of(graph); builder.name("test-job-gremlin") - .input("{}") - .job(new GremlinJob()); + .input("{}") + .job(new GremlinJob()); task = builder.schedule(); task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.FAILED, task.status()); @@ -444,8 +440,8 @@ public void testGremlinJobWithFailure() throws TimeoutException { builder = JobBuilder.of(graph); builder.name("test-job-gremlin") - .input("{\"gremlin\":8}") - .job(new GremlinJob()); + .input("{\"gremlin\":8}") + .job(new GremlinJob()); task = builder.schedule(); task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.FAILED, task.status()); @@ -453,8 +449,8 @@ public void testGremlinJobWithFailure() throws TimeoutException { builder = JobBuilder.of(graph); builder.name("test-job-gremlin") - .input("{\"gremlin\":\"\"}") - .job(new GremlinJob()); + .input("{\"gremlin\":\"\"}") + .job(new GremlinJob()); task = builder.schedule(); task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.FAILED, task.status()); @@ -462,8 +458,8 @@ public void testGremlinJobWithFailure() throws TimeoutException { builder = JobBuilder.of(graph); builder.name("test-job-gremlin") - .input("{\"gremlin\":\"\", \"bindings\":\"\"}") - .job(new GremlinJob()); + .input("{\"gremlin\":\"\", \"bindings\":\"\"}") + .job(new GremlinJob()); task = builder.schedule(); task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.FAILED, task.status()); @@ -471,8 +467,8 @@ public void testGremlinJobWithFailure() throws TimeoutException { builder = JobBuilder.of(graph); builder.name("test-job-gremlin") - .input("{\"gremlin\":\"\", \"bindings\":{}}") - .job(new GremlinJob()); + .input("{\"gremlin\":\"\", \"bindings\":{}}") + .job(new GremlinJob()); task = builder.schedule(); task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.FAILED, task.status()); @@ -480,8 +476,8 @@ public void testGremlinJobWithFailure() throws TimeoutException { builder = JobBuilder.of(graph); builder.name("test-job-gremlin") - .input("{\"gremlin\":\"\", \"bindings\":{}, \"language\":{}}") - .job(new GremlinJob()); + .input("{\"gremlin\":\"\", \"bindings\":{}, \"language\":{}}") + .job(new GremlinJob()); task = builder.schedule(); task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.FAILED, task.status()); @@ -489,8 +485,8 @@ public void testGremlinJobWithFailure() throws TimeoutException { builder = JobBuilder.of(graph); builder.name("test-job-gremlin") - .input("{\"gremlin\":\"\", \"bindings\":{}, \"language\":\"\"}") - .job(new GremlinJob()); + .input("{\"gremlin\":\"\", \"bindings\":{}, \"language\":\"\"}") + .job(new GremlinJob()); task = builder.schedule(); task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.FAILED, task.status()); @@ -498,14 +494,14 @@ public void testGremlinJobWithFailure() throws TimeoutException { builder = JobBuilder.of(graph); builder.name("test-job-gremlin") - .input("{\"gremlin\":\"\", \"bindings\":{}, " + - "\"language\":\"test\", \"aliases\":{}}") - .job(new GremlinJob()); + .input("{\"gremlin\":\"\", \"bindings\":{}, " + + "\"language\":\"test\", \"aliases\":{}}") + .job(new GremlinJob()); task = builder.schedule(); task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.FAILED, task.status()); Assert.assertContains("test is not an available GremlinScriptEngine", - task.result()); + task.result()); } @Test @@ -514,16 +510,16 @@ public void testGremlinJobWithError() throws TimeoutException { Assert.assertThrows(IllegalArgumentException.class, () -> { JobBuilder.of(graph) - .job(new GremlinJob()) - .schedule(); + .job(new GremlinJob()) + .schedule(); }, e -> { Assert.assertContains("Job name can't be null", e.getMessage()); }); Assert.assertThrows(IllegalArgumentException.class, () -> { JobBuilder.of(graph) - .name("test-job-gremlin") - .schedule(); + .name("test-job-gremlin") + .schedule(); }, e -> { Assert.assertContains("Job callable can't be null", e.getMessage()); }); @@ -541,7 +537,7 @@ public void testGremlinJobWithError() throws TimeoutException { }, e -> { Assert.assertContains("Task input size", e.getMessage()); Assert.assertContains("exceeded limit 16777216 bytes", - e.getMessage()); + e.getMessage()); }); } @@ -557,13 +553,16 @@ public void testGremlinJobAndCancel() throws TimeoutException { scheduler.cancel(task); task = scheduler.task(task.id()); - Assert.assertEquals(TaskStatus.CANCELLING, task.status()); + System.out.println(scheduler.getClass()); + if (scheduler.getClass().equals(DistributedTaskScheduler.class)) { + Assert.assertEquals(TaskStatus.CANCELLING, task.status()); + } task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.CANCELLED, task.status()); Assert.assertEquals("test-gremlin-job", task.name()); Assert.assertTrue(task.result(), task.result() == null || - task.result().endsWith("InterruptedException")); + task.result().endsWith("InterruptedException")); // Cancel success task HugeTask task2 = runGremlinJob("1+2"); @@ -583,22 +582,22 @@ public void testGremlinJobAndCancel() throws TimeoutException { task3 = scheduler.task(task3.id()); Assert.assertEquals(TaskStatus.FAILED, task3.status()); Assert.assertContains("LimitExceedException: Job results size 800001 " + - "has exceeded the max limit 800000", - task3.result()); + "has exceeded the max limit 800000", + task3.result()); // Cancel failure task with big results (task exceeded limit 16M) String bigResults = "def random = new Random(); def rs=[];" + - "for (i in 0..4) {" + - " def len = 1024 * 1024;" + - " def item = new StringBuilder(len);" + - " for (j in 0..len) { " + - " item.append(\"node:\"); " + - " item.append((char) random.nextInt(256)); " + - " item.append(\",\"); " + - " };" + - " rs.add(item);" + - "};" + - "rs;"; + "for (i in 0..4) {" + + " def len = 1024 * 1024;" + + " def item = new StringBuilder(len);" + + " for (j in 0..len) { " + + " item.append(\"node:\"); " + + " item.append((char) random.nextInt(256)); " + + " item.append(\",\"); " + + " };" + + " rs.add(item);" + + "};" + + "rs;"; HugeTask task4 = runGremlinJob(bigResults); task4 = scheduler.waitUntilTaskCompleted(task4.id(), 10); Assert.assertEquals(TaskStatus.FAILED, task4.status()); @@ -606,9 +605,9 @@ public void testGremlinJobAndCancel() throws TimeoutException { task4 = scheduler.task(task4.id()); Assert.assertEquals(TaskStatus.FAILED, task4.status()); Assert.assertContains("LimitExceedException: Task result size", - task4.result()); + task4.result()); Assert.assertContains("exceeded limit 16777216 bytes", - task4.result()); + task4.result()); } @Test @@ -617,11 +616,11 @@ public void testGremlinJobAndRestore() throws Exception { TaskScheduler scheduler = graph.taskScheduler(); String gremlin = "println('task start');" + - "for(int i=gremlinJob.progress(); i<=10; i++) {" + - " gremlinJob.updateProgress(i);" + - " Thread.sleep(200); " + - " println('sleep=>'+i);" + - "}; 100;"; + "for(int i=gremlinJob.progress(); i<=10; i++) {" + + " gremlinJob.updateProgress(i);" + + " Thread.sleep(200); " + + " println('sleep=>'+i);" + + "}; 100;"; HugeTask task = runGremlinJob(gremlin); sleepAWhile(200 * 6); @@ -629,22 +628,24 @@ public void testGremlinJobAndRestore() throws Exception { scheduler.cancel(task); task = scheduler.task(task.id()); - Assert.assertEquals(TaskStatus.CANCELLING, task.status()); + if (scheduler.getClass().equals(DistributedTaskScheduler.class)) { + Assert.assertEquals(TaskStatus.CANCELLING, task.status()); + } task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.CANCELLED, task.status()); Assert.assertTrue("progress=" + task.progress(), - 0 < task.progress() && task.progress() < 10); + 0 < task.progress() && task.progress() < 10); Assert.assertEquals(0, task.retries()); - Assert.assertEquals(null, task.result()); + Assert.assertNull(task.result()); HugeTask finalTask = task; Assert.assertThrows(IllegalArgumentException.class, () -> { Whitebox.invoke(scheduler.getClass(), "restore", scheduler, - finalTask); + finalTask); }, e -> { Assert.assertContains("No need to restore completed task", - e.getMessage()); + e.getMessage()); }); HugeTask task2 = scheduler.task(task.id()); @@ -652,7 +653,7 @@ public void testGremlinJobAndRestore() throws Exception { Whitebox.invoke(scheduler.getClass(), "restore", scheduler, task2); }, e -> { Assert.assertContains("No need to restore completed task", - e.getMessage()); + e.getMessage()); }); Whitebox.setInternalState(task2, "status", TaskStatus.RUNNING); @@ -679,8 +680,8 @@ private HugeTask runGremlinJob(String gremlin) { JobBuilder builder = JobBuilder.of(graph); builder.name("test-gremlin-job") - .input(request.toJson()) - .job(new GremlinJob()); + .input(request.toJson()) + .job(new GremlinJob()); return builder.schedule(); } From b0f381cb199a1015b56bc2ff01f0a264f0919205 Mon Sep 17 00:00:00 2001 From: Tsukilc <153273766+Tsukilc@users.noreply.github.com> Date: Fri, 7 Nov 2025 18:38:45 +0800 Subject: [PATCH 09/31] fix(server): disable server-role in StandardTaskScheduler --- .../apache/hugegraph/task/HugeServerInfo.java | 16 +-- .../hugegraph/task/ServerInfoManager.java | 106 ++---------------- .../hugegraph/task/StandardTaskScheduler.java | 66 ++--------- 3 files changed, 18 insertions(+), 170 deletions(-) diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/HugeServerInfo.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/HugeServerInfo.java index 71feb3f688..6bc789f873 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/HugeServerInfo.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/HugeServerInfo.java @@ -17,12 +17,6 @@ package org.apache.hugegraph.task; -import java.util.ArrayList; -import java.util.Date; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - import org.apache.hugegraph.HugeGraph; import org.apache.hugegraph.HugeGraphParams; import org.apache.hugegraph.backend.id.Id; @@ -43,6 +37,8 @@ import org.apache.tinkerpop.gremlin.structure.Vertex; import org.apache.tinkerpop.gremlin.structure.VertexProperty; +import java.util.*; + public class HugeServerInfo { // Unit millisecond @@ -209,14 +205,6 @@ public static HugeServerInfo fromVertex(Vertex vertex) { return serverInfo; } - public boolean suitableFor(HugeTask task, long now) { - if (task.computer() != this.role.computer()) { - return false; - } - return this.updateTime.getTime() + EXPIRED_INTERVAL >= now && - this.load() + task.load() <= this.maxLoad; - } - public static Schema schema(HugeGraphParams graph) { return new Schema(graph); } diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/ServerInfoManager.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/ServerInfoManager.java index bcef869017..af579bb124 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/ServerInfoManager.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/ServerInfoManager.java @@ -17,14 +17,7 @@ package org.apache.hugegraph.task; -import static org.apache.hugegraph.backend.query.Query.NO_LIMIT; - -import java.util.Collection; -import java.util.Iterator; -import java.util.Map; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutorService; - +import com.google.common.collect.ImmutableMap; import org.apache.hugegraph.HugeException; import org.apache.hugegraph.HugeGraph; import org.apache.hugegraph.HugeGraphParams; @@ -35,7 +28,6 @@ import org.apache.hugegraph.backend.query.QueryResults; import org.apache.hugegraph.backend.tx.GraphTransaction; import org.apache.hugegraph.exception.ConnectionException; -import org.apache.hugegraph.iterator.ListIterator; import org.apache.hugegraph.iterator.MapperIterator; import org.apache.hugegraph.masterelection.GlobalMasterInfo; import org.apache.hugegraph.schema.PropertyKey; @@ -50,7 +42,12 @@ import org.apache.tinkerpop.gremlin.structure.Vertex; import org.slf4j.Logger; -import com.google.common.collect.ImmutableMap; +import java.util.Iterator; +import java.util.Map; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorService; + +import static org.apache.hugegraph.backend.query.Query.NO_LIMIT; public class ServerInfoManager { @@ -64,7 +61,7 @@ public class ServerInfoManager { private volatile GlobalMasterInfo globalNodeInfo; - private volatile boolean onlySingleNode; + private final boolean onlySingleNode; private volatile boolean closed; public ServerInfoManager(HugeGraphParams graph, ExecutorService dbExecutor) { @@ -228,48 +225,6 @@ protected boolean graphIsReady() { return !this.closed && this.graph.started() && this.graph.initialized(); } - protected synchronized HugeServerInfo pickWorkerNode(Collection servers, - HugeTask task) { - HugeServerInfo master = null; - HugeServerInfo serverWithMinLoad = null; - int minLoad = Integer.MAX_VALUE; - boolean hasWorkerNode = false; - long now = DateUtil.now().getTime(); - - // Iterate servers to find suitable one - for (HugeServerInfo server : servers) { - if (!server.alive()) { - continue; - } - if (server.role().master()) { - master = server; - continue; - } - hasWorkerNode = true; - if (!server.suitableFor(task, now)) { - continue; - } - if (server.load() < minLoad) { - minLoad = server.load(); - serverWithMinLoad = server; - } - } - - boolean singleNode = !hasWorkerNode; - if (singleNode != this.onlySingleNode) { - LOG.info("Switch only_single_node to {}", singleNode); - this.onlySingleNode = singleNode; - } - - // Only schedule to master if there are no workers and master are suitable - if (!hasWorkerNode) { - if (master != null && master.suitableFor(task, now)) { - serverWithMinLoad = master; - } - } - return serverWithMinLoad; - } - private GraphTransaction tx() { assert Thread.currentThread().getName().contains("server-info-db-worker"); return this.graph.systemTransaction(); @@ -299,33 +254,6 @@ private Id save(HugeServerInfo serverInfo) { }); } - private int save(Collection serverInfos) { - return this.call(() -> { - if (serverInfos.isEmpty()) { - return 0; - } - HugeServerInfo.Schema schema = HugeServerInfo.schema(this.graph); - if (!schema.existVertexLabel(HugeServerInfo.P.SERVER)) { - throw new HugeException("Schema is missing for %s", HugeServerInfo.P.SERVER); - } - // Save server info in batch - GraphTransaction tx = this.tx(); - int updated = 0; - for (HugeServerInfo server : serverInfos) { - if (!server.updated()) { - continue; - } - HugeVertex vertex = tx.constructVertex(false, server.asArray()); - tx.addVertex(vertex); - updated++; - } - // NOTE: actually it is auto-commit, to be improved - tx.commitOrRollback(); - - return updated; - }); - } - private V call(Callable callable) { assert !Thread.currentThread().getName().startsWith( "server-info-db-worker") : "can't call by itself"; @@ -388,24 +316,6 @@ private HugeServerInfo removeServerInfo(Id serverId) { }); } - protected void updateServerInfos(Collection serverInfos) { - this.save(serverInfos); - } - - protected Collection allServerInfos() { - Iterator infos = this.serverInfos(NO_LIMIT, null); - try (ListIterator iter = new ListIterator<>( - MAX_SERVERS, infos)) { - return iter.list(); - } catch (Exception e) { - throw new HugeException("Failed to close server info iterator", e); - } - } - - protected Iterator serverInfos(String page) { - return this.serverInfos(ImmutableMap.of(), PAGE_SIZE, page); - } - protected Iterator serverInfos(long limit, String page) { return this.serverInfos(ImmutableMap.of(), limit, page); } diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/StandardTaskScheduler.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/StandardTaskScheduler.java index 6a0a9a018a..36b9c871ea 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/StandardTaskScheduler.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/StandardTaskScheduler.java @@ -118,11 +118,9 @@ private TaskTransaction tx() { // NOTE: only the owner thread can access task tx if (this.taskTx == null) { /* - * NOTE: don't synchronized(this) due to scheduler thread hold - * this lock through scheduleTasks(), then query tasks and wait - * for db-worker thread after call(), the tx may not be initialized - * but can't catch this lock, then cause deadlock. - * We just use this.serverManager as a monitor here + * NOTE: don't synchronized(this) to avoid potential deadlock + * when multiple threads are accessing task transaction. + * We use this.serverManager as a monitor here for thread safety. */ synchronized (this.serverManager) { if (this.taskTx == null) { @@ -139,9 +137,9 @@ private TaskTransaction tx() { @Override public void restoreTasks() { - Id selfServer = this.serverManager().selfNodeId(); List> taskList = new ArrayList<>(); // Restore 'RESTORING', 'RUNNING' and 'QUEUED' tasks in order. + // Single-node mode: restore all pending tasks without server filtering for (TaskStatus status : TaskStatus.PENDING_STATUSES) { String page = this.supportsPaging() ? PageInfo.PAGE_NONE : null; do { @@ -149,9 +147,7 @@ public void restoreTasks() { for (iter = this.findTask(status, PAGE_SIZE, page); iter.hasNext(); ) { HugeTask task = iter.next(); - if (selfServer.equals(task.server())) { - taskList.add(task); - } + taskList.add(task); } if (page != null) { page = PageInfo.pageInfo(iter); @@ -273,55 +269,11 @@ public ServerInfoManager serverManager() { return this.serverManager; } - protected void cancelTasksOnWorker(Id server) { - String page = this.supportsPaging() ? PageInfo.PAGE_NONE : null; - do { - Iterator> tasks = this.tasks(TaskStatus.CANCELLING, PAGE_SIZE, page); - while (tasks.hasNext()) { - HugeTask task = tasks.next(); - Id taskServer = task.server(); - if (taskServer == null) { - LOG.warn("Task '{}' may not be scheduled", task.id()); - continue; - } - if (!taskServer.equals(server)) { - continue; - } - /* - * Task may be loaded from backend store and not initialized. - * like: A task is completed but failed to save in the last - * step, resulting in the status of the task not being - * updated to storage, the task is not in memory, so it's not - * initialized when canceled. - */ - HugeTask memTask = this.tasks.get(task.id()); - if (memTask != null) { - task = memTask; - } else { - this.initTaskCallable(task); - } - boolean cancelled = task.cancel(true); - LOG.info("Server '{}' cancel task '{}' with cancelled={}", - server, task.id(), cancelled); - } - if (page != null) { - page = PageInfo.pageInfo(tasks); - } - } while (page != null); - } - @Override public void taskDone(HugeTask task) { this.remove(task); - - Id selfServerId = this.serverManager().selfNodeId(); - try { - this.serverManager().decreaseLoad(task.load()); - } catch (Throwable e) { - LOG.error("Failed to decrease load for task '{}' on server '{}'", - task.id(), selfServerId, e); - } - LOG.debug("Task '{}' done on server '{}'", task.id(), selfServerId); + // Single-node mode: no need to manage load + LOG.debug("Task '{}' done", task.id()); } protected void remove(HugeTask task) { @@ -621,9 +573,7 @@ public V call(Callable callable) { } private void checkOnMasterNode(String op) { - if (!this.serverManager().selfIsMaster()) { - throw new HugeException("Can't %s task on non-master server", op); - } + // Single-node mode: all operations are allowed, no role check needed } private boolean supportsPaging() { From 804055db5edb34eadbfaab9dabc3fbd05d30d8b7 Mon Sep 17 00:00:00 2001 From: Tsukilc <153273766+Tsukilc@users.noreply.github.com> Date: Fri, 7 Nov 2025 18:41:46 +0800 Subject: [PATCH 10/31] fix(server): disable server-role in StandardTaskScheduler --- .../main/java/org/apache/hugegraph/task/TaskManager.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskManager.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskManager.java index 67c2831bd6..7d49ab4041 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskManager.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskManager.java @@ -29,7 +29,12 @@ import java.util.Map; import java.util.Queue; -import java.util.concurrent.*; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.Callable; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.ThreadPoolExecutor; /** * Central task management system that coordinates task scheduling and execution. From 2e8578ecd014f7cccf4c4aaf259692234d0404b0 Mon Sep 17 00:00:00 2001 From: Tsukilc <153273766+Tsukilc@users.noreply.github.com> Date: Fri, 7 Nov 2025 18:47:58 +0800 Subject: [PATCH 11/31] fix(server): disable server-role in StandardTaskScheduler --- .../apache/hugegraph/task/TaskManager.java | 45 +------------------ 1 file changed, 1 insertion(+), 44 deletions(-) diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskManager.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskManager.java index 7d49ab4041..d57a674c42 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskManager.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskManager.java @@ -28,13 +28,7 @@ import org.slf4j.Logger; import java.util.Map; -import java.util.Queue; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.Callable; -import java.util.concurrent.TimeoutException; -import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.*; /** * Central task management system that coordinates task scheduling and execution. @@ -224,14 +218,6 @@ private void closeDistributedSchedulerTx(HugeGraphParams graph) { } } - public void pauseScheduledThreadPool() { - this.schedulerExecutor.pauseSchedule(); - } - - public void resumeScheduledThreadPool() { - this.schedulerExecutor.resumeSchedule(); - } - public TaskScheduler getScheduler(HugeGraphParams graph) { return this.schedulers.get(graph); } @@ -379,35 +365,6 @@ public void onAsRoleWorker() { } } - void notifyNewTask(HugeTask task) { - Queue queue = this.schedulerExecutor - .getQueue(); - if (queue.size() <= 1) { - /* - * Notify to schedule tasks initiatively when have new task - * It's OK to not notify again if there are more than one task in - * queue(like two, one is timer task, one is immediate task), - * we don't want too many immediate tasks to be inserted into queue, - * one notify will cause all the tasks to be processed. - */ - this.schedulerExecutor.submit(this::scheduleOrExecuteJob); - } - } - - private void scheduleOrExecuteJob() { - //// Called by scheduler timer - //try { - // for (TaskScheduler entry : this.schedulers.values()) { - // // Maybe other threads close&remove scheduler at the same time - // synchronized (entry) { - // this.scheduleOrExecuteJobForGraph(entry); - // } - // } - //} catch (Throwable e) { - // LOG.error("Exception occurred when schedule job", e); - //} - } - private static final ThreadLocal CONTEXTS = new ThreadLocal<>(); public static void setContext(String context) { From 2c44ceeb23756cf6742a23bb8f13fa2c08f83e88 Mon Sep 17 00:00:00 2001 From: Tsukilc <153273766+Tsukilc@users.noreply.github.com> Date: Mon, 10 Nov 2025 13:13:03 +0800 Subject: [PATCH 12/31] fix(server): disable server-role in StandardTaskScheduler --- .../masterelection/StandardRoleListener.java | 5 ++-- .../hugegraph/task/ServerInfoManager.java | 26 +++++-------------- .../apache/hugegraph/task/TaskManager.java | 6 ----- 3 files changed, 8 insertions(+), 29 deletions(-) diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/masterelection/StandardRoleListener.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/masterelection/StandardRoleListener.java index dbbea6d91e..74515dacec 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/masterelection/StandardRoleListener.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/masterelection/StandardRoleListener.java @@ -17,12 +17,12 @@ package org.apache.hugegraph.masterelection; -import java.util.Objects; - import org.apache.hugegraph.task.TaskManager; import org.apache.hugegraph.util.Log; import org.slf4j.Logger; +import java.util.Objects; + public class StandardRoleListener implements RoleListener { private static final Logger LOG = Log.logger(StandardRoleListener.class); @@ -36,7 +36,6 @@ public class StandardRoleListener implements RoleListener { public StandardRoleListener(TaskManager taskManager, GlobalMasterInfo roleInfo) { this.taskManager = taskManager; - this.taskManager.enableRoleElection(); this.roleInfo = roleInfo; this.selfIsMaster = false; } diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/ServerInfoManager.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/ServerInfoManager.java index af579bb124..6fc8c52802 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/ServerInfoManager.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/ServerInfoManager.java @@ -61,7 +61,6 @@ public class ServerInfoManager { private volatile GlobalMasterInfo globalNodeInfo; - private final boolean onlySingleNode; private volatile boolean closed; public ServerInfoManager(HugeGraphParams graph, ExecutorService dbExecutor) { @@ -73,7 +72,6 @@ public ServerInfoManager(HugeGraphParams graph, ExecutorService dbExecutor) { this.globalNodeInfo = null; - this.onlySingleNode = false; this.closed = false; } @@ -112,11 +110,11 @@ public synchronized void initServerInfo(GlobalMasterInfo nodeInfo) { try { Thread.sleep(existed.expireTime() - now + 1); } catch (InterruptedException e) { - throw new HugeException("Interrupted when waiting for server info expired", e); + throw new HugeException("Interrupted when waiting for server info expired", e); } } E.checkArgument(existed == null || !existed.alive(), - "The server with name '%s' already in cluster", serverId); + "The server with name '%s' already in cluster", serverId); if (nodeInfo.nodeRole().master()) { String page = this.supportsPaging() ? PageInfo.PAGE_NONE : null; @@ -125,8 +123,8 @@ public synchronized void initServerInfo(GlobalMasterInfo nodeInfo) { while (servers.hasNext()) { existed = servers.next(); E.checkArgument(!existed.role().master() || !existed.alive(), - "Already existed master '%s' in current cluster", - existed.id()); + "Already existed master '%s' in current cluster", + existed.id()); } if (page != null) { page = PageInfo.pageInfo(servers); @@ -173,11 +171,6 @@ public boolean selfIsMaster() { return this.selfNodeRole() != null && this.selfNodeRole().master(); } - public boolean onlySingleNode() { - // Only exists one node in the whole master - return this.onlySingleNode; - } - public synchronized void heartbeat() { assert this.graphIsReady(); @@ -209,13 +202,6 @@ public synchronized void heartbeat() { assert serverInfo != null; } - public synchronized void decreaseLoad(int load) { - assert load > 0 : load; - HugeServerInfo serverInfo = this.selfServerInfo(); - serverInfo.increaseLoad(-load); - this.save(serverInfo); - } - public int calcMaxLoad() { // TODO: calc max load based on CPU and Memory resources return 10000; @@ -245,7 +231,7 @@ private Id save(HugeServerInfo serverInfo) { HugeServerInfo.Schema schema = HugeServerInfo.schema(this.graph); if (!schema.existVertexLabel(HugeServerInfo.P.SERVER)) { throw new HugeException("Schema is missing for %s '%s'", - HugeServerInfo.P.SERVER, serverInfo); + HugeServerInfo.P.SERVER, serverInfo); } HugeVertex vertex = this.tx().constructVertex(false, serverInfo.asArray()); // Add or update server info in backend store @@ -264,7 +250,7 @@ private V call(Callable callable) { return this.dbExecutor.submit(callable).get(); } catch (Throwable e) { throw new HugeException("Failed to update/query server info: %s", - e, e.toString()); + e, e.toString()); } } diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskManager.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskManager.java index d57a674c42..07ec28e55b 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskManager.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskManager.java @@ -69,8 +69,6 @@ public final class TaskManager { private final ExecutorService ephemeralTaskExecutor; private final PausableScheduledThreadPool distributedSchedulerExecutor; - private boolean enableRoleElected = false; - public static TaskManager instance() { return MANAGER; } @@ -329,10 +327,6 @@ public int pendingTasks() { return size; } - public void enableRoleElection() { - this.enableRoleElected = true; - } - public void onAsRoleMaster() { try { for (TaskScheduler entry : this.schedulers.values()) { From 925c384cc4464ee7b7d6da04b88e7ac31c7a5602 Mon Sep 17 00:00:00 2001 From: Tsukilc <153273766+Tsukilc@users.noreply.github.com> Date: Sun, 11 Jan 2026 17:27:22 +0800 Subject: [PATCH 13/31] fix(server): fix npe in non-auth mode --- .../src/assembly/static/conf/hugegraph.properties.template | 1 - .../main/java/org/apache/hugegraph/StandardHugeGraph.java | 7 ++++--- .../assembly/static/conf/graphs/hstore.properties.template | 1 - .../src/assembly/static/conf/graphs/hugegraph.properties | 1 - 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/hugegraph-cluster-test/hugegraph-clustertest-dist/src/assembly/static/conf/hugegraph.properties.template b/hugegraph-cluster-test/hugegraph-clustertest-dist/src/assembly/static/conf/hugegraph.properties.template index 2a086bd325..84800bd0a9 100644 --- a/hugegraph-cluster-test/hugegraph-clustertest-dist/src/assembly/static/conf/hugegraph.properties.template +++ b/hugegraph-cluster-test/hugegraph-clustertest-dist/src/assembly/static/conf/hugegraph.properties.template @@ -46,7 +46,6 @@ store=hugegraph pd.peers=$PD_PEERS_LIST$ # task config -task.scheduler_type=local task.schedule_period=10 task.retry=0 task.wait_timeout=10 diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/StandardHugeGraph.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/StandardHugeGraph.java index faf97aa8d6..cb085ae310 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/StandardHugeGraph.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/StandardHugeGraph.java @@ -176,7 +176,6 @@ public class StandardHugeGraph implements HugeGraph { private final BackendStoreProvider storeProvider; private final TinkerPopTransaction tx; private final RamTable ramtable; - private final String schedulerType; private volatile boolean started; private volatile boolean closed; private volatile GraphMode mode; @@ -229,7 +228,6 @@ public StandardHugeGraph(HugeConfig config) { this.closed = false; this.mode = GraphMode.NONE; this.readMode = GraphReadMode.OLTP_ONLY; - this.schedulerType = config.get(CoreOptions.SCHEDULER_TYPE); LockUtil.init(this.spaceGraphName()); @@ -315,6 +313,7 @@ public String backend() { return this.storeProvider.type(); } + @Override public BackendStoreInfo backendStoreInfo() { // Just for trigger Tx.getOrNewTransaction, then load 3 stores // TODO: pass storeProvider.metaStore() @@ -465,6 +464,7 @@ public void updateTime(Date updateTime) { this.updateTime = updateTime; } + @Override public void waitStarted() { // Just for trigger Tx.getOrNewTransaction, then load 3 stores this.schemaTransaction(); @@ -1629,7 +1629,8 @@ public void submitEphemeralJob(EphemeralJob job) { @Override public String schedulerType() { - return StandardHugeGraph.this.schedulerType; + // Use distributed scheduler for hstore backend, otherwise use local + return StandardHugeGraph.this.isHstore() ? "distributed" : "local"; } } diff --git a/hugegraph-server/hugegraph-dist/src/assembly/static/conf/graphs/hstore.properties.template b/hugegraph-server/hugegraph-dist/src/assembly/static/conf/graphs/hstore.properties.template index d3834baf5c..fd2782a87d 100644 --- a/hugegraph-server/hugegraph-dist/src/assembly/static/conf/graphs/hstore.properties.template +++ b/hugegraph-server/hugegraph-dist/src/assembly/static/conf/graphs/hstore.properties.template @@ -31,7 +31,6 @@ store=hugegraph pd.peers=127.0.0.1:8686 # task config -task.scheduler_type=local task.schedule_period=10 task.retry=0 task.wait_timeout=10 diff --git a/hugegraph-server/hugegraph-dist/src/assembly/static/conf/graphs/hugegraph.properties b/hugegraph-server/hugegraph-dist/src/assembly/static/conf/graphs/hugegraph.properties index b77cacb2de..3727919bbb 100644 --- a/hugegraph-server/hugegraph-dist/src/assembly/static/conf/graphs/hugegraph.properties +++ b/hugegraph-server/hugegraph-dist/src/assembly/static/conf/graphs/hugegraph.properties @@ -30,7 +30,6 @@ store=hugegraph #pd.peers=127.0.0.1:8686 # task config -task.scheduler_type=local task.schedule_period=10 task.retry=0 task.wait_timeout=10 From 3974048daf3dfe315bc8cceb936abe283dccbcaa Mon Sep 17 00:00:00 2001 From: Tsukilc <153273766+Tsukilc@users.noreply.github.com> Date: Sun, 11 Jan 2026 17:30:43 +0800 Subject: [PATCH 14/31] fix(server): fix npe in non-auth mode --- .../java/org/apache/hugegraph/config/CoreOptions.java | 8 +------- .../java/org/apache/hugegraph/options/CoreOptions.java | 8 +------- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/config/CoreOptions.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/config/CoreOptions.java index ba4d4a1c0e..72a2da9324 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/config/CoreOptions.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/config/CoreOptions.java @@ -303,13 +303,7 @@ public class CoreOptions extends OptionHolder { rangeInt(1, 500), 1 ); - public static final ConfigOption SCHEDULER_TYPE = - new ConfigOption<>( - "task.scheduler_type", - "The type of scheduler used in distribution system.", - allowValues("local", "distributed"), - "local" - ); + public static final ConfigOption TASK_SYNC_DELETION = new ConfigOption<>( "task.sync_deletion", diff --git a/hugegraph-struct/src/main/java/org/apache/hugegraph/options/CoreOptions.java b/hugegraph-struct/src/main/java/org/apache/hugegraph/options/CoreOptions.java index 849539419b..caf0146bb9 100644 --- a/hugegraph-struct/src/main/java/org/apache/hugegraph/options/CoreOptions.java +++ b/hugegraph-struct/src/main/java/org/apache/hugegraph/options/CoreOptions.java @@ -295,13 +295,7 @@ public class CoreOptions extends OptionHolder { rangeInt(1, 500), 1 ); - public static final ConfigOption SCHEDULER_TYPE = - new ConfigOption<>( - "task.scheduler_type", - "The type of scheduler used in distribution system.", - allowValues("local", "distributed"), - "local" - ); + public static final ConfigOption TASK_SYNC_DELETION = new ConfigOption<>( "task.sync_deletion", From a349f629035a7897d201ec363adece98829aa5a7 Mon Sep 17 00:00:00 2001 From: Tsukilc <153273766+Tsukilc@users.noreply.github.com> Date: Sun, 11 Jan 2026 17:58:28 +0800 Subject: [PATCH 15/31] fix(server): fix npe in non-auth mode --- .../src/assembly/static/conf/rest-server.properties | 3 --- 1 file changed, 3 deletions(-) diff --git a/hugegraph-server/hugegraph-dist/src/assembly/static/conf/rest-server.properties b/hugegraph-server/hugegraph-dist/src/assembly/static/conf/rest-server.properties index 0dce972719..721c22461b 100644 --- a/hugegraph-server/hugegraph-dist/src/assembly/static/conf/rest-server.properties +++ b/hugegraph-server/hugegraph-dist/src/assembly/static/conf/rest-server.properties @@ -23,9 +23,6 @@ arthas.disabled_commands=jad #auth.admin_pa=pa #auth.graph_store=hugegraph -# lightweight load balancing (TODO: legacy mode, remove soon) -server.id=server-1 -server.role=master # use pd # usePD=true From 3e7bc6f720843b156b8b7209e5f9cde9c8af67af Mon Sep 17 00:00:00 2001 From: Tsukilc <153273766+Tsukilc@users.noreply.github.com> Date: Sun, 11 Jan 2026 18:30:30 +0800 Subject: [PATCH 16/31] fix(server): remove server.id --- .../org/apache/hugegraph/config/ServerOptions.java | 6 +++--- .../java/org/apache/hugegraph/core/GraphManager.java | 11 +++++++---- .../hugegraph/masterelection/GlobalMasterInfo.java | 2 +- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/hugegraph-server/hugegraph-api/src/main/java/org/apache/hugegraph/config/ServerOptions.java b/hugegraph-server/hugegraph-api/src/main/java/org/apache/hugegraph/config/ServerOptions.java index 5a785eef4d..f998b1abbc 100644 --- a/hugegraph-server/hugegraph-api/src/main/java/org/apache/hugegraph/config/ServerOptions.java +++ b/hugegraph-server/hugegraph-api/src/main/java/org/apache/hugegraph/config/ServerOptions.java @@ -576,9 +576,9 @@ public class ServerOptions extends OptionHolder { public static final ConfigOption SERVER_ID = new ConfigOption<>( "server.id", - "The id of hugegraph-server.", - disallowEmpty(), - "server-1" + "The id of hugegraph-server, auto-generated if not specified.", + null, + "" ); public static final ConfigOption SERVER_ROLE = new ConfigOption<>( diff --git a/hugegraph-server/hugegraph-api/src/main/java/org/apache/hugegraph/core/GraphManager.java b/hugegraph-server/hugegraph-api/src/main/java/org/apache/hugegraph/core/GraphManager.java index a2659641be..1168030d4a 100644 --- a/hugegraph-server/hugegraph-api/src/main/java/org/apache/hugegraph/core/GraphManager.java +++ b/hugegraph-server/hugegraph-api/src/main/java/org/apache/hugegraph/core/GraphManager.java @@ -37,6 +37,7 @@ import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; +import java.util.UUID; import java.util.function.Consumer; import java.util.stream.Collectors; @@ -1637,10 +1638,12 @@ private void checkBackendVersionOrExit(HugeConfig config) { private void initNodeRole() { String id = config.get(ServerOptions.SERVER_ID); String role = config.get(ServerOptions.SERVER_ROLE); - E.checkArgument(StringUtils.isNotEmpty(id), - "The server name can't be null or empty"); - E.checkArgument(StringUtils.isNotEmpty(role), - "The server role can't be null or empty"); + + // Auto-generate server.id if not configured (for single-node mode) + if (StringUtils.isEmpty(id)) { + id = "server-" + UUID.randomUUID().toString().substring(0, 8); + LOG.info("Auto-generated server.id: {}", id); + } NodeRole nodeRole = NodeRole.valueOf(role.toUpperCase()); boolean supportRoleElection = !nodeRole.computer() && diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/masterelection/GlobalMasterInfo.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/masterelection/GlobalMasterInfo.java index c345c50e60..4856744459 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/masterelection/GlobalMasterInfo.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/masterelection/GlobalMasterInfo.java @@ -22,7 +22,7 @@ import org.apache.hugegraph.type.define.NodeRole; import org.apache.hugegraph.util.E; -// TODO: rename to GlobalNodeRoleInfo +// TODO: We need to completely delete the startup of master-worker public final class GlobalMasterInfo { private static final NodeInfo NO_MASTER = new NodeInfo(false, ""); From e6f64871800da13c8b5a7e63e7aeddf336b7e3f6 Mon Sep 17 00:00:00 2001 From: Tsukilc <153273766+Tsukilc@users.noreply.github.com> Date: Sun, 11 Jan 2026 19:39:22 +0800 Subject: [PATCH 17/31] fix(server): remove task.scheduler_type --- docker/configs/server2-conf/graphs/hugegraph.properties | 1 - 1 file changed, 1 deletion(-) diff --git a/docker/configs/server2-conf/graphs/hugegraph.properties b/docker/configs/server2-conf/graphs/hugegraph.properties index 66cbccb731..b48bab5ea8 100644 --- a/docker/configs/server2-conf/graphs/hugegraph.properties +++ b/docker/configs/server2-conf/graphs/hugegraph.properties @@ -13,7 +13,6 @@ serializer=binary pd.peers=127.0.0.1:8686,127.0.0.1:8687,127.0.0.1:8688 # task config -task.scheduler_type=local task.schedule_period=10 task.retry=0 task.wait_timeout=10 From b325dba686b2d8db53298074808fa901b0062821 Mon Sep 17 00:00:00 2001 From: Tsukilc <153273766+Tsukilc@users.noreply.github.com> Date: Sun, 11 Jan 2026 20:20:57 +0800 Subject: [PATCH 18/31] fix(server): remove task.scheduler_type --- .../org/apache/hugegraph/core/GraphManager.java | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/hugegraph-server/hugegraph-api/src/main/java/org/apache/hugegraph/core/GraphManager.java b/hugegraph-server/hugegraph-api/src/main/java/org/apache/hugegraph/core/GraphManager.java index 1168030d4a..06c34c8038 100644 --- a/hugegraph-server/hugegraph-api/src/main/java/org/apache/hugegraph/core/GraphManager.java +++ b/hugegraph-server/hugegraph-api/src/main/java/org/apache/hugegraph/core/GraphManager.java @@ -196,7 +196,14 @@ public final class GraphManager { public GraphManager(HugeConfig conf, EventHub hub) { LOG.info("Init graph manager"); E.checkArgumentNotNull(conf, "The config can't be null"); + + // Auto-generate server.id if not configured String server = conf.get(ServerOptions.SERVER_ID); + if (StringUtils.isEmpty(server)) { + server = "server-" + UUID.randomUUID().toString().substring(0, 8); + LOG.info("Auto-generated server.id: {}", server); + conf.setProperty(ServerOptions.SERVER_ID.name(), server); + } String role = conf.get(ServerOptions.SERVER_ROLE); this.config = conf; @@ -207,10 +214,6 @@ public GraphManager(HugeConfig conf, EventHub hub) { conf.get(ServerOptions.SERVER_DEPLOY_IN_K8S); this.startIgnoreSingleGraphError = conf.get( ServerOptions.SERVER_START_IGNORE_SINGLE_GRAPH_ERROR); - E.checkArgument(server != null && !server.isEmpty(), - "The server name can't be null or empty"); - E.checkArgument(role != null && !role.isEmpty(), - "The server role can't be null or empty"); this.graphsDir = conf.get(ServerOptions.GRAPHS); this.cluster = conf.get(ServerOptions.CLUSTER); this.graphSpaces = new ConcurrentHashMap<>(); @@ -1639,12 +1642,6 @@ private void initNodeRole() { String id = config.get(ServerOptions.SERVER_ID); String role = config.get(ServerOptions.SERVER_ROLE); - // Auto-generate server.id if not configured (for single-node mode) - if (StringUtils.isEmpty(id)) { - id = "server-" + UUID.randomUUID().toString().substring(0, 8); - LOG.info("Auto-generated server.id: {}", id); - } - NodeRole nodeRole = NodeRole.valueOf(role.toUpperCase()); boolean supportRoleElection = !nodeRole.computer() && this.supportRoleElection() && From 1113520c394ff0335db07d1faf873a40fcbd69f0 Mon Sep 17 00:00:00 2001 From: Tsukilc <153273766+Tsukilc@users.noreply.github.com> Date: Thu, 15 Jan 2026 17:02:05 +0800 Subject: [PATCH 19/31] fix(server): fix some issues of the distributed scheduler --- .../task/DistributedTaskScheduler.java | 96 +++++++++++++++---- .../org/apache/hugegraph/task/HugeTask.java | 29 +++--- .../hugegraph/core/MultiGraphsTest.java | 2 +- .../apache/hugegraph/core/TaskCoreTest.java | 86 +++++++++-------- 4 files changed, 142 insertions(+), 71 deletions(-) diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/DistributedTaskScheduler.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/DistributedTaskScheduler.java index b4bba2ea12..cab0b9c14a 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/DistributedTaskScheduler.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/DistributedTaskScheduler.java @@ -19,7 +19,9 @@ import java.util.Iterator; import java.util.concurrent.Callable; +import java.util.concurrent.CancellationException; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; @@ -118,6 +120,11 @@ private static boolean sleep(long ms) { public void cronSchedule() { // Perform periodic scheduling tasks + // Check closed flag first to exit early + if (this.closed.get()) { + return; + } + if (!this.graph.started() || this.graph.closed()) { return; } @@ -253,6 +260,10 @@ public Future schedule(HugeTask task) { return this.ephemeralTaskExecutor.submit(task); } + // Validate task state before saving to ensure correct exception type + E.checkState(task.type() != null, "Task type can't be null"); + E.checkState(task.name() != null, "Task name can't be null"); + // Process schema task // Handle gremlin task // Handle OLAP calculation tasks @@ -286,13 +297,25 @@ protected void initTaskParams(HugeTask task) { @Override public void cancel(HugeTask task) { - // Update status to CANCELLING - if (!task.completed()) { - // Task not completed, can only execute status not CANCELLING - this.updateStatus(task.id(), null, TaskStatus.CANCELLING); - } else { - LOG.info("cancel task({}) error, task has completed", task.id()); + E.checkArgumentNotNull(task, "Task can't be null"); + + if (task.completed() || task.cancelling()) { + return; } + + LOG.info("Cancel task '{}' in status {}", task.id(), task.status()); + + // Check if task is running locally, cancel it directly if so + HugeTask runningTask = this.runningTasks.get(task.id()); + if (runningTask != null) { + boolean cancelled = runningTask.cancel(true); + LOG.info("Cancel local running task '{}' result: {}", task.id(), cancelled); + return; + } + + // Task not running locally, update status to CANCELLING + // for cronSchedule() or other nodes to handle + this.updateStatus(task.id(), null, TaskStatus.CANCELLING); } @Override @@ -316,14 +339,25 @@ protected HugeTask deleteFromDB(Id id) { @Override public HugeTask delete(Id id, boolean force) { - if (!force) { - // Change status to DELETING, perform the deletion operation through automatic - // scheduling. - this.updateStatus(id, null, TaskStatus.DELETING); + HugeTask task = this.taskWithoutResult(id); + if (task == null) { return null; - } else { - return this.deleteFromDB(id); } + + if (!force) { + // Check task status: can't delete running tasks without force + if (!task.completed() && task.status() != TaskStatus.DELETING) { + throw new IllegalArgumentException( + String.format("Can't delete incomplete task '%s' in status %s, " + + "Please try to cancel the task first", + id, task.status())); + } + // Already in DELETING status, delete directly from DB + // Completed tasks can also be deleted directly + } + + // Delete from DB directly for completed/DELETING tasks or force=true + return this.deleteFromDB(id); } @Override @@ -353,6 +387,18 @@ public boolean close() { cronFuture.cancel(false); } + // Wait for cron task to complete to ensure all transactions are closed + try { + cronFuture.get(schedulePeriod + 5, TimeUnit.SECONDS); + } catch (CancellationException e) { + // Task was cancelled, this is expected + LOG.debug("Cron task was cancelled"); + } catch (TimeoutException e) { + LOG.warn("Cron task did not complete in time when closing scheduler"); + } catch (ExecutionException | InterruptedException e) { + LOG.warn("Exception while waiting for cron task to complete", e); + } + if (!this.taskDbExecutor.isShutdown()) { this.call(() -> { try { @@ -363,7 +409,10 @@ public boolean close() { this.graph.closeTx(); }); } - return true; + + //todo: serverInfoManager section should be removed in the future. + return this.serverManager().close(); + //return true; } @Override @@ -387,15 +436,17 @@ private HugeTask waitUntilTaskCompleted(Id id, long seconds, long passes = seconds * 1000 / intervalMs; HugeTask task = null; for (long pass = 0; ; pass++) { - try { - task = this.taskWithoutResult(id); - } catch (NotFoundException e) { - if (task != null && task.completed()) { - assert task.id().asLong() < 0L : task.id(); + HugeTask previousTask = task; + task = this.taskWithoutResult(id); + if (task == null) { + // Task not found in DB + if (previousTask != null && previousTask.completed()) { + // Task was completed and then deleted (ephemeral task case) + assert previousTask.id().asLong() < 0L : previousTask.id(); sleep(intervalMs); - return task; + return previousTask; } - throw e; + throw new NotFoundException("Can't find task with id '%s'", id); } if (task.completed()) { // Wait for task result being set after status is completed @@ -466,6 +517,11 @@ private V call(Callable callable, ExecutorService executor) { protected boolean updateStatus(Id id, TaskStatus prestatus, TaskStatus status) { HugeTask task = this.taskWithoutResult(id); + if (task == null) { + // Task was already deleted by cronSchedule or another thread + LOG.info("Task '{}' not found, may have been deleted", id); + return false; + } initTaskParams(task); if (prestatus == null || task.status() == prestatus) { task.overwriteStatus(status); diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/HugeTask.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/HugeTask.java index f9e4f120f4..5defd42742 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/HugeTask.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/HugeTask.java @@ -371,9 +371,7 @@ protected void done() { protected void set(V v) { String result = JsonUtil.toJson(v); checkPropertySize(result, P.RESULT); - if (!this.result(TaskStatus.SUCCESS, result)) { - assert this.completed(); - } + assert this.result(TaskStatus.SUCCESS, result) || this.completed(); // Will call done() and may cause to save to store super.set(v); } @@ -742,19 +740,26 @@ private void checkPropertySize(int propertyLength, String propertyName) { public void syncWait() { // This method is just called by tests + /* + * For ephemeral tasks (negative ID), directly wait on the Future. + * Ephemeral tasks are not saved to DB, so we can't query them by ID. + * Since HugeTask extends FutureTask, we can directly wait for completion. + */ + if (this.id().asLong() < 0) { + try { + this.get(); + } catch (Exception e) { + throw new HugeException("Failed to wait for task '%s' completed", + e, this.id); + } + return; + } + + // For normal tasks, wait through scheduler HugeTask task = null; try { task = this.scheduler().waitUntilTaskCompleted(this.id()); } catch (Throwable e) { - if (this.callable() instanceof EphemeralJob && - e.getClass() == NotFoundException.class && - e.getMessage().contains("Can't find task with id")) { - /* - * The task with EphemeralJob won't saved in backends and - * will be removed from memory when completed - */ - return; - } throw new HugeException("Failed to wait for task '%s' completed", e, this.id); } diff --git a/hugegraph-server/hugegraph-test/src/main/java/org/apache/hugegraph/core/MultiGraphsTest.java b/hugegraph-server/hugegraph-test/src/main/java/org/apache/hugegraph/core/MultiGraphsTest.java index 4fae0f76c6..5c34236857 100644 --- a/hugegraph-server/hugegraph-test/src/main/java/org/apache/hugegraph/core/MultiGraphsTest.java +++ b/hugegraph-server/hugegraph-test/src/main/java/org/apache/hugegraph/core/MultiGraphsTest.java @@ -248,7 +248,7 @@ public void testCreateGraphsWithInvalidNames() { @Test public void testCreateGraphsWithSameName() { - List graphs = openGraphs("g", "g", "G"); + List graphs = openGraphs("gg", "gg", "GG"); HugeGraph g1 = graphs.get(0); HugeGraph g2 = graphs.get(1); HugeGraph g3 = graphs.get(2); diff --git a/hugegraph-server/hugegraph-test/src/main/java/org/apache/hugegraph/core/TaskCoreTest.java b/hugegraph-server/hugegraph-test/src/main/java/org/apache/hugegraph/core/TaskCoreTest.java index e608fc28b6..cc5c421c0a 100644 --- a/hugegraph-server/hugegraph-test/src/main/java/org/apache/hugegraph/core/TaskCoreTest.java +++ b/hugegraph-server/hugegraph-test/src/main/java/org/apache/hugegraph/core/TaskCoreTest.java @@ -192,13 +192,18 @@ public Object execute() throws Exception { Assert.assertEquals("test", task.type()); Assert.assertFalse(task.completed()); - HugeTask task2 = scheduler.waitUntilTaskCompleted(task.id(), 10); + // Ephemeral tasks are node-local and not persisted to DB. + // Use Future.get() to wait for completion instead of ID-based lookup. + try { + task.get(10, java.util.concurrent.TimeUnit.SECONDS); + } catch (Exception e) { + throw new RuntimeException("Ephemeral task execution failed", e); + } + Assert.assertEquals(TaskStatus.SUCCESS, task.status()); Assert.assertEquals("{\"k1\":13579,\"k2\":\"24680\"}", task.result()); - Assert.assertEquals(TaskStatus.SUCCESS, task2.status()); - Assert.assertEquals("{\"k1\":13579,\"k2\":\"24680\"}", task2.result()); - + // Ephemeral tasks are not stored in DB, so these should throw NotFoundException Assert.assertThrows(NotFoundException.class, () -> { scheduler.waitUntilTaskCompleted(task.id(), 10); }); @@ -553,10 +558,12 @@ public void testGremlinJobAndCancel() throws TimeoutException { scheduler.cancel(task); task = scheduler.task(task.id()); - System.out.println(scheduler.getClass()); - if (scheduler.getClass().equals(DistributedTaskScheduler.class)) { - Assert.assertEquals(TaskStatus.CANCELLING, task.status()); - } + // For DistributedTaskScheduler, local cancel may result in CANCELLED directly + // (task thread updates status after being interrupted) + // or CANCELLING (if task hasn't processed the interrupt yet) + Assert.assertTrue("Task status should be CANCELLING or CANCELLED, but was " + task.status(), + task.status() == TaskStatus.CANCELLING || + task.status() == TaskStatus.CANCELLED); task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.CANCELLED, task.status()); @@ -628,9 +635,9 @@ public void testGremlinJobAndRestore() throws Exception { scheduler.cancel(task); task = scheduler.task(task.id()); - if (scheduler.getClass().equals(DistributedTaskScheduler.class)) { - Assert.assertEquals(TaskStatus.CANCELLING, task.status()); - } + Assert.assertTrue("Task status should be CANCELLING or CANCELLED, but was " + task.status(), + task.status() == TaskStatus.CANCELLING || + task.status() == TaskStatus.CANCELLED); task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.CANCELLED, task.status()); @@ -640,36 +647,39 @@ public void testGremlinJobAndRestore() throws Exception { Assert.assertNull(task.result()); HugeTask finalTask = task; - Assert.assertThrows(IllegalArgumentException.class, () -> { - Whitebox.invoke(scheduler.getClass(), "restore", scheduler, - finalTask); - }, e -> { - Assert.assertContains("No need to restore completed task", - e.getMessage()); - }); - HugeTask task2 = scheduler.task(task.id()); - Assert.assertThrows(IllegalArgumentException.class, () -> { + // because Distributed do nothing in restore, so only test StandardTaskScheduler here + if (scheduler.getClass().equals(StandardTaskScheduler.class)) { + Assert.assertThrows(IllegalArgumentException.class, () -> { + Whitebox.invoke(scheduler.getClass(), "restore", scheduler, + finalTask); + }, e -> { + Assert.assertContains("No need to restore completed task", + e.getMessage()); + }); + + HugeTask task2 = scheduler.task(task.id()); + Assert.assertThrows(IllegalArgumentException.class, () -> { + Whitebox.invoke(scheduler.getClass(), "restore", scheduler, task2); + }, e -> { + Assert.assertContains("No need to restore completed task", + e.getMessage()); + }); + + Whitebox.setInternalState(task2, "status", TaskStatus.RUNNING); Whitebox.invoke(scheduler.getClass(), "restore", scheduler, task2); - }, e -> { - Assert.assertContains("No need to restore completed task", - e.getMessage()); - }); - - Whitebox.setInternalState(task2, "status", TaskStatus.RUNNING); - Whitebox.invoke(scheduler.getClass(), "restore", scheduler, task2); - Assert.assertThrows(IllegalArgumentException.class, () -> { - Whitebox.invoke(scheduler.getClass(), "restore", scheduler, task2); - }, e -> { - Assert.assertContains("is already in the queue", e.getMessage()); - }); - - scheduler.waitUntilTaskCompleted(task2.id(), 10); - sleepAWhile(500); - Assert.assertEquals(10, task2.progress()); - Assert.assertEquals(1, task2.retries()); - Assert.assertEquals("100", task2.result()); + Assert.assertThrows(IllegalArgumentException.class, () -> { + Whitebox.invoke(scheduler.getClass(), "restore", scheduler, task2); + }, e -> { + Assert.assertContains("is already in the queue", e.getMessage()); + }); + scheduler.waitUntilTaskCompleted(task2.id(), 10); + sleepAWhile(500); + Assert.assertEquals(10, task2.progress()); + Assert.assertEquals(1, task2.retries()); + Assert.assertEquals("100", task2.result()); + } } private HugeTask runGremlinJob(String gremlin) { From 5ffd20b89daa550aa8f09e8f7c117fd5cd89448a Mon Sep 17 00:00:00 2001 From: Tsukilc <153273766+Tsukilc@users.noreply.github.com> Date: Thu, 15 Jan 2026 17:40:32 +0800 Subject: [PATCH 20/31] fix(server): fix some issues of the distributed scheduler --- .../hugegraph/task/DistributedTaskScheduler.java | 16 +++++++--------- .../hugegraph/task/TaskAndResultScheduler.java | 4 ++++ 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/DistributedTaskScheduler.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/DistributedTaskScheduler.java index cab0b9c14a..0975af33ac 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/DistributedTaskScheduler.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/DistributedTaskScheduler.java @@ -436,17 +436,15 @@ private HugeTask waitUntilTaskCompleted(Id id, long seconds, long passes = seconds * 1000 / intervalMs; HugeTask task = null; for (long pass = 0; ; pass++) { - HugeTask previousTask = task; - task = this.taskWithoutResult(id); - if (task == null) { - // Task not found in DB - if (previousTask != null && previousTask.completed()) { - // Task was completed and then deleted (ephemeral task case) - assert previousTask.id().asLong() < 0L : previousTask.id(); + try { + task = this.taskWithoutResult(id); + } catch (NotFoundException e) { + if (task != null && task.completed()) { + assert task.id().asLong() < 0L : task.id(); sleep(intervalMs); - return previousTask; + return task; } - throw new NotFoundException("Can't find task with id '%s'", id); + throw e; } if (task.completed()) { // Wait for task result being set after status is completed diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskAndResultScheduler.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskAndResultScheduler.java index 2ba3fd8a6d..7011609867 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskAndResultScheduler.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskAndResultScheduler.java @@ -219,6 +219,10 @@ protected HugeTask taskWithoutResult(Id id) { return HugeTask.fromVertex(vertex); }); + if (result == null) { + throw new NotFoundException("Can't find task with id '%s'", id); + } + return result; } From a110112561693f26147163f79538d72edf006971 Mon Sep 17 00:00:00 2001 From: Tsukilc <153273766+Tsukilc@users.noreply.github.com> Date: Thu, 15 Jan 2026 20:32:07 +0800 Subject: [PATCH 21/31] fix(server): fix some issues of the distributed scheduler --- .../org/apache/hugegraph/task/HugeTask.java | 25 +++++++------------ 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/HugeTask.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/HugeTask.java index 5defd42742..ac2cb747eb 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/HugeTask.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/HugeTask.java @@ -740,26 +740,19 @@ private void checkPropertySize(int propertyLength, String propertyName) { public void syncWait() { // This method is just called by tests - /* - * For ephemeral tasks (negative ID), directly wait on the Future. - * Ephemeral tasks are not saved to DB, so we can't query them by ID. - * Since HugeTask extends FutureTask, we can directly wait for completion. - */ - if (this.id().asLong() < 0) { - try { - this.get(); - } catch (Exception e) { - throw new HugeException("Failed to wait for task '%s' completed", - e, this.id); - } - return; - } - - // For normal tasks, wait through scheduler HugeTask task = null; try { task = this.scheduler().waitUntilTaskCompleted(this.id()); } catch (Throwable e) { + if (this.callable() instanceof EphemeralJob && + e.getClass() == NotFoundException.class && + e.getMessage().contains("Can't find task with id")) { + /* + * The task with EphemeralJob won't saved in backends and + * will be removed from memory when completed + */ + return; + } throw new HugeException("Failed to wait for task '%s' completed", e, this.id); } From a31e937ff328d04ecb9f4bd375c6d38a781aebe5 Mon Sep 17 00:00:00 2001 From: Tsukilc <153273766+Tsukilc@users.noreply.github.com> Date: Thu, 15 Jan 2026 21:15:03 +0800 Subject: [PATCH 22/31] fix(server): fix some issues of the distributed scheduler --- .../hugegraph/task/DistributedTaskScheduler.java | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/DistributedTaskScheduler.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/DistributedTaskScheduler.java index 0975af33ac..1859d3d56b 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/DistributedTaskScheduler.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/DistributedTaskScheduler.java @@ -315,7 +315,11 @@ public void cancel(HugeTask task) { // Task not running locally, update status to CANCELLING // for cronSchedule() or other nodes to handle - this.updateStatus(task.id(), null, TaskStatus.CANCELLING); + TaskStatus currentStatus = task.status(); + if (!this.updateStatus(task.id(), currentStatus, TaskStatus.CANCELLING)) { + LOG.info("Failed to cancel task '{}', status may have changed from {}", + task.id(), currentStatus); + } } @Override @@ -340,9 +344,6 @@ protected HugeTask deleteFromDB(Id id) { @Override public HugeTask delete(Id id, boolean force) { HugeTask task = this.taskWithoutResult(id); - if (task == null) { - return null; - } if (!force) { // Check task status: can't delete running tasks without force @@ -515,11 +516,6 @@ private V call(Callable callable, ExecutorService executor) { protected boolean updateStatus(Id id, TaskStatus prestatus, TaskStatus status) { HugeTask task = this.taskWithoutResult(id); - if (task == null) { - // Task was already deleted by cronSchedule or another thread - LOG.info("Task '{}' not found, may have been deleted", id); - return false; - } initTaskParams(task); if (prestatus == null || task.status() == prestatus) { task.overwriteStatus(status); From d89b9bdc5b86f371c8747c58edd9bc032011e064 Mon Sep 17 00:00:00 2001 From: Tsukilc <153273766+Tsukilc@users.noreply.github.com> Date: Thu, 15 Jan 2026 22:10:26 +0800 Subject: [PATCH 23/31] fix(server): fix some issues of the distributed scheduler --- .../src/main/java/org/apache/hugegraph/task/HugeTask.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/HugeTask.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/HugeTask.java index ac2cb747eb..f9e4f120f4 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/HugeTask.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/HugeTask.java @@ -371,7 +371,9 @@ protected void done() { protected void set(V v) { String result = JsonUtil.toJson(v); checkPropertySize(result, P.RESULT); - assert this.result(TaskStatus.SUCCESS, result) || this.completed(); + if (!this.result(TaskStatus.SUCCESS, result)) { + assert this.completed(); + } // Will call done() and may cause to save to store super.set(v); } From 5807fb724d960dcc793bc41a94b19a6ade3b8063 Mon Sep 17 00:00:00 2001 From: Tsukilc <153273766+Tsukilc@users.noreply.github.com> Date: Fri, 16 Jan 2026 00:37:01 +0800 Subject: [PATCH 24/31] fix(server): fix some issues of the distributed scheduler --- .../hugegraph/task/DistributedTaskScheduler.java | 11 +++++++++++ .../java/org/apache/hugegraph/core/TaskCoreTest.java | 6 +++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/DistributedTaskScheduler.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/DistributedTaskScheduler.java index 1859d3d56b..d71a012d2b 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/DistributedTaskScheduler.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/DistributedTaskScheduler.java @@ -295,6 +295,12 @@ protected void initTaskParams(HugeTask task) { } } + /** + * Note: This method will update the status of the input task. + * + * @param task + * @param + */ @Override public void cancel(HugeTask task) { E.checkArgumentNotNull(task, "Task can't be null"); @@ -309,6 +315,9 @@ public void cancel(HugeTask task) { HugeTask runningTask = this.runningTasks.get(task.id()); if (runningTask != null) { boolean cancelled = runningTask.cancel(true); + if (cancelled) { + task.overwriteStatus(TaskStatus.CANCELLED); + } LOG.info("Cancel local running task '{}' result: {}", task.id(), cancelled); return; } @@ -319,6 +328,8 @@ public void cancel(HugeTask task) { if (!this.updateStatus(task.id(), currentStatus, TaskStatus.CANCELLING)) { LOG.info("Failed to cancel task '{}', status may have changed from {}", task.id(), currentStatus); + } else { + task.overwriteStatus(TaskStatus.CANCELLING); } } diff --git a/hugegraph-server/hugegraph-test/src/main/java/org/apache/hugegraph/core/TaskCoreTest.java b/hugegraph-server/hugegraph-test/src/main/java/org/apache/hugegraph/core/TaskCoreTest.java index cc5c421c0a..78d84fe503 100644 --- a/hugegraph-server/hugegraph-test/src/main/java/org/apache/hugegraph/core/TaskCoreTest.java +++ b/hugegraph-server/hugegraph-test/src/main/java/org/apache/hugegraph/core/TaskCoreTest.java @@ -29,7 +29,11 @@ import org.apache.hugegraph.job.EphemeralJobBuilder; import org.apache.hugegraph.job.GremlinJob; import org.apache.hugegraph.job.JobBuilder; -import org.apache.hugegraph.task.*; +import org.apache.hugegraph.task.StandardTaskScheduler; +import org.apache.hugegraph.task.HugeTask; +import org.apache.hugegraph.task.TaskCallable; +import org.apache.hugegraph.task.TaskScheduler; +import org.apache.hugegraph.task.TaskStatus; import org.apache.hugegraph.testutil.Assert; import org.apache.hugegraph.testutil.Whitebox; import org.junit.Before; From f8fc58accedcc7a3b4bbbd4fe082b1991d26acb0 Mon Sep 17 00:00:00 2001 From: Tsukilc <153273766+Tsukilc@users.noreply.github.com> Date: Fri, 16 Jan 2026 10:15:34 +0800 Subject: [PATCH 25/31] fix(server): fix some issues of the distributed scheduler --- .../apache/hugegraph/task/HugeServerInfo.java | 8 ++++++-- .../hugegraph/task/StandardTaskScheduler.java | 17 +++++++++++------ .../org/apache/hugegraph/task/TaskManager.java | 11 ++++++++--- 3 files changed, 25 insertions(+), 11 deletions(-) diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/HugeServerInfo.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/HugeServerInfo.java index 6bc789f873..f0485f6656 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/HugeServerInfo.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/HugeServerInfo.java @@ -17,6 +17,12 @@ package org.apache.hugegraph.task; +import java.util.ArrayList; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + import org.apache.hugegraph.HugeGraph; import org.apache.hugegraph.HugeGraphParams; import org.apache.hugegraph.backend.id.Id; @@ -37,8 +43,6 @@ import org.apache.tinkerpop.gremlin.structure.Vertex; import org.apache.tinkerpop.gremlin.structure.VertexProperty; -import java.util.*; - public class HugeServerInfo { // Unit millisecond diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/StandardTaskScheduler.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/StandardTaskScheduler.java index 36b9c871ea..1228828629 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/StandardTaskScheduler.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/StandardTaskScheduler.java @@ -17,6 +17,17 @@ package org.apache.hugegraph.task; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.concurrent.Callable; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeoutException; + import com.google.common.collect.ImmutableMap; import org.apache.hugegraph.HugeException; import org.apache.hugegraph.HugeGraph; @@ -46,12 +57,6 @@ import org.apache.tinkerpop.gremlin.structure.Vertex; import org.slf4j.Logger; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.concurrent.*; - public class StandardTaskScheduler implements TaskScheduler { private static final Logger LOG = Log.logger(StandardTaskScheduler.class); diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskManager.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskManager.java index 07ec28e55b..9ce9762743 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskManager.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskManager.java @@ -17,6 +17,14 @@ package org.apache.hugegraph.task; +import java.util.Map; +import java.util.concurrent.Callable; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; + import org.apache.hugegraph.HugeException; import org.apache.hugegraph.HugeGraphParams; import org.apache.hugegraph.concurrent.PausableScheduledThreadPool; @@ -27,9 +35,6 @@ import org.apache.hugegraph.util.Log; import org.slf4j.Logger; -import java.util.Map; -import java.util.concurrent.*; - /** * Central task management system that coordinates task scheduling and execution. * Manages task schedulers for different graphs and handles role-based execution. From 6dd52e43f857ebfbdc71583faa6c6ea95ecd7b46 Mon Sep 17 00:00:00 2001 From: Tsukilc <153273766+Tsukilc@users.noreply.github.com> Date: Fri, 16 Jan 2026 10:45:40 +0800 Subject: [PATCH 26/31] fix(server): fix some issues of the distributed scheduler --- .../hugegraph/task/ServerInfoManager.java | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/ServerInfoManager.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/ServerInfoManager.java index 6fc8c52802..d4b0f27ad2 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/ServerInfoManager.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/ServerInfoManager.java @@ -17,7 +17,13 @@ package org.apache.hugegraph.task; -import com.google.common.collect.ImmutableMap; +import static org.apache.hugegraph.backend.query.Query.NO_LIMIT; + +import java.util.Iterator; +import java.util.Map; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorService; + import org.apache.hugegraph.HugeException; import org.apache.hugegraph.HugeGraph; import org.apache.hugegraph.HugeGraphParams; @@ -42,12 +48,7 @@ import org.apache.tinkerpop.gremlin.structure.Vertex; import org.slf4j.Logger; -import java.util.Iterator; -import java.util.Map; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutorService; - -import static org.apache.hugegraph.backend.query.Query.NO_LIMIT; +import com.google.common.collect.ImmutableMap; public class ServerInfoManager { @@ -114,7 +115,7 @@ public synchronized void initServerInfo(GlobalMasterInfo nodeInfo) { } } E.checkArgument(existed == null || !existed.alive(), - "The server with name '%s' already in cluster", serverId); + "The server with name '%s' already in cluster", serverId); if (nodeInfo.nodeRole().master()) { String page = this.supportsPaging() ? PageInfo.PAGE_NONE : null; @@ -123,8 +124,8 @@ public synchronized void initServerInfo(GlobalMasterInfo nodeInfo) { while (servers.hasNext()) { existed = servers.next(); E.checkArgument(!existed.role().master() || !existed.alive(), - "Already existed master '%s' in current cluster", - existed.id()); + "Already existed master '%s' in current cluster", + existed.id()); } if (page != null) { page = PageInfo.pageInfo(servers); @@ -231,7 +232,7 @@ private Id save(HugeServerInfo serverInfo) { HugeServerInfo.Schema schema = HugeServerInfo.schema(this.graph); if (!schema.existVertexLabel(HugeServerInfo.P.SERVER)) { throw new HugeException("Schema is missing for %s '%s'", - HugeServerInfo.P.SERVER, serverInfo); + HugeServerInfo.P.SERVER, serverInfo); } HugeVertex vertex = this.tx().constructVertex(false, serverInfo.asArray()); // Add or update server info in backend store @@ -250,7 +251,7 @@ private V call(Callable callable) { return this.dbExecutor.submit(callable).get(); } catch (Throwable e) { throw new HugeException("Failed to update/query server info: %s", - e, e.toString()); + e, e.toString()); } } From af85bef638a340bdccf85c05f9d4ca687168e813 Mon Sep 17 00:00:00 2001 From: Tsukilc <153273766+Tsukilc@users.noreply.github.com> Date: Fri, 16 Jan 2026 10:48:53 +0800 Subject: [PATCH 27/31] fix(server): fix some issues of the distributed scheduler --- .../task/DistributedTaskScheduler.java | 1 + .../hugegraph/task/StandardTaskScheduler.java | 5 ++- .../task/TaskAndResultScheduler.java | 45 ++++++++++--------- 3 files changed, 27 insertions(+), 24 deletions(-) diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/DistributedTaskScheduler.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/DistributedTaskScheduler.java index d71a012d2b..e5ff6d9521 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/DistributedTaskScheduler.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/DistributedTaskScheduler.java @@ -50,6 +50,7 @@ import org.slf4j.Logger; public class DistributedTaskScheduler extends TaskAndResultScheduler { + private static final Logger LOG = Log.logger(DistributedTaskScheduler.class); private final long schedulePeriod; private final ExecutorService taskDbExecutor; diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/StandardTaskScheduler.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/StandardTaskScheduler.java index 1228828629..315cdbca47 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/StandardTaskScheduler.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/StandardTaskScheduler.java @@ -28,7 +28,6 @@ import java.util.concurrent.Future; import java.util.concurrent.TimeoutException; -import com.google.common.collect.ImmutableMap; import org.apache.hugegraph.HugeException; import org.apache.hugegraph.HugeGraph; import org.apache.hugegraph.HugeGraphParams; @@ -57,6 +56,8 @@ import org.apache.tinkerpop.gremlin.structure.Vertex; import org.slf4j.Logger; +import com.google.common.collect.ImmutableMap; + public class StandardTaskScheduler implements TaskScheduler { private static final Logger LOG = Log.logger(StandardTaskScheduler.class); @@ -266,7 +267,7 @@ public synchronized void cancel(HugeTask task) { } throw new HugeException("Can't cancel task '%s' in status %s", - task.id(), task.status()); + task.id(), task.status()); } @Override diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskAndResultScheduler.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskAndResultScheduler.java index 7011609867..6c99ef156d 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskAndResultScheduler.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/TaskAndResultScheduler.java @@ -46,6 +46,7 @@ * Base class of task & result scheduler */ public abstract class TaskAndResultScheduler implements TaskScheduler { + /** * Which graph the scheduler belongs to */ @@ -61,8 +62,8 @@ public abstract class TaskAndResultScheduler implements TaskScheduler { private final ServerInfoManager serverManager; public TaskAndResultScheduler( - HugeGraphParams graph, - ExecutorService serverInfoDbExecutor) { + HugeGraphParams graph, + ExecutorService serverInfoDbExecutor) { E.checkNotNull(graph, "graph"); this.graph = graph; @@ -90,7 +91,7 @@ public void save(HugeTask task) { // Save result outcome if (rawResult != null) { HugeTaskResult result = - new HugeTaskResult(HugeTaskResult.genId(task.id())); + new HugeTaskResult(HugeTaskResult.genId(task.id())); result.result(rawResult); this.call(() -> { @@ -164,7 +165,7 @@ protected Iterator> queryTask(Map conditions, } Iterator vertices = this.tx().queryTaskInfos(query); Iterator> tasks = - new MapperIterator<>(vertices, HugeTask::fromVertex); + new MapperIterator<>(vertices, HugeTask::fromVertex); // Convert iterator to list to avoid across thread tx accessed return QueryResults.toList(tasks); }); @@ -180,16 +181,16 @@ protected Iterator> queryTask(Map conditions, protected Iterator> queryTask(List ids) { ListIterator> ts = this.call( - () -> { - Object[] idArray = ids.toArray(new Id[ids.size()]); - Iterator vertices = this.tx() - .queryTaskInfos(idArray); - Iterator> tasks = - new MapperIterator<>(vertices, - HugeTask::fromVertex); - // Convert iterator to list to avoid across thread tx accessed - return QueryResults.toList(tasks); - }); + () -> { + Object[] idArray = ids.toArray(new Id[ids.size()]); + Iterator vertices = this.tx() + .queryTaskInfos(idArray); + Iterator> tasks = + new MapperIterator<>(vertices, + HugeTask::fromVertex); + // Convert iterator to list to avoid across thread tx accessed + return QueryResults.toList(tasks); + }); Iterator results = queryTaskResult(ids); @@ -201,7 +202,7 @@ protected Iterator> queryTask(List ids) { return new MapperIterator<>(ts, (task) -> { HugeTaskResult taskResult = - resultCaches.get(HugeTaskResult.genId(task.id())); + resultCaches.get(HugeTaskResult.genId(task.id())); if (taskResult != null) { task.result(taskResult); } @@ -231,7 +232,7 @@ protected Iterator> tasksWithoutResult(List ids) { Object[] idArray = ids.toArray(new Id[ids.size()]); Iterator vertices = this.tx().queryTaskInfos(idArray); Iterator> tasks = - new MapperIterator<>(vertices, HugeTask::fromVertex); + new MapperIterator<>(vertices, HugeTask::fromVertex); // Convert iterator to list to avoid across thread tx accessed return QueryResults.toList(tasks); }); @@ -254,7 +255,7 @@ protected Iterator> queryTaskWithoutResult(String key, } protected Iterator> queryTaskWithoutResult(Map conditions, long limit, String page) { + Object> conditions, long limit, String page) { return this.call(() -> { ConditionQuery query = new ConditionQuery(HugeType.TASK); if (page != null) { @@ -272,7 +273,7 @@ protected Iterator> queryTaskWithoutResult(Map vertices = this.tx().queryTaskInfos(query); Iterator> tasks = - new MapperIterator<>(vertices, HugeTask::fromVertex); + new MapperIterator<>(vertices, HugeTask::fromVertex); // Convert iterator to list to avoid across thread tx accessed return QueryResults.toList(tasks); }); @@ -281,7 +282,7 @@ protected Iterator> queryTaskWithoutResult(Map { Iterator vertices = - this.tx().queryTaskInfos(HugeTaskResult.genId(taskid)); + this.tx().queryTaskInfos(HugeTaskResult.genId(taskid)); Vertex vertex = QueryResults.one(vertices); if (vertex == null) { return null; @@ -296,12 +297,12 @@ protected HugeTaskResult queryTaskResult(Id taskid) { protected Iterator queryTaskResult(List taskIds) { return this.call(() -> { Object[] idArray = - taskIds.stream().map(HugeTaskResult::genId).toArray(); + taskIds.stream().map(HugeTaskResult::genId).toArray(); Iterator vertices = this.tx() .queryTaskInfos(idArray); Iterator tasks = - new MapperIterator<>(vertices, - HugeTaskResult::fromVertex); + new MapperIterator<>(vertices, + HugeTaskResult::fromVertex); // Convert iterator to list to avoid across thread tx accessed return QueryResults.toList(tasks); }); From b332674a2f1aa132874e0f832be13253dab1bd77 Mon Sep 17 00:00:00 2001 From: Tsukilc <153273766+Tsukilc@users.noreply.github.com> Date: Fri, 16 Jan 2026 18:05:32 +0800 Subject: [PATCH 28/31] fix(server): fix some issues of the distributed scheduler --- .../task/DistributedTaskScheduler.java | 10 +- .../hugegraph/task/StandardTaskScheduler.java | 1 + .../apache/hugegraph/core/TaskCoreTest.java | 315 +++++++++--------- 3 files changed, 163 insertions(+), 163 deletions(-) diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/DistributedTaskScheduler.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/DistributedTaskScheduler.java index e5ff6d9521..083c38a9d4 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/DistributedTaskScheduler.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/DistributedTaskScheduler.java @@ -357,14 +357,10 @@ protected HugeTask deleteFromDB(Id id) { public HugeTask delete(Id id, boolean force) { HugeTask task = this.taskWithoutResult(id); - if (!force) { + if (!force && !task.completed() && task.status() != TaskStatus.DELETING) { // Check task status: can't delete running tasks without force - if (!task.completed() && task.status() != TaskStatus.DELETING) { - throw new IllegalArgumentException( - String.format("Can't delete incomplete task '%s' in status %s, " + - "Please try to cancel the task first", - id, task.status())); - } + this.updateStatus(id, null, TaskStatus.DELETING); + return null; // Already in DELETING status, delete directly from DB // Completed tasks can also be deleted directly } diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/StandardTaskScheduler.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/StandardTaskScheduler.java index 315cdbca47..79dd98c0f4 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/StandardTaskScheduler.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/StandardTaskScheduler.java @@ -578,6 +578,7 @@ public V call(Callable callable) { } } + @Deprecated private void checkOnMasterNode(String op) { // Single-node mode: all operations are allowed, no role check needed } diff --git a/hugegraph-server/hugegraph-test/src/main/java/org/apache/hugegraph/core/TaskCoreTest.java b/hugegraph-server/hugegraph-test/src/main/java/org/apache/hugegraph/core/TaskCoreTest.java index 78d84fe503..3811a46f02 100644 --- a/hugegraph-server/hugegraph-test/src/main/java/org/apache/hugegraph/core/TaskCoreTest.java +++ b/hugegraph-server/hugegraph-test/src/main/java/org/apache/hugegraph/core/TaskCoreTest.java @@ -17,8 +17,11 @@ package org.apache.hugegraph.core; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; +import java.util.Iterator; +import java.util.List; +import java.util.Random; +import java.util.concurrent.TimeoutException; + import org.apache.hugegraph.HugeException; import org.apache.hugegraph.HugeGraph; import org.apache.hugegraph.api.job.GremlinAPI.GremlinRequest; @@ -29,8 +32,8 @@ import org.apache.hugegraph.job.EphemeralJobBuilder; import org.apache.hugegraph.job.GremlinJob; import org.apache.hugegraph.job.JobBuilder; -import org.apache.hugegraph.task.StandardTaskScheduler; import org.apache.hugegraph.task.HugeTask; +import org.apache.hugegraph.task.StandardTaskScheduler; import org.apache.hugegraph.task.TaskCallable; import org.apache.hugegraph.task.TaskScheduler; import org.apache.hugegraph.task.TaskStatus; @@ -39,10 +42,8 @@ import org.junit.Before; import org.junit.Test; -import java.util.Iterator; -import java.util.List; -import java.util.Random; -import java.util.concurrent.TimeoutException; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; public class TaskCoreTest extends BaseCoreTest { @@ -76,12 +77,14 @@ public void testTask() throws TimeoutException { Assert.assertEquals(id, task.id()); Assert.assertFalse(task.completed()); - Assert.assertThrows(IllegalArgumentException.class, () -> { - scheduler.delete(id, false); - }, e -> { - Assert.assertContains("Can't delete incomplete task '88888'", - e.getMessage()); - }); + if (scheduler.getClass().equals(StandardTaskScheduler.class)) { + Assert.assertThrows(IllegalArgumentException.class, () -> { + scheduler.delete(id, false); + }, e -> { + Assert.assertContains("Can't delete incomplete task '88888'", + e.getMessage()); + }); + } task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(id, task.id()); @@ -90,7 +93,7 @@ public void testTask() throws TimeoutException { Assert.assertEquals("test-task", scheduler.task(id).name()); Assert.assertEquals("test-task", scheduler.tasks(List.of(id)) - .next().name()); + .next().name()); Iterator> iter = scheduler.tasks(ImmutableList.of(id)); Assert.assertTrue(iter.hasNext()); @@ -144,7 +147,7 @@ protected void done() { new HugeTask<>(id, null, callable); }, e -> { Assert.assertContains("Invalid task id type, it must be number", - e.getMessage()); + e.getMessage()); }); Assert.assertThrows(NullPointerException.class, () -> { @@ -178,18 +181,18 @@ public void testEphemeralJob() throws TimeoutException { EphemeralJobBuilder builder = EphemeralJobBuilder.of(graph); builder.name("test-job-ephemeral") - .job(new EphemeralJob() { - @Override - public String type() { - return "test"; - } - - @Override - public Object execute() throws Exception { - sleepAWhile(); - return ImmutableMap.of("k1", 13579, "k2", "24680"); - } - }); + .job(new EphemeralJob() { + @Override + public String type() { + return "test"; + } + + @Override + public Object execute() throws Exception { + sleepAWhile(); + return ImmutableMap.of("k1", 13579, "k2", "24680"); + } + }); HugeTask task = builder.schedule(); Assert.assertEquals("test-job-ephemeral", task.name()); @@ -226,8 +229,8 @@ public void testGremlinJob() throws TimeoutException { JobBuilder builder = JobBuilder.of(graph); builder.name("test-job-gremlin") - .input(request.toJson()) - .job(new GremlinJob()); + .input(request.toJson()) + .job(new GremlinJob()); HugeTask task = builder.schedule(); Assert.assertEquals("test-job-gremlin", task.name()); @@ -254,22 +257,22 @@ public void testGremlinJobWithScript() throws TimeoutException { TaskScheduler scheduler = graph.taskScheduler(); String script = "schema=graph.schema();" + - "schema.propertyKey('name').asText().ifNotExist().create();" + - "schema.propertyKey('age').asInt().ifNotExist().create();" + - "schema.propertyKey('lang').asText().ifNotExist().create();" + - "schema.propertyKey('date').asDate().ifNotExist().create();" + - "schema.propertyKey('price').asInt().ifNotExist().create();" + - "schema.vertexLabel('person1').properties('name','age').ifNotExist()" + - ".create();" + - "schema.vertexLabel('person2').properties('name','age').ifNotExist()" + - ".create();" + - "schema.edgeLabel('knows').sourceLabel('person1').targetLabel('person2')." + - "properties('date').ifNotExist().create();" + - "for(int i = 0; i < 1000; i++) {" + - " p1=graph.addVertex(T.label,'person1','name','p1-'+i,'age',29);" + - " p2=graph.addVertex(T.label,'person2','name','p2-'+i,'age',27);" + - " p1.addEdge('knows',p2,'date','2016-01-10');" + - "}"; + "schema.propertyKey('name').asText().ifNotExist().create();" + + "schema.propertyKey('age').asInt().ifNotExist().create();" + + "schema.propertyKey('lang').asText().ifNotExist().create();" + + "schema.propertyKey('date').asDate().ifNotExist().create();" + + "schema.propertyKey('price').asInt().ifNotExist().create();" + + "schema.vertexLabel('person1').properties('name','age').ifNotExist()" + + ".create();" + + "schema.vertexLabel('person2').properties('name','age').ifNotExist()" + + ".create();" + + "schema.edgeLabel('knows').sourceLabel('person1').targetLabel('person2')." + + "properties('date').ifNotExist().create();" + + "for(int i = 0; i < 1000; i++) {" + + " p1=graph.addVertex(T.label,'person1','name','p1-'+i,'age',29);" + + " p2=graph.addVertex(T.label,'person2','name','p2-'+i,'age',27);" + + " p1.addEdge('knows',p2,'date','2016-01-10');" + + "}"; HugeTask task = runGremlinJob(script); task = scheduler.waitUntilTaskCompleted(task.id(), 10); @@ -315,27 +318,27 @@ public void testGremlinJobWithSerializedResults() throws TimeoutException { TaskScheduler scheduler = graph.taskScheduler(); String script = "schema=graph.schema();" + - "schema.propertyKey('name').asText().ifNotExist().create();" + - "schema.vertexLabel('char').useCustomizeNumberId()" + - " .properties('name').ifNotExist().create();" + - "schema.edgeLabel('next').sourceLabel('char').targetLabel('char')" + - " .properties('name').ifNotExist().create();" + - "g.addV('char').property(id,1).property('name','A').as('a')" + - " .addV('char').property(id,2).property('name','B').as('b')" + - " .addV('char').property(id,3).property('name','C').as('c')" + - " .addV('char').property(id,4).property('name','D').as('d')" + - " .addV('char').property(id,5).property('name','E').as('e')" + - " .addV('char').property(id,6).property('name','F').as('f')" + - " .addE('next').from('a').to('b').property('name','ab')" + - " .addE('next').from('b').to('c').property('name','bc')" + - " .addE('next').from('b').to('d').property('name','bd')" + - " .addE('next').from('c').to('d').property('name','cd')" + - " .addE('next').from('c').to('e').property('name','ce')" + - " .addE('next').from('d').to('e').property('name','de')" + - " .addE('next').from('e').to('f').property('name','ef')" + - " .addE('next').from('f').to('d').property('name','fd')" + - " .iterate();" + - "g.tx().commit(); g.E().count();"; + "schema.propertyKey('name').asText().ifNotExist().create();" + + "schema.vertexLabel('char').useCustomizeNumberId()" + + " .properties('name').ifNotExist().create();" + + "schema.edgeLabel('next').sourceLabel('char').targetLabel('char')" + + " .properties('name').ifNotExist().create();" + + "g.addV('char').property(id,1).property('name','A').as('a')" + + " .addV('char').property(id,2).property('name','B').as('b')" + + " .addV('char').property(id,3).property('name','C').as('c')" + + " .addV('char').property(id,4).property('name','D').as('d')" + + " .addV('char').property(id,5).property('name','E').as('e')" + + " .addV('char').property(id,6).property('name','F').as('f')" + + " .addE('next').from('a').to('b').property('name','ab')" + + " .addE('next').from('b').to('c').property('name','bc')" + + " .addE('next').from('b').to('d').property('name','bd')" + + " .addE('next').from('c').to('d').property('name','cd')" + + " .addE('next').from('c').to('e').property('name','ce')" + + " .addE('next').from('d').to('e').property('name','de')" + + " .addE('next').from('e').to('f').property('name','ef')" + + " .addE('next').from('f').to('d').property('name','fd')" + + " .iterate();" + + "g.tx().commit(); g.E().count();"; HugeTask task = runGremlinJob(script); task = scheduler.waitUntilTaskCompleted(task.id(), 10); @@ -351,15 +354,15 @@ public void testGremlinJobWithSerializedResults() throws TimeoutException { task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.SUCCESS, task.status()); String expected = String.format("[{\"labels\":[[],[],[]],\"objects\":[" + - "{\"id\":1,\"label\":\"char\",\"type\":\"vertex\"," + - "\"properties\":{\"name\":\"A\"}}," + - "{\"id\":\"L1>%s>%s>>L2\",\"label\":\"next\"," + - "\"type\":\"edge\",\"outV\":1," + - "\"outVLabel\":\"char\",\"inV\":2,\"" + - "inVLabel\":\"char\",\"properties\":{\"name\":\"ab\"}}," + - "{\"id\":2,\"label\":\"char\",\"type\":\"vertex\"," + - "\"properties\":{\"name\":\"B\"}}" + - "]}]", edgeLabelId, edgeLabelId); + "{\"id\":1,\"label\":\"char\",\"type\":\"vertex\"," + + "\"properties\":{\"name\":\"A\"}}," + + "{\"id\":\"L1>%s>%s>>L2\",\"label\":\"next\"," + + "\"type\":\"edge\",\"outV\":1," + + "\"outVLabel\":\"char\",\"inV\":2,\"" + + "inVLabel\":\"char\",\"properties\":{\"name\":\"ab\"}}," + + "{\"id\":2,\"label\":\"char\",\"type\":\"vertex\"," + + "\"properties\":{\"name\":\"B\"}}" + + "]}]", edgeLabelId, edgeLabelId); Assert.assertEquals(expected, task.result()); script = "g.V(1).out().out().path()"; @@ -367,19 +370,19 @@ public void testGremlinJobWithSerializedResults() throws TimeoutException { task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.SUCCESS, task.status()); expected = "[{\"labels\":[[],[],[]],\"objects\":[" + - "{\"id\":1,\"label\":\"char\",\"type\":\"vertex\"," + - "\"properties\":{\"name\":\"A\"}}," + - "{\"id\":2,\"label\":\"char\",\"type\":\"vertex\"," + - "\"properties\":{\"name\":\"B\"}}," + - "{\"id\":3,\"label\":\"char\",\"type\":\"vertex\"," + - "\"properties\":{\"name\":\"C\"}}]}," + - "{\"labels\":[[],[],[]],\"objects\":[" + - "{\"id\":1,\"label\":\"char\",\"type\":\"vertex\"," + - "\"properties\":{\"name\":\"A\"}}," + - "{\"id\":2,\"label\":\"char\",\"type\":\"vertex\"," + - "\"properties\":{\"name\":\"B\"}}," + - "{\"id\":4,\"label\":\"char\",\"type\":\"vertex\"," + - "\"properties\":{\"name\":\"D\"}}]}]"; + "{\"id\":1,\"label\":\"char\",\"type\":\"vertex\"," + + "\"properties\":{\"name\":\"A\"}}," + + "{\"id\":2,\"label\":\"char\",\"type\":\"vertex\"," + + "\"properties\":{\"name\":\"B\"}}," + + "{\"id\":3,\"label\":\"char\",\"type\":\"vertex\"," + + "\"properties\":{\"name\":\"C\"}}]}," + + "{\"labels\":[[],[],[]],\"objects\":[" + + "{\"id\":1,\"label\":\"char\",\"type\":\"vertex\"," + + "\"properties\":{\"name\":\"A\"}}," + + "{\"id\":2,\"label\":\"char\",\"type\":\"vertex\"," + + "\"properties\":{\"name\":\"B\"}}," + + "{\"id\":4,\"label\":\"char\",\"type\":\"vertex\"," + + "\"properties\":{\"name\":\"D\"}}]}]"; Assert.assertEquals(expected, task.result()); script = "g.V(1).outE().inV().tree()"; @@ -387,16 +390,16 @@ public void testGremlinJobWithSerializedResults() throws TimeoutException { task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.SUCCESS, task.status()); expected = String.format("[[{\"key\":{\"id\":1,\"label\":\"char\",\"type\":\"vertex\"," + - "\"properties\":{\"name\":\"A\"}}," + - "\"value\":[" + - "{\"key\":{\"id\":\"L1>%s>%s>>L2\",\"label\":\"next\"," + - "\"type\":\"edge\",\"outV\":1," + - "\"outVLabel\":\"char\",\"inV\":2,\"inVLabel\":\"char\"," + - "\"properties\":{\"name\":\"ab\"}}," + - "\"value\":[{\"key\":{\"id\":2,\"label\":\"char\"," + - "\"type\":\"vertex\"," + - "\"properties\":{\"name\":\"B\"}},\"value\":[]}]}]}]]", - edgeLabelId, edgeLabelId); + "\"properties\":{\"name\":\"A\"}}," + + "\"value\":[" + + "{\"key\":{\"id\":\"L1>%s>%s>>L2\",\"label\":\"next\"," + + "\"type\":\"edge\",\"outV\":1," + + "\"outVLabel\":\"char\",\"inV\":2,\"inVLabel\":\"char\"," + + "\"properties\":{\"name\":\"ab\"}}," + + "\"value\":[{\"key\":{\"id\":2,\"label\":\"char\"," + + "\"type\":\"vertex\"," + + "\"properties\":{\"name\":\"B\"}},\"value\":[]}]}]}]]", + edgeLabelId, edgeLabelId); Assert.assertEquals(expected, task.result()); script = "g.V(1).out().out().tree()"; @@ -404,14 +407,14 @@ public void testGremlinJobWithSerializedResults() throws TimeoutException { task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.SUCCESS, task.status()); expected = "[[{\"key\":{\"id\":1,\"label\":\"char\",\"type\":\"vertex\"," + - "\"properties\":{\"name\":\"A\"}}," + - "\"value\":[{\"key\":{\"id\":2,\"label\":\"char\",\"type\":\"vertex\"," + - "\"properties\":{\"name\":\"B\"}}," + - "\"value\":[" + - "{\"key\":{\"id\":3,\"label\":\"char\",\"type\":\"vertex\",\"properties\":" + - "{\"name\":\"C\"}},\"value\":[]}," + - "{\"key\":{\"id\":4,\"label\":\"char\",\"type\":\"vertex\",\"properties\":" + - "{\"name\":\"D\"}},\"value\":[]}]}]}]]"; + "\"properties\":{\"name\":\"A\"}}," + + "\"value\":[{\"key\":{\"id\":2,\"label\":\"char\",\"type\":\"vertex\"," + + "\"properties\":{\"name\":\"B\"}}," + + "\"value\":[" + + "{\"key\":{\"id\":3,\"label\":\"char\",\"type\":\"vertex\",\"properties\":" + + "{\"name\":\"C\"}},\"value\":[]}," + + "{\"key\":{\"id\":4,\"label\":\"char\",\"type\":\"vertex\",\"properties\":" + + "{\"name\":\"D\"}},\"value\":[]}]}]}]]"; Assert.assertEquals(expected, task.result()); } @@ -422,8 +425,8 @@ public void testGremlinJobWithFailure() throws TimeoutException { JobBuilder builder = JobBuilder.of(graph); builder.name("test-job-gremlin") - .input("") - .job(new GremlinJob()); + .input("") + .job(new GremlinJob()); HugeTask task = builder.schedule(); task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.FAILED, task.status()); @@ -431,7 +434,7 @@ public void testGremlinJobWithFailure() throws TimeoutException { builder = JobBuilder.of(graph); builder.name("test-job-gremlin") - .job(new GremlinJob()); + .job(new GremlinJob()); task = builder.schedule(); scheduler.waitUntilTaskCompleted(task.id(), 10); task = scheduler.task(task.id()); @@ -440,8 +443,8 @@ public void testGremlinJobWithFailure() throws TimeoutException { builder = JobBuilder.of(graph); builder.name("test-job-gremlin") - .input("{}") - .job(new GremlinJob()); + .input("{}") + .job(new GremlinJob()); task = builder.schedule(); task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.FAILED, task.status()); @@ -449,8 +452,8 @@ public void testGremlinJobWithFailure() throws TimeoutException { builder = JobBuilder.of(graph); builder.name("test-job-gremlin") - .input("{\"gremlin\":8}") - .job(new GremlinJob()); + .input("{\"gremlin\":8}") + .job(new GremlinJob()); task = builder.schedule(); task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.FAILED, task.status()); @@ -458,8 +461,8 @@ public void testGremlinJobWithFailure() throws TimeoutException { builder = JobBuilder.of(graph); builder.name("test-job-gremlin") - .input("{\"gremlin\":\"\"}") - .job(new GremlinJob()); + .input("{\"gremlin\":\"\"}") + .job(new GremlinJob()); task = builder.schedule(); task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.FAILED, task.status()); @@ -467,8 +470,8 @@ public void testGremlinJobWithFailure() throws TimeoutException { builder = JobBuilder.of(graph); builder.name("test-job-gremlin") - .input("{\"gremlin\":\"\", \"bindings\":\"\"}") - .job(new GremlinJob()); + .input("{\"gremlin\":\"\", \"bindings\":\"\"}") + .job(new GremlinJob()); task = builder.schedule(); task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.FAILED, task.status()); @@ -476,8 +479,8 @@ public void testGremlinJobWithFailure() throws TimeoutException { builder = JobBuilder.of(graph); builder.name("test-job-gremlin") - .input("{\"gremlin\":\"\", \"bindings\":{}}") - .job(new GremlinJob()); + .input("{\"gremlin\":\"\", \"bindings\":{}}") + .job(new GremlinJob()); task = builder.schedule(); task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.FAILED, task.status()); @@ -485,8 +488,8 @@ public void testGremlinJobWithFailure() throws TimeoutException { builder = JobBuilder.of(graph); builder.name("test-job-gremlin") - .input("{\"gremlin\":\"\", \"bindings\":{}, \"language\":{}}") - .job(new GremlinJob()); + .input("{\"gremlin\":\"\", \"bindings\":{}, \"language\":{}}") + .job(new GremlinJob()); task = builder.schedule(); task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.FAILED, task.status()); @@ -494,8 +497,8 @@ public void testGremlinJobWithFailure() throws TimeoutException { builder = JobBuilder.of(graph); builder.name("test-job-gremlin") - .input("{\"gremlin\":\"\", \"bindings\":{}, \"language\":\"\"}") - .job(new GremlinJob()); + .input("{\"gremlin\":\"\", \"bindings\":{}, \"language\":\"\"}") + .job(new GremlinJob()); task = builder.schedule(); task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.FAILED, task.status()); @@ -503,14 +506,14 @@ public void testGremlinJobWithFailure() throws TimeoutException { builder = JobBuilder.of(graph); builder.name("test-job-gremlin") - .input("{\"gremlin\":\"\", \"bindings\":{}, " + - "\"language\":\"test\", \"aliases\":{}}") - .job(new GremlinJob()); + .input("{\"gremlin\":\"\", \"bindings\":{}, " + + "\"language\":\"test\", \"aliases\":{}}") + .job(new GremlinJob()); task = builder.schedule(); task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.FAILED, task.status()); Assert.assertContains("test is not an available GremlinScriptEngine", - task.result()); + task.result()); } @Test @@ -519,16 +522,16 @@ public void testGremlinJobWithError() throws TimeoutException { Assert.assertThrows(IllegalArgumentException.class, () -> { JobBuilder.of(graph) - .job(new GremlinJob()) - .schedule(); + .job(new GremlinJob()) + .schedule(); }, e -> { Assert.assertContains("Job name can't be null", e.getMessage()); }); Assert.assertThrows(IllegalArgumentException.class, () -> { JobBuilder.of(graph) - .name("test-job-gremlin") - .schedule(); + .name("test-job-gremlin") + .schedule(); }, e -> { Assert.assertContains("Job callable can't be null", e.getMessage()); }); @@ -546,7 +549,7 @@ public void testGremlinJobWithError() throws TimeoutException { }, e -> { Assert.assertContains("Task input size", e.getMessage()); Assert.assertContains("exceeded limit 16777216 bytes", - e.getMessage()); + e.getMessage()); }); } @@ -573,7 +576,7 @@ public void testGremlinJobAndCancel() throws TimeoutException { Assert.assertEquals(TaskStatus.CANCELLED, task.status()); Assert.assertEquals("test-gremlin-job", task.name()); Assert.assertTrue(task.result(), task.result() == null || - task.result().endsWith("InterruptedException")); + task.result().endsWith("InterruptedException")); // Cancel success task HugeTask task2 = runGremlinJob("1+2"); @@ -593,22 +596,22 @@ public void testGremlinJobAndCancel() throws TimeoutException { task3 = scheduler.task(task3.id()); Assert.assertEquals(TaskStatus.FAILED, task3.status()); Assert.assertContains("LimitExceedException: Job results size 800001 " + - "has exceeded the max limit 800000", - task3.result()); + "has exceeded the max limit 800000", + task3.result()); // Cancel failure task with big results (task exceeded limit 16M) String bigResults = "def random = new Random(); def rs=[];" + - "for (i in 0..4) {" + - " def len = 1024 * 1024;" + - " def item = new StringBuilder(len);" + - " for (j in 0..len) { " + - " item.append(\"node:\"); " + - " item.append((char) random.nextInt(256)); " + - " item.append(\",\"); " + - " };" + - " rs.add(item);" + - "};" + - "rs;"; + "for (i in 0..4) {" + + " def len = 1024 * 1024;" + + " def item = new StringBuilder(len);" + + " for (j in 0..len) { " + + " item.append(\"node:\"); " + + " item.append((char) random.nextInt(256)); " + + " item.append(\",\"); " + + " };" + + " rs.add(item);" + + "};" + + "rs;"; HugeTask task4 = runGremlinJob(bigResults); task4 = scheduler.waitUntilTaskCompleted(task4.id(), 10); Assert.assertEquals(TaskStatus.FAILED, task4.status()); @@ -616,9 +619,9 @@ public void testGremlinJobAndCancel() throws TimeoutException { task4 = scheduler.task(task4.id()); Assert.assertEquals(TaskStatus.FAILED, task4.status()); Assert.assertContains("LimitExceedException: Task result size", - task4.result()); + task4.result()); Assert.assertContains("exceeded limit 16777216 bytes", - task4.result()); + task4.result()); } @Test @@ -627,11 +630,11 @@ public void testGremlinJobAndRestore() throws Exception { TaskScheduler scheduler = graph.taskScheduler(); String gremlin = "println('task start');" + - "for(int i=gremlinJob.progress(); i<=10; i++) {" + - " gremlinJob.updateProgress(i);" + - " Thread.sleep(200); " + - " println('sleep=>'+i);" + - "}; 100;"; + "for(int i=gremlinJob.progress(); i<=10; i++) {" + + " gremlinJob.updateProgress(i);" + + " Thread.sleep(200); " + + " println('sleep=>'+i);" + + "}; 100;"; HugeTask task = runGremlinJob(gremlin); sleepAWhile(200 * 6); @@ -646,7 +649,7 @@ public void testGremlinJobAndRestore() throws Exception { task = scheduler.waitUntilTaskCompleted(task.id(), 10); Assert.assertEquals(TaskStatus.CANCELLED, task.status()); Assert.assertTrue("progress=" + task.progress(), - 0 < task.progress() && task.progress() < 10); + 0 < task.progress() && task.progress() < 10); Assert.assertEquals(0, task.retries()); Assert.assertNull(task.result()); @@ -694,8 +697,8 @@ private HugeTask runGremlinJob(String gremlin) { JobBuilder builder = JobBuilder.of(graph); builder.name("test-gremlin-job") - .input(request.toJson()) - .job(new GremlinJob()); + .input(request.toJson()) + .job(new GremlinJob()); return builder.schedule(); } From 28e0390a28f421090e2458a01725e653c8572961 Mon Sep 17 00:00:00 2001 From: Tsukilc <153273766+Tsukilc@users.noreply.github.com> Date: Tue, 10 Feb 2026 18:21:00 +0800 Subject: [PATCH 29/31] fix(server): fix some issues of the distributed scheduler --- .../apache/hugegraph/core/GraphManager.java | 20 +++++++++++++++---- .../apache/hugegraph/StandardHugeGraph.java | 8 +------- .../apache/hugegraph/config/CoreOptions.java | 17 ++++++++-------- .../task/DistributedTaskScheduler.java | 2 +- .../store/hstore/HstoreSessionsImpl.java | 6 ++++-- 5 files changed, 30 insertions(+), 23 deletions(-) diff --git a/hugegraph-server/hugegraph-api/src/main/java/org/apache/hugegraph/core/GraphManager.java b/hugegraph-server/hugegraph-api/src/main/java/org/apache/hugegraph/core/GraphManager.java index 991076f742..36d0980698 100644 --- a/hugegraph-server/hugegraph-api/src/main/java/org/apache/hugegraph/core/GraphManager.java +++ b/hugegraph-server/hugegraph-api/src/main/java/org/apache/hugegraph/core/GraphManager.java @@ -33,11 +33,11 @@ import java.util.Map; import java.util.Objects; import java.util.Set; +import java.util.UUID; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; -import java.util.UUID; import java.util.function.Consumer; import java.util.stream.Collectors; @@ -69,6 +69,7 @@ import org.apache.hugegraph.config.TypedOption; import org.apache.hugegraph.event.EventHub; import org.apache.hugegraph.exception.ExistedException; +import org.apache.hugegraph.exception.NotFoundException; import org.apache.hugegraph.exception.NotSupportException; import org.apache.hugegraph.io.HugeGraphSONModule; import org.apache.hugegraph.k8s.K8sDriver; @@ -197,7 +198,10 @@ public GraphManager(HugeConfig conf, EventHub hub) { LOG.info("Init graph manager"); E.checkArgumentNotNull(conf, "The config can't be null"); - // Auto-generate server.id if not configured + // Auto-generate server.id if not configured. + // Random generation is to prevent duplicate id error reports.This id is currently + // meaningless and needs to be completely removed serverInfoManager in + // the future String server = conf.get(ServerOptions.SERVER_ID); if (StringUtils.isEmpty(server)) { server = "server-" + UUID.randomUUID().toString().substring(0, 8); @@ -280,7 +284,7 @@ private static String serviceId(String graphSpace, Service.ServiceType type, .replace("_", "-").toLowerCase(); } - private boolean usePD() { + public boolean usePD() { return this.PDExist; } @@ -1561,6 +1565,14 @@ private void loadGraph(String name, String graphConfPath) { String raftGroupPeers = this.conf.get(ServerOptions.RAFT_GROUP_PEERS); config.addProperty(ServerOptions.RAFT_GROUP_PEERS.name(), raftGroupPeers); + + // Transfer `pd.peers` from server config to graph config + // Only inject if not already configured in graph config + if (!config.containsKey("pd.peers")) { + String pdPeers = this.conf.get(ServerOptions.PD_PEERS); + config.addProperty("pd.peers", pdPeers); + } + this.transferRoleWorkerConfig(config); Graph graph = GraphFactory.open(config); @@ -1960,7 +1972,7 @@ public HugeGraph graph(String graphSpace, String name) { } else if (graph instanceof HugeGraph) { return (HugeGraph) graph; } - throw new NotSupportException("graph instance of %s", graph.getClass()); + throw new NotFoundException(String.format("Graph '%s' does not exist", name)); } public void dropGraphLocal(String name) { diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/StandardHugeGraph.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/StandardHugeGraph.java index cb085ae310..fec8c9f9ce 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/StandardHugeGraph.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/StandardHugeGraph.java @@ -247,13 +247,6 @@ public StandardHugeGraph(HugeConfig config) { throw new HugeException(message); } - if (isHstore()) { - // TODO: parameterize the remaining configurations - MetaManager.instance().connect("hg", MetaManager.MetaDriverType.PD, - "ca", "ca", "ca", - config.get(CoreOptions.PD_PEERS)); - } - try { this.tx = new TinkerPopTransaction(this); boolean supportsPersistence = this.backendStoreFeatures().supportsPersistence(); @@ -1630,6 +1623,7 @@ public void submitEphemeralJob(EphemeralJob job) { @Override public String schedulerType() { // Use distributed scheduler for hstore backend, otherwise use local + // After the merger of rocksdb and hstore, consider whether to change this logic return StandardHugeGraph.this.isHstore() ? "distributed" : "local"; } } diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/config/CoreOptions.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/config/CoreOptions.java index 72a2da9324..d2f244dfab 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/config/CoreOptions.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/config/CoreOptions.java @@ -17,14 +17,19 @@ package org.apache.hugegraph.config; +import static org.apache.hugegraph.backend.query.Query.COMMIT_BATCH; +import static org.apache.hugegraph.config.OptionChecker.allowValues; +import static org.apache.hugegraph.config.OptionChecker.disallowEmpty; +import static org.apache.hugegraph.config.OptionChecker.nonNegativeInt; +import static org.apache.hugegraph.config.OptionChecker.positiveInt; +import static org.apache.hugegraph.config.OptionChecker.rangeDouble; +import static org.apache.hugegraph.config.OptionChecker.rangeInt; + import org.apache.hugegraph.backend.query.Query; import org.apache.hugegraph.backend.tx.GraphTransaction; import org.apache.hugegraph.type.define.CollectionType; import org.apache.hugegraph.util.Bytes; -import static org.apache.hugegraph.backend.query.Query.COMMIT_BATCH; -import static org.apache.hugegraph.config.OptionChecker.*; - public class CoreOptions extends OptionHolder { public static final int CPUS = Runtime.getRuntime().availableProcessors(); @@ -636,12 +641,6 @@ public class CoreOptions extends OptionHolder { disallowEmpty(), "./conf/resource-quota-template.yaml" ); - public static final ConfigOption PD_PEERS = new ConfigOption<>( - "pd.peers", - "The addresses of pd nodes, separated with commas.", - disallowEmpty(), - "127.0.0.1:8686" - ); public static final ConfigOption MEMORY_MODE = new ConfigOption<>( "memory.mode", "The memory mode used for query in HugeGraph.", diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/DistributedTaskScheduler.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/DistributedTaskScheduler.java index 083c38a9d4..7c143fb33d 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/DistributedTaskScheduler.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/DistributedTaskScheduler.java @@ -357,7 +357,7 @@ protected HugeTask deleteFromDB(Id id) { public HugeTask delete(Id id, boolean force) { HugeTask task = this.taskWithoutResult(id); - if (!force && !task.completed() && task.status() != TaskStatus.DELETING) { + if (!force && !task.completed()) { // Check task status: can't delete running tasks without force this.updateStatus(id, null, TaskStatus.DELETING); return null; diff --git a/hugegraph-server/hugegraph-hstore/src/main/java/org/apache/hugegraph/backend/store/hstore/HstoreSessionsImpl.java b/hugegraph-server/hugegraph-hstore/src/main/java/org/apache/hugegraph/backend/store/hstore/HstoreSessionsImpl.java index 2f98d03745..89d40d8eb4 100755 --- a/hugegraph-server/hugegraph-hstore/src/main/java/org/apache/hugegraph/backend/store/hstore/HstoreSessionsImpl.java +++ b/hugegraph-server/hugegraph-hstore/src/main/java/org/apache/hugegraph/backend/store/hstore/HstoreSessionsImpl.java @@ -37,7 +37,6 @@ import org.apache.hugegraph.backend.store.BackendEntry.BackendColumn; import org.apache.hugegraph.backend.store.BackendEntry.BackendColumnIterator; import org.apache.hugegraph.backend.store.BackendEntryIterator; -import org.apache.hugegraph.config.CoreOptions; import org.apache.hugegraph.config.HugeConfig; import org.apache.hugegraph.meta.PdMetaDriver.PDAuthConfig; import org.apache.hugegraph.pd.client.PDClient; @@ -111,7 +110,10 @@ private void initStoreNode(HugeConfig config) { if (!initializedNode) { synchronized (this) { if (!initializedNode) { - PDConfig pdConfig = PDConfig.of(config.get(CoreOptions.PD_PEERS)) + // Use hardcoded string to avoid dependency on ServerOptions + // The value is injected from ServerOptions.PD_PEERS by GraphManager + String pdPeers = config.getString("pd.peers"); + PDConfig pdConfig = PDConfig.of(pdPeers) .setAuthority(PDAuthConfig.service(), PDAuthConfig.token()) .setEnableCache(true); defaultPdClient = PDClient.create(pdConfig); From b70788fb0fefb8fd491cb9d631ece91acba53dd7 Mon Sep 17 00:00:00 2001 From: Tsukilc <153273766+Tsukilc@users.noreply.github.com> Date: Wed, 11 Feb 2026 21:09:43 +0800 Subject: [PATCH 30/31] Revert "fix(server): fix some issues of the distributed scheduler" This reverts commit 28e0390a28f421090e2458a01725e653c8572961. --- .../apache/hugegraph/core/GraphManager.java | 20 ++++--------------- .../apache/hugegraph/StandardHugeGraph.java | 8 +++++++- .../apache/hugegraph/config/CoreOptions.java | 17 ++++++++-------- .../task/DistributedTaskScheduler.java | 2 +- .../store/hstore/HstoreSessionsImpl.java | 6 ++---- 5 files changed, 23 insertions(+), 30 deletions(-) diff --git a/hugegraph-server/hugegraph-api/src/main/java/org/apache/hugegraph/core/GraphManager.java b/hugegraph-server/hugegraph-api/src/main/java/org/apache/hugegraph/core/GraphManager.java index 36d0980698..991076f742 100644 --- a/hugegraph-server/hugegraph-api/src/main/java/org/apache/hugegraph/core/GraphManager.java +++ b/hugegraph-server/hugegraph-api/src/main/java/org/apache/hugegraph/core/GraphManager.java @@ -33,11 +33,11 @@ import java.util.Map; import java.util.Objects; import java.util.Set; -import java.util.UUID; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; +import java.util.UUID; import java.util.function.Consumer; import java.util.stream.Collectors; @@ -69,7 +69,6 @@ import org.apache.hugegraph.config.TypedOption; import org.apache.hugegraph.event.EventHub; import org.apache.hugegraph.exception.ExistedException; -import org.apache.hugegraph.exception.NotFoundException; import org.apache.hugegraph.exception.NotSupportException; import org.apache.hugegraph.io.HugeGraphSONModule; import org.apache.hugegraph.k8s.K8sDriver; @@ -198,10 +197,7 @@ public GraphManager(HugeConfig conf, EventHub hub) { LOG.info("Init graph manager"); E.checkArgumentNotNull(conf, "The config can't be null"); - // Auto-generate server.id if not configured. - // Random generation is to prevent duplicate id error reports.This id is currently - // meaningless and needs to be completely removed serverInfoManager in - // the future + // Auto-generate server.id if not configured String server = conf.get(ServerOptions.SERVER_ID); if (StringUtils.isEmpty(server)) { server = "server-" + UUID.randomUUID().toString().substring(0, 8); @@ -284,7 +280,7 @@ private static String serviceId(String graphSpace, Service.ServiceType type, .replace("_", "-").toLowerCase(); } - public boolean usePD() { + private boolean usePD() { return this.PDExist; } @@ -1565,14 +1561,6 @@ private void loadGraph(String name, String graphConfPath) { String raftGroupPeers = this.conf.get(ServerOptions.RAFT_GROUP_PEERS); config.addProperty(ServerOptions.RAFT_GROUP_PEERS.name(), raftGroupPeers); - - // Transfer `pd.peers` from server config to graph config - // Only inject if not already configured in graph config - if (!config.containsKey("pd.peers")) { - String pdPeers = this.conf.get(ServerOptions.PD_PEERS); - config.addProperty("pd.peers", pdPeers); - } - this.transferRoleWorkerConfig(config); Graph graph = GraphFactory.open(config); @@ -1972,7 +1960,7 @@ public HugeGraph graph(String graphSpace, String name) { } else if (graph instanceof HugeGraph) { return (HugeGraph) graph; } - throw new NotFoundException(String.format("Graph '%s' does not exist", name)); + throw new NotSupportException("graph instance of %s", graph.getClass()); } public void dropGraphLocal(String name) { diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/StandardHugeGraph.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/StandardHugeGraph.java index fec8c9f9ce..cb085ae310 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/StandardHugeGraph.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/StandardHugeGraph.java @@ -247,6 +247,13 @@ public StandardHugeGraph(HugeConfig config) { throw new HugeException(message); } + if (isHstore()) { + // TODO: parameterize the remaining configurations + MetaManager.instance().connect("hg", MetaManager.MetaDriverType.PD, + "ca", "ca", "ca", + config.get(CoreOptions.PD_PEERS)); + } + try { this.tx = new TinkerPopTransaction(this); boolean supportsPersistence = this.backendStoreFeatures().supportsPersistence(); @@ -1623,7 +1630,6 @@ public void submitEphemeralJob(EphemeralJob job) { @Override public String schedulerType() { // Use distributed scheduler for hstore backend, otherwise use local - // After the merger of rocksdb and hstore, consider whether to change this logic return StandardHugeGraph.this.isHstore() ? "distributed" : "local"; } } diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/config/CoreOptions.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/config/CoreOptions.java index d2f244dfab..72a2da9324 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/config/CoreOptions.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/config/CoreOptions.java @@ -17,19 +17,14 @@ package org.apache.hugegraph.config; -import static org.apache.hugegraph.backend.query.Query.COMMIT_BATCH; -import static org.apache.hugegraph.config.OptionChecker.allowValues; -import static org.apache.hugegraph.config.OptionChecker.disallowEmpty; -import static org.apache.hugegraph.config.OptionChecker.nonNegativeInt; -import static org.apache.hugegraph.config.OptionChecker.positiveInt; -import static org.apache.hugegraph.config.OptionChecker.rangeDouble; -import static org.apache.hugegraph.config.OptionChecker.rangeInt; - import org.apache.hugegraph.backend.query.Query; import org.apache.hugegraph.backend.tx.GraphTransaction; import org.apache.hugegraph.type.define.CollectionType; import org.apache.hugegraph.util.Bytes; +import static org.apache.hugegraph.backend.query.Query.COMMIT_BATCH; +import static org.apache.hugegraph.config.OptionChecker.*; + public class CoreOptions extends OptionHolder { public static final int CPUS = Runtime.getRuntime().availableProcessors(); @@ -641,6 +636,12 @@ public class CoreOptions extends OptionHolder { disallowEmpty(), "./conf/resource-quota-template.yaml" ); + public static final ConfigOption PD_PEERS = new ConfigOption<>( + "pd.peers", + "The addresses of pd nodes, separated with commas.", + disallowEmpty(), + "127.0.0.1:8686" + ); public static final ConfigOption MEMORY_MODE = new ConfigOption<>( "memory.mode", "The memory mode used for query in HugeGraph.", diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/DistributedTaskScheduler.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/DistributedTaskScheduler.java index 7c143fb33d..083c38a9d4 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/DistributedTaskScheduler.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/DistributedTaskScheduler.java @@ -357,7 +357,7 @@ protected HugeTask deleteFromDB(Id id) { public HugeTask delete(Id id, boolean force) { HugeTask task = this.taskWithoutResult(id); - if (!force && !task.completed()) { + if (!force && !task.completed() && task.status() != TaskStatus.DELETING) { // Check task status: can't delete running tasks without force this.updateStatus(id, null, TaskStatus.DELETING); return null; diff --git a/hugegraph-server/hugegraph-hstore/src/main/java/org/apache/hugegraph/backend/store/hstore/HstoreSessionsImpl.java b/hugegraph-server/hugegraph-hstore/src/main/java/org/apache/hugegraph/backend/store/hstore/HstoreSessionsImpl.java index 89d40d8eb4..2f98d03745 100755 --- a/hugegraph-server/hugegraph-hstore/src/main/java/org/apache/hugegraph/backend/store/hstore/HstoreSessionsImpl.java +++ b/hugegraph-server/hugegraph-hstore/src/main/java/org/apache/hugegraph/backend/store/hstore/HstoreSessionsImpl.java @@ -37,6 +37,7 @@ import org.apache.hugegraph.backend.store.BackendEntry.BackendColumn; import org.apache.hugegraph.backend.store.BackendEntry.BackendColumnIterator; import org.apache.hugegraph.backend.store.BackendEntryIterator; +import org.apache.hugegraph.config.CoreOptions; import org.apache.hugegraph.config.HugeConfig; import org.apache.hugegraph.meta.PdMetaDriver.PDAuthConfig; import org.apache.hugegraph.pd.client.PDClient; @@ -110,10 +111,7 @@ private void initStoreNode(HugeConfig config) { if (!initializedNode) { synchronized (this) { if (!initializedNode) { - // Use hardcoded string to avoid dependency on ServerOptions - // The value is injected from ServerOptions.PD_PEERS by GraphManager - String pdPeers = config.getString("pd.peers"); - PDConfig pdConfig = PDConfig.of(pdPeers) + PDConfig pdConfig = PDConfig.of(config.get(CoreOptions.PD_PEERS)) .setAuthority(PDAuthConfig.service(), PDAuthConfig.token()) .setEnableCache(true); defaultPdClient = PDClient.create(pdConfig); From 7ba40bd02ce923fc69de8bdca7086b8f39d0514a Mon Sep 17 00:00:00 2001 From: Tsukilc <153273766+Tsukilc@users.noreply.github.com> Date: Wed, 11 Feb 2026 21:13:30 +0800 Subject: [PATCH 31/31] fix(server): fix some issues of the distributed scheduler --- .../apache/hugegraph/core/GraphManager.java | 20 +++++++++++++++---- .../apache/hugegraph/StandardHugeGraph.java | 1 + .../task/DistributedTaskScheduler.java | 2 +- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/hugegraph-server/hugegraph-api/src/main/java/org/apache/hugegraph/core/GraphManager.java b/hugegraph-server/hugegraph-api/src/main/java/org/apache/hugegraph/core/GraphManager.java index 991076f742..b92d5c536f 100644 --- a/hugegraph-server/hugegraph-api/src/main/java/org/apache/hugegraph/core/GraphManager.java +++ b/hugegraph-server/hugegraph-api/src/main/java/org/apache/hugegraph/core/GraphManager.java @@ -33,11 +33,11 @@ import java.util.Map; import java.util.Objects; import java.util.Set; +import java.util.UUID; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; -import java.util.UUID; import java.util.function.Consumer; import java.util.stream.Collectors; @@ -69,6 +69,7 @@ import org.apache.hugegraph.config.TypedOption; import org.apache.hugegraph.event.EventHub; import org.apache.hugegraph.exception.ExistedException; +import org.apache.hugegraph.exception.NotFoundException; import org.apache.hugegraph.exception.NotSupportException; import org.apache.hugegraph.io.HugeGraphSONModule; import org.apache.hugegraph.k8s.K8sDriver; @@ -197,7 +198,10 @@ public GraphManager(HugeConfig conf, EventHub hub) { LOG.info("Init graph manager"); E.checkArgumentNotNull(conf, "The config can't be null"); - // Auto-generate server.id if not configured + // Auto-generate server.id if not configured. + // Random generation is to prevent duplicate id error reports.This id is currently + // meaningless and needs to be completely removed serverInfoManager in + // the future String server = conf.get(ServerOptions.SERVER_ID); if (StringUtils.isEmpty(server)) { server = "server-" + UUID.randomUUID().toString().substring(0, 8); @@ -280,7 +284,7 @@ private static String serviceId(String graphSpace, Service.ServiceType type, .replace("_", "-").toLowerCase(); } - private boolean usePD() { + public boolean usePD() { return this.PDExist; } @@ -1561,6 +1565,14 @@ private void loadGraph(String name, String graphConfPath) { String raftGroupPeers = this.conf.get(ServerOptions.RAFT_GROUP_PEERS); config.addProperty(ServerOptions.RAFT_GROUP_PEERS.name(), raftGroupPeers); + + // Transfer `pd.peers` from server config to graph config + // Only inject if not already configured in graph config + if (!config.containsKey("pd.peers")) { + String pdPeers = this.conf.get(ServerOptions.PD_PEERS); + config.addProperty("pd.peers", pdPeers); + } + this.transferRoleWorkerConfig(config); Graph graph = GraphFactory.open(config); @@ -1960,7 +1972,7 @@ public HugeGraph graph(String graphSpace, String name) { } else if (graph instanceof HugeGraph) { return (HugeGraph) graph; } - throw new NotSupportException("graph instance of %s", graph.getClass()); + throw new NotFoundException(String.format("Graph '%s' does not exist", name)); } public void dropGraphLocal(String name) { diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/StandardHugeGraph.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/StandardHugeGraph.java index cb085ae310..5864e2a615 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/StandardHugeGraph.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/StandardHugeGraph.java @@ -1630,6 +1630,7 @@ public void submitEphemeralJob(EphemeralJob job) { @Override public String schedulerType() { // Use distributed scheduler for hstore backend, otherwise use local + // After the merger of rocksdb and hstore, consider whether to change this logic return StandardHugeGraph.this.isHstore() ? "distributed" : "local"; } } diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/DistributedTaskScheduler.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/DistributedTaskScheduler.java index 083c38a9d4..7c143fb33d 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/DistributedTaskScheduler.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/task/DistributedTaskScheduler.java @@ -357,7 +357,7 @@ protected HugeTask deleteFromDB(Id id) { public HugeTask delete(Id id, boolean force) { HugeTask task = this.taskWithoutResult(id); - if (!force && !task.completed() && task.status() != TaskStatus.DELETING) { + if (!force && !task.completed()) { // Check task status: can't delete running tasks without force this.updateStatus(id, null, TaskStatus.DELETING); return null;