diff --git a/challenge/onsite_competition/onsite_competition_rules_en-US.md b/challenge/onsite_competition/onsite_competition_rules_en-US.md index d9c241c9..ff4e3568 100644 --- a/challenge/onsite_competition/onsite_competition_rules_en-US.md +++ b/challenge/onsite_competition/onsite_competition_rules_en-US.md @@ -38,13 +38,12 @@ The challenge centers on vision-language fusion for cross-room end-to-end naviga - A standardized debugging time and environment will be provided onsite. Teams may use model weights different from the online stage and make environment-specific configuration adjustments, but modifications to the core algorithm logic are strictly prohibited. ### 4.2 Procedure -Each team will receive 10 fixed-sequence instructions. Start from the first one. +Each team will receive 10 instructions. Start from any instruction. For each instruction: - Move the robot to the given starting position. - Provide the instruction to the robot and raise your hand to signal the start. -- If execution fails, teams may retry (up to 3 attempts) or skip the instruction. -- After 3 failed attempts (timeout, collision, human intervention, etc.), the instruction must be skipped. -- Skipped instructions cannot be retried later. +- If execution fails, teams may retry or skip the instruction. +- Instruction can be repeated if failed (timeout, collision, human intervention, etc.). - Before each attempt, the algorithm state must be reset and previous observations cleared. ### 4.3 Time Limit @@ -82,7 +81,7 @@ Successfully completing one instruction will be one success, and the completion | Action | Score Impact | |:--|:--| | Successfully reach goal | success | -| Minor scrape more than 5 times or Collision with obstacle | fail | +| Minor scrape or Collision with obstacle | fail | If there is a trend of continuous collisions, the referee has the right to terminate the robot’s current action, with the severity of the impact determined by the on-site referee. diff --git a/challenge/onsite_competition/onsite_competition_rules_zh-CN.md b/challenge/onsite_competition/onsite_competition_rules_zh-CN.md index 9edabee0..6f3ed366 100644 --- a/challenge/onsite_competition/onsite_competition_rules_zh-CN.md +++ b/challenge/onsite_competition/onsite_competition_rules_zh-CN.md @@ -32,11 +32,10 @@ - 比赛现场将提供统一的调试时间和环境,允许使用与线上赛不同的模型权重,允许进行环境适配性配置修改,但禁止修改核心算法逻辑。 ### 4.2 比赛过程 -每支队伍开始挑战时将会提供十条固定顺序的指令,从第一条指令开始。 +每支队伍开始挑战时将会提供十条导航指令,选手可选择指令进行。 - 移动机器人到达该指令的起始位置后,将指令输入给机器人后举手示意作为开始挑战的信号。 - - 指令执行过程中若中途失败,可选择重试,也可以选择跳过该指令,顺序执行下一条未执行指令。 - - 每条指令可重试3次,若3次失败,则必须跳过该指令。(失败指选手选择重新开始,或其他条件,如超时,冲撞障碍物,人为干预等) - - 已经跳过的指令不能重复执行。 + - 指令执行过程中若中途失败,可选择重试,也可以选择跳过该指令。 + - 失败时可选择重新开始(如超时,冲撞障碍物,人为干预等) - 每次测试前需重置算法状态并清除所有前序执行过程中获取的观测缓存。 ### 4.3 时间限制 @@ -60,7 +59,7 @@ - 成功完成1条指令,算成功一次,并记录该指令的完成时间。(成功条件:在比赛中,每条指令均对应一个目标位置。目标位置定义为半径 2 米的圆形区域(不穿墙),机器人最终停止在该范围即可判定为该条指令导航成功。) **扣分规则**: -- 若机器人频繁刮蹭(5次)或直接撞击障碍物,本次导航将被强制终止,该条指令失败。 +- 若机器人连续刮蹭或直接撞击障碍物,本次导航将被强制终止,该条指令失败。 | Action | Score | |:--|:--:| diff --git a/challenge/onsite_competition/sdk/cam.py b/challenge/onsite_competition/sdk/cam.py index 9526c23d..b76caa68 100644 --- a/challenge/onsite_competition/sdk/cam.py +++ b/challenge/onsite_competition/sdk/cam.py @@ -99,7 +99,8 @@ def get_observation(self, timeout_ms: int = 1000) -> Dict: if not color or not depth: raise RuntimeError("can not align color/depth frame") - rgb = np.asanyarray(color.get_data()) # HxWx3, uint8 (BGR) + bgr = np.asanyarray(color.get_data()) # HxWx3, uint8 (BGR) + rgb = bgr[..., ::-1] # HxWx3, uint8 (convert to RGB) depth_raw = np.asanyarray(depth.get_data()) # HxW, uint16 if depth_raw.shape != rgb.shape[:2]: # Extreme fallback (theoretically should be consistent after alignment). diff --git a/challenge/onsite_competition/sdk/control.py b/challenge/onsite_competition/sdk/control.py index 5040946d..062a13c4 100644 --- a/challenge/onsite_competition/sdk/control.py +++ b/challenge/onsite_competition/sdk/control.py @@ -35,6 +35,10 @@ def odom_callback(self, msg): self.start_yaw = yaw rospy.loginfo(f"start yaw: {math.degrees(self.start_yaw):.2f}") + # --- position (new) + p = msg.pose.pose.position + self.current_xy = (p.x, p.y) + def execute_turn(self): if self.start_yaw is None: rospy.loginfo("wait for init yaw state...") @@ -87,7 +91,7 @@ class DiscreteRobotController(Turn90Degrees): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # initialize parent class - def stand_still(self, duration=0.5): + def stand_still(self, duration=0.2): twist = Twist() twist.linear.x = 0.0 twist.angular.z = 0.0 @@ -95,9 +99,9 @@ def stand_still(self, duration=0.5): rospy.sleep(duration) # Maintain stand still for a short duration rospy.loginfo("Stand still command executed.") - def move_forward(self, distance=0.25): + def move_forward(self, distance=0.5, speed=0.2): twist = Twist() - twist.linear.x = 0.2 # Forward speed + twist.linear.x = speed # Forward speed twist.angular.z = 0.0 duration = distance / twist.linear.x # Time to move forward the specified distance @@ -108,34 +112,103 @@ def move_forward(self, distance=0.25): self.cmd_vel_pub.publish(twist) self.rate.sleep() - self.stand_still() # Stop after moving forward + # self.stand_still() # Stop after moving forward rospy.loginfo("Move forward command executed.") - def turn_left(self, angle=15, speed=0.2): - self.turn_angle = math.radians(angle) # update angle - self.angular_speed = speed # Set positive angular speed for left turn - self.start_yaw = None # Reset start yaw to current position - self.turning = False # Reset turning flag - self.run() - self.stand_still() # Stop after moving forward - rospy.loginfo("Turn left command executed.") + # ---- NEW: feedback-controlled forward motion + + +def move_feedback(self, distance=0.25, speed=0.5, tol=0.02, timeout=None): + """ + 里程计闭环直线移动:支持正/负 distance。 + - distance: 目标路程(米)。>0 前进,<0 倒车。 + - speed: 名义线速度(m/s),可给正/负,最终会按正数取绝对值。 + - tol: 终止距离容差(米) + - timeout: 超时(秒);默认 max(3*|distance|/speed, 3.0) + """ + # 等待位姿 + while self.current_xy is None and not rospy.is_shutdown(): + rospy.loginfo_throttle(2.0, "Waiting for /odom...") + self.rate.sleep() + if rospy.is_shutdown(): + return + + # 方向与目标 + direction = 1.0 if distance >= 0.0 else -1.0 + target = abs(distance) + speed = abs(speed) if speed is not None else 0.5 + + start_xy = self.current_xy + start_time = rospy.Time.now() + + if timeout is None: + timeout = max(3.0, 3.0 * (target / max(speed, 0.05))) - def turn_right(self, angle=15, speed=-0.2): + twist = Twist() + + # 简单 P 控制,越靠近越慢 + Kp = 1.5 + min_speed = 0.06 # 防止轮子停转 + + rospy.loginfo( + f"move_linear: target={target:.3f} m, dir={'forward' if direction>0 else 'backward'}, " + f"speed≈{speed:.2f} m/s, tol={tol:.3f} m" + ) + + try: + while not rospy.is_shutdown(): + # 超时 + if (rospy.Time.now() - start_time).to_sec() > timeout: + rospy.logwarn("move_linear timeout reached; stopping.") + break + + # 走过的距离(欧式距离,不区分前后,目标用 abs) + cx, cy = self.current_xy + sx, sy = start_xy + traveled = math.hypot(cx - sx, cy - sy) + remaining = target - traveled + + # 达标退出 + if remaining <= tol: + rospy.loginfo(f"Reached distance: traveled={traveled:.3f} m (tol {tol} m)") + break + + # 速度控制(带方向) + v = Kp * remaining + v = max(min(v, speed), min_speed) # [min_speed, speed] + twist.linear.x = direction * v # 关键:按方向加符号 + twist.angular.z = 0.0 + self.cmd_vel_pub.publish(twist) + + rospy.loginfo_throttle( + 1.0, f"traveled={traveled:.3f} m, remaining={remaining:.3f} m, v={twist.linear.x:.2f} m/s" + ) + self.rate.sleep() + + finally: + # 停车 + twist.linear.x = 0.0 + twist.angular.z = 0.0 + self.cmd_vel_pub.publish(twist) + + rospy.loginfo("Move linear command executed.") + + def turn(self, angle=15, speed=0.5): self.turn_angle = math.radians(angle) # update angle self.angular_speed = speed # Set positive angular speed for left turn self.start_yaw = None # Reset start yaw to current position self.turning = False # Reset turning flag self.run() - self.stand_still() # Stop after moving forward - rospy.loginfo("Turn right command executed.") + rospy.loginfo("Turn left command executed.") if __name__ == '__main__': try: control = DiscreteRobotController() - control.turn_left(10) - control.move_forward(0.1) - control.turn_right(10) + control.turn(15, 0.5) # left + control.turn(15, -0.5) # right + control.move_feedback(0.25, 0.5) + control.move_feedback(-0.25, 0.5) except rospy.ROSInterruptException: pass diff --git a/challenge/onsite_competition/sdk/main.py b/challenge/onsite_competition/sdk/main.py index 705329cd..f93b908b 100644 --- a/challenge/onsite_competition/sdk/main.py +++ b/challenge/onsite_competition/sdk/main.py @@ -3,10 +3,9 @@ import sys from real_world_env import RealWorldEnv -from stream import app, start_env +from stream import run, set_instruction -from internnav.agent.utils.client import AgentClient -from internnav.configs.evaluator.default_config import get_config +from internnav.utils.comm_utils.client import AgentClient def parse_args(): @@ -14,15 +13,16 @@ def parse_args(): parser.add_argument( "--config", type=str, - default='scripts/eval/configs/h1_cma_cfg.py', + default='challenge/onsite_competition/configs/h1_internvla_n1_cfg.py', help='eval config file path, e.g. scripts/eval/configs/h1_cma_cfg.py', ) parser.add_argument( "--instruction", type=str, + default='Go straight and pass the sofa and turn right into the hallway. Keep walking down, pass the kitchen and the bathroom, then enter the study room at the far end on the right with a desk, stop next to the white shelf.', help='current instruction to follow', ) - parser.add_argument("--tag", type=str, help="tag for the run, saved by the tag name which is team-task-trail") + parser.add_argument("--uninteractive_mode", action='store_true', help="whether to confirm each step") return parser.parse_args() @@ -34,46 +34,87 @@ def load_eval_cfg(config_path, attr_name='eval_cfg'): return getattr(config_module, attr_name) -# TODO add logging for each step, saved by the tag name which is team-task-trail +def confirm(msg: str) -> bool: + """ + Ask user to confirm. Return True if user types 'y' (case-insensitive), + False for anything else (including empty input). + """ + try: + answer = input(f"{msg} [y/N]: ").strip().lower() + except (EOFError, KeyboardInterrupt): + print("\nCancelled.") + return False + return answer in ("", "y") + + +def get_instruction() -> int: + try: + import json + + instruction_lst = json.load(open("challenge/onsite_competition/instructions.json")) + print("Available instructions:") + for i, item in enumerate(instruction_lst): + print(f"{i}: {item['instruction_title']}") + answer = input("input instruction id: ").strip().lower() + answer = int(answer) + except (EOFError, KeyboardInterrupt): + print("\nCancelled.") + sys.exit() + return instruction_lst[answer]['instruction'][0] + + +def action_to_word(action: int) -> str: + action = max(0, min(3, action)) + wl = ["stand still", "move forward", "turn left", "turn right"] + return wl[action] + + def main(): args = parse_args() print("--- Loading config from:", args.config, "---") - evaluator_cfg = load_eval_cfg(args.config, attr_name='eval_cfg') - cfg = get_config(evaluator_cfg) - print(cfg) + cfg = load_eval_cfg(args.config, attr_name='eval_cfg') + agent_cfg = cfg.agent - # initialize user agent - agent = AgentClient(cfg.agent) + # initialize user's agent + agent = AgentClient(agent_cfg) # initialize real world env - env = RealWorldEnv() + env = RealWorldEnv(fps=30, duration=0.1, distance=0.3, angle=15, move_speed=0.5, turn_speed=0.5) + env.reverse() # reverse move direction if using a rear camera + env.step(0) + obs = env.get_observation() # start stream - start_env(env) - app.run(host="0.0.0.0", port=8080, threaded=True) + print("--- start running steam app ---") + run(env=env) + + while True: + instruction = get_instruction() + print("\nNew instruction:", instruction) + set_instruction(instruction) - try: while True: # print("get observation...") # obs contains {rgb, depth, instruction} obs = env.get_observation() - obs["instruction"] = args.instruction + # print(obs) + obs["instruction"] = instruction - # print("agent step...") + print("agent step...") # action is a integer in [0, 3], agent return [{'action': [int], 'ideal_flag': bool}] (same to internvla_n1 agent) - try: - action = agent.step(obs)[0]['action'][0] - print(f"agent step success, action is {action}") - except Exception as e: - print(f"agent step error {e}") - continue - - # print("env step...") - try: + action = agent.step([obs])[0]['action'][0] + print("agent step success, action:", action) + + if args.uninteractive_mode or confirm(f"Execute this action [{action_to_word(action)}]?"): + print("env step...") env.step(action) print("env step success") - except Exception as e: - print(f"env step error {e}") - continue - finally: - env.close() + else: + print("Stop requested. Exiting loop.") + print("agent reset...") + agent.reset() + break + + +if __name__ == "__main__": + main() diff --git a/challenge/onsite_competition/sdk/real_world_env.py b/challenge/onsite_competition/sdk/real_world_env.py index e004bd64..ef176a58 100644 --- a/challenge/onsite_competition/sdk/real_world_env.py +++ b/challenge/onsite_competition/sdk/real_world_env.py @@ -4,11 +4,20 @@ from cam import AlignedRealSense from control import DiscreteRobotController -from internnav.env import Env +# from internnav.env import Env -class RealWorldEnv(Env): - def __init__(self, fps: int = 30): +class RealWorldEnv: + def __init__( + self, + fps: int = 30, + duration: float = 0.5, + distance: float = 0.25, + angle: int = 15, + turn_speed: float = 0.5, + move_speed: float = 0.3, + ): + self.node = DiscreteRobotController() self.cam = AlignedRealSense() self.latest_obs = None @@ -22,6 +31,16 @@ def __init__(self, fps: int = 30): self.thread = threading.Thread(target=self._capture_loop, daemon=True) self.thread.start() + # control setting + self.duration = duration + self.distance = distance + self.angle = angle + self.turn_speed = turn_speed # rad/s + self.move_speed = move_speed # m/s + + def reverse(self): + self.distance = -self.distance + def _capture_loop(self): """keep capturing frames""" interval = 1.0 / self.fps @@ -52,13 +71,13 @@ def step(self, action: int): 3: turn right """ if action == 0: - self.node.stand_still() + self.node.stand_still(self.duration) elif action == 1: - self.node.move_forward() + self.node.move_feedback(self.distance, self.move_speed) elif action == 2: - self.node.turn_left() + self.node.turn(self.angle, self.turn_speed) elif action == 3: - self.node.turn_right() + self.node.turn(self.angle, -self.turn_speed) def close(self): self.stop_flag.set() diff --git a/challenge/onsite_competition/sdk/stream.py b/challenge/onsite_competition/sdk/stream.py index b045cf12..cfbad940 100644 --- a/challenge/onsite_competition/sdk/stream.py +++ b/challenge/onsite_competition/sdk/stream.py @@ -1,13 +1,16 @@ # stream_server.py +import threading import time +from typing import Optional import cv2 -from flask import Flask, Response +import numpy as np +from flask import Flask, Response, make_response, stream_with_context app = Flask(__name__) -# 由主程序注入 _env = None +_stop = threading.Event() # graceful stop for Ctrl+C def set_env(env): @@ -16,23 +19,173 @@ def set_env(env): _env = env -def _mjpeg_generator(jpeg_quality: int = 80): +_instruction = "" +_instruction_lock = threading.Lock() + + +def set_instruction(text: str) -> None: + """ + Set the instruction text that will be displayed on the viewer page. + Thread-safe; can be called from your main/control loop at any time. + """ + global _instruction + with _instruction_lock: + _instruction = str(text) if text is not None else "" + + +def get_instruction() -> str: + """ + Get the current instruction text (thread-safe). + """ + with _instruction_lock: + return _instruction + + +def _encode_jpeg(frame_bgr: np.ndarray, quality: int = 80) -> Optional[bytes]: + ok, jpg = cv2.imencode(".jpg", frame_bgr, [cv2.IMWRITE_JPEG_QUALITY, quality]) + return jpg.tobytes() if ok else None + + +def _mjpeg_generator(jpeg_quality: int = 80, fps_limit: float = 30.0): boundary = b"--frame" - while True: + min_interval = 1.0 / fps_limit if fps_limit > 0 else 0.0 + + last = 0.0 + while not _stop.is_set(): if _env is None: - time.sleep(0.1) + time.sleep(0.05) continue + obs = _env.get_observation() - if obs is None: + if not obs or "rgb" not in obs: time.sleep(0.01) continue - frame_bgr = obs["rgb"] - ok, jpg = cv2.imencode(".jpg", frame_bgr, [cv2.IMWRITE_JPEG_QUALITY, jpeg_quality]) - if not ok: + + frame = obs["rgb"] # Expect BGR for OpenCV; convert if your source is RGB + if frame is None: + time.sleep(0.01) + continue + + # If your env returns RGB, uncomment the next line: + frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) + + jpg = _encode_jpeg(frame, quality=jpeg_quality) + if jpg is None: continue - yield (boundary + b"\r\n" b"Content-Type: image/jpeg\r\n\r\n" + jpg.tobytes() + b"\r\n") + + # multipart chunk + yield ( + boundary + b"\r\n" + b"Content-Type: image/jpeg\r\n" + b"Cache-Control: no-cache\r\n" + b"Pragma: no-cache\r\n\r\n" + jpg + b"\r\n" + ) + + # simple pacing (prevents 100% CPU + helps Ctrl+C responsiveness) + now = time.time() + sleep_t = min_interval - (now - last) + if sleep_t > 0: + time.sleep(sleep_t) + last = now @app.route("/stream") def stream(): - return Response(_mjpeg_generator(), mimetype="multipart/x-mixed-replace; boundary=frame") + # stream_with_context ensures Flask doesn’t buffer the generator + gen = stream_with_context(_mjpeg_generator()) + resp = Response(gen, mimetype="multipart/x-mixed-replace; boundary=frame", direct_passthrough=True) + # extra headers to avoid caching + resp.headers["Cache-Control"] = "no-cache, no-store, must-revalidate" + resp.headers["Pragma"] = "no-cache" + resp.headers["Connection"] = "keep-alive" + return resp + + +@app.route("/instruction") +def instruction(): + text = get_instruction() + r = make_response(text) + r.headers["Content-Type"] = "text/plain; charset=utf-8" + r.headers["Cache-Control"] = "no-cache, no-store, must-revalidate" + r.headers["Pragma"] = "no-cache" + return r + + +@app.route("/") +def index(): + # simple viewer page with instruction overlay + html = """ + +