diff --git a/CHANGELOG.md b/CHANGELOG.md index 856b7393..7289ea7a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,17 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). +## [0.11.1] - 2026-02-10 + +### Fixed + +- AI Judge がプロバイダーシステムを経由するよう修正 — `callAiJudge` を Claude 固定実装からプロバイダー経由(`runAgent`)に変更し、Codex プロバイダーでも AI 判定が正しく動作するように +- 実行指示が長大化する問題を緩和 — implement/fix 系ムーブメントで `pass_previous_response: false` を設定し、Report Directory 内のレポートを一次情報として優先する指示に変更(en/ja 両対応) + +### Internal + +- stable release 時に npm の `next` dist-tag を `latest` と自動同期するよう CI ワークフローを改善(リトライ付き) + ## [0.11.0] - 2026-02-10 ### Added diff --git a/package.json b/package.json index f414b82c..a9b37634 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "takt", - "version": "0.11.0", + "version": "0.11.1", "description": "TAKT: Task Agent Koordination Tool - AI Agent Piece Orchestration", "main": "dist/index.js", "types": "dist/index.d.ts", diff --git a/src/__tests__/ai-judge.test.ts b/src/__tests__/ai-judge.test.ts index 19ed04bc..1ae7cf18 100644 --- a/src/__tests__/ai-judge.test.ts +++ b/src/__tests__/ai-judge.test.ts @@ -3,7 +3,7 @@ */ import { describe, it, expect } from 'vitest'; -import { detectJudgeIndex, buildJudgePrompt } from '../infra/claude/client.js'; +import { detectJudgeIndex, buildJudgePrompt } from '../agents/ai-judge.js'; describe('detectJudgeIndex', () => { it('should detect [JUDGE:1] as index 0', () => { diff --git a/src/__tests__/it-error-recovery.test.ts b/src/__tests__/it-error-recovery.test.ts index 1ab9fc18..5b49af33 100644 --- a/src/__tests__/it-error-recovery.test.ts +++ b/src/__tests__/it-error-recovery.test.ts @@ -14,12 +14,13 @@ import { join } from 'node:path'; import { tmpdir } from 'node:os'; import { setMockScenario, resetScenario } from '../infra/mock/index.js'; import type { PieceConfig, PieceMovement, PieceRule } from '../core/models/index.js'; -import { callAiJudge, detectRuleIndex } from '../infra/claude/index.js'; +import { detectRuleIndex } from '../infra/claude/index.js'; +import { callAiJudge } from '../agents/ai-judge.js'; // --- Mocks --- -vi.mock('../infra/claude/client.js', async (importOriginal) => { - const original = await importOriginal(); +vi.mock('../agents/ai-judge.js', async (importOriginal) => { + const original = await importOriginal(); return { ...original, callAiJudge: vi.fn().mockResolvedValue(-1), diff --git a/src/__tests__/it-notification-sound.test.ts b/src/__tests__/it-notification-sound.test.ts index 6de13dea..497125f0 100644 --- a/src/__tests__/it-notification-sound.test.ts +++ b/src/__tests__/it-notification-sound.test.ts @@ -105,11 +105,14 @@ vi.mock('../core/piece/index.js', () => ({ })); vi.mock('../infra/claude/index.js', () => ({ - callAiJudge: vi.fn(), detectRuleIndex: vi.fn(), interruptAllQueries: mockInterruptAllQueries, })); +vi.mock('../agents/ai-judge.js', () => ({ + callAiJudge: vi.fn(), +})); + vi.mock('../infra/config/index.js', () => ({ loadPersonaSessions: vi.fn().mockReturnValue({}), updatePersonaSession: vi.fn(), diff --git a/src/__tests__/it-piece-execution.test.ts b/src/__tests__/it-piece-execution.test.ts index cf1b3021..a836d505 100644 --- a/src/__tests__/it-piece-execution.test.ts +++ b/src/__tests__/it-piece-execution.test.ts @@ -15,15 +15,16 @@ import { join } from 'node:path'; import { tmpdir } from 'node:os'; import { setMockScenario, resetScenario } from '../infra/mock/index.js'; import type { PieceConfig, PieceMovement, PieceRule } from '../core/models/index.js'; -import { callAiJudge, detectRuleIndex } from '../infra/claude/index.js'; +import { detectRuleIndex } from '../infra/claude/index.js'; +import { callAiJudge } from '../agents/ai-judge.js'; // --- Mocks (minimal — only infrastructure, not core logic) --- -// Safety net: prevent callAiJudge from calling real Claude CLI. +// Safety net: prevent callAiJudge from calling real agent. // Tag-based detection should always match in these tests; if it doesn't, // this mock surfaces the failure immediately instead of timing out. -vi.mock('../infra/claude/client.js', async (importOriginal) => { - const original = await importOriginal(); +vi.mock('../agents/ai-judge.js', async (importOriginal) => { + const original = await importOriginal(); return { ...original, callAiJudge: vi.fn().mockResolvedValue(-1), diff --git a/src/__tests__/it-piece-patterns.test.ts b/src/__tests__/it-piece-patterns.test.ts index 15d0cf12..4ea6d597 100644 --- a/src/__tests__/it-piece-patterns.test.ts +++ b/src/__tests__/it-piece-patterns.test.ts @@ -13,12 +13,13 @@ import { mkdtempSync, mkdirSync, rmSync } from 'node:fs'; import { join } from 'node:path'; import { tmpdir } from 'node:os'; import { setMockScenario, resetScenario } from '../infra/mock/index.js'; -import { callAiJudge, detectRuleIndex } from '../infra/claude/index.js'; +import { detectRuleIndex } from '../infra/claude/index.js'; +import { callAiJudge } from '../agents/ai-judge.js'; // --- Mocks --- -vi.mock('../infra/claude/client.js', async (importOriginal) => { - const original = await importOriginal(); +vi.mock('../agents/ai-judge.js', async (importOriginal) => { + const original = await importOriginal(); return { ...original, callAiJudge: vi.fn().mockImplementation(async (content: string, conditions: { index: number; text: string }[]) => { diff --git a/src/__tests__/it-pipeline-modes.test.ts b/src/__tests__/it-pipeline-modes.test.ts index 235d3307..241cac1e 100644 --- a/src/__tests__/it-pipeline-modes.test.ts +++ b/src/__tests__/it-pipeline-modes.test.ts @@ -31,8 +31,8 @@ const { mockPushBranch: vi.fn(), })); -vi.mock('../infra/claude/client.js', async (importOriginal) => { - const original = await importOriginal(); +vi.mock('../agents/ai-judge.js', async (importOriginal) => { + const original = await importOriginal(); return { ...original, callAiJudge: vi.fn().mockImplementation(async (content: string, conditions: { index: number; text: string }[]) => { diff --git a/src/__tests__/it-pipeline.test.ts b/src/__tests__/it-pipeline.test.ts index c118fd4f..a796e594 100644 --- a/src/__tests__/it-pipeline.test.ts +++ b/src/__tests__/it-pipeline.test.ts @@ -16,9 +16,9 @@ import { setMockScenario, resetScenario } from '../infra/mock/index.js'; // --- Mocks --- -// Safety net: prevent callAiJudge from calling real Claude CLI. -vi.mock('../infra/claude/client.js', async (importOriginal) => { - const original = await importOriginal(); +// Safety net: prevent callAiJudge from calling real agent. +vi.mock('../agents/ai-judge.js', async (importOriginal) => { + const original = await importOriginal(); return { ...original, callAiJudge: vi.fn().mockImplementation(async (content: string, conditions: { index: number; text: string }[]) => { diff --git a/src/__tests__/it-sigint-interrupt.test.ts b/src/__tests__/it-sigint-interrupt.test.ts index ab3d86be..f1e0e52d 100644 --- a/src/__tests__/it-sigint-interrupt.test.ts +++ b/src/__tests__/it-sigint-interrupt.test.ts @@ -71,11 +71,14 @@ vi.mock('../core/piece/index.js', () => ({ })); vi.mock('../infra/claude/index.js', () => ({ - callAiJudge: vi.fn(), detectRuleIndex: vi.fn(), interruptAllQueries: mockInterruptAllQueries, })); +vi.mock('../agents/ai-judge.js', () => ({ + callAiJudge: vi.fn(), +})); + vi.mock('../infra/config/index.js', () => ({ loadPersonaSessions: vi.fn().mockReturnValue({}), updatePersonaSession: vi.fn(), diff --git a/src/__tests__/it-three-phase-execution.test.ts b/src/__tests__/it-three-phase-execution.test.ts index 40db6c62..c87380b3 100644 --- a/src/__tests__/it-three-phase-execution.test.ts +++ b/src/__tests__/it-three-phase-execution.test.ts @@ -15,12 +15,13 @@ import { join } from 'node:path'; import { tmpdir } from 'node:os'; import { setMockScenario, resetScenario } from '../infra/mock/index.js'; import type { PieceConfig, PieceMovement, PieceRule } from '../core/models/index.js'; -import { callAiJudge, detectRuleIndex } from '../infra/claude/index.js'; +import { detectRuleIndex } from '../infra/claude/index.js'; +import { callAiJudge } from '../agents/ai-judge.js'; // --- Mocks --- -vi.mock('../infra/claude/client.js', async (importOriginal) => { - const original = await importOriginal(); +vi.mock('../agents/ai-judge.js', async (importOriginal) => { + const original = await importOriginal(); return { ...original, callAiJudge: vi.fn().mockResolvedValue(-1), diff --git a/src/__tests__/pieceExecution-debug-prompts.test.ts b/src/__tests__/pieceExecution-debug-prompts.test.ts index c99bc19e..073ec9b8 100644 --- a/src/__tests__/pieceExecution-debug-prompts.test.ts +++ b/src/__tests__/pieceExecution-debug-prompts.test.ts @@ -71,11 +71,14 @@ vi.mock('../core/piece/index.js', () => ({ })); vi.mock('../infra/claude/index.js', () => ({ - callAiJudge: vi.fn(), detectRuleIndex: vi.fn(), interruptAllQueries: vi.fn(), })); +vi.mock('../agents/ai-judge.js', () => ({ + callAiJudge: vi.fn(), +})); + vi.mock('../infra/config/index.js', () => ({ loadPersonaSessions: vi.fn().mockReturnValue({}), updatePersonaSession: vi.fn(), diff --git a/src/__tests__/runAllTasks-concurrency.test.ts b/src/__tests__/runAllTasks-concurrency.test.ts index 677e7866..e7ec26c9 100644 --- a/src/__tests__/runAllTasks-concurrency.test.ts +++ b/src/__tests__/runAllTasks-concurrency.test.ts @@ -98,10 +98,13 @@ vi.mock('../infra/github/index.js', () => ({ vi.mock('../infra/claude/index.js', () => ({ interruptAllQueries: vi.fn(), - callAiJudge: vi.fn(), detectRuleIndex: vi.fn(), })); +vi.mock('../agents/ai-judge.js', () => ({ + callAiJudge: vi.fn(), +})); + vi.mock('../shared/exitCodes.js', () => ({ EXIT_SIGINT: 130, })); diff --git a/src/agents/ai-judge.ts b/src/agents/ai-judge.ts new file mode 100644 index 00000000..1adc4398 --- /dev/null +++ b/src/agents/ai-judge.ts @@ -0,0 +1,67 @@ +/** + * AI judge - provider-aware rule condition evaluator + * + * Evaluates agent output against ai() conditions using the configured provider. + * Uses runAgent (which resolves provider from config) instead of hardcoded Claude. + */ + +import type { AiJudgeCaller, AiJudgeCondition } from '../core/piece/types.js'; +import { loadTemplate } from '../shared/prompts/index.js'; +import { createLogger } from '../shared/utils/index.js'; +import { runAgent } from './runner.js'; + +const log = createLogger('ai-judge'); + +/** + * Detect judge rule index from [JUDGE:N] tag pattern. + * Returns 0-based rule index, or -1 if no match. + */ +export function detectJudgeIndex(content: string): number { + const regex = /\[JUDGE:(\d+)\]/i; + const match = content.match(regex); + if (match?.[1]) { + const index = Number.parseInt(match[1], 10) - 1; + return index >= 0 ? index : -1; + } + return -1; +} + +/** + * Build the prompt for the AI judge that evaluates agent output against ai() conditions. + */ +export function buildJudgePrompt( + agentOutput: string, + aiConditions: AiJudgeCondition[], +): string { + const conditionList = aiConditions + .map((c) => `| ${c.index + 1} | ${c.text} |`) + .join('\n'); + + return loadTemplate('perform_judge_message', 'en', { agentOutput, conditionList }); +} + +/** + * Call AI judge to evaluate agent output against ai() conditions. + * Uses the provider system (via runAgent) for correct provider resolution. + * Returns 0-based index of the matched ai() condition, or -1 if no match. + */ +export const callAiJudge: AiJudgeCaller = async ( + agentOutput: string, + conditions: AiJudgeCondition[], + options: { cwd: string }, +): Promise => { + const prompt = buildJudgePrompt(agentOutput, conditions); + + const response = await runAgent(undefined, prompt, { + cwd: options.cwd, + maxTurns: 1, + allowedTools: [], + }); + + if (response.status !== 'done') { + log.error('AI judge call failed', { error: response.error }); + return -1; + } + + return detectJudgeIndex(response.content); +}; diff --git a/src/agents/index.ts b/src/agents/index.ts index ebbf3920..6adc5d91 100644 --- a/src/agents/index.ts +++ b/src/agents/index.ts @@ -3,4 +3,5 @@ */ export { AgentRunner, runAgent } from './runner.js'; +export { callAiJudge, detectJudgeIndex, buildJudgePrompt } from './ai-judge.js'; export type { RunAgentOptions, StreamCallback } from './types.js'; diff --git a/src/features/tasks/execute/pieceExecution.ts b/src/features/tasks/execute/pieceExecution.ts index 99ece9fa..0a10d7f4 100644 --- a/src/features/tasks/execute/pieceExecution.ts +++ b/src/features/tasks/execute/pieceExecution.ts @@ -6,7 +6,8 @@ import { readFileSync } from 'node:fs'; import { PieceEngine, type IterationLimitRequest, type UserInputRequest } from '../../../core/piece/index.js'; import type { PieceConfig } from '../../../core/models/index.js'; import type { PieceExecutionResult, PieceExecutionOptions } from './types.js'; -import { callAiJudge, detectRuleIndex, interruptAllQueries } from '../../../infra/claude/index.js'; +import { detectRuleIndex, interruptAllQueries } from '../../../infra/claude/index.js'; +import { callAiJudge } from '../../../agents/ai-judge.js'; export type { PieceExecutionResult, PieceExecutionOptions }; diff --git a/src/index.ts b/src/index.ts index 3cbd6e3b..4731f7d5 100644 --- a/src/index.ts +++ b/src/index.ts @@ -65,10 +65,7 @@ export { callClaudeCustom, callClaudeAgent, callClaudeSkill, - callAiJudge, detectRuleIndex, - detectJudgeIndex, - buildJudgePrompt, isRegexSafe, } from './infra/claude/index.js'; export type { diff --git a/src/infra/claude/client.ts b/src/infra/claude/client.ts index 85fc418c..be473d15 100644 --- a/src/infra/claude/client.ts +++ b/src/infra/claude/client.ts @@ -154,60 +154,6 @@ export class ClaudeClient { }; } - /** - * Detect judge rule index from [JUDGE:N] tag pattern. - * Returns 0-based rule index, or -1 if no match. - */ - static detectJudgeIndex(content: string): number { - const regex = /\[JUDGE:(\d+)\]/i; - const match = content.match(regex); - if (match?.[1]) { - const index = Number.parseInt(match[1], 10) - 1; - return index >= 0 ? index : -1; - } - return -1; - } - - /** - * Build the prompt for the AI judge that evaluates agent output against ai() conditions. - */ - static buildJudgePrompt( - agentOutput: string, - aiConditions: { index: number; text: string }[], - ): string { - const conditionList = aiConditions - .map((c) => `| ${c.index + 1} | ${c.text} |`) - .join('\n'); - - return loadTemplate('perform_judge_message', 'en', { agentOutput, conditionList }); - } - - /** - * Call AI judge to evaluate agent output against ai() conditions. - * Uses a lightweight model (haiku) for cost efficiency. - * Returns 0-based index of the matched ai() condition, or -1 if no match. - */ - async callAiJudge( - agentOutput: string, - aiConditions: { index: number; text: string }[], - options: { cwd: string }, - ): Promise { - const prompt = ClaudeClient.buildJudgePrompt(agentOutput, aiConditions); - - const spawnOptions: ClaudeSpawnOptions = { - cwd: options.cwd, - model: 'haiku', - maxTurns: 1, - }; - - const result = await executeClaudeCli(prompt, spawnOptions); - if (!result.success) { - log.error('AI judge call failed', { error: result.error }); - return -1; - } - - return ClaudeClient.detectJudgeIndex(result.content); - } } // ---- Module-level functions ---- @@ -247,21 +193,3 @@ export async function callClaudeSkill( return defaultClient.callSkill(skillName, prompt, options); } -export function detectJudgeIndex(content: string): number { - return ClaudeClient.detectJudgeIndex(content); -} - -export function buildJudgePrompt( - agentOutput: string, - aiConditions: { index: number; text: string }[], -): string { - return ClaudeClient.buildJudgePrompt(agentOutput, aiConditions); -} - -export async function callAiJudge( - agentOutput: string, - aiConditions: { index: number; text: string }[], - options: { cwd: string }, -): Promise { - return defaultClient.callAiJudge(agentOutput, aiConditions, options); -} diff --git a/src/infra/claude/index.ts b/src/infra/claude/index.ts index 4c21f589..8e8060db 100644 --- a/src/infra/claude/index.ts +++ b/src/infra/claude/index.ts @@ -67,9 +67,6 @@ export { callClaudeCustom, callClaudeAgent, callClaudeSkill, - callAiJudge, detectRuleIndex, - detectJudgeIndex, - buildJudgePrompt, isRegexSafe, } from './client.js';