Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,17 @@ All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).

## [0.11.1] - 2026-02-10

### Fixed

- AI Judge がプロバイダーシステムを経由するよう修正 — `callAiJudge` を Claude 固定実装からプロバイダー経由(`runAgent`)に変更し、Codex プロバイダーでも AI 判定が正しく動作するように
- 実行指示が長大化する問題を緩和 — implement/fix 系ムーブメントで `pass_previous_response: false` を設定し、Report Directory 内のレポートを一次情報として優先する指示に変更(en/ja 両対応)

### Internal

- stable release 時に npm の `next` dist-tag を `latest` と自動同期するよう CI ワークフローを改善(リトライ付き)

## [0.11.0] - 2026-02-10

### Added
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "takt",
"version": "0.11.0",
"version": "0.11.1",
"description": "TAKT: Task Agent Koordination Tool - AI Agent Piece Orchestration",
"main": "dist/index.js",
"types": "dist/index.d.ts",
Expand Down
2 changes: 1 addition & 1 deletion src/__tests__/ai-judge.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
*/

import { describe, it, expect } from 'vitest';
import { detectJudgeIndex, buildJudgePrompt } from '../infra/claude/client.js';
import { detectJudgeIndex, buildJudgePrompt } from '../agents/ai-judge.js';

describe('detectJudgeIndex', () => {
it('should detect [JUDGE:1] as index 0', () => {
Expand Down
7 changes: 4 additions & 3 deletions src/__tests__/it-error-recovery.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,13 @@ import { join } from 'node:path';
import { tmpdir } from 'node:os';
import { setMockScenario, resetScenario } from '../infra/mock/index.js';
import type { PieceConfig, PieceMovement, PieceRule } from '../core/models/index.js';
import { callAiJudge, detectRuleIndex } from '../infra/claude/index.js';
import { detectRuleIndex } from '../infra/claude/index.js';
import { callAiJudge } from '../agents/ai-judge.js';

// --- Mocks ---

vi.mock('../infra/claude/client.js', async (importOriginal) => {
const original = await importOriginal<typeof import('../infra/claude/client.js')>();
vi.mock('../agents/ai-judge.js', async (importOriginal) => {
const original = await importOriginal<typeof import('../agents/ai-judge.js')>();
return {
...original,
callAiJudge: vi.fn().mockResolvedValue(-1),
Expand Down
5 changes: 4 additions & 1 deletion src/__tests__/it-notification-sound.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -105,11 +105,14 @@ vi.mock('../core/piece/index.js', () => ({
}));

vi.mock('../infra/claude/index.js', () => ({
callAiJudge: vi.fn(),
detectRuleIndex: vi.fn(),
interruptAllQueries: mockInterruptAllQueries,
}));

vi.mock('../agents/ai-judge.js', () => ({
callAiJudge: vi.fn(),
}));

vi.mock('../infra/config/index.js', () => ({
loadPersonaSessions: vi.fn().mockReturnValue({}),
updatePersonaSession: vi.fn(),
Expand Down
9 changes: 5 additions & 4 deletions src/__tests__/it-piece-execution.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,16 @@ import { join } from 'node:path';
import { tmpdir } from 'node:os';
import { setMockScenario, resetScenario } from '../infra/mock/index.js';
import type { PieceConfig, PieceMovement, PieceRule } from '../core/models/index.js';
import { callAiJudge, detectRuleIndex } from '../infra/claude/index.js';
import { detectRuleIndex } from '../infra/claude/index.js';
import { callAiJudge } from '../agents/ai-judge.js';

// --- Mocks (minimal — only infrastructure, not core logic) ---

// Safety net: prevent callAiJudge from calling real Claude CLI.
// Safety net: prevent callAiJudge from calling real agent.
// Tag-based detection should always match in these tests; if it doesn't,
// this mock surfaces the failure immediately instead of timing out.
vi.mock('../infra/claude/client.js', async (importOriginal) => {
const original = await importOriginal<typeof import('../infra/claude/client.js')>();
vi.mock('../agents/ai-judge.js', async (importOriginal) => {
const original = await importOriginal<typeof import('../agents/ai-judge.js')>();
return {
...original,
callAiJudge: vi.fn().mockResolvedValue(-1),
Expand Down
7 changes: 4 additions & 3 deletions src/__tests__/it-piece-patterns.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,13 @@ import { mkdtempSync, mkdirSync, rmSync } from 'node:fs';
import { join } from 'node:path';
import { tmpdir } from 'node:os';
import { setMockScenario, resetScenario } from '../infra/mock/index.js';
import { callAiJudge, detectRuleIndex } from '../infra/claude/index.js';
import { detectRuleIndex } from '../infra/claude/index.js';
import { callAiJudge } from '../agents/ai-judge.js';

// --- Mocks ---

vi.mock('../infra/claude/client.js', async (importOriginal) => {
const original = await importOriginal<typeof import('../infra/claude/client.js')>();
vi.mock('../agents/ai-judge.js', async (importOriginal) => {
const original = await importOriginal<typeof import('../agents/ai-judge.js')>();
return {
...original,
callAiJudge: vi.fn().mockImplementation(async (content: string, conditions: { index: number; text: string }[]) => {
Expand Down
4 changes: 2 additions & 2 deletions src/__tests__/it-pipeline-modes.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ const {
mockPushBranch: vi.fn(),
}));

vi.mock('../infra/claude/client.js', async (importOriginal) => {
const original = await importOriginal<typeof import('../infra/claude/client.js')>();
vi.mock('../agents/ai-judge.js', async (importOriginal) => {
const original = await importOriginal<typeof import('../agents/ai-judge.js')>();
return {
...original,
callAiJudge: vi.fn().mockImplementation(async (content: string, conditions: { index: number; text: string }[]) => {
Expand Down
6 changes: 3 additions & 3 deletions src/__tests__/it-pipeline.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ import { setMockScenario, resetScenario } from '../infra/mock/index.js';

// --- Mocks ---

// Safety net: prevent callAiJudge from calling real Claude CLI.
vi.mock('../infra/claude/client.js', async (importOriginal) => {
const original = await importOriginal<typeof import('../infra/claude/client.js')>();
// Safety net: prevent callAiJudge from calling real agent.
vi.mock('../agents/ai-judge.js', async (importOriginal) => {
const original = await importOriginal<typeof import('../agents/ai-judge.js')>();
return {
...original,
callAiJudge: vi.fn().mockImplementation(async (content: string, conditions: { index: number; text: string }[]) => {
Expand Down
5 changes: 4 additions & 1 deletion src/__tests__/it-sigint-interrupt.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,14 @@ vi.mock('../core/piece/index.js', () => ({
}));

vi.mock('../infra/claude/index.js', () => ({
callAiJudge: vi.fn(),
detectRuleIndex: vi.fn(),
interruptAllQueries: mockInterruptAllQueries,
}));

vi.mock('../agents/ai-judge.js', () => ({
callAiJudge: vi.fn(),
}));

vi.mock('../infra/config/index.js', () => ({
loadPersonaSessions: vi.fn().mockReturnValue({}),
updatePersonaSession: vi.fn(),
Expand Down
7 changes: 4 additions & 3 deletions src/__tests__/it-three-phase-execution.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,13 @@ import { join } from 'node:path';
import { tmpdir } from 'node:os';
import { setMockScenario, resetScenario } from '../infra/mock/index.js';
import type { PieceConfig, PieceMovement, PieceRule } from '../core/models/index.js';
import { callAiJudge, detectRuleIndex } from '../infra/claude/index.js';
import { detectRuleIndex } from '../infra/claude/index.js';
import { callAiJudge } from '../agents/ai-judge.js';

// --- Mocks ---

vi.mock('../infra/claude/client.js', async (importOriginal) => {
const original = await importOriginal<typeof import('../infra/claude/client.js')>();
vi.mock('../agents/ai-judge.js', async (importOriginal) => {
const original = await importOriginal<typeof import('../agents/ai-judge.js')>();
return {
...original,
callAiJudge: vi.fn().mockResolvedValue(-1),
Expand Down
5 changes: 4 additions & 1 deletion src/__tests__/pieceExecution-debug-prompts.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,14 @@ vi.mock('../core/piece/index.js', () => ({
}));

vi.mock('../infra/claude/index.js', () => ({
callAiJudge: vi.fn(),
detectRuleIndex: vi.fn(),
interruptAllQueries: vi.fn(),
}));

vi.mock('../agents/ai-judge.js', () => ({
callAiJudge: vi.fn(),
}));

vi.mock('../infra/config/index.js', () => ({
loadPersonaSessions: vi.fn().mockReturnValue({}),
updatePersonaSession: vi.fn(),
Expand Down
5 changes: 4 additions & 1 deletion src/__tests__/runAllTasks-concurrency.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,13 @@ vi.mock('../infra/github/index.js', () => ({

vi.mock('../infra/claude/index.js', () => ({
interruptAllQueries: vi.fn(),
callAiJudge: vi.fn(),
detectRuleIndex: vi.fn(),
}));

vi.mock('../agents/ai-judge.js', () => ({
callAiJudge: vi.fn(),
}));

vi.mock('../shared/exitCodes.js', () => ({
EXIT_SIGINT: 130,
}));
Expand Down
67 changes: 67 additions & 0 deletions src/agents/ai-judge.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/**
* AI judge - provider-aware rule condition evaluator
*
* Evaluates agent output against ai() conditions using the configured provider.
* Uses runAgent (which resolves provider from config) instead of hardcoded Claude.
*/

import type { AiJudgeCaller, AiJudgeCondition } from '../core/piece/types.js';
import { loadTemplate } from '../shared/prompts/index.js';
import { createLogger } from '../shared/utils/index.js';
import { runAgent } from './runner.js';

const log = createLogger('ai-judge');

/**
* Detect judge rule index from [JUDGE:N] tag pattern.
* Returns 0-based rule index, or -1 if no match.
*/
export function detectJudgeIndex(content: string): number {
const regex = /\[JUDGE:(\d+)\]/i;
const match = content.match(regex);
if (match?.[1]) {
const index = Number.parseInt(match[1], 10) - 1;
return index >= 0 ? index : -1;
}
return -1;
}

/**
* Build the prompt for the AI judge that evaluates agent output against ai() conditions.
*/
export function buildJudgePrompt(
agentOutput: string,
aiConditions: AiJudgeCondition[],
): string {
const conditionList = aiConditions
.map((c) => `| ${c.index + 1} | ${c.text} |`)
.join('\n');

return loadTemplate('perform_judge_message', 'en', { agentOutput, conditionList });
}

/**
* Call AI judge to evaluate agent output against ai() conditions.
* Uses the provider system (via runAgent) for correct provider resolution.
* Returns 0-based index of the matched ai() condition, or -1 if no match.
*/
export const callAiJudge: AiJudgeCaller = async (
agentOutput: string,
conditions: AiJudgeCondition[],
options: { cwd: string },
): Promise<number> => {
const prompt = buildJudgePrompt(agentOutput, conditions);

const response = await runAgent(undefined, prompt, {
cwd: options.cwd,
maxTurns: 1,
allowedTools: [],
});

if (response.status !== 'done') {
log.error('AI judge call failed', { error: response.error });
return -1;
}

return detectJudgeIndex(response.content);
};
1 change: 1 addition & 0 deletions src/agents/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@
*/

export { AgentRunner, runAgent } from './runner.js';
export { callAiJudge, detectJudgeIndex, buildJudgePrompt } from './ai-judge.js';
export type { RunAgentOptions, StreamCallback } from './types.js';
3 changes: 2 additions & 1 deletion src/features/tasks/execute/pieceExecution.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ import { readFileSync } from 'node:fs';
import { PieceEngine, type IterationLimitRequest, type UserInputRequest } from '../../../core/piece/index.js';
import type { PieceConfig } from '../../../core/models/index.js';
import type { PieceExecutionResult, PieceExecutionOptions } from './types.js';
import { callAiJudge, detectRuleIndex, interruptAllQueries } from '../../../infra/claude/index.js';
import { detectRuleIndex, interruptAllQueries } from '../../../infra/claude/index.js';
import { callAiJudge } from '../../../agents/ai-judge.js';

export type { PieceExecutionResult, PieceExecutionOptions };

Expand Down
3 changes: 0 additions & 3 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,7 @@ export {
callClaudeCustom,
callClaudeAgent,
callClaudeSkill,
callAiJudge,
detectRuleIndex,
detectJudgeIndex,
buildJudgePrompt,
isRegexSafe,
} from './infra/claude/index.js';
export type {
Expand Down
72 changes: 0 additions & 72 deletions src/infra/claude/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -154,60 +154,6 @@ export class ClaudeClient {
};
}

/**
* Detect judge rule index from [JUDGE:N] tag pattern.
* Returns 0-based rule index, or -1 if no match.
*/
static detectJudgeIndex(content: string): number {
const regex = /\[JUDGE:(\d+)\]/i;
const match = content.match(regex);
if (match?.[1]) {
const index = Number.parseInt(match[1], 10) - 1;
return index >= 0 ? index : -1;
}
return -1;
}

/**
* Build the prompt for the AI judge that evaluates agent output against ai() conditions.
*/
static buildJudgePrompt(
agentOutput: string,
aiConditions: { index: number; text: string }[],
): string {
const conditionList = aiConditions
.map((c) => `| ${c.index + 1} | ${c.text} |`)
.join('\n');

return loadTemplate('perform_judge_message', 'en', { agentOutput, conditionList });
}

/**
* Call AI judge to evaluate agent output against ai() conditions.
* Uses a lightweight model (haiku) for cost efficiency.
* Returns 0-based index of the matched ai() condition, or -1 if no match.
*/
async callAiJudge(
agentOutput: string,
aiConditions: { index: number; text: string }[],
options: { cwd: string },
): Promise<number> {
const prompt = ClaudeClient.buildJudgePrompt(agentOutput, aiConditions);

const spawnOptions: ClaudeSpawnOptions = {
cwd: options.cwd,
model: 'haiku',
maxTurns: 1,
};

const result = await executeClaudeCli(prompt, spawnOptions);
if (!result.success) {
log.error('AI judge call failed', { error: result.error });
return -1;
}

return ClaudeClient.detectJudgeIndex(result.content);
}
}

// ---- Module-level functions ----
Expand Down Expand Up @@ -247,21 +193,3 @@ export async function callClaudeSkill(
return defaultClient.callSkill(skillName, prompt, options);
}

export function detectJudgeIndex(content: string): number {
return ClaudeClient.detectJudgeIndex(content);
}

export function buildJudgePrompt(
agentOutput: string,
aiConditions: { index: number; text: string }[],
): string {
return ClaudeClient.buildJudgePrompt(agentOutput, aiConditions);
}

export async function callAiJudge(
agentOutput: string,
aiConditions: { index: number; text: string }[],
options: { cwd: string },
): Promise<number> {
return defaultClient.callAiJudge(agentOutput, aiConditions, options);
}
3 changes: 0 additions & 3 deletions src/infra/claude/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,6 @@ export {
callClaudeCustom,
callClaudeAgent,
callClaudeSkill,
callAiJudge,
detectRuleIndex,
detectJudgeIndex,
buildJudgePrompt,
isRegexSafe,
} from './client.js';
Loading