Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/external/guides/authoring.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,11 @@ deno run -A packages/gambit/scripts/migrate-schema-terms.ts <repo-root>
invalid JSON or schema-violating output blocks the run with a clear error.
- `graderDecks` describe calibration decks that score transcripts/artifacts. The
simulator Calibrate page will run these decks against stored runs.
- For graders that inspect assistant tool usage, set
`contextSchema = "gambit://schemas/graders/contexts/turn_tools.zod.ts"` so
`session.messages[*].tool_calls` is available in the grader input.
- For conversation-level tool-call grading (single score for the whole run), use
`contextSchema = "gambit://schemas/graders/contexts/conversation_tools.zod.ts"`.
- Configure `acceptsUserTurns` alongside these references:
- Markdown roots default to `true`; TypeScript decks default to `false`
everywhere. Set it to `false` for any workflow deck that should never accept
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import { z } from "zod";

const graderToolCallSchema = z.object({
id: z.string().optional(),
type: z.string().optional(),
function: z.object({
name: z.string(),
arguments: z.string().optional(),
}),
});

export const graderConversationMessageWithToolsSchema = z.object({
role: z.string(),
content: z.any().optional(),
name: z.string().optional(),
tool_calls: z.array(graderToolCallSchema).optional(),
});

export const graderConversationWithToolsSchema = z.object({
messages: z.array(graderConversationMessageWithToolsSchema).optional(),
meta: z.record(z.any()).optional(),
notes: z.object({ text: z.string().optional() }).optional(),
});

export default z.object({
session: graderConversationWithToolsSchema,
});
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export { default } from "./conversation_tools.ts";
5 changes: 5 additions & 0 deletions packages/gambit-core/schemas/graders/contexts/tools.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
export { default } from "./turn_tools.ts";
export {
graderConversationWithToolsSchema,
graderMessageWithToolsSchema,
} from "./turn_tools.ts";
1 change: 1 addition & 0 deletions packages/gambit-core/schemas/graders/contexts/tools.zod.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export { default } from "./turn_tools.ts";
28 changes: 28 additions & 0 deletions packages/gambit-core/schemas/graders/contexts/turn_tools.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import { z } from "zod";

const graderToolCallSchema = z.object({
id: z.string().optional(),
type: z.string().optional(),
function: z.object({
name: z.string(),
arguments: z.string().optional(),
}),
});

export const graderMessageWithToolsSchema = z.object({
role: z.string(),
content: z.any().optional(),
name: z.string().optional(),
tool_calls: z.array(graderToolCallSchema).optional(),
});

export const graderConversationWithToolsSchema = z.object({
messages: z.array(graderMessageWithToolsSchema).optional(),
meta: z.record(z.any()).optional(),
notes: z.object({ text: z.string().optional() }).optional(),
});

export default z.object({
session: graderConversationWithToolsSchema,
messageToGrade: graderMessageWithToolsSchema,
});
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export { default } from "./turn_tools.ts";
95 changes: 95 additions & 0 deletions packages/gambit-core/src/markdown.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,101 @@ Schema deck.
assertEquals(parsed, { status: 200 });
});

Deno.test("markdown deck resolves tool-call-aware grader context schema", async () => {
const dir = await Deno.makeTempDir();

const deckPath = await writeTempDeck(
dir,
"turn-tools-schema.deck.md",
`+++
label = "turn-tools-schema"
contextSchema = "gambit://schemas/graders/contexts/turn_tools.zod.ts"
+++

Schema deck.
`,
);

const deck = await loadMarkdownDeck(deckPath);

assert(deck.contextSchema, "expected context schema to resolve");
const parsed = deck.contextSchema.parse({
session: {
messages: [
{
role: "assistant",
tool_calls: [
{
function: {
name: "bot_write",
arguments: '{"path":"PROMPT.md"}',
},
},
],
},
],
},
messageToGrade: {
role: "assistant",
tool_calls: [
{
function: {
name: "bot_write",
},
},
],
},
});

assertEquals(parsed.messageToGrade.role, "assistant");
assertEquals(
parsed.session.messages?.[0].tool_calls?.[0].function.name,
"bot_write",
);
});

Deno.test("markdown deck resolves conversation-level tool-call grader context schema", async () => {
const dir = await Deno.makeTempDir();

const deckPath = await writeTempDeck(
dir,
"conversation-tools-schema.deck.md",
`+++
label = "conversation-tools-schema"
contextSchema = "gambit://schemas/graders/contexts/conversation_tools.zod.ts"
+++

Schema deck.
`,
);

const deck = await loadMarkdownDeck(deckPath);

assert(deck.contextSchema, "expected context schema to resolve");
const parsed = deck.contextSchema.parse({
session: {
messages: [
{
role: "assistant",
tool_calls: [
{
function: {
name: "bot_write",
arguments: '{"path":"faq-bot/PROMPT.md"}',
},
},
],
},
],
},
});

assertEquals(
parsed.session.messages?.[0].tool_calls?.[0].function.name,
"bot_write",
);
});

Deno.test("markdown deck warns on legacy schema URIs", async () => {
const dir = await Deno.makeTempDir();
const deckPath = await writeTempDeck(
Expand Down
35 changes: 15 additions & 20 deletions src/decks/gambit-bot/PROMPT.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,30 +55,25 @@ label = "Deck format policy guard (turn) LLM"
path = "./graders/deck_format_policy_llm/PROMPT.md"
description = "LLM guard for policy-compliant deck editing behavior."

[[scenarios]]
label = "Recipe selection on-ramp tester"
path = "./scenarios/recipe_selection/PROMPT.md"
description = "Synthetic user that asks Gambit Bot to build a recipe selection chatbot."

[[scenarios]]
label = "Recipe selection (no skip)"
path = "./scenarios/recipe_selection_no_skip/PROMPT.md"
description = "Synthetic user that completes the question flow without skipping to building."
[[graders]]
label = "First deck location guard (turn)"
path = "./graders/first_deck_root_prompt_guard/PROMPT.md"
description = "Checks that the first created deck is root PROMPT.md (not a subfolder PROMPT.md)."

[[scenarios]]
label = "Build tab demo prompt"
path = "./scenarios/build_tab_demo/PROMPT.md"
description = "Synthetic user prompt for the build tab demo."
[[graders]]
label = "First deck location guard (tools)"
path = "./graders/first_deck_root_prompt_guard_tools/PROMPT.md"
description = "Checks first created deck location using tool-call-aware grading context."

[[scenarios]]
label = "NUX from scratch demo prompt"
path = "./scenarios/nux_from_scratch_demo/PROMPT.md"
description = "Synthetic user prompt for the NUX from-scratch build demo."
[[graders]]
label = "First deck location guard (tools, conversation)"
path = "./graders/first_deck_root_prompt_guard_tools_conversation/PROMPT.md"
description = "Conversation-level check of first created deck location with tool-call-aware context."

[[scenarios]]
label = "Investor FAQ regression"
path = "./scenarios/investor_faq_regression/PROMPT.md"
description = "Replays the investor FAQ build flow that previously produced a non-v1.0 deck format."
label = "FAQ bot build flow"
path = "./scenarios/faq_bot_build_flow/PROMPT.md"
description = "Synthetic user flow that builds an FAQ bot, checks policy alignment, and requests a root-level deck move."
+++

You are GambitBot, an AI assistant designed to help people build other AI
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
+++
label = "First deck location guard (turn)"
description = "Deterministic guard that checks whether the first created deck is root PROMPT.md."
contextSchema = "gambit://schemas/graders/contexts/turn_tools.zod.ts"
responseSchema = "gambit://schemas/graders/grader_output.zod.ts"
execute = "./first_deck_root_prompt_guard.deck.ts"
+++

Compute grader that enforces first deck location policy.
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import { defineDeck } from "jsr:@bolt-foundry/gambit";
import { z } from "npm:zod";
import contextSchema, {
type graderMessageWithToolsSchema as messageSchema,
} from "../../../../../../gambit-core/schemas/graders/contexts/turn_tools.ts";

const responseSchema = z.object({
score: z.number().int().min(-3).max(3),
reason: z.string(),
evidence: z.array(z.string()).optional(),
});

type DeckWrite = {
path: string;
messageIndex: number;
};

export default defineDeck({
label: "first_deck_root_prompt_guard",
contextSchema,
responseSchema,
run(ctx) {
const messages = ctx.input.session.messages ?? [];
const deckWrites = collectDeckPromptWrites(messages);

if (deckWrites.length === 0) {
return {
score: 0,
reason:
"No deck creation write found (no bot_write call targeting PROMPT.md).",
};
}

const firstWrite = deckWrites[0];
if (firstWrite.path === "PROMPT.md") {
return {
score: 3,
reason: "First created deck is root PROMPT.md.",
evidence: [`first deck write path: ${firstWrite.path}`],
};
}

return {
score: -3,
reason:
"First created deck is not root PROMPT.md; it was created in a subfolder.",
evidence: [
`first deck write path: ${firstWrite.path}`,
`message index: ${firstWrite.messageIndex}`,
],
};
},
});

function collectDeckPromptWrites(
messages: Array<z.infer<typeof messageSchema>>,
): Array<DeckWrite> {
const writes: Array<DeckWrite> = [];

for (let i = 0; i < messages.length; i += 1) {
const msg = messages[i];
if (msg.role !== "assistant" || !msg.tool_calls?.length) continue;

for (const tool of msg.tool_calls) {
if (tool.function.name !== "bot_write") continue;
if (!tool.function.arguments) continue;

try {
const parsed = JSON.parse(tool.function.arguments) as {
path?: unknown;
};
if (typeof parsed.path !== "string") continue;

const normalizedPath = normalizePath(parsed.path);
if (isDeckPromptPath(normalizedPath)) {
writes.push({ path: normalizedPath, messageIndex: i });
}
} catch {
// Ignore malformed tool args and continue scanning.
}
}
}

return writes;
}

function normalizePath(path: string): string {
const withForwardSlashes = path.replaceAll("\\", "/");
return withForwardSlashes.replace(/^\.\//, "");
}

function isDeckPromptPath(path: string): boolean {
return path === "PROMPT.md" || path.endsWith("/PROMPT.md");
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
+++
label = "First deck location guard (tools)"
description = "Deterministic guard that checks whether the first created deck is root PROMPT.md, with tool-call-aware context."
contextSchema = "gambit://schemas/graders/contexts/turn_tools.zod.ts"
responseSchema = "gambit://schemas/graders/grader_output.zod.ts"
execute = "./first_deck_root_prompt_guard_tools.deck.ts"
+++

Compute grader that enforces first deck location policy using tool-call-aware
context.
Loading