import { afterEach, beforeEach, describe, it } from "node:test"; import assert from "node:assert/strict"; import { promises as fs } from "fs"; import os from "os"; import path from "path"; import { normalize } from "../src/pipeline/normalize"; import { segment } from "../src/pipeline/segment"; import { extract } from "../src/pipeline/extract"; import { toSoustack } from "../src/pipeline/toSoustack"; import { emit } from "../src/pipeline/emit"; import { initValidator, validate } from "../src/pipeline/validate"; import { IntermediateRecipe, SoustackRecipe } from "../src/pipeline/types"; describe("pipeline", () => { describe("normalize", () => { it("produces stable lines", () => { const input = "First line\r\nSecond line\r\n\r\nThird line"; const result = normalize(input); assert.equal(result.fullText, "First line\nSecond line\n\nThird line"); assert.deepEqual(result.lines, [ { n: 1, text: "First line" }, { n: 2, text: "Second line" }, { n: 3, text: "" }, { n: 4, text: "Third line" }, ]); }); it("treats form feeds as newlines", () => { const input = "First line\fSecond line\f\fThird line"; const result = normalize(input); assert.equal(result.fullText, "First line\nSecond line\n\nThird line"); assert.deepEqual(result.lines, [ { n: 1, text: "First line" }, { n: 2, text: "Second line" }, { n: 3, text: "" }, { n: 4, text: "Third line" }, ]); }); }); describe("segment", () => { it("returns non-overlapping chunks", () => { const lines = normalize("Title\nBody line\nAnother line").lines; const { chunks } = segment(lines); assert.ok(chunks.length > 0); const sorted = [...chunks].sort((a, b) => a.startLine - b.startLine); sorted.forEach((chunk) => { assert.ok(chunk.startLine <= chunk.endLine); }); for (let index = 1; index < sorted.length; index += 1) { assert.ok(sorted[index].startLine > sorted[index - 1].endLine); } }); it("segments a mini cookbook with inferred titles", () => { const cookbook = [ "SUMMER SALAD", "", "2 cups mixed greens", "1/2 cup cherry tomatoes", "Pinch of salt", "", "Toss together and serve.", "", "COZY SOUP", "", "- 1 tbsp olive oil", "- 1 cup broth", "", "Directions", "Simmer for 10 minutes.", "", "PANCAKE BITES", "", "1 cup flour", "1/2 cup milk", "1 egg", "", "Cook on a griddle until golden.", ].join("\n"); const { chunks } = segment(normalize(cookbook).lines); assert.equal(chunks.length, 3); assert.deepEqual( chunks.map((chunk) => chunk.titleGuess), ["SUMMER SALAD", "COZY SOUP", "Directions"] ); chunks.forEach((chunk) => { assert.ok(chunk.confidence > 0.6); }); }); it("handles ingredient lines with unit tokens but no leading numbers", () => { const cookbook = [ "MORNING OATS", "", "cup rolled oats", "tbsp chia seeds", "pinch salt", "", "Stir together and soak overnight.", "", "HERB TEA", "", "tsp dried chamomile", "cup hot water", "", "Steep for five minutes.", ].join("\n"); const { chunks } = segment(normalize(cookbook).lines); assert.equal(chunks.length, 2); assert.deepEqual(chunks.map((chunk) => chunk.titleGuess), [ "MORNING OATS", "HERB TEA", ]); chunks.forEach((chunk) => { assert.ok(chunk.confidence > 0.6); }); }); it("avoids creating chunks on ingredient-like title candidates", () => { const cookbook = [ "COZY SOUP", "", "Cup Rolled Oats", "", "Mix well.", ].join("\n"); const { chunks } = segment(normalize(cookbook).lines); assert.equal(chunks.length, 1); assert.equal(chunks[0]?.titleGuess, "COZY SOUP"); assert.ok(!chunks.some((chunk) => chunk.titleGuess === "Cup Rolled Oats")); }); it("keeps all-caps titles above imperative-only instructions", async () => { const fixturePath = path.join(process.cwd(), "test", "fixtures", "cinnamon-toast.txt"); const fixture = await fs.readFile(fixturePath, "utf-8"); const lines = normalize(fixture).lines; const { chunks } = segment(lines); assert.equal(chunks.length, 1); assert.equal(chunks[0]?.titleGuess, "CINNAMON TOAST AND BUTTER"); assert.equal(chunks[0]?.startLine, 1); const chunkLines = lines .filter( (line) => line.n >= (chunks[0]?.startLine ?? 0) && line.n <= (chunks[0]?.endLine ?? 0), ) .map((line) => line.text.trim()); assert.ok(chunkLines.includes("Toast the white bread")); assert.ok(chunkLines.includes("Add soft butter onto toast")); assert.ok(chunkLines.includes("Sprinkle the cinnamon/sugar mix")); assert.ok(!chunks.some((chunk) => chunk.titleGuess === "Toast the white bread")); }); it("prefers structured cookbook titles over ingredient lines", async () => { const fixturePath = path.join(process.cwd(), "test", "fixtures", "structured-cookbook.txt"); const fixture = await fs.readFile(fixturePath, "utf-8"); const lines = normalize(fixture).lines; const { chunks } = segment(lines); assert.equal(chunks[0]?.titleGuess, "CHICKEN PICCATTA"); assert.ok(!chunks.some((chunk) => chunk.titleGuess === "Chicken breasts")); }); }); describe("extract", () => { it("returns ingredients and instructions when headings are missing", () => { const text = [ "SIMPLE SALAD", "2 cups mixed greens", "1 tbsp olive oil", "Pinch of salt", "Toss everything together and serve.", ].join("\n"); const lines = normalize(text).lines; const [chunk] = segment(lines).chunks; const recipe = extract(chunk, lines); assert.ok(recipe.ingredients.includes("2 cups mixed greens")); assert.ok(recipe.ingredients.includes("1 tbsp olive oil")); assert.ok(recipe.ingredients.includes("Pinch of salt")); assert.ok(!recipe.ingredients.includes("SIMPLE SALAD")); assert.ok(recipe.instructions.join(" ").includes("Toss")); }); it("handles ingredients before a preparation heading", () => { const text = [ "QUICK TOAST", "2 slices bread", "1 tbsp butter", "Preparation:", "Toast the bread until golden.", "Spread with butter.", ].join("\n"); const lines = normalize(text).lines; const chunk = { startLine: 1, endLine: lines.length, titleGuess: "QUICK TOAST", confidence: 1, evidence: "test", }; const recipe = extract(chunk, lines); assert.deepEqual(recipe.ingredients, ["bread", "golden", "butter"]); assert.deepEqual(recipe.prepSection, [ "Toast the bread until golden.", "Spread with butter.", ]); assert.ok(recipe.instructions.includes("2 slices bread")); assert.ok(recipe.instructions.includes("1 tbsp butter")); assert.ok(recipe.instructions.includes("Toast the bread until golden.")); }); it("keeps simple ingredient phrases without quantities", () => { const text = ["SPICE BLEND", "Salt and pepper", "Mix well."].join("\n"); const lines = normalize(text).lines; const [chunk] = segment(lines).chunks; const recipe = extract(chunk, lines); assert.ok(recipe.ingredients.includes("Salt and pepper")); assert.ok(recipe.instructions.join(" ").includes("Mix")); }); it("treats unicode bullets as ingredients", () => { const text = ["BULLET LIST", "• 1 cup flour", "• 2 tbsp sugar"].join("\n"); const lines = normalize(text).lines; const [chunk] = segment(lines).chunks; const recipe = extract(chunk, lines); assert.equal(recipe.ingredients.length, 2); assert.deepEqual(recipe.ingredients, ["1 cup flour", "2 tbsp sugar"]); }); it("treats unicode fraction ingredient lines as ingredients without headings", () => { const text = [ "FANCY VINAIGRETTE", "½ cup olive oil", "¼ cup white wine", "¾ tsp salt", "Whisk until emulsified.", ].join("\n"); const lines = normalize(text).lines; const [chunk] = segment(lines).chunks; const recipe = extract(chunk, lines); assert.ok(recipe.ingredients.includes("½ cup olive oil")); assert.ok(recipe.ingredients.includes("¼ cup white wine")); assert.ok(recipe.ingredients.includes("¾ tsp salt")); assert.ok(!recipe.instructions.some((line) => line.includes("½ cup olive oil"))); assert.ok(!recipe.instructions.some((line) => line.includes("¼ cup white wine"))); assert.ok(!recipe.instructions.some((line) => line.includes("¾ tsp salt"))); assert.ok(recipe.instructions.join(" ").includes("Whisk")); }); it("strips a multi-line By block from instructions", () => { const text = [ "BOWMAN SALSA", "By:", "Jessie Bowman", "", "Ingredients", "1 cup tomatoes", "Directions", "Mix together and serve.", ].join("\n"); const lines = normalize(text).lines; const chunk = { startLine: 1, endLine: lines.length, titleGuess: "BOWMAN SALSA", confidence: 1, evidence: "test", }; const recipe = extract(chunk, lines); assert.equal(recipe.source.author, "Jessie Bowman"); const instructionsText = recipe.instructions.join(" "); assert.ok(!instructionsText.includes("By:")); assert.ok(!instructionsText.includes("Jessie Bowman")); }); it("strips single-line By attribution from instructions", () => { const text = [ "BOWMAN STEW", "By: Jamie Bowman", "", "Ingredients", "1 cup broth", "Instructions", "Simmer gently.", ].join("\n"); const lines = normalize(text).lines; const chunk = { startLine: 1, endLine: lines.length, titleGuess: "BOWMAN STEW", confidence: 1, evidence: "test", }; const recipe = extract(chunk, lines); assert.equal(recipe.source.author, "Jamie Bowman"); const instructionsText = recipe.instructions.join(" "); assert.ok(!instructionsText.includes("By:")); assert.ok(!instructionsText.includes("Jamie Bowman")); }); it("captures prep sections and ingredient-level prep metadata", () => { const text = [ "Title line", "Prep:", "- Dice the onion", "- Mince garlic", "Ingredients:", "1 onion, diced", "2 cloves garlic, minced", "1 can diced tomatoes", "Instructions:", "1. Cook stuff.", ].join("\n"); const lines = normalize(text).lines; const chunk = { startLine: 1, endLine: lines.length, titleGuess: "Title line", confidence: 1, evidence: "test", }; const recipe = extract(chunk, lines); assert.deepEqual(recipe.prepSection, ["Dice the onion", "Mince garlic"]); assert.deepEqual(recipe.ingredients, [ "1 onion, diced", "2 cloves garlic, minced", "1 can diced tomatoes", ]); assert.deepEqual(recipe.instructions, [ "- Dice the onion", "- Mince garlic", "Cook stuff.", ]); assert.deepEqual(recipe.ingredientPrep, [ { index: 0, raw: "1 onion, diced", base: "1 onion", prep: ["diced"], }, { index: 1, raw: "2 cloves garlic, minced", base: "2 cloves garlic", prep: ["minced"], }, ]); }); }); describe("toSoustack", () => { it("includes required fields", () => { const intermediate: IntermediateRecipe = { title: "CINNAMON TOAST AND BUTTER", ingredients: ["1 cup rice"], instructions: ["Cook the rice."], source: { startLine: 1, endLine: 4, evidence: "Lines 1-4", }, }; const recipe = toSoustack(intermediate, { sourcePath: "recipes.md" }); assert.equal(recipe.$schema, "https://spec.soustack.org/soustack.schema.json"); assert.equal(recipe.profile, "lite"); assert.equal(recipe.name, "Cinnamon Toast and Butter"); assert.deepEqual(recipe.ingredients, intermediate.ingredients); assert.deepEqual(recipe.instructions, intermediate.instructions); assert.deepEqual(recipe.stacks, {}); assert.equal(recipe.metadata?.originalTitle, intermediate.title); assert.deepEqual(recipe.metadata?.ingest, { pipelineVersion: "0.1.1", sourcePath: "recipes.md", sourceLines: { start: 1, end: 4, }, }); }); it("emits prep metadata in the x-prep extension lane", () => { const intermediate: IntermediateRecipe = { title: "Prep Test", ingredients: ["1 onion, diced"], instructions: ["Cook the onion."], prepSection: ["Dice the onion"], ingredientPrep: [ { index: 0, raw: "1 onion, diced", base: "1 onion", prep: ["diced"], }, ], source: { startLine: 1, endLine: 4, evidence: "Lines 1-4", }, }; const recipe = toSoustack(intermediate); assert.deepEqual(recipe["x-prep"]?.section, ["Dice the onion"]); assert.deepEqual(recipe["x-prep"]?.ingredients, intermediate.ingredientPrep); assert.ok(recipe["x-prep"]?.generatedAt); }); it("defaults to empty ingredient and instruction arrays when missing", () => { const intermediate = { title: "Empty Fields", source: { startLine: 1, endLine: 1, evidence: "Lines 1-1", }, } as unknown as IntermediateRecipe; const recipe = toSoustack(intermediate); assert.deepEqual(recipe.ingredients, []); assert.deepEqual(recipe.instructions, []); assert.equal(recipe.stacks && typeof recipe.stacks, "object"); }); it("emits only spec-approved top-level fields", () => { const intermediate: IntermediateRecipe = { title: "Veggie Bowl", ingredients: ["1 cup rice"], instructions: ["Cook the rice."], source: { startLine: 1, endLine: 4, evidence: "Lines 1-4", }, }; const recipe = toSoustack(intermediate); const allowedKeys = new Set([ "$schema", "profile", "stacks", "name", "ingredients", "instructions", "x-prep", "metadata", ]); assert.equal(recipe.$schema, "https://spec.soustack.org/soustack.schema.json"); Object.keys(recipe).forEach((key) => { assert.ok(allowedKeys.has(key), `Unexpected top-level key: ${key}`); }); }); }); describe("emit", () => { let tempDir: string; beforeEach(async () => { tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "soustack-")); }); afterEach(async () => { await fs.rm(tempDir, { recursive: true, force: true }); }); it("writes index and recipe files", async () => { const recipes: SoustackRecipe[] = [ { $schema: "https://spec.soustack.org/soustack.schema.json", profile: "lite", name: "Test Recipe", stacks: {}, ingredients: ["1 cup sugar"], instructions: ["Mix."], metadata: { ingest: { pipelineVersion: "0.1.0", }, }, }, ]; await emit(recipes, tempDir); const indexPath = path.join(tempDir, "index.json"); const recipePath = path.join(tempDir, "recipes", "test-recipe.soustack.json"); const indexRaw = await fs.readFile(indexPath, "utf-8"); const indexPayload = JSON.parse(indexRaw) as Array<{ name: string; slug: string; path: string }>; assert.deepEqual(indexPayload, [ { name: "Test Recipe", slug: "test-recipe", path: "recipes/test-recipe.soustack.json", }, ]); const recipeRaw = await fs.readFile(recipePath, "utf-8"); const recipePayload = JSON.parse(recipeRaw) as SoustackRecipe; assert.equal(recipePayload.name, "Test Recipe"); assert.deepEqual(recipePayload.ingredients, ["1 cup sugar"]); }); it("writes unique paths for duplicate recipe names", async () => { const recipes: SoustackRecipe[] = [ { $schema: "https://spec.soustack.org/soustack.schema.json", profile: "lite", name: "Dup Recipe", stacks: {}, ingredients: ["1 cup sugar"], instructions: ["Mix."], metadata: { ingest: { pipelineVersion: "0.1.0", }, }, }, { $schema: "https://spec.soustack.org/soustack.schema.json", profile: "lite", name: "Dup Recipe", stacks: {}, ingredients: ["2 cups flour"], instructions: ["Bake."], metadata: { ingest: { pipelineVersion: "0.1.0", }, }, }, ]; await emit(recipes, tempDir); const indexPath = path.join(tempDir, "index.json"); const indexRaw = await fs.readFile(indexPath, "utf-8"); const indexPayload = JSON.parse(indexRaw) as Array<{ name: string; slug: string; path: string }>; const paths = indexPayload.map((entry) => entry.path); assert.equal(indexPayload.length, 2); assert.equal(new Set(paths).size, 2); for (const entry of indexPayload) { const filePath = path.join(tempDir, entry.path); const recipeRaw = await fs.readFile(filePath, "utf-8"); const recipePayload = JSON.parse(recipeRaw) as SoustackRecipe; assert.ok(recipePayload.name); } }); }); describe("validate", () => { beforeEach(async () => { await initValidator(); }); it("accepts a minimal valid recipe", () => { const recipe: SoustackRecipe = { $schema: "https://spec.soustack.org/soustack.schema.json", profile: "lite", name: "Test Recipe", stacks: {}, ingredients: ["1 cup sugar"], instructions: ["Mix."], metadata: { ingest: { pipelineVersion: "0.1.0", }, }, }; const result = validate(recipe); assert.equal(result.ok, true); assert.equal(result.errors.length, 0); }); it("rejects a recipe missing a name", () => { const recipe = { $schema: "https://spec.soustack.org/soustack.schema.json", profile: "lite", stacks: {}, ingredients: ["1 cup sugar"], instructions: ["Mix."], metadata: { ingest: { pipelineVersion: "0.1.0", }, }, } as unknown as SoustackRecipe; const result = validate(recipe); assert.equal(result.ok, false); assert.ok(result.errors.some((error) => error.includes("name"))); }); }); });