import { describe, it, expect } from "bun:test"; import { countTokens, getContextLimit, exceedsTokenLimit, getTokenUsage, MODEL_CONTEXT_LIMITS, } from "./tokenizer"; describe("tokenizer", () => { describe("countTokens", () => { it("returns 0 for empty string", () => { expect(countTokens("")).toBe(0); }); it("returns 0 for undefined/null coerced to empty", () => { expect(countTokens("")).toBe(0); }); it("counts tokens for simple text", () => { const tokens = countTokens("Hello, world!"); // "Hello, world!" is typically 4 tokens: "Hello", ",", " world", "!" expect(tokens).toBeGreaterThan(0); expect(tokens).toBeLessThan(10); }); it("counts tokens more accurately than length/4", () => { // This demonstrates the difference from the old heuristic const text = "The quick brown fox jumps over the lazy dog."; const tokens = countTokens(text); const oldHeuristic = Math.ceil(text.length / 4); // Token count should be different from simple length/4 // The actual token count for this sentence is around 10 expect(tokens).toBeGreaterThan(5); expect(tokens).toBeLessThan(20); // The old heuristic would give 44/4 = 11 (ceil) expect(oldHeuristic).toBe(11); }); it("handles code snippets", () => { const code = `function hello() { console.log("Hello, world!"); }`; const tokens = countTokens(code); expect(tokens).toBeGreaterThan(10); expect(tokens).toBeLessThan(50); }); it("handles unicode and special characters", () => { const unicode = "Hello δΈ–η•Œ! Emoji: πŸŽ‰"; const tokens = countTokens(unicode); expect(tokens).toBeGreaterThan(0); }); it("scales linearly with content size", () => { const base = "This is a test sentence. "; const small = base.repeat(10); const large = base.repeat(100); const smallTokens = countTokens(small); const largeTokens = countTokens(large); // Large should be roughly 10x small (within reasonable margin) const ratio = largeTokens / smallTokens; expect(ratio).toBeGreaterThan(8); expect(ratio).toBeLessThan(12); }); }); describe("getContextLimit", () => { it("returns default limit when no model specified", () => { expect(getContextLimit()).toBe(MODEL_CONTEXT_LIMITS.default); }); it("returns correct limit for Claude models", () => { expect(getContextLimit("opus")).toBe(200000); expect(getContextLimit("sonnet")).toBe(200000); expect(getContextLimit("haiku")).toBe(200000); expect(getContextLimit("claude-3-opus")).toBe(200000); expect(getContextLimit("claude-3.5-sonnet")).toBe(200000); }); it("returns correct limit for OpenAI models", () => { expect(getContextLimit("gpt-4")).toBe(128000); expect(getContextLimit("gpt-4-turbo")).toBe(128000); expect(getContextLimit("gpt-4o")).toBe(128000); expect(getContextLimit("gpt-3.5-turbo")).toBe(16385); }); it("returns correct limit for Gemini models", () => { expect(getContextLimit("gemini")).toBe(1000000); expect(getContextLimit("gemini-pro")).toBe(1000000); expect(getContextLimit("gemini-1.5-pro")).toBe(1000000); }); it("handles case-insensitive model names", () => { expect(getContextLimit("OPUS")).toBe(200000); expect(getContextLimit("GPT-4")).toBe(128000); expect(getContextLimit("Gemini")).toBe(1000000); }); it("matches partial model names", () => { // "claude-opus-4-20241022" should match "opus" expect(getContextLimit("claude-opus-4-20241022")).toBe(200000); // "gpt-4-1106-preview" should match "gpt-4" expect(getContextLimit("gpt-4-1106-preview")).toBe(128000); }); it("returns default for unknown models", () => { expect(getContextLimit("unknown-model")).toBe(MODEL_CONTEXT_LIMITS.default); expect(getContextLimit("my-custom-llm")).toBe(MODEL_CONTEXT_LIMITS.default); }); it("uses config override when provided", () => { // Config override should take priority over model-based limit expect(getContextLimit("opus", 50000)).toBe(50000); expect(getContextLimit("gpt-4", 256000)).toBe(256000); expect(getContextLimit(undefined, 75000)).toBe(75000); }); it("ignores invalid config override values", () => { expect(getContextLimit("opus", 0)).toBe(200000); expect(getContextLimit("opus", -1)).toBe(200000); }); }); describe("exceedsTokenLimit", () => { it("returns false for small text", () => { expect(exceedsTokenLimit("Hello, world!", "opus")).toBe(false); }); it("returns false for empty text", () => { expect(exceedsTokenLimit("", "gpt-4")).toBe(false); }); it("respects model-specific limits", () => { // Create text that would exceed a small limit but not a large one const mediumText = "test ".repeat(5000); // ~5000 tokens // Should not exceed opus limit (200k) expect(exceedsTokenLimit(mediumText, "opus")).toBe(false); // Test with config override for small limit expect(exceedsTokenLimit(mediumText, "opus", 1000)).toBe(true); }); it("uses config override for limit check", () => { const text = "word ".repeat(100); // ~100 tokens expect(exceedsTokenLimit(text, "opus", 50)).toBe(true); expect(exceedsTokenLimit(text, "opus", 500)).toBe(false); }); }); describe("getTokenUsage", () => { it("returns correct usage information", () => { const text = "Hello, world!"; const usage = getTokenUsage(text, "opus"); expect(usage.tokens).toBeGreaterThan(0); expect(usage.limit).toBe(200000); // Percentage is tiny (4 tokens / 200k limit = 0.002%) expect(usage.percentage).toBeGreaterThanOrEqual(0); expect(usage.percentage).toBeLessThan(1); expect(usage.exceeds).toBe(false); }); it("calculates percentage correctly", () => { const text = "test ".repeat(1000); // ~1000 tokens const usage = getTokenUsage(text, undefined, 10000); // Should be around 10% usage expect(usage.percentage).toBeGreaterThan(5); expect(usage.percentage).toBeLessThan(20); }); it("detects when limit is exceeded", () => { const text = "word ".repeat(200); // ~200 tokens const usage = getTokenUsage(text, undefined, 50); expect(usage.exceeds).toBe(true); expect(usage.percentage).toBeGreaterThan(100); }); it("rounds percentage to 2 decimal places", () => { const text = "test"; const usage = getTokenUsage(text, "opus"); // Check that percentage has at most 2 decimal places const decimalPart = usage.percentage.toString().split(".")[1]; if (decimalPart) { expect(decimalPart.length).toBeLessThanOrEqual(2); } }); }); describe("MODEL_CONTEXT_LIMITS", () => { it("has a default fallback", () => { expect(MODEL_CONTEXT_LIMITS.default).toBeDefined(); expect(MODEL_CONTEXT_LIMITS.default).toBe(100000); }); it("includes major model families", () => { // Claude expect(MODEL_CONTEXT_LIMITS.opus).toBeDefined(); expect(MODEL_CONTEXT_LIMITS.sonnet).toBeDefined(); expect(MODEL_CONTEXT_LIMITS.haiku).toBeDefined(); // OpenAI expect(MODEL_CONTEXT_LIMITS["gpt-4"]).toBeDefined(); expect(MODEL_CONTEXT_LIMITS["gpt-4o"]).toBeDefined(); // Gemini expect(MODEL_CONTEXT_LIMITS.gemini).toBeDefined(); }); }); });