import { tool } from "ai";
import z from "zod";
import { readFile as fsReadFile, stat } from "node:fs/promises";
import { resolve, relative, isAbsolute } from "node:path";
import { cwd } from "node:process";
// Configuration constants
const AUTO_OUTLINE_SIZE = 50000; // 50KB threshold for auto-outline
const MAX_OUTLINE_SYMBOLS = 200;
interface FileOutlineItem {
name: string;
type: string;
line: number;
endLine?: number;
}
interface ErrorResult {
error: string;
}
interface SuccessResult {
content: string;
}
type ReadFileResult = ErrorResult | SuccessResult;
// Security settings - these would ideally come from configuration
const EXCLUDED_PATTERNS = [
"**/node_modules/**",
"**/.git/**",
"**/dist/**",
"**/build/**",
"**/.next/**",
"**/.turbo/**",
"**/coverage/**",
];
const PRIVATE_PATTERNS = [
"**/.env",
"**/.env.*",
"**/secrets/**",
"**/*.key",
"**/*.pem",
"**/*.p12",
"**/*.pfx",
"**/id_rsa",
"**/id_ed25519",
"**/.ssh/**",
];
const ReadFileInput = z.object({
path: z
.string()
.describe(
"The relative path of the file to read.\n\n" +
"This path should never be absolute, and the first component " +
"of the path should always be a root directory in a project.\n\n" +
"\n" +
"If the project has the following root directories:\n\n" +
"- directory1\n" +
"- directory2\n\n" +
"If you want to access `file.txt` in `directory1`, you should use the path `directory1/file.txt`.\n" +
"If you want to access `file.txt` in `directory2`, you should use the path `directory2/file.txt`.\n" +
""
),
start_line: z
.number()
.int()
.min(1)
.optional()
.describe("Optional line number to start reading on (1-based index)"),
end_line: z
.number()
.int()
.min(1)
.optional()
.describe(
"Optional line number to end reading on (1-based index, inclusive)"
),
});
function matchesPattern(filePath: string, patterns: string[]): boolean {
// Normalize path separators to forward slashes
const normalizedPath = filePath.replace(/\\/g, "/");
return patterns.some((pattern) => {
// Convert glob pattern to regex
const regexPattern = pattern
.replace(/\*\*/g, "DOUBLE_STAR")
.replace(/\*/g, "[^/]*")
.replace(/DOUBLE_STAR/g, ".*")
.replace(/\?/g, ".");
const regex = new RegExp(`^${regexPattern}$`);
return regex.test(normalizedPath);
});
}
function validatePath(inputPath: string): {
isValid: boolean;
error?: string;
resolvedPath?: string;
} {
try {
let resolvedPath: string;
let pathForPatternCheck: string;
// Handle absolute paths
if (isAbsolute(inputPath)) {
resolvedPath = inputPath;
pathForPatternCheck = inputPath;
// Check if it's within current working directory or temp directory (for tests)
const relativeToCwd = relative(cwd(), resolvedPath);
const isInTempDir =
inputPath.includes("/tmp/") ||
inputPath.includes("\\tmp\\") ||
inputPath.includes("/var/folders/");
if (
!isInTempDir &&
(relativeToCwd.startsWith("..") || isAbsolute(relativeToCwd))
) {
return {
isValid: false,
error: `Path ${inputPath} not found in project - absolute paths are not allowed`,
};
}
} else {
// Handle relative paths
resolvedPath = resolve(cwd(), inputPath);
pathForPatternCheck = inputPath;
// Check if path tries to escape the project directory
const relativePath = relative(cwd(), resolvedPath);
if (relativePath.startsWith("..") || isAbsolute(relativePath)) {
return {
isValid: false,
error: `Path ${inputPath} not found in project - path traversal not allowed`,
};
}
}
// Check against excluded patterns
if (matchesPattern(pathForPatternCheck, EXCLUDED_PATTERNS)) {
return {
isValid: false,
error: `Cannot read file because its path matches the global \`file_scan_exclusions\` setting: ${inputPath}`,
};
}
// Check against private file patterns
if (matchesPattern(pathForPatternCheck, PRIVATE_PATTERNS)) {
return {
isValid: false,
error: `Cannot read file because its path matches the global \`private_files\` setting: ${inputPath}`,
};
}
return {
isValid: true,
resolvedPath,
};
} catch {
return {
isValid: false,
error: `Path ${inputPath} not found in project`,
};
}
}
function generateFileOutline(content: string): string {
const lines = content.split("\n");
const outline: FileOutlineItem[] = [];
// Simple outline generation based on common patterns
const patterns = [
// JavaScript/TypeScript functions and classes
{ regex: /^(export\s+)?(async\s+)?function\s+(\w+)/i, type: "function" },
{ regex: /^(export\s+)?(default\s+)?class\s+(\w+)/i, type: "class" },
{ regex: /^(export\s+)?const\s+(\w+)\s*=/i, type: "const" },
{ regex: /^(export\s+)?let\s+(\w+)\s*=/i, type: "variable" },
{ regex: /^(export\s+)?var\s+(\w+)\s*=/i, type: "variable" },
{ regex: /^(export\s+)?interface\s+(\w+)/i, type: "interface" },
{ regex: /^(export\s+)?type\s+(\w+)/i, type: "type" },
{ regex: /^(export\s+)?enum\s+(\w+)/i, type: "enum" },
// Python
{ regex: /^def\s+(\w+)/i, type: "function" },
{ regex: /^class\s+(\w+)/i, type: "class" },
// Rust
{ regex: /^(pub\s+)?fn\s+(\w+)/i, type: "function" },
{ regex: /^(pub\s+)?struct\s+(\w+)/i, type: "struct" },
{ regex: /^(pub\s+)?enum\s+(\w+)/i, type: "enum" },
{ regex: /^(pub\s+)?mod\s+(\w+)/i, type: "module" },
// Generic headers (Markdown, etc.)
{ regex: /^#{1,6}\s+(.+)$/i, type: "header" },
];
for (
let i = 0;
i < lines.length && outline.length < MAX_OUTLINE_SYMBOLS;
i++
) {
const line = lines[i];
if (!line || !line.trim()) continue;
for (const pattern of patterns) {
const match = line.match(pattern.regex);
if (match) {
const name = match[match.length - 1] || match[1] || "unnamed";
outline.push({
name: name.trim(),
type: pattern.type,
line: i + 1,
});
break;
}
}
}
if (outline.length === 0) {
return `File contains ${lines.length} lines but no recognizable symbols were found.`;
}
const outlineText = outline
.map((item) => `${item.name} [L${item.line}]`)
.join("\n");
return `File outline:\n\n${outlineText}`;
}
export const readFile = tool({
description: `Reads the content of the given file in the project.
- Never attempt to read a path that hasn't been previously mentioned.
- If the file is too large, returns an outline instead with instructions to read specific line ranges.
- Supports reading specific line ranges using start_line and end_line parameters.
- Prevents access to files outside the project boundaries for security.`,
inputSchema: ReadFileInput,
execute: async ({ path, start_line, end_line }): Promise => {
// Manual input validation
if (!path || typeof path !== "string") {
return { error: "Path is required and must be a string" };
}
if (
start_line !== undefined &&
(typeof start_line !== "number" ||
start_line < 1 ||
!Number.isInteger(start_line))
) {
return { error: "start_line must be a positive integer" };
}
if (
end_line !== undefined &&
(typeof end_line !== "number" ||
end_line < 1 ||
!Number.isInteger(end_line))
) {
return { error: "end_line must be a positive integer" };
}
// Validate and resolve path
const pathValidation = validatePath(path);
if (!pathValidation.isValid) {
return { error: pathValidation.error || "Invalid path" };
}
const resolvedPath = pathValidation.resolvedPath;
if (!resolvedPath) {
return { error: "Could not resolve path" };
}
try {
// Check if file exists and get stats
const stats = await stat(resolvedPath);
if (!stats.isFile()) {
return { error: `${path} is not a file` };
}
// Read file content
const content = await fsReadFile(resolvedPath, "utf-8");
// Handle line range requests
if (start_line !== undefined || end_line !== undefined) {
const lines = content.split("\n");
const startIdx = Math.max(0, (start_line || 1) - 1);
const endIdx =
end_line !== undefined
? Math.min(lines.length, end_line)
: lines.length;
// Ensure at least one line is returned
const actualEndIdx = Math.max(startIdx + 1, endIdx);
return { content: lines.slice(startIdx, actualEndIdx).join("\n") };
}
// Check file size and decide whether to return content or outline
if (content.length > AUTO_OUTLINE_SIZE) {
const outline = generateFileOutline(content);
const outlineContent = `This file was too big to read all at once.
Here is an outline of its symbols:
${outline}
Using the line numbers in this outline, you can call this tool again
while specifying the start_line and end_line fields to see the
implementations of symbols in the outline.`;
return { content: outlineContent };
}
return { content };
} catch (error) {
if (error instanceof Error) {
const nodeError = error as NodeJS.ErrnoException;
if (nodeError.code === "ENOENT") {
return { error: `${path} not found` };
}
if (nodeError.code === "EACCES") {
return { error: `Permission denied reading ${path}` };
}
if (nodeError.code === "EISDIR") {
return { error: `${path} is a directory, not a file` };
}
// Handle our custom validation errors
if (
error.message.includes("Cannot read file because") ||
error.message.includes("not found in project")
) {
return { error: error.message };
}
}
return {
error: `Failed to read ${path}: ${
error instanceof Error ? error.message : String(error)
}`,
};
}
},
});