import type { PluginInput } from "@opencode-ai/plugin" const ANTHROPIC_ACTUAL_LIMIT = 200_000 const CHARS_PER_TOKEN_ESTIMATE = 4 const TARGET_MAX_TOKENS = 50_000 interface AssistantMessageInfo { role: "assistant" tokens: { input: number output: number reasoning: number cache: { read: number; write: number } } } interface MessageWrapper { info: { role: string } & Partial } function estimateTokens(text: string): number { return Math.ceil(text.length / CHARS_PER_TOKEN_ESTIMATE) } function truncateToTokenLimit(output: string, maxTokens: number): { result: string; truncated: boolean } { const currentTokens = estimateTokens(output) if (currentTokens <= maxTokens) { return { result: output, truncated: false } } const lines = output.split("\n") if (lines.length <= 3) { const maxChars = maxTokens * CHARS_PER_TOKEN_ESTIMATE return { result: output.slice(0, maxChars) + "\n\n[Output truncated due to context window limit]", truncated: true, } } const headerLines = lines.slice(0, 3) const contentLines = lines.slice(3) const headerText = headerLines.join("\n") const headerTokens = estimateTokens(headerText) const availableTokens = maxTokens - headerTokens - 50 if (availableTokens <= 0) { return { result: headerText + "\n\n[Content truncated due to context window limit]", truncated: true, } } let resultLines: string[] = [] let currentTokenCount = 0 for (const line of contentLines) { const lineTokens = estimateTokens(line + "\n") if (currentTokenCount + lineTokens > availableTokens) { break } resultLines.push(line) currentTokenCount += lineTokens } const truncatedContent = [...headerLines, ...resultLines].join("\n") const removedCount = contentLines.length - resultLines.length return { result: truncatedContent + `\n\n[${removedCount} more lines truncated due to context window limit]`, truncated: true, } } export function createGrepOutputTruncatorHook(ctx: PluginInput) { const GREP_TOOLS = ["grep", "Grep", "safe_grep"] const toolExecuteAfter = async ( input: { tool: string; sessionID: string; callID: string }, output: { title: string; output: string; metadata: unknown } ) => { if (!GREP_TOOLS.includes(input.tool)) return const { sessionID } = input try { const response = await ctx.client.session.messages({ path: { id: sessionID }, }) const messages = (response.data ?? response) as MessageWrapper[] const assistantMessages = messages .filter((m) => m.info.role === "assistant") .map((m) => m.info as AssistantMessageInfo) if (assistantMessages.length === 0) return // Use only the last assistant message's input tokens // This reflects the ACTUAL current context window usage (post-compaction) const lastAssistant = assistantMessages[assistantMessages.length - 1] const lastTokens = lastAssistant.tokens const totalInputTokens = (lastTokens?.input ?? 0) + (lastTokens?.cache?.read ?? 0) const remainingTokens = ANTHROPIC_ACTUAL_LIMIT - totalInputTokens const maxOutputTokens = Math.min( remainingTokens * 0.5, TARGET_MAX_TOKENS ) if (maxOutputTokens <= 0) { output.output = "[Output suppressed - context window exhausted]" return } const { result, truncated } = truncateToTokenLimit(output.output, maxOutputTokens) if (truncated) { output.output = result } } catch { // Graceful degradation } } return { "tool.execute.after": toolExecuteAfter, } }