import fs from "node:fs/promises"; import path from "node:path"; import { ApiError, badRequest } from "@/lib/api/errors"; import { decodeCursor, encodeCursor } from "@/lib/search/cursor"; /** * FS provider (local/test fallback). * * Purpose: * - Provide a deterministic fallback without Qsirch for local development/tests. * * Behavior: * - Traverses the NAS-like folder structure under NAS_ROOT_PATH: * NLxx/YYYY/MM/DD/*.pdf * - Filters by: * - branch scope (branch/multi/all) * - date range (inclusive) * - q (best-effort: filename substring; optional small-file content substring) * * NOTE: * - This is not intended to be fast on real NAS-scale. * - Production should use SEARCH_PROVIDER=qsirch. */ const BRANCH_RE = /^NL\d+$/; const YEAR_RE = /^\d{4}$/; const MONTH_RE = /^(0[1-9]|1[0-2])$/; const DAY_RE = /^(0[1-9]|[12]\d|3[01])$/; function getNasRootOrThrow() { const root = process.env.NAS_ROOT_PATH; if (!root) { throw new ApiError({ status: 500, code: "FS_STORAGE_ERROR", message: "Internal server error", }); } return root; } function isWithinRange(date, from, to) { // date/from/to are ISO "YYYY-MM-DD"; lexical compare works. if (from && date < from) return false; if (to && date > to) return false; return true; } async function safeReaddir(dir) { try { return await fs.readdir(dir, { withFileTypes: true }); } catch { return null; } } function normalizeQuery(q) { if (typeof q !== "string") return null; const s = q.trim().toLowerCase(); return s ? s : null; } async function maybeReadSmallFileText(filePath, maxBytes = 1_000_000) { try { const stat = await fs.stat(filePath); if (!stat.isFile()) return null; if (stat.size > maxBytes) return null; const buf = await fs.readFile(filePath); return buf.toString("utf8"); } catch { return null; } } function buildSnippet(text, needle) { if (!text || !needle) return undefined; const normalized = String(text).replace(/\s+/g, " ").trim(); if (!normalized) return undefined; const hay = normalized.toLowerCase(); const idx = hay.indexOf(needle); if (idx === -1) return undefined; const MAX = 240; const start = Math.max(0, idx - 80); const end = Math.min(normalized.length, start + MAX); const chunk = normalized.slice(start, end).trim(); return (start > 0 ? "…" : "") + chunk + (end < normalized.length ? "…" : ""); } export function createFsProvider() { return { /** * @param {{ * mode: "branch"|"multi"|"all", * branches: string[]|null, * q: string|null, * from: string|null, * to: string|null, * limit: number, * cursor: string|null * }} input */ async search(input) { const { mode, branches, q, from, to, limit, cursor } = input || {}; if (!Number.isInteger(limit) || limit < 1) { throw badRequest("VALIDATION_SEARCH_LIMIT", "Invalid limit parameter"); } const decoded = decodeCursor(cursor || null); if (decoded.mode !== "sync") { throw badRequest("VALIDATION_SEARCH_CURSOR", "Invalid cursor"); } const offset = decoded.offset; const root = getNasRootOrThrow(); let scopeBranches = branches; // mode === "all" => list all branches from filesystem. if (mode === "all") { const entries = await safeReaddir(root); scopeBranches = (entries || []) .filter((e) => e.isDirectory() && BRANCH_RE.test(e.name)) .map((e) => e.name) .sort(); } if (!Array.isArray(scopeBranches) || scopeBranches.length === 0) { return { items: [], nextCursor: null }; } const needle = normalizeQuery(q); const results = []; // Traverse NLxx/YYYY/MM/DD/*.pdf for (const branch of scopeBranches) { const branchDir = path.join(root, branch); const years = await safeReaddir(branchDir); if (!years) continue; for (const y of years) { if (!y.isDirectory() || !YEAR_RE.test(y.name)) continue; const year = y.name; const yearDir = path.join(branchDir, year); const months = await safeReaddir(yearDir); if (!months) continue; for (const m of months) { if (!m.isDirectory() || !MONTH_RE.test(m.name)) continue; const month = m.name; const monthDir = path.join(yearDir, month); const days = await safeReaddir(monthDir); if (!days) continue; for (const d of days) { if (!d.isDirectory() || !DAY_RE.test(d.name)) continue; const day = d.name; const date = `${year}-${month}-${day}`; if (!isWithinRange(date, from, to)) continue; const dayDir = path.join(monthDir, day); const files = await safeReaddir(dayDir); if (!files) continue; for (const f of files) { if (!f.isFile()) continue; if (!f.name.toLowerCase().endsWith(".pdf")) continue; const filename = f.name; const rel = `${branch}/${year}/${month}/${day}/${filename}`; // q filter: filename substring, optional content substring for small files if (needle) { const nameHit = filename.toLowerCase().includes(needle); let contentHit = false; let snippet; if (!nameHit) { const abs = path.join(dayDir, filename); const text = await maybeReadSmallFileText(abs); if (text && text.toLowerCase().includes(needle)) { contentHit = true; snippet = buildSnippet(text, needle); } } if (!nameHit && !contentHit) continue; results.push({ branch, date, year, month, day, filename, relativePath: rel, snippet, }); } else { // Date-only search results.push({ branch, date, year, month, day, filename, relativePath: rel, }); } } } } } } // Stable ordering for FS fallback: // - newest dates first, then filename asc results.sort((a, b) => { if (a.date !== b.date) return a.date < b.date ? 1 : -1; return String(a.filename).localeCompare(String(b.filename), "de"); }); const page = results.slice(offset, offset + limit); const nextOffset = offset + page.length; const nextCursor = nextOffset < results.length ? encodeCursor({ v: 1, mode: "sync", offset: nextOffset }) : null; return { items: page, nextCursor, total: results.length }; }, }; }