| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246 |
- import fs from "node:fs/promises";
- import path from "node:path";
- import { ApiError, badRequest } from "@/lib/api/errors";
- import { decodeCursor, encodeCursor } from "@/lib/search/cursor";
- /**
- * FS provider (local/test fallback).
- *
- * Purpose:
- * - Provide a deterministic fallback without Qsirch for local development/tests.
- *
- * Behavior:
- * - Traverses the NAS-like folder structure under NAS_ROOT_PATH:
- * NLxx/YYYY/MM/DD/*.pdf
- * - Filters by:
- * - branch scope (branch/multi/all)
- * - date range (inclusive)
- * - q (best-effort: filename substring; optional small-file content substring)
- *
- * NOTE:
- * - This is not intended to be fast on real NAS-scale.
- * - Production should use SEARCH_PROVIDER=qsirch.
- */
- const BRANCH_RE = /^NL\d+$/;
- const YEAR_RE = /^\d{4}$/;
- const MONTH_RE = /^(0[1-9]|1[0-2])$/;
- const DAY_RE = /^(0[1-9]|[12]\d|3[01])$/;
- function getNasRootOrThrow() {
- const root = process.env.NAS_ROOT_PATH;
- if (!root) {
- throw new ApiError({
- status: 500,
- code: "FS_STORAGE_ERROR",
- message: "Internal server error",
- });
- }
- return root;
- }
- function isWithinRange(date, from, to) {
- // date/from/to are ISO "YYYY-MM-DD"; lexical compare works.
- if (from && date < from) return false;
- if (to && date > to) return false;
- return true;
- }
- async function safeReaddir(dir) {
- try {
- return await fs.readdir(dir, { withFileTypes: true });
- } catch {
- return null;
- }
- }
- function normalizeQuery(q) {
- if (typeof q !== "string") return null;
- const s = q.trim().toLowerCase();
- return s ? s : null;
- }
- async function maybeReadSmallFileText(filePath, maxBytes = 1_000_000) {
- try {
- const stat = await fs.stat(filePath);
- if (!stat.isFile()) return null;
- if (stat.size > maxBytes) return null;
- const buf = await fs.readFile(filePath);
- return buf.toString("utf8");
- } catch {
- return null;
- }
- }
- function buildSnippet(text, needle) {
- if (!text || !needle) return undefined;
- const normalized = String(text).replace(/\s+/g, " ").trim();
- if (!normalized) return undefined;
- const hay = normalized.toLowerCase();
- const idx = hay.indexOf(needle);
- if (idx === -1) return undefined;
- const MAX = 240;
- const start = Math.max(0, idx - 80);
- const end = Math.min(normalized.length, start + MAX);
- const chunk = normalized.slice(start, end).trim();
- return (start > 0 ? "…" : "") + chunk + (end < normalized.length ? "…" : "");
- }
- export function createFsProvider() {
- return {
- /**
- * @param {{
- * mode: "branch"|"multi"|"all",
- * branches: string[]|null,
- * q: string|null,
- * from: string|null,
- * to: string|null,
- * limit: number,
- * cursor: string|null
- * }} input
- */
- async search(input) {
- const { mode, branches, q, from, to, limit, cursor } = input || {};
- if (!Number.isInteger(limit) || limit < 1) {
- throw badRequest("VALIDATION_SEARCH_LIMIT", "Invalid limit parameter");
- }
- const decoded = decodeCursor(cursor || null);
- if (decoded.mode !== "sync") {
- throw badRequest("VALIDATION_SEARCH_CURSOR", "Invalid cursor");
- }
- const offset = decoded.offset;
- const root = getNasRootOrThrow();
- let scopeBranches = branches;
- // mode === "all" => list all branches from filesystem.
- if (mode === "all") {
- const entries = await safeReaddir(root);
- scopeBranches = (entries || [])
- .filter((e) => e.isDirectory() && BRANCH_RE.test(e.name))
- .map((e) => e.name)
- .sort();
- }
- if (!Array.isArray(scopeBranches) || scopeBranches.length === 0) {
- return { items: [], nextCursor: null };
- }
- const needle = normalizeQuery(q);
- const results = [];
- // Traverse NLxx/YYYY/MM/DD/*.pdf
- for (const branch of scopeBranches) {
- const branchDir = path.join(root, branch);
- const years = await safeReaddir(branchDir);
- if (!years) continue;
- for (const y of years) {
- if (!y.isDirectory() || !YEAR_RE.test(y.name)) continue;
- const year = y.name;
- const yearDir = path.join(branchDir, year);
- const months = await safeReaddir(yearDir);
- if (!months) continue;
- for (const m of months) {
- if (!m.isDirectory() || !MONTH_RE.test(m.name)) continue;
- const month = m.name;
- const monthDir = path.join(yearDir, month);
- const days = await safeReaddir(monthDir);
- if (!days) continue;
- for (const d of days) {
- if (!d.isDirectory() || !DAY_RE.test(d.name)) continue;
- const day = d.name;
- const date = `${year}-${month}-${day}`;
- if (!isWithinRange(date, from, to)) continue;
- const dayDir = path.join(monthDir, day);
- const files = await safeReaddir(dayDir);
- if (!files) continue;
- for (const f of files) {
- if (!f.isFile()) continue;
- if (!f.name.toLowerCase().endsWith(".pdf")) continue;
- const filename = f.name;
- const rel = `${branch}/${year}/${month}/${day}/${filename}`;
- // q filter: filename substring, optional content substring for small files
- if (needle) {
- const nameHit = filename.toLowerCase().includes(needle);
- let contentHit = false;
- let snippet;
- if (!nameHit) {
- const abs = path.join(dayDir, filename);
- const text = await maybeReadSmallFileText(abs);
- if (text && text.toLowerCase().includes(needle)) {
- contentHit = true;
- snippet = buildSnippet(text, needle);
- }
- }
- if (!nameHit && !contentHit) continue;
- results.push({
- branch,
- date,
- year,
- month,
- day,
- filename,
- relativePath: rel,
- snippet,
- });
- } else {
- // Date-only search
- results.push({
- branch,
- date,
- year,
- month,
- day,
- filename,
- relativePath: rel,
- });
- }
- }
- }
- }
- }
- }
- // Stable ordering for FS fallback:
- // - newest dates first, then filename asc
- results.sort((a, b) => {
- if (a.date !== b.date) return a.date < b.date ? 1 : -1;
- return String(a.filename).localeCompare(String(b.filename), "de");
- });
- const page = results.slice(offset, offset + limit);
- const nextOffset = offset + page.length;
- const nextCursor =
- nextOffset < results.length
- ? encodeCursor({ v: 1, mode: "sync", offset: nextOffset })
- : null;
- return { items: page, nextCursor };
- },
- };
- }
|