|
|
@@ -0,0 +1,246 @@
|
|
|
+import fs from "node:fs/promises";
|
|
|
+import path from "node:path";
|
|
|
+
|
|
|
+import { ApiError, badRequest } from "@/lib/api/errors";
|
|
|
+import { decodeCursor, encodeCursor } from "@/lib/search/cursor";
|
|
|
+
|
|
|
+/**
|
|
|
+ * FS provider (local/test fallback).
|
|
|
+ *
|
|
|
+ * Purpose:
|
|
|
+ * - Provide a deterministic fallback without Qsirch for local development/tests.
|
|
|
+ *
|
|
|
+ * Behavior:
|
|
|
+ * - Traverses the NAS-like folder structure under NAS_ROOT_PATH:
|
|
|
+ * NLxx/YYYY/MM/DD/*.pdf
|
|
|
+ * - Filters by:
|
|
|
+ * - branch scope (branch/multi/all)
|
|
|
+ * - date range (inclusive)
|
|
|
+ * - q (best-effort: filename substring; optional small-file content substring)
|
|
|
+ *
|
|
|
+ * NOTE:
|
|
|
+ * - This is not intended to be fast on real NAS-scale.
|
|
|
+ * - Production should use SEARCH_PROVIDER=qsirch.
|
|
|
+ */
|
|
|
+
|
|
|
+const BRANCH_RE = /^NL\d+$/;
|
|
|
+const YEAR_RE = /^\d{4}$/;
|
|
|
+const MONTH_RE = /^(0[1-9]|1[0-2])$/;
|
|
|
+const DAY_RE = /^(0[1-9]|[12]\d|3[01])$/;
|
|
|
+
|
|
|
+function getNasRootOrThrow() {
|
|
|
+ const root = process.env.NAS_ROOT_PATH;
|
|
|
+ if (!root) {
|
|
|
+ throw new ApiError({
|
|
|
+ status: 500,
|
|
|
+ code: "FS_STORAGE_ERROR",
|
|
|
+ message: "Internal server error",
|
|
|
+ });
|
|
|
+ }
|
|
|
+ return root;
|
|
|
+}
|
|
|
+
|
|
|
+function isWithinRange(date, from, to) {
|
|
|
+ // date/from/to are ISO "YYYY-MM-DD"; lexical compare works.
|
|
|
+ if (from && date < from) return false;
|
|
|
+ if (to && date > to) return false;
|
|
|
+ return true;
|
|
|
+}
|
|
|
+
|
|
|
+async function safeReaddir(dir) {
|
|
|
+ try {
|
|
|
+ return await fs.readdir(dir, { withFileTypes: true });
|
|
|
+ } catch {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+function normalizeQuery(q) {
|
|
|
+ if (typeof q !== "string") return null;
|
|
|
+ const s = q.trim().toLowerCase();
|
|
|
+ return s ? s : null;
|
|
|
+}
|
|
|
+
|
|
|
+async function maybeReadSmallFileText(filePath, maxBytes = 1_000_000) {
|
|
|
+ try {
|
|
|
+ const stat = await fs.stat(filePath);
|
|
|
+ if (!stat.isFile()) return null;
|
|
|
+ if (stat.size > maxBytes) return null;
|
|
|
+
|
|
|
+ const buf = await fs.readFile(filePath);
|
|
|
+ return buf.toString("utf8");
|
|
|
+ } catch {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+function buildSnippet(text, needle) {
|
|
|
+ if (!text || !needle) return undefined;
|
|
|
+
|
|
|
+ const normalized = String(text).replace(/\s+/g, " ").trim();
|
|
|
+ if (!normalized) return undefined;
|
|
|
+
|
|
|
+ const hay = normalized.toLowerCase();
|
|
|
+ const idx = hay.indexOf(needle);
|
|
|
+ if (idx === -1) return undefined;
|
|
|
+
|
|
|
+ const MAX = 240;
|
|
|
+ const start = Math.max(0, idx - 80);
|
|
|
+ const end = Math.min(normalized.length, start + MAX);
|
|
|
+
|
|
|
+ const chunk = normalized.slice(start, end).trim();
|
|
|
+ return (start > 0 ? "…" : "") + chunk + (end < normalized.length ? "…" : "");
|
|
|
+}
|
|
|
+
|
|
|
+export function createFsProvider() {
|
|
|
+ return {
|
|
|
+ /**
|
|
|
+ * @param {{
|
|
|
+ * mode: "branch"|"multi"|"all",
|
|
|
+ * branches: string[]|null,
|
|
|
+ * q: string|null,
|
|
|
+ * from: string|null,
|
|
|
+ * to: string|null,
|
|
|
+ * limit: number,
|
|
|
+ * cursor: string|null
|
|
|
+ * }} input
|
|
|
+ */
|
|
|
+ async search(input) {
|
|
|
+ const { mode, branches, q, from, to, limit, cursor } = input || {};
|
|
|
+
|
|
|
+ if (!Number.isInteger(limit) || limit < 1) {
|
|
|
+ throw badRequest("VALIDATION_SEARCH_LIMIT", "Invalid limit parameter");
|
|
|
+ }
|
|
|
+
|
|
|
+ const decoded = decodeCursor(cursor || null);
|
|
|
+ if (decoded.mode !== "sync") {
|
|
|
+ throw badRequest("VALIDATION_SEARCH_CURSOR", "Invalid cursor");
|
|
|
+ }
|
|
|
+ const offset = decoded.offset;
|
|
|
+
|
|
|
+ const root = getNasRootOrThrow();
|
|
|
+
|
|
|
+ let scopeBranches = branches;
|
|
|
+
|
|
|
+ // mode === "all" => list all branches from filesystem.
|
|
|
+ if (mode === "all") {
|
|
|
+ const entries = await safeReaddir(root);
|
|
|
+ scopeBranches = (entries || [])
|
|
|
+ .filter((e) => e.isDirectory() && BRANCH_RE.test(e.name))
|
|
|
+ .map((e) => e.name)
|
|
|
+ .sort();
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!Array.isArray(scopeBranches) || scopeBranches.length === 0) {
|
|
|
+ return { items: [], nextCursor: null };
|
|
|
+ }
|
|
|
+
|
|
|
+ const needle = normalizeQuery(q);
|
|
|
+
|
|
|
+ const results = [];
|
|
|
+
|
|
|
+ // Traverse NLxx/YYYY/MM/DD/*.pdf
|
|
|
+ for (const branch of scopeBranches) {
|
|
|
+ const branchDir = path.join(root, branch);
|
|
|
+ const years = await safeReaddir(branchDir);
|
|
|
+ if (!years) continue;
|
|
|
+
|
|
|
+ for (const y of years) {
|
|
|
+ if (!y.isDirectory() || !YEAR_RE.test(y.name)) continue;
|
|
|
+ const year = y.name;
|
|
|
+
|
|
|
+ const yearDir = path.join(branchDir, year);
|
|
|
+ const months = await safeReaddir(yearDir);
|
|
|
+ if (!months) continue;
|
|
|
+
|
|
|
+ for (const m of months) {
|
|
|
+ if (!m.isDirectory() || !MONTH_RE.test(m.name)) continue;
|
|
|
+ const month = m.name;
|
|
|
+
|
|
|
+ const monthDir = path.join(yearDir, month);
|
|
|
+ const days = await safeReaddir(monthDir);
|
|
|
+ if (!days) continue;
|
|
|
+
|
|
|
+ for (const d of days) {
|
|
|
+ if (!d.isDirectory() || !DAY_RE.test(d.name)) continue;
|
|
|
+ const day = d.name;
|
|
|
+
|
|
|
+ const date = `${year}-${month}-${day}`;
|
|
|
+ if (!isWithinRange(date, from, to)) continue;
|
|
|
+
|
|
|
+ const dayDir = path.join(monthDir, day);
|
|
|
+ const files = await safeReaddir(dayDir);
|
|
|
+ if (!files) continue;
|
|
|
+
|
|
|
+ for (const f of files) {
|
|
|
+ if (!f.isFile()) continue;
|
|
|
+ if (!f.name.toLowerCase().endsWith(".pdf")) continue;
|
|
|
+
|
|
|
+ const filename = f.name;
|
|
|
+ const rel = `${branch}/${year}/${month}/${day}/${filename}`;
|
|
|
+
|
|
|
+ // q filter: filename substring, optional content substring for small files
|
|
|
+ if (needle) {
|
|
|
+ const nameHit = filename.toLowerCase().includes(needle);
|
|
|
+
|
|
|
+ let contentHit = false;
|
|
|
+ let snippet;
|
|
|
+
|
|
|
+ if (!nameHit) {
|
|
|
+ const abs = path.join(dayDir, filename);
|
|
|
+ const text = await maybeReadSmallFileText(abs);
|
|
|
+ if (text && text.toLowerCase().includes(needle)) {
|
|
|
+ contentHit = true;
|
|
|
+ snippet = buildSnippet(text, needle);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!nameHit && !contentHit) continue;
|
|
|
+
|
|
|
+ results.push({
|
|
|
+ branch,
|
|
|
+ date,
|
|
|
+ year,
|
|
|
+ month,
|
|
|
+ day,
|
|
|
+ filename,
|
|
|
+ relativePath: rel,
|
|
|
+ snippet,
|
|
|
+ });
|
|
|
+ } else {
|
|
|
+ // Date-only search
|
|
|
+ results.push({
|
|
|
+ branch,
|
|
|
+ date,
|
|
|
+ year,
|
|
|
+ month,
|
|
|
+ day,
|
|
|
+ filename,
|
|
|
+ relativePath: rel,
|
|
|
+ });
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // Stable ordering for FS fallback:
|
|
|
+ // - newest dates first, then filename asc
|
|
|
+ results.sort((a, b) => {
|
|
|
+ if (a.date !== b.date) return a.date < b.date ? 1 : -1;
|
|
|
+ return String(a.filename).localeCompare(String(b.filename), "de");
|
|
|
+ });
|
|
|
+
|
|
|
+ const page = results.slice(offset, offset + limit);
|
|
|
+ const nextOffset = offset + page.length;
|
|
|
+
|
|
|
+ const nextCursor =
|
|
|
+ nextOffset < results.length
|
|
|
+ ? encodeCursor({ v: 1, mode: "sync", offset: nextOffset })
|
|
|
+ : null;
|
|
|
+
|
|
|
+ return { items: page, nextCursor };
|
|
|
+ },
|
|
|
+ };
|
|
|
+}
|