Bladeren bron

RHL-016 feat(search): add FS provider for local fallback with directory traversal and filtering

Code_Uwe 4 weken geleden
bovenliggende
commit
69c3320137
1 gewijzigde bestanden met toevoegingen van 246 en 0 verwijderingen
  1. 246 0
      lib/search/providers/fs.js

+ 246 - 0
lib/search/providers/fs.js

@@ -0,0 +1,246 @@
+import fs from "node:fs/promises";
+import path from "node:path";
+
+import { ApiError, badRequest } from "@/lib/api/errors";
+import { decodeCursor, encodeCursor } from "@/lib/search/cursor";
+
+/**
+ * FS provider (local/test fallback).
+ *
+ * Purpose:
+ * - Provide a deterministic fallback without Qsirch for local development/tests.
+ *
+ * Behavior:
+ * - Traverses the NAS-like folder structure under NAS_ROOT_PATH:
+ *   NLxx/YYYY/MM/DD/*.pdf
+ * - Filters by:
+ *   - branch scope (branch/multi/all)
+ *   - date range (inclusive)
+ *   - q (best-effort: filename substring; optional small-file content substring)
+ *
+ * NOTE:
+ * - This is not intended to be fast on real NAS-scale.
+ * - Production should use SEARCH_PROVIDER=qsirch.
+ */
+
+const BRANCH_RE = /^NL\d+$/;
+const YEAR_RE = /^\d{4}$/;
+const MONTH_RE = /^(0[1-9]|1[0-2])$/;
+const DAY_RE = /^(0[1-9]|[12]\d|3[01])$/;
+
+function getNasRootOrThrow() {
+	const root = process.env.NAS_ROOT_PATH;
+	if (!root) {
+		throw new ApiError({
+			status: 500,
+			code: "FS_STORAGE_ERROR",
+			message: "Internal server error",
+		});
+	}
+	return root;
+}
+
+function isWithinRange(date, from, to) {
+	// date/from/to are ISO "YYYY-MM-DD"; lexical compare works.
+	if (from && date < from) return false;
+	if (to && date > to) return false;
+	return true;
+}
+
+async function safeReaddir(dir) {
+	try {
+		return await fs.readdir(dir, { withFileTypes: true });
+	} catch {
+		return null;
+	}
+}
+
+function normalizeQuery(q) {
+	if (typeof q !== "string") return null;
+	const s = q.trim().toLowerCase();
+	return s ? s : null;
+}
+
+async function maybeReadSmallFileText(filePath, maxBytes = 1_000_000) {
+	try {
+		const stat = await fs.stat(filePath);
+		if (!stat.isFile()) return null;
+		if (stat.size > maxBytes) return null;
+
+		const buf = await fs.readFile(filePath);
+		return buf.toString("utf8");
+	} catch {
+		return null;
+	}
+}
+
+function buildSnippet(text, needle) {
+	if (!text || !needle) return undefined;
+
+	const normalized = String(text).replace(/\s+/g, " ").trim();
+	if (!normalized) return undefined;
+
+	const hay = normalized.toLowerCase();
+	const idx = hay.indexOf(needle);
+	if (idx === -1) return undefined;
+
+	const MAX = 240;
+	const start = Math.max(0, idx - 80);
+	const end = Math.min(normalized.length, start + MAX);
+
+	const chunk = normalized.slice(start, end).trim();
+	return (start > 0 ? "…" : "") + chunk + (end < normalized.length ? "…" : "");
+}
+
+export function createFsProvider() {
+	return {
+		/**
+		 * @param {{
+		 *   mode: "branch"|"multi"|"all",
+		 *   branches: string[]|null,
+		 *   q: string|null,
+		 *   from: string|null,
+		 *   to: string|null,
+		 *   limit: number,
+		 *   cursor: string|null
+		 * }} input
+		 */
+		async search(input) {
+			const { mode, branches, q, from, to, limit, cursor } = input || {};
+
+			if (!Number.isInteger(limit) || limit < 1) {
+				throw badRequest("VALIDATION_SEARCH_LIMIT", "Invalid limit parameter");
+			}
+
+			const decoded = decodeCursor(cursor || null);
+			if (decoded.mode !== "sync") {
+				throw badRequest("VALIDATION_SEARCH_CURSOR", "Invalid cursor");
+			}
+			const offset = decoded.offset;
+
+			const root = getNasRootOrThrow();
+
+			let scopeBranches = branches;
+
+			// mode === "all" => list all branches from filesystem.
+			if (mode === "all") {
+				const entries = await safeReaddir(root);
+				scopeBranches = (entries || [])
+					.filter((e) => e.isDirectory() && BRANCH_RE.test(e.name))
+					.map((e) => e.name)
+					.sort();
+			}
+
+			if (!Array.isArray(scopeBranches) || scopeBranches.length === 0) {
+				return { items: [], nextCursor: null };
+			}
+
+			const needle = normalizeQuery(q);
+
+			const results = [];
+
+			// Traverse NLxx/YYYY/MM/DD/*.pdf
+			for (const branch of scopeBranches) {
+				const branchDir = path.join(root, branch);
+				const years = await safeReaddir(branchDir);
+				if (!years) continue;
+
+				for (const y of years) {
+					if (!y.isDirectory() || !YEAR_RE.test(y.name)) continue;
+					const year = y.name;
+
+					const yearDir = path.join(branchDir, year);
+					const months = await safeReaddir(yearDir);
+					if (!months) continue;
+
+					for (const m of months) {
+						if (!m.isDirectory() || !MONTH_RE.test(m.name)) continue;
+						const month = m.name;
+
+						const monthDir = path.join(yearDir, month);
+						const days = await safeReaddir(monthDir);
+						if (!days) continue;
+
+						for (const d of days) {
+							if (!d.isDirectory() || !DAY_RE.test(d.name)) continue;
+							const day = d.name;
+
+							const date = `${year}-${month}-${day}`;
+							if (!isWithinRange(date, from, to)) continue;
+
+							const dayDir = path.join(monthDir, day);
+							const files = await safeReaddir(dayDir);
+							if (!files) continue;
+
+							for (const f of files) {
+								if (!f.isFile()) continue;
+								if (!f.name.toLowerCase().endsWith(".pdf")) continue;
+
+								const filename = f.name;
+								const rel = `${branch}/${year}/${month}/${day}/${filename}`;
+
+								// q filter: filename substring, optional content substring for small files
+								if (needle) {
+									const nameHit = filename.toLowerCase().includes(needle);
+
+									let contentHit = false;
+									let snippet;
+
+									if (!nameHit) {
+										const abs = path.join(dayDir, filename);
+										const text = await maybeReadSmallFileText(abs);
+										if (text && text.toLowerCase().includes(needle)) {
+											contentHit = true;
+											snippet = buildSnippet(text, needle);
+										}
+									}
+
+									if (!nameHit && !contentHit) continue;
+
+									results.push({
+										branch,
+										date,
+										year,
+										month,
+										day,
+										filename,
+										relativePath: rel,
+										snippet,
+									});
+								} else {
+									// Date-only search
+									results.push({
+										branch,
+										date,
+										year,
+										month,
+										day,
+										filename,
+										relativePath: rel,
+									});
+								}
+							}
+						}
+					}
+				}
+			}
+
+			// Stable ordering for FS fallback:
+			// - newest dates first, then filename asc
+			results.sort((a, b) => {
+				if (a.date !== b.date) return a.date < b.date ? 1 : -1;
+				return String(a.filename).localeCompare(String(b.filename), "de");
+			});
+
+			const page = results.slice(offset, offset + limit);
+			const nextOffset = offset + page.length;
+
+			const nextCursor =
+				nextOffset < results.length
+					? encodeCursor({ v: 1, mode: "sync", offset: nextOffset })
+					: null;
+
+			return { items: page, nextCursor };
+		},
+	};
+}