Quellcode durchsuchen

RHL-016 feat(search): implement Qsirch provider and cursor encoding/decoding functionality

Code_Uwe vor 4 Wochen
Ursprung
Commit
512406d41b
3 geänderte Dateien mit 555 neuen und 0 gelöschten Zeilen
  1. 115 0
      lib/search/cursor.js
  2. 72 0
      lib/search/index.js
  3. 368 0
      lib/search/providers/qsirch.js

+ 115 - 0
lib/search/cursor.js

@@ -0,0 +1,115 @@
+import { badRequest } from "@/lib/api/errors";
+
+/**
+ * Cursor format (opaque for clients):
+ * - base64url(JSON.stringify(payload))
+ *
+ * We keep the cursor extensible so we can switch provider internals later
+ * (e.g. async-search context_id) without changing the public API.
+ *
+ * Current v1 payload shape:
+ * {
+ *   v: 1,
+ *   mode: "sync",
+ *   offset: number,
+ *   contextId?: string
+ * }
+ */
+
+function isPlainObject(value) {
+	return Boolean(value && typeof value === "object" && !Array.isArray(value));
+}
+
+function toBase64Url(jsonString) {
+	// Node.js supports "base64url" encoding out of the box.
+	return Buffer.from(String(jsonString), "utf8").toString("base64url");
+}
+
+function fromBase64Url(b64) {
+	return Buffer.from(String(b64), "base64url").toString("utf8");
+}
+
+/**
+ * Encode a cursor payload into an opaque string.
+ *
+ * @param {{ v?: number, mode?: string, offset: number, contextId?: string }} payload
+ * @returns {string}
+ */
+export function encodeCursor(payload) {
+	if (!isPlainObject(payload)) {
+		throw badRequest("VALIDATION_SEARCH_CURSOR", "Invalid cursor payload");
+	}
+
+	const v = payload.v ?? 1;
+	const mode = payload.mode ?? "sync";
+	const offset = payload.offset;
+
+	if (!Number.isInteger(offset) || offset < 0) {
+		throw badRequest("VALIDATION_SEARCH_CURSOR", "Invalid cursor payload", {
+			offset,
+		});
+	}
+
+	const normalized = {
+		v,
+		mode: String(mode),
+		offset,
+	};
+
+	if (payload.contextId) normalized.contextId = String(payload.contextId);
+
+	return toBase64Url(JSON.stringify(normalized));
+}
+
+/**
+ * Decode an opaque cursor string.
+ *
+ * @param {string|null|undefined} cursor
+ * @returns {{ v: number, mode: string, offset: number, contextId: string|null }}
+ */
+export function decodeCursor(cursor) {
+	if (!cursor) {
+		return { v: 1, mode: "sync", offset: 0, contextId: null };
+	}
+
+	if (typeof cursor !== "string" || !cursor.trim()) {
+		throw badRequest("VALIDATION_SEARCH_CURSOR", "Invalid cursor");
+	}
+
+	let raw;
+	try {
+		raw = fromBase64Url(cursor.trim());
+	} catch (err) {
+		throw badRequest("VALIDATION_SEARCH_CURSOR", "Invalid cursor");
+	}
+
+	let parsed;
+	try {
+		parsed = JSON.parse(raw);
+	} catch (err) {
+		throw badRequest("VALIDATION_SEARCH_CURSOR", "Invalid cursor");
+	}
+
+	if (!isPlainObject(parsed)) {
+		throw badRequest("VALIDATION_SEARCH_CURSOR", "Invalid cursor");
+	}
+
+	const v = Number(parsed.v ?? 1);
+	const mode = String(parsed.mode ?? "sync");
+	const offset = Number(parsed.offset);
+
+	if (!Number.isInteger(v) || v !== 1) {
+		throw badRequest("VALIDATION_SEARCH_CURSOR", "Invalid cursor");
+	}
+
+	if (!Number.isInteger(offset) || offset < 0) {
+		throw badRequest("VALIDATION_SEARCH_CURSOR", "Invalid cursor");
+	}
+
+	const contextId =
+		typeof parsed.contextId === "string" && parsed.contextId.trim()
+			? parsed.contextId.trim()
+			: null;
+
+	return { v, mode, offset, contextId };
+}

+ 72 - 0
lib/search/index.js

@@ -0,0 +1,72 @@
+import { ApiError } from "@/lib/api/errors";
+import { createQsirchProvider } from "@/lib/search/providers/qsirch";
+import { createFsProvider } from "@/lib/search/providers/fs";
+
+/**
+ * Provider selection.
+ *
+ * We keep provider selection in one place:
+ * - SEARCH_PROVIDER=qsirch | fs
+ *
+ * This allows:
+ * - local dev: fs fallback
+ * - production: qsirch (fast, indexed)
+ */
+
+let cachedProvider = null;
+
+function normalizeProviderName(value) {
+	return String(value || "")
+		.trim()
+		.toLowerCase();
+}
+
+export function getSearchProvider() {
+	if (cachedProvider) return cachedProvider;
+
+	const providerName = normalizeProviderName(
+		process.env.SEARCH_PROVIDER || "fs"
+	);
+
+	if (providerName === "fs") {
+		cachedProvider = createFsProvider();
+		return cachedProvider;
+	}
+
+	if (providerName === "qsirch") {
+		cachedProvider = createQsirchProvider({
+			baseUrl: process.env.QSIRCH_BASE_URL,
+			account: process.env.QSIRCH_ACCOUNT,
+			password: process.env.QSIRCH_PASSWORD,
+			pathPrefix: process.env.QSIRCH_PATH_PREFIX || "/Niederlassungen",
+			dateField: (process.env.QSIRCH_DATE_FIELD || "modified").trim(),
+			mode: (process.env.QSIRCH_MODE || "sync").trim(),
+		});
+
+		return cachedProvider;
+	}
+
+	throw new ApiError({
+		status: 500,
+		code: "SEARCH_BACKEND_UNAVAILABLE",
+		message: "Internal server error",
+	});
+}
+
+/**
+ * Unified search entrypoint used by route handlers.
+ *
+ * @param {{
+ *   mode: "branch"|"multi"|"all",
+ *   branches: string[]|null,
+ *   q: string|null,
+ *   from: string|null,
+ *   to: string|null,
+ *   limit: number,
+ *   cursor: string|null
+ * }} input
+ */
+export async function search(input) {
+	const provider = getSearchProvider();
+	return provider.search(input);
+}

+ 368 - 0
lib/search/providers/qsirch.js

@@ -0,0 +1,368 @@
+import { ApiError, badRequest } from "@/lib/api/errors";
+import { decodeCursor, encodeCursor } from "@/lib/search/cursor";
+import { buildQsirchQuery } from "@/lib/search/queryBuilder";
+import { mapQsirchItemToSearchItem } from "@/lib/search/pathMapping";
+
+/**
+ * Qsirch provider (sync-first).
+ *
+ * Auth model:
+ * - Qsirch requests are authenticated using QTS session cookies (NAS_SID / NAS_USER).
+ * - We obtain NAS_SID via the QTS auth endpoint:
+ *     /cgi-bin/authLogin.cgi?user=...&pwd=...&serviceKey=1&...
+ *
+ * Notes:
+ * - We cache NAS_SID in-memory and refresh on 401 responses.
+ * - We do NOT expose Qsirch/QTS internals to API clients.
+ */
+
+function isBlank(v) {
+	return v === undefined || v === null || String(v).trim() === "";
+}
+
+function normalizeBaseUrl(baseUrl) {
+	const raw = String(baseUrl || "").trim();
+	if (!raw) return null;
+
+	// Basic normalization: remove trailing slash.
+	return raw.endsWith("/") ? raw.slice(0, -1) : raw;
+}
+
+function encodePasswordForQts(password) {
+	// QNAP docs mention an "ezEncode" step; the examples show base64 encoding.
+	// We encode UTF-8 bytes as base64.
+	return Buffer.from(String(password), "utf8").toString("base64");
+}
+
+function extractXmlTagValue(xml, tag) {
+	const re1 = new RegExp(`<${tag}><!\\[CDATA\\[(.*?)\\]\\]><\\/${tag}>`, "i");
+	const re2 = new RegExp(`<${tag}>(.*?)<\\/${tag}>`, "i");
+
+	const m1 = String(xml).match(re1);
+	if (m1 && m1[1]) return m1[1].trim();
+
+	const m2 = String(xml).match(re2);
+	if (m2 && m2[1]) return m2[1].trim();
+
+	return null;
+}
+
+async function qtsLogin({ baseUrl, account, password, fetchImpl }) {
+	const url = new URL(`${baseUrl}/cgi-bin/authLogin.cgi`);
+
+	url.searchParams.set("user", account);
+	url.searchParams.set("pwd", encodePasswordForQts(password));
+	url.searchParams.set("serviceKey", "1");
+	url.searchParams.set("remme", "0");
+
+	// Random param often used by QNAP examples to avoid caching.
+	url.searchParams.set("r", String(Math.random()));
+
+	let res;
+	try {
+		res = await fetchImpl(url.toString(), {
+			method: "GET",
+			headers: { Accept: "text/xml, application/xml, text/plain, */*" },
+			cache: "no-store",
+		});
+	} catch (err) {
+		throw new ApiError({
+			status: 500,
+			code: "SEARCH_BACKEND_UNAVAILABLE",
+			message: "Internal server error",
+			cause: err,
+		});
+	}
+
+	const text = await res.text().catch(() => "");
+
+	// QTS auth endpoint typically returns 200 even for failed logins
+	// and indicates the outcome in the XML body.
+	const authSid = extractXmlTagValue(text, "authSid");
+
+	if (!authSid) {
+		const errorValue = extractXmlTagValue(text, "errorValue");
+		throw new ApiError({
+			status: 500,
+			code: "SEARCH_BACKEND_UNAVAILABLE",
+			message: "Internal server error",
+			details: errorValue ? { errorValue } : undefined,
+		});
+	}
+
+	return authSid;
+}
+
+function buildCookieHeader({ account, sid }) {
+	// Minimal cookies required for many QTS-protected requests.
+	// Additional cookies may exist in browser sessions, but NAS_USER+NAS_SID
+	// is typically sufficient for server-to-server calls.
+	return `NAS_USER=${account}; NAS_SID=${sid}`;
+}
+
+function buildSnippet(content, q) {
+	if (typeof content !== "string") return undefined;
+
+	// Collapse whitespace for stable UI rendering.
+	const text = content.replace(/\s+/g, " ").trim();
+	if (!text) return undefined;
+
+	// If we have a query term, attempt to center the snippet around it.
+	const needle =
+		typeof q === "string" && q.trim() ? q.trim().toLowerCase() : null;
+
+	const MAX = 240;
+
+	if (!needle) {
+		return text.length > MAX ? `${text.slice(0, MAX)}…` : text;
+	}
+
+	const hay = text.toLowerCase();
+	const idx = hay.indexOf(needle);
+
+	if (idx === -1) {
+		return text.length > MAX ? `${text.slice(0, MAX)}…` : text;
+	}
+
+	const start = Math.max(0, idx - 80);
+	const end = Math.min(text.length, start + MAX);
+
+	const chunk = text.slice(start, end).trim();
+	if (!chunk) return undefined;
+
+	return (start > 0 ? "…" : "") + chunk + (end < text.length ? "…" : "");
+}
+
+export function createQsirchProvider({
+	baseUrl,
+	account,
+	password,
+	pathPrefix,
+	dateField = "modified",
+	mode = "sync",
+}) {
+	const base = normalizeBaseUrl(baseUrl);
+
+	if (!base || isBlank(account) || isBlank(password) || isBlank(pathPrefix)) {
+		throw new ApiError({
+			status: 500,
+			code: "SEARCH_BACKEND_UNAVAILABLE",
+			message: "Internal server error",
+		});
+	}
+
+	let cachedSid = null;
+	let sidPromise = null;
+
+	async function getSid(fetchImpl) {
+		if (cachedSid) return cachedSid;
+		if (sidPromise) return sidPromise;
+
+		sidPromise = (async () => {
+			const sid = await qtsLogin({
+				baseUrl: base,
+				account,
+				password,
+				fetchImpl,
+			});
+			cachedSid = sid;
+			return sid;
+		})();
+
+		try {
+			return await sidPromise;
+		} finally {
+			sidPromise = null;
+		}
+	}
+
+	async function qsirchSearchOnce({ fetchImpl, sid, q, limit, offset }) {
+		const url = new URL(`${base}/qsirch/latest/api/search/`);
+
+		// Required
+		url.searchParams.set("q", q);
+
+		// Pagination
+		url.searchParams.set("limit", String(limit));
+		url.searchParams.set("offset", String(offset));
+
+		// Avoid heavy/irrelevant data
+		url.searchParams.set("show_folder", "0"); // files only
+		url.searchParams.set("show_hidden", "0");
+		url.searchParams.set("show_absolute_path", "0"); // share path (not physical)
+		url.searchParams.set("store_history", "0"); // do not store query history
+		url.searchParams.set("tools", "0");
+		url.searchParams.set("tools_resp", "1");
+		url.searchParams.set("tools_limit_items", "50000");
+
+		// Permission checks on QTS side (defense-in-depth)
+		url.searchParams.set("file_status", "1");
+
+		// We keep highlight params default-compatible but do not rely on them.
+		url.searchParams.set("pre_highlight_tag", "<em>");
+		url.searchParams.set("post_highlight_tag", "</em>");
+		url.searchParams.set("highlight_limit", "250");
+
+		const headers = {
+			accept: "application/json",
+			cookie: buildCookieHeader({ account, sid }),
+		};
+
+		const res = await fetchImpl(url.toString(), {
+			method: "GET",
+			headers,
+			cache: "no-store",
+		});
+
+		return res;
+	}
+
+	async function qsirchSearch({ fetchImpl, q, limit, offset }) {
+		let sid = await getSid(fetchImpl);
+
+		// Try once with the cached sid.
+		let res;
+		try {
+			res = await qsirchSearchOnce({ fetchImpl, sid, q, limit, offset });
+		} catch (err) {
+			throw new ApiError({
+				status: 500,
+				code: "SEARCH_BACKEND_UNAVAILABLE",
+				message: "Internal server error",
+				cause: err,
+			});
+		}
+
+		// If SID expired, clear and retry once with a fresh SID.
+		if (res.status === 401) {
+			cachedSid = null;
+			sid = await getSid(fetchImpl);
+
+			res = await qsirchSearchOnce({ fetchImpl, sid, q, limit, offset });
+		}
+
+		if (!res.ok) {
+			throw new ApiError({
+				status: 500,
+				code: "SEARCH_BACKEND_UNAVAILABLE",
+				message: "Internal server error",
+			});
+		}
+
+		let payload;
+		try {
+			payload = await res.json();
+		} catch (err) {
+			throw new ApiError({
+				status: 500,
+				code: "SEARCH_BACKEND_UNAVAILABLE",
+				message: "Internal server error",
+				cause: err,
+			});
+		}
+
+		return payload;
+	}
+
+	return {
+		/**
+		 * @param {{
+		 *   mode: "branch"|"multi"|"all",
+		 *   branches: string[]|null,
+		 *   q: string|null,
+		 *   from: string|null,
+		 *   to: string|null,
+		 *   limit: number,
+		 *   cursor: string|null
+		 * }} input
+		 */
+		async search(input) {
+			const fetchImpl = input?.fetchImpl || fetch;
+
+			// We support sync now; async will be added later without changing the public API.
+			// "auto" currently behaves like "sync" to keep behavior predictable.
+			const effectiveMode = mode === "async" ? "async" : "sync";
+			if (effectiveMode !== "sync") {
+				throw new ApiError({
+					status: 500,
+					code: "SEARCH_BACKEND_UNAVAILABLE",
+					message: "Internal server error",
+				});
+			}
+
+			const { mode: searchMode, branches, q, from, to, limit } = input || {};
+
+			if (!Number.isInteger(limit) || limit < 1) {
+				throw badRequest("VALIDATION_SEARCH_LIMIT", "Invalid limit parameter");
+			}
+
+			const decoded = decodeCursor(input?.cursor || null);
+			if (decoded.mode !== "sync") {
+				throw badRequest("VALIDATION_SEARCH_CURSOR", "Invalid cursor");
+			}
+
+			const offset = decoded.offset;
+
+			const qsirchQ = buildQsirchQuery({
+				mode: searchMode,
+				branches: branches || null,
+				q,
+				from,
+				to,
+				dateField,
+				pathPrefix,
+			});
+
+			const payload = await qsirchSearch({
+				fetchImpl,
+				q: qsirchQ,
+				limit,
+				offset,
+			});
+
+			const total =
+				typeof payload?.total === "number"
+					? payload.total
+					: Number(payload?.total);
+
+			const rawItems = Array.isArray(payload?.items) ? payload.items : [];
+
+			const items = rawItems
+				.map((it) => {
+					const mapped = mapQsirchItemToSearchItem(it, { pathPrefix });
+
+					if (!mapped) return null;
+
+					const snippet = buildSnippet(it?.content, q);
+
+					const result = {
+						branch: mapped.branch,
+						date: mapped.date,
+						year: mapped.year,
+						month: mapped.month,
+						day: mapped.day,
+						filename: mapped.filename,
+						relativePath: mapped.relativePath,
+					};
+
+					if (typeof it?.score === "number") result.score = it.score;
+					if (snippet) result.snippet = snippet;
+
+					return result;
+				})
+				.filter(Boolean);
+
+			// Pagination:
+			// Use rawItems.length (not mapped length) to avoid repeating pages if we drop items.
+			const rawCount = rawItems.length;
+			const hasMore =
+				Number.isFinite(total) && rawCount > 0 && offset + rawCount < total;
+
+			const nextCursor = hasMore
+				? encodeCursor({ v: 1, mode: "sync", offset: offset + rawCount })
+				: null;
+
+			return { items, nextCursor };
+		},
+	};
+}