Просмотр исходного кода

RHL-016 feat(search): implement Qsirch item mapping and query building functionality with tests

Code_Uwe 4 недель назад
Родитель
Сommit
7cc3479d99
4 измененных файлов с 364 добавлено и 0 удалено
  1. 111 0
      lib/search/pathMapping.js
  2. 52 0
      lib/search/pathMapping.test.js
  3. 141 0
      lib/search/queryBuilder.js
  4. 60 0
      lib/search/queryBuilder.test.js

+ 111 - 0
lib/search/pathMapping.js

@@ -0,0 +1,111 @@
+/**
+ * Map Qsirch items back into our NAS convention:
+ *
+ * Qsirch fields observed:
+ * - item.path = "Niederlassungen/NL20/2025/12/18"   (directory)
+ * - item.name = "Stapel_Seiten-4_Zeit-141039"      (basename without extension)
+ * - item.extension = "pdf"
+ *
+ * We strictly accept only:
+ *   <prefix>/<branch>/<year>/<month>/<day>
+ *
+ * And we build:
+ * - filename: "<name>.<extension>"
+ * - relativePath: "<branch>/<year>/<month>/<day>/<filename>"
+ *
+ * Any unexpected shape is rejected (returns null) as defense-in-depth.
+ */
+
+const BRANCH_RE = /^NL\d+$/;
+const YEAR_RE = /^\d{4}$/;
+const MONTH_RE = /^(0[1-9]|1[0-2])$/;
+const DAY_RE = /^(0[1-9]|[12]\d|3[01])$/;
+
+function stripLeadingSlash(p) {
+	const s = String(p || "");
+	return s.startsWith("/") ? s.slice(1) : s;
+}
+
+function normalizePrefix(prefix) {
+	let p = stripLeadingSlash(prefix);
+	p = p.trim();
+	if (p.endsWith("/")) p = p.replace(/\/+$/, "");
+	return p;
+}
+
+function stripPrefix(pathValue, prefix) {
+	const p = stripLeadingSlash(pathValue).trim();
+	const pref = normalizePrefix(prefix);
+
+	if (!pref) return p;
+
+	if (p === pref) return "";
+	if (p.startsWith(`${pref}/`)) return p.slice(pref.length + 1);
+
+	return null;
+}
+
+function toFilename(baseName, ext) {
+	const safeBase = String(baseName || "").trim();
+	const safeExt = String(ext || "").trim();
+
+	if (!safeBase) return null;
+	if (!safeExt) return null;
+
+	const lowerExt = safeExt.toLowerCase();
+
+	// We only support PDFs for the Lieferscheine workflow.
+	if (lowerExt !== "pdf") return null;
+
+	// Qsirch usually returns name without extension. If it already ends with ".pdf",
+	// keep it as-is to avoid double extensions.
+	if (safeBase.toLowerCase().endsWith(`.${lowerExt}`)) {
+		return safeBase;
+	}
+
+	return `${safeBase}.${safeExt}`;
+}
+
+/**
+ * @param {any} item - Qsirch item
+ * @param {{ pathPrefix: string }} options
+ * @returns {null | {
+ *   branch: string,
+ *   date: string,
+ *   year: string,
+ *   month: string,
+ *   day: string,
+ *   filename: string,
+ *   relativePath: string
+ * }}
+ */
+export function mapQsirchItemToSearchItem(item, { pathPrefix }) {
+	const dirPath = typeof item?.path === "string" ? item.path : null;
+	const name = typeof item?.name === "string" ? item.name : null;
+	const ext = typeof item?.extension === "string" ? item.extension : null;
+
+	if (!dirPath || !name || !ext) return null;
+
+	const stripped = stripPrefix(dirPath, pathPrefix);
+	if (stripped === null) return null;
+
+	const parts = stripped.split("/").filter(Boolean);
+
+	// Must be exactly NLxx/YYYY/MM/DD
+	if (parts.length !== 4) return null;
+
+	const [branch, year, month, day] = parts;
+
+	if (!BRANCH_RE.test(branch)) return null;
+	if (!YEAR_RE.test(year)) return null;
+	if (!MONTH_RE.test(month)) return null;
+	if (!DAY_RE.test(day)) return null;
+
+	const filename = toFilename(name, ext);
+	if (!filename) return null;
+
+	const date = `${year}-${month}-${day}`;
+	const relativePath = `${branch}/${year}/${month}/${day}/${filename}`;
+
+	return { branch, date, year, month, day, filename, relativePath };
+}

+ 52 - 0
lib/search/pathMapping.test.js

@@ -0,0 +1,52 @@
+/* @vitest-environment node */
+
+import { describe, it, expect } from "vitest";
+import { mapQsirchItemToSearchItem } from "./pathMapping.js";
+
+describe("lib/search/pathMapping", () => {
+	it("maps a valid Qsirch item into our search item format", () => {
+		const item = {
+			path: "Niederlassungen/NL20/2025/12/18",
+			name: "Test_File",
+			extension: "pdf",
+		};
+
+		const mapped = mapQsirchItemToSearchItem(item, {
+			pathPrefix: "/Niederlassungen",
+		});
+
+		expect(mapped).toEqual({
+			branch: "NL20",
+			date: "2025-12-18",
+			year: "2025",
+			month: "12",
+			day: "18",
+			filename: "Test_File.pdf",
+			relativePath: "NL20/2025/12/18/Test_File.pdf",
+		});
+	});
+
+	it("rejects unexpected paths outside prefix", () => {
+		const item = {
+			path: "Public/NL20/2025/12/18",
+			name: "Test",
+			extension: "pdf",
+		};
+
+		expect(
+			mapQsirchItemToSearchItem(item, { pathPrefix: "/Niederlassungen" })
+		).toBe(null);
+	});
+
+	it("rejects paths not matching NLxx/YYYY/MM/DD", () => {
+		const item = {
+			path: "Niederlassungen/NL20/2025/12",
+			name: "Test",
+			extension: "pdf",
+		};
+
+		expect(
+			mapQsirchItemToSearchItem(item, { pathPrefix: "/Niederlassungen" })
+		).toBe(null);
+	});
+});

+ 141 - 0
lib/search/queryBuilder.js

@@ -0,0 +1,141 @@
+/**
+ * Qsirch query builder.
+ *
+ * We build a Qsirch "q" string using documented operators like:
+ * - path:"/Public"
+ * - modified:"YYYY-MM-DD"
+ * - modified:"YYYY-MM-DD..YYYY-MM-DD"
+ * - comparison operators: modified:>=YYYY-MM-DD
+ * - extension:"pdf"
+ *
+ * Note:
+ * - Qsirch operator syntax must not include spaces between operator and value.
+ *   (Example: name:"QNAP" is correct, name: QNAP is incorrect.)
+ *
+ * Security:
+ * - We treat user input as plain search terms.
+ * - We strip characters that could turn user input into Qsirch operators.
+ */
+
+/**
+ * Normalize and sanitize user query so it cannot inject Qsirch operators.
+ *
+ * @param {string|null} raw
+ * @returns {string|null}
+ */
+export function sanitizeUserQuery(raw) {
+	if (typeof raw !== "string") return null;
+
+	let s = raw.trim();
+	if (!s) return null;
+
+	// Prevent operator injection:
+	// - ":" is used by Qsirch operators (path:, modified:, extension:, ...)
+	// - quotes can shape operator values
+	s = s.replace(/[:"]/g, " ");
+
+	// Prevent the user query from interfering with our own OR chaining:
+	// We only remove the standalone token "OR" (case-sensitive),
+	// so normal words like "order" or German "oder" remain unaffected.
+	s = s.replace(/\bOR\b/g, " ");
+
+	// Normalize whitespace
+	s = s.replace(/\s+/g, " ").trim();
+
+	return s || null;
+}
+
+function normalizePathPrefix(prefix) {
+	let p = String(prefix || "").trim();
+	if (!p) return "/";
+
+	// Ensure leading slash and no trailing slash (unless it's just "/").
+	if (!p.startsWith("/")) p = `/${p}`;
+	if (p.length > 1) p = p.replace(/\/+$/, "");
+
+	return p;
+}
+
+function buildDateClause(dateField, from, to) {
+	const f = String(dateField || "modified").trim();
+
+	// Range is the most explicit and is documented by QNAP for date ranges.
+	if (from && to) return `${f}:"${from}..${to}"`;
+
+	// QNAP documents comparison operators for dates/sizes as well.
+	// Example: modified:<2015 (year)
+	// We use ISO date strings here for determinism.
+	if (from) return `${f}:>=${from}`;
+	if (to) return `${f}:<=${to}`;
+
+	return null;
+}
+
+function buildBranchClause({ pathPrefix, branch }) {
+	const prefix = normalizePathPrefix(pathPrefix);
+	return `path:"${prefix}/${branch}"`;
+}
+
+function buildGlobalClause({ pathPrefix }) {
+	const prefix = normalizePathPrefix(pathPrefix);
+	return `path:"${prefix}"`;
+}
+
+/**
+ * Build the Qsirch "q" string from normalized inputs.
+ *
+ * @param {{
+ *   mode: "branch"|"multi"|"all",
+ *   branches: string[]|null,
+ *   q: string|null,
+ *   from: string|null,
+ *   to: string|null,
+ *   dateField: "modified"|"created",
+ *   pathPrefix: string
+ * }} input
+ * @returns {string}
+ */
+export function buildQsirchQuery({
+	mode,
+	branches,
+	q,
+	from,
+	to,
+	dateField,
+	pathPrefix,
+}) {
+	const userTerms = sanitizeUserQuery(q);
+	const dateClause = buildDateClause(dateField, from, to);
+	const extClause = `extension:"pdf"`;
+
+	// Base terms that should always apply within a clause.
+	// Order does not matter for AND semantics, but keeping a stable ordering
+	// makes debugging and tests easier.
+	function assembleClause(pathClause) {
+		const parts = [];
+		if (userTerms) parts.push(userTerms);
+		parts.push(pathClause);
+		parts.push(extClause);
+		if (dateClause) parts.push(dateClause);
+		return parts.join(" ");
+	}
+
+	if (mode === "all") {
+		return assembleClause(buildGlobalClause({ pathPrefix }));
+	}
+
+	if (mode === "branch") {
+		const b = branches?.[0];
+		return assembleClause(buildBranchClause({ pathPrefix, branch: b }));
+	}
+
+	// mode === "multi"
+	// We replicate the full clause per branch and connect with OR.
+	// This avoids precedence issues where a shared "extension:" or "modified:"
+	// might only apply to the last OR segment.
+	const clauses = (branches || []).map((b) =>
+		assembleClause(buildBranchClause({ pathPrefix, branch: b }))
+	);
+
+	return clauses.join(" OR ");
+}

+ 60 - 0
lib/search/queryBuilder.test.js

@@ -0,0 +1,60 @@
+/* @vitest-environment node */
+
+import { describe, it, expect } from "vitest";
+import { buildQsirchQuery, sanitizeUserQuery } from "./queryBuilder.js";
+
+describe("lib/search/queryBuilder", () => {
+	it("sanitizes user query (removes ':' and quotes)", () => {
+		expect(sanitizeUserQuery('path:"/Public"')).toBe("path /Public");
+	});
+
+	it("builds branch-scoped query with date range", () => {
+		const q = buildQsirchQuery({
+			mode: "branch",
+			branches: ["NL20"],
+			q: "bridgestone",
+			from: "2025-12-01",
+			to: "2025-12-31",
+			dateField: "modified",
+			pathPrefix: "/Niederlassungen",
+		});
+
+		expect(q).toContain('path:"/Niederlassungen/NL20"');
+		expect(q).toContain('extension:"pdf"');
+		expect(q).toContain('modified:"2025-12-01..2025-12-31"');
+		expect(q).toContain("bridgestone");
+	});
+
+	it("builds multi-branch query by replicating filters per branch", () => {
+		const q = buildQsirchQuery({
+			mode: "multi",
+			branches: ["NL01", "NL02"],
+			q: "invoice",
+			from: "2025-01-01",
+			to: "2025-01-31",
+			dateField: "modified",
+			pathPrefix: "/Niederlassungen",
+		});
+
+		// Both branch clauses must include the same constraints.
+		expect(q).toContain('path:"/Niederlassungen/NL01"');
+		expect(q).toContain('path:"/Niederlassungen/NL02"');
+		expect(q).toContain(" OR ");
+		expect(q).toContain('extension:"pdf"');
+	});
+
+	it("builds global query for all", () => {
+		const q = buildQsirchQuery({
+			mode: "all",
+			branches: null,
+			q: null,
+			from: null,
+			to: "2025-12-31",
+			dateField: "modified",
+			pathPrefix: "/Niederlassungen",
+		});
+
+		expect(q).toContain('path:"/Niederlassungen"');
+		expect(q).toContain("modified:<=2025-12-31");
+	});
+});