fs.js 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246
  1. import fs from "node:fs/promises";
  2. import path from "node:path";
  3. import { ApiError, badRequest } from "@/lib/api/errors";
  4. import { decodeCursor, encodeCursor } from "@/lib/search/cursor";
  5. /**
  6. * FS provider (local/test fallback).
  7. *
  8. * Purpose:
  9. * - Provide a deterministic fallback without Qsirch for local development/tests.
  10. *
  11. * Behavior:
  12. * - Traverses the NAS-like folder structure under NAS_ROOT_PATH:
  13. * NLxx/YYYY/MM/DD/*.pdf
  14. * - Filters by:
  15. * - branch scope (branch/multi/all)
  16. * - date range (inclusive)
  17. * - q (best-effort: filename substring; optional small-file content substring)
  18. *
  19. * NOTE:
  20. * - This is not intended to be fast on real NAS-scale.
  21. * - Production should use SEARCH_PROVIDER=qsirch.
  22. */
  23. const BRANCH_RE = /^NL\d+$/;
  24. const YEAR_RE = /^\d{4}$/;
  25. const MONTH_RE = /^(0[1-9]|1[0-2])$/;
  26. const DAY_RE = /^(0[1-9]|[12]\d|3[01])$/;
  27. function getNasRootOrThrow() {
  28. const root = process.env.NAS_ROOT_PATH;
  29. if (!root) {
  30. throw new ApiError({
  31. status: 500,
  32. code: "FS_STORAGE_ERROR",
  33. message: "Internal server error",
  34. });
  35. }
  36. return root;
  37. }
  38. function isWithinRange(date, from, to) {
  39. // date/from/to are ISO "YYYY-MM-DD"; lexical compare works.
  40. if (from && date < from) return false;
  41. if (to && date > to) return false;
  42. return true;
  43. }
  44. async function safeReaddir(dir) {
  45. try {
  46. return await fs.readdir(dir, { withFileTypes: true });
  47. } catch {
  48. return null;
  49. }
  50. }
  51. function normalizeQuery(q) {
  52. if (typeof q !== "string") return null;
  53. const s = q.trim().toLowerCase();
  54. return s ? s : null;
  55. }
  56. async function maybeReadSmallFileText(filePath, maxBytes = 1_000_000) {
  57. try {
  58. const stat = await fs.stat(filePath);
  59. if (!stat.isFile()) return null;
  60. if (stat.size > maxBytes) return null;
  61. const buf = await fs.readFile(filePath);
  62. return buf.toString("utf8");
  63. } catch {
  64. return null;
  65. }
  66. }
  67. function buildSnippet(text, needle) {
  68. if (!text || !needle) return undefined;
  69. const normalized = String(text).replace(/\s+/g, " ").trim();
  70. if (!normalized) return undefined;
  71. const hay = normalized.toLowerCase();
  72. const idx = hay.indexOf(needle);
  73. if (idx === -1) return undefined;
  74. const MAX = 240;
  75. const start = Math.max(0, idx - 80);
  76. const end = Math.min(normalized.length, start + MAX);
  77. const chunk = normalized.slice(start, end).trim();
  78. return (start > 0 ? "…" : "") + chunk + (end < normalized.length ? "…" : "");
  79. }
  80. export function createFsProvider() {
  81. return {
  82. /**
  83. * @param {{
  84. * mode: "branch"|"multi"|"all",
  85. * branches: string[]|null,
  86. * q: string|null,
  87. * from: string|null,
  88. * to: string|null,
  89. * limit: number,
  90. * cursor: string|null
  91. * }} input
  92. */
  93. async search(input) {
  94. const { mode, branches, q, from, to, limit, cursor } = input || {};
  95. if (!Number.isInteger(limit) || limit < 1) {
  96. throw badRequest("VALIDATION_SEARCH_LIMIT", "Invalid limit parameter");
  97. }
  98. const decoded = decodeCursor(cursor || null);
  99. if (decoded.mode !== "sync") {
  100. throw badRequest("VALIDATION_SEARCH_CURSOR", "Invalid cursor");
  101. }
  102. const offset = decoded.offset;
  103. const root = getNasRootOrThrow();
  104. let scopeBranches = branches;
  105. // mode === "all" => list all branches from filesystem.
  106. if (mode === "all") {
  107. const entries = await safeReaddir(root);
  108. scopeBranches = (entries || [])
  109. .filter((e) => e.isDirectory() && BRANCH_RE.test(e.name))
  110. .map((e) => e.name)
  111. .sort();
  112. }
  113. if (!Array.isArray(scopeBranches) || scopeBranches.length === 0) {
  114. return { items: [], nextCursor: null };
  115. }
  116. const needle = normalizeQuery(q);
  117. const results = [];
  118. // Traverse NLxx/YYYY/MM/DD/*.pdf
  119. for (const branch of scopeBranches) {
  120. const branchDir = path.join(root, branch);
  121. const years = await safeReaddir(branchDir);
  122. if (!years) continue;
  123. for (const y of years) {
  124. if (!y.isDirectory() || !YEAR_RE.test(y.name)) continue;
  125. const year = y.name;
  126. const yearDir = path.join(branchDir, year);
  127. const months = await safeReaddir(yearDir);
  128. if (!months) continue;
  129. for (const m of months) {
  130. if (!m.isDirectory() || !MONTH_RE.test(m.name)) continue;
  131. const month = m.name;
  132. const monthDir = path.join(yearDir, month);
  133. const days = await safeReaddir(monthDir);
  134. if (!days) continue;
  135. for (const d of days) {
  136. if (!d.isDirectory() || !DAY_RE.test(d.name)) continue;
  137. const day = d.name;
  138. const date = `${year}-${month}-${day}`;
  139. if (!isWithinRange(date, from, to)) continue;
  140. const dayDir = path.join(monthDir, day);
  141. const files = await safeReaddir(dayDir);
  142. if (!files) continue;
  143. for (const f of files) {
  144. if (!f.isFile()) continue;
  145. if (!f.name.toLowerCase().endsWith(".pdf")) continue;
  146. const filename = f.name;
  147. const rel = `${branch}/${year}/${month}/${day}/${filename}`;
  148. // q filter: filename substring, optional content substring for small files
  149. if (needle) {
  150. const nameHit = filename.toLowerCase().includes(needle);
  151. let contentHit = false;
  152. let snippet;
  153. if (!nameHit) {
  154. const abs = path.join(dayDir, filename);
  155. const text = await maybeReadSmallFileText(abs);
  156. if (text && text.toLowerCase().includes(needle)) {
  157. contentHit = true;
  158. snippet = buildSnippet(text, needle);
  159. }
  160. }
  161. if (!nameHit && !contentHit) continue;
  162. results.push({
  163. branch,
  164. date,
  165. year,
  166. month,
  167. day,
  168. filename,
  169. relativePath: rel,
  170. snippet,
  171. });
  172. } else {
  173. // Date-only search
  174. results.push({
  175. branch,
  176. date,
  177. year,
  178. month,
  179. day,
  180. filename,
  181. relativePath: rel,
  182. });
  183. }
  184. }
  185. }
  186. }
  187. }
  188. }
  189. // Stable ordering for FS fallback:
  190. // - newest dates first, then filename asc
  191. results.sort((a, b) => {
  192. if (a.date !== b.date) return a.date < b.date ? 1 : -1;
  193. return String(a.filename).localeCompare(String(b.filename), "de");
  194. });
  195. const page = results.slice(offset, offset + limit);
  196. const nextOffset = offset + page.length;
  197. const nextCursor =
  198. nextOffset < results.length
  199. ? encodeCursor({ v: 1, mode: "sync", offset: nextOffset })
  200. : null;
  201. return { items: page, nextCursor };
  202. },
  203. };
  204. }