storage.js 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316
  1. // ---------------------------------------------------------------------------
  2. // Ordner: lib
  3. // Datei: storage.js
  4. // Relativer Pfad: lib/storage.js
  5. // ---------------------------------------------------------------------------
  6. // lib/storage.js
  7. // -----------------------------------------------------------------------------
  8. // Central abstraction layer for reading files and directories from the NAS
  9. // share mounted at `NAS_ROOT_PATH` (e.g. `/mnt/niederlassungen`).
  10. //
  11. // All access to the branch/year/month/day/PDF structure should go through
  12. // these functions instead of using `fs` directly in route handlers.
  13. //
  14. // - Read-only: no write/delete operations here.
  15. // - Async only: uses `fs/promises` + async/await to avoid blocking the event loop.
  16. //
  17. // RHL-006 (Caching / Freshness):
  18. // - We add a small process-local TTL micro-cache for directory listings.
  19. // - Goal: reduce filesystem load while keeping freshness predictable.
  20. // - Security note: RBAC is enforced in API routes BEFORE calling storage helpers,
  21. // therefore caching here does not bypass auth/permissions.
  22. // -----------------------------------------------------------------------------
  23. import fs from "node:fs/promises"; // Promise-based filesystem API
  24. import path from "node:path"; // Safe path utilities (handles separators)
  25. // Root directory of the NAS share, injected via environment variable.
  26. // On the Linux app server, this is typically `/mnt/niederlassungen`.
  27. // Do NOT cache process.env.NAS_ROOT_PATH at module load time.
  28. // Instead, resolve it on demand so tests (and runtime) can change it.
  29. function getRoot() {
  30. const root = process.env.NAS_ROOT_PATH;
  31. if (!root) {
  32. throw new Error("NAS_ROOT_PATH environment variable is not set");
  33. }
  34. return root;
  35. }
  36. // Build an absolute path below the NAS root from a list of segments.
  37. function fullPath(...segments) {
  38. const root = getRoot();
  39. return path.join(root, ...segments.map(String));
  40. }
  41. // Compare strings that represent numbers in a numeric way.
  42. // This ensures "2" comes before "10" (2 < 10), not after.
  43. function sortNumericStrings(a, b) {
  44. const na = parseInt(a, 10);
  45. const nb = parseInt(b, 10);
  46. if (!Number.isNaN(na) && !Number.isNaN(nb)) {
  47. return na - nb;
  48. }
  49. // Fallback to localeCompare if parsing fails
  50. return a.localeCompare(b, "en");
  51. }
  52. // -----------------------------------------------------------------------------
  53. // RHL-006: Storage micro-cache (process-local TTL cache)
  54. // -----------------------------------------------------------------------------
  55. //
  56. // Why a cache here (and not Next route caching)?
  57. // - We want to avoid any risk of shared caching across users/sessions.
  58. // - Next route caching / ISR-style caching is powerful but easy to misuse with auth.
  59. // - A micro-cache AFTER RBAC checks is safe and predictable.
  60. //
  61. // Important constraints:
  62. // - Process-local only: if we ever run multiple instances, caches are not shared.
  63. // - Short TTL only: we accept a small window where newly scanned PDFs might not
  64. // appear immediately, but they will appear after TTL expires.
  65. // - Failure-safe: if a filesystem read throws, we do NOT keep a "poisoned" cache entry.
  66. // -----------------------------------------------------------------------------
  67. // TTLs chosen in the design (accepted by you):
  68. // - Branches/Years change rarely -> 60 seconds
  69. // - Months/Days/Files can change frequently -> 15 seconds
  70. const TTL_BRANCHES_MS = 60_000;
  71. const TTL_YEARS_MS = 60_000;
  72. const TTL_MONTHS_MS = 15_000;
  73. const TTL_DAYS_MS = 15_000;
  74. const TTL_FILES_MS = 15_000;
  75. // Internal cache store:
  76. // key -> { expiresAt, value } OR { expiresAt, promise }
  77. // - value: resolved cache value
  78. // - promise: in-flight load promise to collapse concurrent reads
  79. const __storageCache = new Map();
  80. /**
  81. * Build a stable cache key for a given listing type.
  82. *
  83. * We include NAS_ROOT_PATH in the key so tests that change the env var do not
  84. * accidentally reuse data from a previous test run.
  85. *
  86. * @param {string} type
  87. * @param {...string} parts
  88. * @returns {string}
  89. */
  90. function buildCacheKey(type, ...parts) {
  91. const root = getRoot();
  92. return [type, root, ...parts.map(String)].join("|");
  93. }
  94. /**
  95. * Generic TTL-cache wrapper.
  96. *
  97. * Behavior:
  98. * 1) If a load is already in-flight (promise exists), reuse it.
  99. * 2) If a cached value exists and is not expired, return it.
  100. * 3) Otherwise run loader(), store the result, and return it.
  101. *
  102. * Failure policy:
  103. * - If loader() throws, the cache entry is removed so later calls can retry.
  104. *
  105. * @template T
  106. * @param {string} key
  107. * @param {number} ttlMs
  108. * @param {() => Promise<T>} loader
  109. * @returns {Promise<T>}
  110. */
  111. async function withTtlCache(key, ttlMs, loader) {
  112. const now = Date.now();
  113. const existing = __storageCache.get(key);
  114. // 1) Collapsing concurrent calls:
  115. // If another request already triggered the same filesystem read,
  116. // we reuse the same promise to avoid redundant fs operations.
  117. if (existing?.promise) {
  118. return existing.promise;
  119. }
  120. // 2) Serve cached values while still fresh:
  121. if (existing && existing.value !== undefined && existing.expiresAt > now) {
  122. return existing.value;
  123. }
  124. // 3) Cache miss or expired: start a new load.
  125. const promise = (async () => {
  126. try {
  127. const value = await loader();
  128. // Store resolved value with a fresh expiry timestamp.
  129. __storageCache.set(key, {
  130. value,
  131. expiresAt: Date.now() + ttlMs,
  132. });
  133. return value;
  134. } catch (err) {
  135. // Do not keep failed results in cache.
  136. __storageCache.delete(key);
  137. throw err;
  138. }
  139. })();
  140. // Store in-flight promise immediately so concurrent calls reuse it.
  141. __storageCache.set(key, {
  142. promise,
  143. expiresAt: now + ttlMs,
  144. });
  145. return promise;
  146. }
  147. /**
  148. * TEST-ONLY helper: clear the micro-cache.
  149. *
  150. * Why this exists:
  151. * - Unit tests often mutate the filesystem fixture after calling list*() once.
  152. * - Without a cache reset, tests could observe stale values.
  153. *
  154. * We intentionally export this with a loud name to discourage production usage.
  155. */
  156. export function __clearStorageCacheForTests() {
  157. __storageCache.clear();
  158. }
  159. // -----------------------------------------------------------------------------
  160. // 1. Branches (NL01, NL02, ...)
  161. // Path pattern: `${ROOT}/NLxx`
  162. // -----------------------------------------------------------------------------
  163. export async function listBranches() {
  164. // RHL-006: cache directory listing for 60 seconds (branches change rarely).
  165. return withTtlCache(buildCacheKey("branches"), TTL_BRANCHES_MS, async () => {
  166. // Read the root directory of the NAS share.
  167. // `withFileTypes: true` returns `Dirent` objects so we can call `isDirectory()`
  168. // without extra stat() calls, which is more efficient.
  169. const entries = await fs.readdir(fullPath(), { withFileTypes: true });
  170. return (
  171. entries
  172. .filter(
  173. (entry) =>
  174. entry.isDirectory() && // only directories
  175. entry.name !== "@Recently-Snapshot" && // ignore QNAP snapshot folder
  176. /^NL\d+$/i.test(entry.name) // keep only names like "NL01", "NL02", ...
  177. )
  178. .map((entry) => entry.name)
  179. // Sort by numeric branch number: NL1, NL2, ..., NL10
  180. .sort((a, b) =>
  181. sortNumericStrings(a.replace("NL", ""), b.replace("NL", ""))
  182. )
  183. );
  184. });
  185. }
  186. // -----------------------------------------------------------------------------
  187. // 2. Years (2023, 2024, ...)
  188. // Path pattern: `${ROOT}/${branch}/${year}`
  189. // -----------------------------------------------------------------------------
  190. export async function listYears(branch) {
  191. // RHL-006: cache directory listing for 60 seconds (years change rarely).
  192. return withTtlCache(
  193. buildCacheKey("years", branch),
  194. TTL_YEARS_MS,
  195. async () => {
  196. const dir = fullPath(branch);
  197. const entries = await fs.readdir(dir, { withFileTypes: true });
  198. return entries
  199. .filter(
  200. (entry) => entry.isDirectory() && /^\d{4}$/.test(entry.name) // exactly 4 digits → year folders like "2024"
  201. )
  202. .map((entry) => entry.name)
  203. .sort(sortNumericStrings);
  204. }
  205. );
  206. }
  207. // -----------------------------------------------------------------------------
  208. // 3. Months (01–12)
  209. // Path pattern: `${ROOT}/${branch}/${year}/${month}`
  210. // -----------------------------------------------------------------------------
  211. export async function listMonths(branch, year) {
  212. // RHL-006: cache directory listing for 15 seconds (months can change occasionally).
  213. return withTtlCache(
  214. buildCacheKey("months", branch, year),
  215. TTL_MONTHS_MS,
  216. async () => {
  217. const dir = fullPath(branch, year);
  218. const entries = await fs.readdir(dir, { withFileTypes: true });
  219. return (
  220. entries
  221. .filter(
  222. (entry) => entry.isDirectory() && /^\d{1,2}$/.test(entry.name) // supports "1" or "10", we normalize below
  223. )
  224. // Normalize to two digits so the UI shows "01", "02", ..., "12"
  225. .map((entry) => entry.name.trim().padStart(2, "0"))
  226. .sort(sortNumericStrings)
  227. );
  228. }
  229. );
  230. }
  231. // -----------------------------------------------------------------------------
  232. // 4. Days (01–31)
  233. // Path pattern: `${ROOT}/${branch}/${year}/${month}/${day}`
  234. // -----------------------------------------------------------------------------
  235. export async function listDays(branch, year, month) {
  236. // RHL-006: cache directory listing for 15 seconds (days change frequently with new scans).
  237. return withTtlCache(
  238. buildCacheKey("days", branch, year, month),
  239. TTL_DAYS_MS,
  240. async () => {
  241. const dir = fullPath(branch, year, month);
  242. const entries = await fs.readdir(dir, { withFileTypes: true });
  243. return entries
  244. .filter(
  245. (entry) => entry.isDirectory() && /^\d{1,2}$/.test(entry.name) // supports "1" or "23"
  246. )
  247. .map((entry) => entry.name.trim().padStart(2, "0"))
  248. .sort(sortNumericStrings);
  249. }
  250. );
  251. }
  252. // -----------------------------------------------------------------------------
  253. // 5. Files (PDFs) for a given day
  254. // Path pattern: `${ROOT}/${branch}/${year}/${month}/${day}/<file>.pdf`
  255. // -----------------------------------------------------------------------------
  256. export async function listFiles(branch, year, month, day) {
  257. // RHL-006: cache file listing for 15 seconds (new PDFs can appear at any time).
  258. return withTtlCache(
  259. buildCacheKey("files", branch, year, month, day),
  260. TTL_FILES_MS,
  261. async () => {
  262. const dir = fullPath(branch, year, month, day);
  263. const entries = await fs.readdir(dir);
  264. return (
  265. entries
  266. // We only care about PDF files at the moment
  267. .filter((name) => name.toLowerCase().endsWith(".pdf"))
  268. .sort((a, b) => a.localeCompare(b, "en"))
  269. .map((name) => ({
  270. // Just the file name, e.g. "Stapel-1_Seiten-1_Zeit-1048.pdf"
  271. name,
  272. // Relative path from the NAS root, used for download URLs etc.
  273. // Example: "NL01/2024/10/23/Stapel-1_Seiten-1_Zeit-1048.pdf"
  274. relativePath: `${branch}/${year}/${month}/${day}/${name}`,
  275. }))
  276. );
  277. }
  278. );
  279. }
  280. // ---------------------------------------------------------------------------