// --------------------------------------------------------------------------- // Ordner: lib // Datei: storage.js // Relativer Pfad: lib/storage.js // --------------------------------------------------------------------------- // lib/storage.js // ----------------------------------------------------------------------------- // Central abstraction layer for reading files and directories from the NAS // share mounted at `NAS_ROOT_PATH` (e.g. `/mnt/niederlassungen`). // // All access to the branch/year/month/day/PDF structure should go through // these functions instead of using `fs` directly in route handlers. // // - Read-only: no write/delete operations here. // - Async only: uses `fs/promises` + async/await to avoid blocking the event loop. // // RHL-006 (Caching / Freshness): // - We add a small process-local TTL micro-cache for directory listings. // - Goal: reduce filesystem load while keeping freshness predictable. // - Security note: RBAC is enforced in API routes BEFORE calling storage helpers, // therefore caching here does not bypass auth/permissions. // ----------------------------------------------------------------------------- import fs from "node:fs/promises"; // Promise-based filesystem API import path from "node:path"; // Safe path utilities (handles separators) // Root directory of the NAS share, injected via environment variable. // On the Linux app server, this is typically `/mnt/niederlassungen`. // Do NOT cache process.env.NAS_ROOT_PATH at module load time. // Instead, resolve it on demand so tests (and runtime) can change it. function getRoot() { const root = process.env.NAS_ROOT_PATH; if (!root) { throw new Error("NAS_ROOT_PATH environment variable is not set"); } return root; } // Build an absolute path below the NAS root from a list of segments. function fullPath(...segments) { const root = getRoot(); return path.join(root, ...segments.map(String)); } // Compare strings that represent numbers in a numeric way. // This ensures "2" comes before "10" (2 < 10), not after. function sortNumericStrings(a, b) { const na = parseInt(a, 10); const nb = parseInt(b, 10); if (!Number.isNaN(na) && !Number.isNaN(nb)) { return na - nb; } // Fallback to localeCompare if parsing fails return a.localeCompare(b, "en"); } // ----------------------------------------------------------------------------- // RHL-006: Storage micro-cache (process-local TTL cache) // ----------------------------------------------------------------------------- // // Why a cache here (and not Next route caching)? // - We want to avoid any risk of shared caching across users/sessions. // - Next route caching / ISR-style caching is powerful but easy to misuse with auth. // - A micro-cache AFTER RBAC checks is safe and predictable. // // Important constraints: // - Process-local only: if we ever run multiple instances, caches are not shared. // - Short TTL only: we accept a small window where newly scanned PDFs might not // appear immediately, but they will appear after TTL expires. // - Failure-safe: if a filesystem read throws, we do NOT keep a "poisoned" cache entry. // ----------------------------------------------------------------------------- // TTLs chosen in the design (accepted by you): // - Branches/Years change rarely -> 60 seconds // - Months/Days/Files can change frequently -> 15 seconds const TTL_BRANCHES_MS = 60_000; const TTL_YEARS_MS = 60_000; const TTL_MONTHS_MS = 15_000; const TTL_DAYS_MS = 15_000; const TTL_FILES_MS = 15_000; // Internal cache store: // key -> { expiresAt, value } OR { expiresAt, promise } // - value: resolved cache value // - promise: in-flight load promise to collapse concurrent reads const __storageCache = new Map(); /** * Build a stable cache key for a given listing type. * * We include NAS_ROOT_PATH in the key so tests that change the env var do not * accidentally reuse data from a previous test run. * * @param {string} type * @param {...string} parts * @returns {string} */ function buildCacheKey(type, ...parts) { const root = getRoot(); return [type, root, ...parts.map(String)].join("|"); } /** * Generic TTL-cache wrapper. * * Behavior: * 1) If a load is already in-flight (promise exists), reuse it. * 2) If a cached value exists and is not expired, return it. * 3) Otherwise run loader(), store the result, and return it. * * Failure policy: * - If loader() throws, the cache entry is removed so later calls can retry. * * @template T * @param {string} key * @param {number} ttlMs * @param {() => Promise} loader * @returns {Promise} */ async function withTtlCache(key, ttlMs, loader) { const now = Date.now(); const existing = __storageCache.get(key); // 1) Collapsing concurrent calls: // If another request already triggered the same filesystem read, // we reuse the same promise to avoid redundant fs operations. if (existing?.promise) { return existing.promise; } // 2) Serve cached values while still fresh: if (existing && existing.value !== undefined && existing.expiresAt > now) { return existing.value; } // 3) Cache miss or expired: start a new load. const promise = (async () => { try { const value = await loader(); // Store resolved value with a fresh expiry timestamp. __storageCache.set(key, { value, expiresAt: Date.now() + ttlMs, }); return value; } catch (err) { // Do not keep failed results in cache. __storageCache.delete(key); throw err; } })(); // Store in-flight promise immediately so concurrent calls reuse it. __storageCache.set(key, { promise, expiresAt: now + ttlMs, }); return promise; } /** * TEST-ONLY helper: clear the micro-cache. * * Why this exists: * - Unit tests often mutate the filesystem fixture after calling list*() once. * - Without a cache reset, tests could observe stale values. * * We intentionally export this with a loud name to discourage production usage. */ export function __clearStorageCacheForTests() { __storageCache.clear(); } // ----------------------------------------------------------------------------- // 1. Branches (NL01, NL02, ...) // Path pattern: `${ROOT}/NLxx` // ----------------------------------------------------------------------------- export async function listBranches() { // RHL-006: cache directory listing for 60 seconds (branches change rarely). return withTtlCache(buildCacheKey("branches"), TTL_BRANCHES_MS, async () => { // Read the root directory of the NAS share. // `withFileTypes: true` returns `Dirent` objects so we can call `isDirectory()` // without extra stat() calls, which is more efficient. const entries = await fs.readdir(fullPath(), { withFileTypes: true }); return ( entries .filter( (entry) => entry.isDirectory() && // only directories entry.name !== "@Recently-Snapshot" && // ignore QNAP snapshot folder /^NL\d+$/i.test(entry.name) // keep only names like "NL01", "NL02", ... ) .map((entry) => entry.name) // Sort by numeric branch number: NL1, NL2, ..., NL10 .sort((a, b) => sortNumericStrings(a.replace("NL", ""), b.replace("NL", "")) ) ); }); } // ----------------------------------------------------------------------------- // 2. Years (2023, 2024, ...) // Path pattern: `${ROOT}/${branch}/${year}` // ----------------------------------------------------------------------------- export async function listYears(branch) { // RHL-006: cache directory listing for 60 seconds (years change rarely). return withTtlCache( buildCacheKey("years", branch), TTL_YEARS_MS, async () => { const dir = fullPath(branch); const entries = await fs.readdir(dir, { withFileTypes: true }); return entries .filter( (entry) => entry.isDirectory() && /^\d{4}$/.test(entry.name) // exactly 4 digits → year folders like "2024" ) .map((entry) => entry.name) .sort(sortNumericStrings); } ); } // ----------------------------------------------------------------------------- // 3. Months (01–12) // Path pattern: `${ROOT}/${branch}/${year}/${month}` // ----------------------------------------------------------------------------- export async function listMonths(branch, year) { // RHL-006: cache directory listing for 15 seconds (months can change occasionally). return withTtlCache( buildCacheKey("months", branch, year), TTL_MONTHS_MS, async () => { const dir = fullPath(branch, year); const entries = await fs.readdir(dir, { withFileTypes: true }); return ( entries .filter( (entry) => entry.isDirectory() && /^\d{1,2}$/.test(entry.name) // supports "1" or "10", we normalize below ) // Normalize to two digits so the UI shows "01", "02", ..., "12" .map((entry) => entry.name.trim().padStart(2, "0")) .sort(sortNumericStrings) ); } ); } // ----------------------------------------------------------------------------- // 4. Days (01–31) // Path pattern: `${ROOT}/${branch}/${year}/${month}/${day}` // ----------------------------------------------------------------------------- export async function listDays(branch, year, month) { // RHL-006: cache directory listing for 15 seconds (days change frequently with new scans). return withTtlCache( buildCacheKey("days", branch, year, month), TTL_DAYS_MS, async () => { const dir = fullPath(branch, year, month); const entries = await fs.readdir(dir, { withFileTypes: true }); return entries .filter( (entry) => entry.isDirectory() && /^\d{1,2}$/.test(entry.name) // supports "1" or "23" ) .map((entry) => entry.name.trim().padStart(2, "0")) .sort(sortNumericStrings); } ); } // ----------------------------------------------------------------------------- // 5. Files (PDFs) for a given day // Path pattern: `${ROOT}/${branch}/${year}/${month}/${day}/.pdf` // ----------------------------------------------------------------------------- export async function listFiles(branch, year, month, day) { // RHL-006: cache file listing for 15 seconds (new PDFs can appear at any time). return withTtlCache( buildCacheKey("files", branch, year, month, day), TTL_FILES_MS, async () => { const dir = fullPath(branch, year, month, day); const entries = await fs.readdir(dir); return ( entries // We only care about PDF files at the moment .filter((name) => name.toLowerCase().endsWith(".pdf")) .sort((a, b) => a.localeCompare(b, "en")) .map((name) => ({ // Just the file name, e.g. "Stapel-1_Seiten-1_Zeit-1048.pdf" name, // Relative path from the NAS root, used for download URLs etc. // Example: "NL01/2024/10/23/Stapel-1_Seiten-1_Zeit-1048.pdf" relativePath: `${branch}/${year}/${month}/${day}/${name}`, })) ); } ); } // ---------------------------------------------------------------------------