Переглянути джерело

RHL-006(refactor): implement process-local TTL caching for directory and file listings

Code_Uwe 8 годин тому
батько
коміт
57a661b956
2 змінених файлів з 288 додано та 64 видалено
  1. 226 63
      lib/storage.js
  2. 62 1
      lib/storage.test.js

+ 226 - 63
lib/storage.js

@@ -1,3 +1,8 @@
+// ---------------------------------------------------------------------------
+// Ordner: lib
+// Datei: storage.js
+// Relativer Pfad: lib/storage.js
+// ---------------------------------------------------------------------------
 // lib/storage.js
 // -----------------------------------------------------------------------------
 // Central abstraction layer for reading files and directories from the NAS
@@ -8,6 +13,12 @@
 //
 // - Read-only: no write/delete operations here.
 // - Async only: uses `fs/promises` + async/await to avoid blocking the event loop.
+//
+// RHL-006 (Caching / Freshness):
+// - We add a small process-local TTL micro-cache for directory listings.
+// - Goal: reduce filesystem load while keeping freshness predictable.
+// - Security note: RBAC is enforced in API routes BEFORE calling storage helpers,
+//   therefore caching here does not bypass auth/permissions.
 // -----------------------------------------------------------------------------
 
 import fs from "node:fs/promises"; // Promise-based filesystem API
@@ -47,31 +58,154 @@ function sortNumericStrings(a, b) {
 	return a.localeCompare(b, "en");
 }
 
+// -----------------------------------------------------------------------------
+// RHL-006: Storage micro-cache (process-local TTL cache)
+// -----------------------------------------------------------------------------
+//
+// Why a cache here (and not Next route caching)?
+// - We want to avoid any risk of shared caching across users/sessions.
+// - Next route caching / ISR-style caching is powerful but easy to misuse with auth.
+// - A micro-cache AFTER RBAC checks is safe and predictable.
+//
+// Important constraints:
+// - Process-local only: if we ever run multiple instances, caches are not shared.
+// - Short TTL only: we accept a small window where newly scanned PDFs might not
+//   appear immediately, but they will appear after TTL expires.
+// - Failure-safe: if a filesystem read throws, we do NOT keep a "poisoned" cache entry.
+// -----------------------------------------------------------------------------
+
+// TTLs chosen in the design (accepted by you):
+// - Branches/Years change rarely -> 60 seconds
+// - Months/Days/Files can change frequently -> 15 seconds
+const TTL_BRANCHES_MS = 60_000;
+const TTL_YEARS_MS = 60_000;
+const TTL_MONTHS_MS = 15_000;
+const TTL_DAYS_MS = 15_000;
+const TTL_FILES_MS = 15_000;
+
+// Internal cache store:
+// key -> { expiresAt, value } OR { expiresAt, promise }
+// - value: resolved cache value
+// - promise: in-flight load promise to collapse concurrent reads
+const __storageCache = new Map();
+
+/**
+ * Build a stable cache key for a given listing type.
+ *
+ * We include NAS_ROOT_PATH in the key so tests that change the env var do not
+ * accidentally reuse data from a previous test run.
+ *
+ * @param {string} type
+ * @param  {...string} parts
+ * @returns {string}
+ */
+function buildCacheKey(type, ...parts) {
+	const root = getRoot();
+	return [type, root, ...parts.map(String)].join("|");
+}
+
+/**
+ * Generic TTL-cache wrapper.
+ *
+ * Behavior:
+ * 1) If a load is already in-flight (promise exists), reuse it.
+ * 2) If a cached value exists and is not expired, return it.
+ * 3) Otherwise run loader(), store the result, and return it.
+ *
+ * Failure policy:
+ * - If loader() throws, the cache entry is removed so later calls can retry.
+ *
+ * @template T
+ * @param {string} key
+ * @param {number} ttlMs
+ * @param {() => Promise<T>} loader
+ * @returns {Promise<T>}
+ */
+async function withTtlCache(key, ttlMs, loader) {
+	const now = Date.now();
+	const existing = __storageCache.get(key);
+
+	// 1) Collapsing concurrent calls:
+	// If another request already triggered the same filesystem read,
+	// we reuse the same promise to avoid redundant fs operations.
+	if (existing?.promise) {
+		return existing.promise;
+	}
+
+	// 2) Serve cached values while still fresh:
+	if (existing && existing.value !== undefined && existing.expiresAt > now) {
+		return existing.value;
+	}
+
+	// 3) Cache miss or expired: start a new load.
+	const promise = (async () => {
+		try {
+			const value = await loader();
+
+			// Store resolved value with a fresh expiry timestamp.
+			__storageCache.set(key, {
+				value,
+				expiresAt: Date.now() + ttlMs,
+			});
+
+			return value;
+		} catch (err) {
+			// Do not keep failed results in cache.
+			__storageCache.delete(key);
+			throw err;
+		}
+	})();
+
+	// Store in-flight promise immediately so concurrent calls reuse it.
+	__storageCache.set(key, {
+		promise,
+		expiresAt: now + ttlMs,
+	});
+
+	return promise;
+}
+
+/**
+ * TEST-ONLY helper: clear the micro-cache.
+ *
+ * Why this exists:
+ * - Unit tests often mutate the filesystem fixture after calling list*() once.
+ * - Without a cache reset, tests could observe stale values.
+ *
+ * We intentionally export this with a loud name to discourage production usage.
+ */
+export function __clearStorageCacheForTests() {
+	__storageCache.clear();
+}
+
 // -----------------------------------------------------------------------------
 // 1. Branches (NL01, NL02, ...)
 // Path pattern: `${ROOT}/NLxx`
 // -----------------------------------------------------------------------------
 
 export async function listBranches() {
-	// Read the root directory of the NAS share.
-	// `withFileTypes: true` returns `Dirent` objects so we can call `isDirectory()`
-	// without extra stat() calls, which is more efficient.
-	const entries = await fs.readdir(fullPath(), { withFileTypes: true });
-
-	return (
-		entries
-			.filter(
-				(entry) =>
-					entry.isDirectory() && // only directories
-					entry.name !== "@Recently-Snapshot" && // ignore QNAP snapshot folder
-					/^NL\d+$/i.test(entry.name) // keep only names like "NL01", "NL02", ...
-			)
-			.map((entry) => entry.name)
-			// Sort by numeric branch number: NL1, NL2, ..., NL10
-			.sort((a, b) =>
-				sortNumericStrings(a.replace("NL", ""), b.replace("NL", ""))
-			)
-	);
+	// RHL-006: cache directory listing for 60 seconds (branches change rarely).
+	return withTtlCache(buildCacheKey("branches"), TTL_BRANCHES_MS, async () => {
+		// Read the root directory of the NAS share.
+		// `withFileTypes: true` returns `Dirent` objects so we can call `isDirectory()`
+		// without extra stat() calls, which is more efficient.
+		const entries = await fs.readdir(fullPath(), { withFileTypes: true });
+
+		return (
+			entries
+				.filter(
+					(entry) =>
+						entry.isDirectory() && // only directories
+						entry.name !== "@Recently-Snapshot" && // ignore QNAP snapshot folder
+						/^NL\d+$/i.test(entry.name) // keep only names like "NL01", "NL02", ...
+				)
+				.map((entry) => entry.name)
+				// Sort by numeric branch number: NL1, NL2, ..., NL10
+				.sort((a, b) =>
+					sortNumericStrings(a.replace("NL", ""), b.replace("NL", ""))
+				)
+		);
+	});
 }
 
 // -----------------------------------------------------------------------------
@@ -80,15 +214,22 @@ export async function listBranches() {
 // -----------------------------------------------------------------------------
 
 export async function listYears(branch) {
-	const dir = fullPath(branch);
-	const entries = await fs.readdir(dir, { withFileTypes: true });
-
-	return entries
-		.filter(
-			(entry) => entry.isDirectory() && /^\d{4}$/.test(entry.name) // exactly 4 digits → year folders like "2024"
-		)
-		.map((entry) => entry.name)
-		.sort(sortNumericStrings);
+	// RHL-006: cache directory listing for 60 seconds (years change rarely).
+	return withTtlCache(
+		buildCacheKey("years", branch),
+		TTL_YEARS_MS,
+		async () => {
+			const dir = fullPath(branch);
+			const entries = await fs.readdir(dir, { withFileTypes: true });
+
+			return entries
+				.filter(
+					(entry) => entry.isDirectory() && /^\d{4}$/.test(entry.name) // exactly 4 digits → year folders like "2024"
+				)
+				.map((entry) => entry.name)
+				.sort(sortNumericStrings);
+		}
+	);
 }
 
 // -----------------------------------------------------------------------------
@@ -97,17 +238,24 @@ export async function listYears(branch) {
 // -----------------------------------------------------------------------------
 
 export async function listMonths(branch, year) {
-	const dir = fullPath(branch, year);
-	const entries = await fs.readdir(dir, { withFileTypes: true });
-
-	return (
-		entries
-			.filter(
-				(entry) => entry.isDirectory() && /^\d{1,2}$/.test(entry.name) // supports "1" or "10", we normalize below
-			)
-			// Normalize to two digits so the UI shows "01", "02", ..., "12"
-			.map((entry) => entry.name.trim().padStart(2, "0"))
-			.sort(sortNumericStrings)
+	// RHL-006: cache directory listing for 15 seconds (months can change occasionally).
+	return withTtlCache(
+		buildCacheKey("months", branch, year),
+		TTL_MONTHS_MS,
+		async () => {
+			const dir = fullPath(branch, year);
+			const entries = await fs.readdir(dir, { withFileTypes: true });
+
+			return (
+				entries
+					.filter(
+						(entry) => entry.isDirectory() && /^\d{1,2}$/.test(entry.name) // supports "1" or "10", we normalize below
+					)
+					// Normalize to two digits so the UI shows "01", "02", ..., "12"
+					.map((entry) => entry.name.trim().padStart(2, "0"))
+					.sort(sortNumericStrings)
+			);
+		}
 	);
 }
 
@@ -117,15 +265,22 @@ export async function listMonths(branch, year) {
 // -----------------------------------------------------------------------------
 
 export async function listDays(branch, year, month) {
-	const dir = fullPath(branch, year, month);
-	const entries = await fs.readdir(dir, { withFileTypes: true });
-
-	return entries
-		.filter(
-			(entry) => entry.isDirectory() && /^\d{1,2}$/.test(entry.name) // supports "1" or "23"
-		)
-		.map((entry) => entry.name.trim().padStart(2, "0"))
-		.sort(sortNumericStrings);
+	// RHL-006: cache directory listing for 15 seconds (days change frequently with new scans).
+	return withTtlCache(
+		buildCacheKey("days", branch, year, month),
+		TTL_DAYS_MS,
+		async () => {
+			const dir = fullPath(branch, year, month);
+			const entries = await fs.readdir(dir, { withFileTypes: true });
+
+			return entries
+				.filter(
+					(entry) => entry.isDirectory() && /^\d{1,2}$/.test(entry.name) // supports "1" or "23"
+				)
+				.map((entry) => entry.name.trim().padStart(2, "0"))
+				.sort(sortNumericStrings);
+		}
+	);
 }
 
 // -----------------------------------------------------------------------------
@@ -134,20 +289,28 @@ export async function listDays(branch, year, month) {
 // -----------------------------------------------------------------------------
 
 export async function listFiles(branch, year, month, day) {
-	const dir = fullPath(branch, year, month, day);
-	const entries = await fs.readdir(dir);
-
-	return (
-		entries
-			// We only care about PDF files at the moment
-			.filter((name) => name.toLowerCase().endsWith(".pdf"))
-			.sort((a, b) => a.localeCompare(b, "en"))
-			.map((name) => ({
-				// Just the file name, e.g. "Stapel-1_Seiten-1_Zeit-1048.pdf"
-				name,
-				// Relative path from the NAS root, used for download URLs etc.
-				// Example: "NL01/2024/10/23/Stapel-1_Seiten-1_Zeit-1048.pdf"
-				relativePath: `${branch}/${year}/${month}/${day}/${name}`,
-			}))
+	// RHL-006: cache file listing for 15 seconds (new PDFs can appear at any time).
+	return withTtlCache(
+		buildCacheKey("files", branch, year, month, day),
+		TTL_FILES_MS,
+		async () => {
+			const dir = fullPath(branch, year, month, day);
+			const entries = await fs.readdir(dir);
+
+			return (
+				entries
+					// We only care about PDF files at the moment
+					.filter((name) => name.toLowerCase().endsWith(".pdf"))
+					.sort((a, b) => a.localeCompare(b, "en"))
+					.map((name) => ({
+						// Just the file name, e.g. "Stapel-1_Seiten-1_Zeit-1048.pdf"
+						name,
+						// Relative path from the NAS root, used for download URLs etc.
+						// Example: "NL01/2024/10/23/Stapel-1_Seiten-1_Zeit-1048.pdf"
+						relativePath: `${branch}/${year}/${month}/${day}/${name}`,
+					}))
+			);
+		}
 	);
 }
+// ---------------------------------------------------------------------------

+ 62 - 1
lib/storage.test.js

@@ -1,5 +1,19 @@
+// ---------------------------------------------------------------------------
+// Ordner: lib
+// Datei: storage.test.js
+// Relativer Pfad: lib/storage.test.js
+// ---------------------------------------------------------------------------
 // tests/lib/storage.test.js
-import { describe, it, expect, beforeAll, afterAll } from "vitest";
+import {
+	describe,
+	it,
+	expect,
+	beforeAll,
+	afterAll,
+	beforeEach,
+	afterEach,
+	vi,
+} from "vitest";
 import fs from "node:fs/promises";
 import os from "node:os";
 import path from "node:path";
@@ -10,6 +24,7 @@ import {
 	listMonths,
 	listDays,
 	listFiles,
+	__clearStorageCacheForTests,
 } from "@/lib/storage";
 
 let tmpRoot;
@@ -41,6 +56,19 @@ afterAll(async () => {
 	await fs.rm(tmpRoot, { recursive: true, force: true });
 });
 
+beforeEach(() => {
+	// RHL-006:
+	// Storage functions now use a TTL cache. We clear it between tests to ensure
+	// each test sees filesystem changes deterministically.
+	__clearStorageCacheForTests();
+});
+
+afterEach(() => {
+	// Safety: if a test enables fake timers, always restore real timers afterwards
+	// to avoid affecting unrelated tests.
+	vi.useRealTimers();
+});
+
 describe("storage: listBranches", () => {
 	it("returns branch names and filters snapshots", async () => {
 		const branches = await listBranches();
@@ -76,3 +104,36 @@ describe("storage: listFiles", () => {
 		]);
 	});
 });
+
+describe("storage: micro-cache (RHL-006)", () => {
+	it("keeps results stable within TTL and refreshes after TTL expires", async () => {
+		// Use a dedicated folder so this test doesn't interfere with other fixtures.
+		const dayDir = path.join(tmpRoot, "NL01", "2024", "10", "24");
+		await fs.mkdir(dayDir, { recursive: true });
+
+		// Initial file
+		await fs.writeFile(path.join(dayDir, "a.pdf"), "a");
+
+		// Fake timers allow us to advance time without real waiting.
+		vi.useFakeTimers();
+		const t0 = new Date("2025-01-01T00:00:00.000Z");
+		vi.setSystemTime(t0);
+
+		// First call populates the cache (TTL for files: 15s)
+		const first = await listFiles("NL01", "2024", "10", "24");
+		expect(first.map((f) => f.name)).toEqual(["a.pdf"]);
+
+		// Add a new file while the cache is still valid.
+		await fs.writeFile(path.join(dayDir, "b.pdf"), "b");
+
+		// Second call should still return the cached result (b.pdf not visible yet).
+		const second = await listFiles("NL01", "2024", "10", "24");
+		expect(second.map((f) => f.name)).toEqual(["a.pdf"]);
+
+		// Advance time beyond the TTL (15s) so the next call re-reads the directory.
+		vi.setSystemTime(t0.getTime() + 16_000);
+
+		const third = await listFiles("NL01", "2024", "10", "24");
+		expect(third.map((f) => f.name)).toEqual(["a.pdf", "b.pdf"]);
+	});
+});