cache_manager.py 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838
  1. """Image Cache Manager for pre-generating and managing image previews."""
  2. import os
  3. import json
  4. import asyncio
  5. import logging
  6. from pathlib import Path
  7. from modules.core.pattern_manager import list_theta_rho_files, THETA_RHO_DIR, parse_theta_rho_file
  8. logger = logging.getLogger(__name__)
  9. # Global cache progress state
  10. cache_progress = {
  11. "is_running": False,
  12. "total_files": 0,
  13. "processed_files": 0,
  14. "current_file": "",
  15. "stage": "idle", # idle, metadata, images, complete
  16. "error": None
  17. }
  18. # Lock to prevent race conditions when writing to metadata cache
  19. # Multiple concurrent tasks (from asyncio.gather) can try to read-modify-write simultaneously
  20. # Lazily initialized to avoid "attached to a different loop" errors
  21. _metadata_cache_lock: "asyncio.Lock | None" = None
  22. def _get_metadata_cache_lock() -> asyncio.Lock:
  23. """Get or create the metadata cache lock in the current event loop."""
  24. global _metadata_cache_lock
  25. if _metadata_cache_lock is None:
  26. _metadata_cache_lock = asyncio.Lock()
  27. return _metadata_cache_lock
  28. # Constants
  29. CACHE_DIR = os.path.join(THETA_RHO_DIR, "cached_images")
  30. METADATA_CACHE_FILE = "metadata_cache.json" # Now in root directory
  31. # Cache schema version - increment when structure changes
  32. CACHE_SCHEMA_VERSION = 1
  33. # Expected cache schema structure
  34. EXPECTED_CACHE_SCHEMA = {
  35. 'version': CACHE_SCHEMA_VERSION,
  36. 'structure': {
  37. 'mtime': 'number',
  38. 'metadata': {
  39. 'first_coordinate': {'x': 'number', 'y': 'number'},
  40. 'last_coordinate': {'x': 'number', 'y': 'number'},
  41. 'total_coordinates': 'number'
  42. }
  43. }
  44. }
  45. def validate_cache_schema(cache_data):
  46. """Validate that cache data matches the expected schema structure."""
  47. try:
  48. # Check if version info exists
  49. if not isinstance(cache_data, dict):
  50. return False
  51. # Check for version field - if missing, it's old format
  52. cache_version = cache_data.get('version')
  53. if cache_version is None:
  54. logger.info("Cache file missing version info - treating as outdated schema")
  55. return False
  56. # Check if version matches current expected version
  57. if cache_version != CACHE_SCHEMA_VERSION:
  58. logger.info(f"Cache schema version mismatch: found {cache_version}, expected {CACHE_SCHEMA_VERSION}")
  59. return False
  60. # Check if data section exists
  61. if 'data' not in cache_data:
  62. logger.warning("Cache file missing 'data' section")
  63. return False
  64. # Validate structure of a few entries if they exist
  65. data_section = cache_data.get('data', {})
  66. if data_section and isinstance(data_section, dict):
  67. # Check first entry structure
  68. for pattern_file, entry in list(data_section.items())[:1]: # Just check first entry
  69. if not isinstance(entry, dict):
  70. return False
  71. if 'mtime' not in entry or 'metadata' not in entry:
  72. return False
  73. metadata = entry.get('metadata', {})
  74. required_fields = ['first_coordinate', 'last_coordinate', 'total_coordinates']
  75. if not all(field in metadata for field in required_fields):
  76. return False
  77. # Validate coordinate structure
  78. for coord_field in ['first_coordinate', 'last_coordinate']:
  79. coord = metadata.get(coord_field)
  80. if not isinstance(coord, dict) or 'x' not in coord or 'y' not in coord:
  81. return False
  82. return True
  83. except Exception as e:
  84. logger.warning(f"Error validating cache schema: {str(e)}")
  85. return False
  86. def invalidate_cache():
  87. """Delete only the metadata cache file, preserving image cache."""
  88. try:
  89. # Delete metadata cache file only
  90. if os.path.exists(METADATA_CACHE_FILE):
  91. os.remove(METADATA_CACHE_FILE)
  92. logger.info("Deleted outdated metadata cache file")
  93. # Keep image cache directory intact - images are still valid
  94. # Just ensure the cache directory structure exists
  95. ensure_cache_dir()
  96. return True
  97. except Exception as e:
  98. logger.error(f"Failed to invalidate metadata cache: {str(e)}")
  99. return False
  100. async def invalidate_cache_async():
  101. """Async version: Delete only the metadata cache file, preserving image cache."""
  102. try:
  103. # Delete metadata cache file only
  104. if await asyncio.to_thread(os.path.exists, METADATA_CACHE_FILE):
  105. await asyncio.to_thread(os.remove, METADATA_CACHE_FILE)
  106. logger.info("Deleted outdated metadata cache file")
  107. # Keep image cache directory intact - images are still valid
  108. # Just ensure the cache directory structure exists
  109. await ensure_cache_dir_async()
  110. return True
  111. except Exception as e:
  112. logger.error(f"Failed to invalidate metadata cache: {str(e)}")
  113. return False
  114. def ensure_cache_dir():
  115. """Ensure the cache directory exists with proper permissions."""
  116. try:
  117. Path(CACHE_DIR).mkdir(parents=True, exist_ok=True)
  118. # Initialize metadata cache if it doesn't exist
  119. if not os.path.exists(METADATA_CACHE_FILE):
  120. initial_cache = {
  121. 'version': CACHE_SCHEMA_VERSION,
  122. 'data': {}
  123. }
  124. with open(METADATA_CACHE_FILE, 'w') as f:
  125. json.dump(initial_cache, f)
  126. try:
  127. os.chmod(METADATA_CACHE_FILE, 0o644) # More conservative permissions
  128. except (OSError, PermissionError) as e:
  129. logger.debug(f"Could not set metadata cache file permissions: {str(e)}")
  130. for root, dirs, files in os.walk(CACHE_DIR):
  131. try:
  132. os.chmod(root, 0o755) # More conservative permissions
  133. for file in files:
  134. file_path = os.path.join(root, file)
  135. try:
  136. os.chmod(file_path, 0o644) # More conservative permissions
  137. except (OSError, PermissionError) as e:
  138. # Log as debug instead of error since this is not critical
  139. logger.debug(f"Could not set permissions for file {file_path}: {str(e)}")
  140. except (OSError, PermissionError) as e:
  141. # Log as debug instead of error since this is not critical
  142. logger.debug(f"Could not set permissions for directory {root}: {str(e)}")
  143. continue
  144. except Exception as e:
  145. logger.error(f"Failed to create cache directory: {str(e)}")
  146. async def ensure_cache_dir_async():
  147. """Async version: Ensure the cache directory exists with proper permissions."""
  148. try:
  149. await asyncio.to_thread(Path(CACHE_DIR).mkdir, parents=True, exist_ok=True)
  150. # Initialize metadata cache if it doesn't exist
  151. if not await asyncio.to_thread(os.path.exists, METADATA_CACHE_FILE):
  152. initial_cache = {
  153. 'version': CACHE_SCHEMA_VERSION,
  154. 'data': {}
  155. }
  156. def _write_initial_cache():
  157. with open(METADATA_CACHE_FILE, 'w') as f:
  158. json.dump(initial_cache, f)
  159. await asyncio.to_thread(_write_initial_cache)
  160. try:
  161. await asyncio.to_thread(os.chmod, METADATA_CACHE_FILE, 0o644)
  162. except (OSError, PermissionError) as e:
  163. logger.debug(f"Could not set metadata cache file permissions: {str(e)}")
  164. def _set_permissions():
  165. for root, dirs, files in os.walk(CACHE_DIR):
  166. try:
  167. os.chmod(root, 0o755)
  168. for file in files:
  169. file_path = os.path.join(root, file)
  170. try:
  171. os.chmod(file_path, 0o644)
  172. except (OSError, PermissionError) as e:
  173. logger.debug(f"Could not set permissions for file {file_path}: {str(e)}")
  174. except (OSError, PermissionError) as e:
  175. logger.debug(f"Could not set permissions for directory {root}: {str(e)}")
  176. continue
  177. await asyncio.to_thread(_set_permissions)
  178. except Exception as e:
  179. logger.error(f"Failed to create cache directory: {str(e)}")
  180. def get_cache_path(pattern_file):
  181. """Get the cache path for a pattern file."""
  182. # Normalize path separators to handle both forward slashes and backslashes
  183. pattern_file = pattern_file.replace('\\', '/')
  184. # Create subdirectories in cache to match the pattern file structure
  185. cache_subpath = os.path.dirname(pattern_file)
  186. if cache_subpath:
  187. # Create the same subdirectory structure in cache (including custom_patterns)
  188. # Convert forward slashes back to platform-specific separator for os.path.join
  189. cache_subpath = cache_subpath.replace('/', os.sep)
  190. cache_dir = os.path.join(CACHE_DIR, cache_subpath)
  191. else:
  192. # For files in root pattern directory
  193. cache_dir = CACHE_DIR
  194. # Ensure the subdirectory exists
  195. os.makedirs(cache_dir, exist_ok=True)
  196. try:
  197. os.chmod(cache_dir, 0o755) # More conservative permissions
  198. except (OSError, PermissionError) as e:
  199. # Log as debug instead of error since this is not critical
  200. logger.debug(f"Could not set permissions for cache subdirectory {cache_dir}: {str(e)}")
  201. # Use just the filename part for the cache file
  202. filename = os.path.basename(pattern_file)
  203. safe_name = filename.replace('\\', '_')
  204. return os.path.join(cache_dir, f"{safe_name}.webp")
  205. def delete_pattern_cache(pattern_file):
  206. """Delete cached preview image and metadata for a pattern file."""
  207. try:
  208. # Remove cached image
  209. cache_path = get_cache_path(pattern_file)
  210. if os.path.exists(cache_path):
  211. os.remove(cache_path)
  212. logger.info(f"Deleted cached image: {cache_path}")
  213. # Remove from metadata cache
  214. metadata_cache = load_metadata_cache()
  215. data_section = metadata_cache.get('data', {})
  216. if pattern_file in data_section:
  217. del data_section[pattern_file]
  218. metadata_cache['data'] = data_section
  219. save_metadata_cache(metadata_cache)
  220. logger.info(f"Removed {pattern_file} from metadata cache")
  221. return True
  222. except Exception as e:
  223. logger.error(f"Failed to delete cache for {pattern_file}: {str(e)}")
  224. return False
  225. def load_metadata_cache():
  226. """Load the metadata cache from disk with schema validation."""
  227. try:
  228. if os.path.exists(METADATA_CACHE_FILE):
  229. with open(METADATA_CACHE_FILE, 'r') as f:
  230. cache_data = json.load(f)
  231. # Validate schema
  232. if not validate_cache_schema(cache_data):
  233. logger.info("Cache schema validation failed - invalidating cache")
  234. invalidate_cache()
  235. # Return empty cache structure after invalidation
  236. return {
  237. 'version': CACHE_SCHEMA_VERSION,
  238. 'data': {}
  239. }
  240. return cache_data
  241. except Exception as e:
  242. logger.warning(f"Failed to load metadata cache: {str(e)} - invalidating cache")
  243. try:
  244. invalidate_cache()
  245. except Exception as invalidate_error:
  246. logger.error(f"Failed to invalidate corrupted cache: {str(invalidate_error)}")
  247. # Return empty cache structure
  248. return {
  249. 'version': CACHE_SCHEMA_VERSION,
  250. 'data': {}
  251. }
  252. async def load_metadata_cache_async():
  253. """Async version: Load the metadata cache from disk with schema validation."""
  254. try:
  255. if await asyncio.to_thread(os.path.exists, METADATA_CACHE_FILE):
  256. def _load_json():
  257. with open(METADATA_CACHE_FILE, 'r') as f:
  258. return json.load(f)
  259. cache_data = await asyncio.to_thread(_load_json)
  260. # Validate schema
  261. if not validate_cache_schema(cache_data):
  262. logger.info("Cache schema validation failed - invalidating cache")
  263. await invalidate_cache_async()
  264. # Return empty cache structure after invalidation
  265. return {
  266. 'version': CACHE_SCHEMA_VERSION,
  267. 'data': {}
  268. }
  269. return cache_data
  270. except Exception as e:
  271. logger.warning(f"Failed to load metadata cache: {str(e)} - invalidating cache")
  272. try:
  273. await invalidate_cache_async()
  274. except Exception as invalidate_error:
  275. logger.error(f"Failed to invalidate corrupted cache: {str(invalidate_error)}")
  276. # Return empty cache structure
  277. return {
  278. 'version': CACHE_SCHEMA_VERSION,
  279. 'data': {}
  280. }
  281. def save_metadata_cache(cache_data):
  282. """Save the metadata cache to disk with version info."""
  283. try:
  284. ensure_cache_dir()
  285. # Ensure cache data has proper structure
  286. if not isinstance(cache_data, dict) or 'version' not in cache_data:
  287. # Convert old format or create new structure
  288. if isinstance(cache_data, dict) and 'data' not in cache_data:
  289. # Old format - wrap existing data
  290. structured_cache = {
  291. 'version': CACHE_SCHEMA_VERSION,
  292. 'data': cache_data
  293. }
  294. else:
  295. structured_cache = cache_data
  296. else:
  297. structured_cache = cache_data
  298. with open(METADATA_CACHE_FILE, 'w') as f:
  299. json.dump(structured_cache, f, indent=2)
  300. except Exception as e:
  301. logger.error(f"Failed to save metadata cache: {str(e)}")
  302. def get_pattern_metadata(pattern_file):
  303. """Get cached metadata for a pattern file."""
  304. cache_data = load_metadata_cache()
  305. data_section = cache_data.get('data', {})
  306. # Check if we have cached metadata and if the file hasn't changed
  307. if pattern_file in data_section:
  308. cached_entry = data_section[pattern_file]
  309. pattern_path = os.path.join(THETA_RHO_DIR, pattern_file)
  310. try:
  311. file_mtime = os.path.getmtime(pattern_path)
  312. if cached_entry.get('mtime') == file_mtime:
  313. return cached_entry.get('metadata')
  314. except OSError:
  315. pass
  316. return None
  317. async def get_pattern_metadata_async(pattern_file):
  318. """Async version: Get cached metadata for a pattern file."""
  319. cache_data = await load_metadata_cache_async()
  320. data_section = cache_data.get('data', {})
  321. # Check if we have cached metadata and if the file hasn't changed
  322. if pattern_file in data_section:
  323. cached_entry = data_section[pattern_file]
  324. pattern_path = os.path.join(THETA_RHO_DIR, pattern_file)
  325. try:
  326. file_mtime = await asyncio.to_thread(os.path.getmtime, pattern_path)
  327. if cached_entry.get('mtime') == file_mtime:
  328. return cached_entry.get('metadata')
  329. except OSError:
  330. pass
  331. return None
  332. async def cache_pattern_metadata(pattern_file, first_coord, last_coord, total_coords):
  333. """Cache metadata for a pattern file.
  334. Uses asyncio.Lock to prevent race conditions when multiple concurrent tasks
  335. (from asyncio.gather) try to read-modify-write the cache file simultaneously.
  336. """
  337. async with _get_metadata_cache_lock():
  338. try:
  339. cache_data = await asyncio.to_thread(load_metadata_cache)
  340. data_section = cache_data.get('data', {})
  341. pattern_path = os.path.join(THETA_RHO_DIR, pattern_file)
  342. file_mtime = await asyncio.to_thread(os.path.getmtime, pattern_path)
  343. data_section[pattern_file] = {
  344. 'mtime': file_mtime,
  345. 'metadata': {
  346. 'first_coordinate': first_coord,
  347. 'last_coordinate': last_coord,
  348. 'total_coordinates': total_coords
  349. }
  350. }
  351. cache_data['data'] = data_section
  352. await asyncio.to_thread(save_metadata_cache, cache_data)
  353. logger.debug(f"Cached metadata for {pattern_file}")
  354. except Exception as e:
  355. logger.warning(f"Failed to cache metadata for {pattern_file}: {str(e)}")
  356. def needs_cache(pattern_file):
  357. """Check if a pattern file needs its cache generated."""
  358. # Check if image preview exists
  359. cache_path = get_cache_path(pattern_file)
  360. if not os.path.exists(cache_path):
  361. return True
  362. # Check if metadata cache exists and is valid
  363. metadata = get_pattern_metadata(pattern_file)
  364. if metadata is None:
  365. return True
  366. return False
  367. def needs_image_cache_only(pattern_file):
  368. """Quick check if a pattern file needs its image cache generated.
  369. Only checks for image file existence, not metadata validity.
  370. Used during startup for faster checking.
  371. """
  372. cache_path = get_cache_path(pattern_file)
  373. return not os.path.exists(cache_path)
  374. async def needs_cache_async(pattern_file):
  375. """Async version: Check if a pattern file needs its cache generated."""
  376. # Check if image preview exists
  377. cache_path = get_cache_path(pattern_file)
  378. if not await asyncio.to_thread(os.path.exists, cache_path):
  379. return True
  380. # Check if metadata cache exists and is valid
  381. metadata = await get_pattern_metadata_async(pattern_file)
  382. if metadata is None:
  383. return True
  384. return False
  385. async def generate_image_preview(pattern_file):
  386. """Generate image preview for a single pattern file."""
  387. from modules.core.preview import generate_preview_image
  388. from modules.core.pattern_manager import parse_theta_rho_file
  389. try:
  390. logger.debug(f"Starting preview generation for {pattern_file}")
  391. # Check if we need to update metadata cache
  392. metadata = get_pattern_metadata(pattern_file)
  393. if metadata is None:
  394. # Parse file to get metadata (this is the only time we need to parse)
  395. logger.debug(f"Parsing {pattern_file} for metadata cache")
  396. pattern_path = os.path.join(THETA_RHO_DIR, pattern_file)
  397. try:
  398. coordinates = await asyncio.to_thread(parse_theta_rho_file, pattern_path)
  399. if coordinates:
  400. first_coord = {"x": coordinates[0][0], "y": coordinates[0][1]}
  401. last_coord = {"x": coordinates[-1][0], "y": coordinates[-1][1]}
  402. total_coords = len(coordinates)
  403. # Cache the metadata for future use
  404. await cache_pattern_metadata(pattern_file, first_coord, last_coord, total_coords)
  405. logger.debug(f"Metadata cached for {pattern_file}: {total_coords} coordinates")
  406. else:
  407. logger.warning(f"No coordinates found in {pattern_file}")
  408. except Exception as e:
  409. logger.error(f"Failed to parse {pattern_file} for metadata: {str(e)}")
  410. # Continue with image generation even if metadata fails
  411. # Check if we need to generate the image
  412. cache_path = get_cache_path(pattern_file)
  413. if os.path.exists(cache_path):
  414. logger.debug(f"Skipping image generation for {pattern_file} - already cached")
  415. return True
  416. # Generate the image
  417. logger.debug(f"Generating image preview for {pattern_file}")
  418. image_content = await generate_preview_image(pattern_file)
  419. if not image_content:
  420. logger.error(f"Generated image content is empty for {pattern_file}")
  421. return False
  422. # Ensure cache directory exists
  423. ensure_cache_dir()
  424. with open(cache_path, 'wb') as f:
  425. f.write(image_content)
  426. try:
  427. os.chmod(cache_path, 0o644) # More conservative permissions
  428. except (OSError, PermissionError) as e:
  429. # Log as debug instead of error since this is not critical
  430. logger.debug(f"Could not set cache file permissions for {pattern_file}: {str(e)}")
  431. logger.debug(f"Successfully generated preview for {pattern_file}")
  432. return True
  433. except Exception as e:
  434. logger.error(f"Failed to generate image for {pattern_file}: {str(e)}")
  435. return False
  436. async def generate_all_image_previews():
  437. """Generate image previews for missing patterns using set difference."""
  438. global cache_progress
  439. try:
  440. await ensure_cache_dir_async()
  441. # Step 1: Get all pattern files
  442. pattern_files = await list_theta_rho_files_async()
  443. if not pattern_files:
  444. logger.info("No .thr pattern files found. Skipping image preview generation.")
  445. return
  446. # Step 2: Find patterns with existing cache
  447. def _find_cached_patterns():
  448. cached = set()
  449. for pattern in pattern_files:
  450. cache_path = get_cache_path(pattern)
  451. if os.path.exists(cache_path):
  452. cached.add(pattern)
  453. return cached
  454. cached_patterns = await asyncio.to_thread(_find_cached_patterns)
  455. # Step 3: Calculate delta (patterns missing image cache)
  456. pattern_set = set(pattern_files)
  457. patterns_to_cache = list(pattern_set - cached_patterns)
  458. total_files = len(patterns_to_cache)
  459. skipped_files = len(pattern_files) - total_files
  460. if total_files == 0:
  461. logger.info(f"All {skipped_files} pattern files already have image previews. Skipping image generation.")
  462. return
  463. # Update progress state
  464. cache_progress.update({
  465. "stage": "images",
  466. "total_files": total_files,
  467. "processed_files": 0,
  468. "current_file": "",
  469. "error": None
  470. })
  471. logger.info(f"Generating image cache for {total_files} uncached .thr patterns ({skipped_files} already cached)...")
  472. batch_size = 5
  473. successful = 0
  474. for i in range(0, total_files, batch_size):
  475. batch = patterns_to_cache[i:i + batch_size]
  476. tasks = [generate_image_preview(file) for file in batch]
  477. results = await asyncio.gather(*tasks)
  478. successful += sum(1 for r in results if r)
  479. # Update progress
  480. cache_progress["processed_files"] = min(i + batch_size, total_files)
  481. if i < total_files:
  482. cache_progress["current_file"] = patterns_to_cache[min(i + batch_size - 1, total_files - 1)]
  483. # Log progress
  484. progress = min(i + batch_size, total_files)
  485. logger.info(f"Image cache generation progress: {progress}/{total_files} files processed")
  486. logger.info(f"Image cache generation completed: {successful}/{total_files} patterns cached successfully, {skipped_files} patterns skipped (already cached)")
  487. except Exception as e:
  488. logger.error(f"Error during image cache generation: {str(e)}")
  489. cache_progress["error"] = str(e)
  490. raise
  491. async def generate_metadata_cache():
  492. """Generate metadata cache for missing patterns using set difference."""
  493. global cache_progress
  494. try:
  495. logger.info("Starting metadata cache generation...")
  496. # Step 1: Get all pattern files
  497. pattern_files = await list_theta_rho_files_async()
  498. if not pattern_files:
  499. logger.info("No pattern files found. Skipping metadata cache generation.")
  500. return
  501. # Step 2: Get existing metadata keys
  502. metadata_cache = await load_metadata_cache_async()
  503. existing_keys = set(metadata_cache.get('data', {}).keys())
  504. # Step 3: Calculate delta (patterns missing from metadata)
  505. pattern_set = set(pattern_files)
  506. files_to_process = list(pattern_set - existing_keys)
  507. total_files = len(files_to_process)
  508. skipped_files = len(pattern_files) - total_files
  509. if total_files == 0:
  510. logger.info(f"All {skipped_files} files already have metadata cache. Skipping metadata generation.")
  511. return
  512. # Update progress state
  513. cache_progress.update({
  514. "stage": "metadata",
  515. "total_files": total_files,
  516. "processed_files": 0,
  517. "current_file": "",
  518. "error": None
  519. })
  520. logger.info(f"Generating metadata cache for {total_files} new files ({skipped_files} files already cached)...")
  521. # Process in smaller batches for Pi Zero 2 W
  522. batch_size = 3 # Reduced from 5
  523. successful = 0
  524. for i in range(0, total_files, batch_size):
  525. batch = files_to_process[i:i + batch_size]
  526. # Process files sequentially within batch (no parallel tasks)
  527. for file_name in batch:
  528. pattern_path = os.path.join(THETA_RHO_DIR, file_name)
  529. cache_progress["current_file"] = file_name
  530. try:
  531. # Parse file to get metadata
  532. coordinates = await asyncio.to_thread(parse_theta_rho_file, pattern_path)
  533. if coordinates:
  534. first_coord = {"x": coordinates[0][0], "y": coordinates[0][1]}
  535. last_coord = {"x": coordinates[-1][0], "y": coordinates[-1][1]}
  536. total_coords = len(coordinates)
  537. # Cache the metadata
  538. await cache_pattern_metadata(file_name, first_coord, last_coord, total_coords)
  539. successful += 1
  540. logger.debug(f"Generated metadata for {file_name}")
  541. # Small delay to reduce I/O pressure
  542. await asyncio.sleep(0.05)
  543. except Exception as e:
  544. logger.error(f"Failed to generate metadata for {file_name}: {str(e)}")
  545. # Update progress
  546. cache_progress["processed_files"] = min(i + batch_size, total_files)
  547. # Log progress
  548. progress = min(i + batch_size, total_files)
  549. logger.info(f"Metadata cache generation progress: {progress}/{total_files} files processed")
  550. # Delay between batches for system recovery
  551. if i + batch_size < total_files:
  552. await asyncio.sleep(0.3)
  553. logger.info(f"Metadata cache generation completed: {successful}/{total_files} patterns cached successfully, {skipped_files} patterns skipped (already cached)")
  554. except Exception as e:
  555. logger.error(f"Error during metadata cache generation: {str(e)}")
  556. cache_progress["error"] = str(e)
  557. raise
  558. async def rebuild_cache():
  559. """Rebuild the entire cache for all pattern files."""
  560. logger.info("Starting cache rebuild...")
  561. # Ensure cache directory exists
  562. ensure_cache_dir()
  563. # First generate metadata cache for all files
  564. await generate_metadata_cache()
  565. # Then generate image previews
  566. pattern_files = [f for f in list_theta_rho_files() if f.endswith('.thr')]
  567. total_files = len(pattern_files)
  568. if total_files == 0:
  569. logger.info("No pattern files found to cache")
  570. return
  571. logger.info(f"Generating image previews for {total_files} pattern files...")
  572. # Process in batches
  573. batch_size = 5
  574. successful = 0
  575. for i in range(0, total_files, batch_size):
  576. batch = pattern_files[i:i + batch_size]
  577. tasks = [generate_image_preview(file) for file in batch]
  578. results = await asyncio.gather(*tasks)
  579. successful += sum(1 for r in results if r)
  580. # Log progress
  581. progress = min(i + batch_size, total_files)
  582. logger.info(f"Image preview generation progress: {progress}/{total_files} files processed")
  583. logger.info(f"Cache rebuild completed: {successful}/{total_files} patterns cached successfully")
  584. async def generate_cache_background():
  585. """Run cache generation in the background with progress tracking."""
  586. global cache_progress
  587. try:
  588. cache_progress.update({
  589. "is_running": True,
  590. "stage": "starting",
  591. "total_files": 0,
  592. "processed_files": 0,
  593. "current_file": "",
  594. "error": None
  595. })
  596. # First generate metadata cache
  597. await generate_metadata_cache()
  598. # Then generate image previews
  599. await generate_all_image_previews()
  600. # Mark as complete
  601. cache_progress.update({
  602. "is_running": False,
  603. "stage": "complete",
  604. "current_file": "",
  605. "error": None
  606. })
  607. logger.info("Background cache generation completed successfully")
  608. except Exception as e:
  609. logger.error(f"Background cache generation failed: {str(e)}")
  610. cache_progress.update({
  611. "is_running": False,
  612. "stage": "error",
  613. "error": str(e)
  614. })
  615. raise
  616. def get_cache_progress():
  617. """Get the current cache generation progress.
  618. Returns a reference to the cache_progress dict for read-only access.
  619. The WebSocket handler should not modify this dict.
  620. """
  621. global cache_progress
  622. return cache_progress # Return reference instead of copy for better performance
  623. def is_cache_generation_needed():
  624. """Check if cache generation is needed."""
  625. pattern_files = [f for f in list_theta_rho_files() if f.endswith('.thr')]
  626. if not pattern_files:
  627. return False
  628. # Check if any files need caching
  629. patterns_to_cache = [f for f in pattern_files if needs_cache(f)]
  630. # Check metadata cache
  631. files_needing_metadata = []
  632. for file_name in pattern_files:
  633. if get_pattern_metadata(file_name) is None:
  634. files_needing_metadata.append(file_name)
  635. return len(patterns_to_cache) > 0 or len(files_needing_metadata) > 0
  636. async def is_cache_generation_needed_async():
  637. """Check if cache generation is needed using simple set difference.
  638. Returns True if any patterns are missing from either metadata or image cache.
  639. """
  640. try:
  641. # Step 1: List all patterns
  642. pattern_files = await list_theta_rho_files_async()
  643. if not pattern_files:
  644. return False
  645. pattern_set = set(pattern_files)
  646. # Step 2: Check metadata cache
  647. metadata_cache = await load_metadata_cache_async()
  648. metadata_keys = set(metadata_cache.get('data', {}).keys())
  649. if pattern_set != metadata_keys:
  650. # Metadata is missing some patterns
  651. return True
  652. # Step 3: Check image cache
  653. def _list_cached_images():
  654. """List all patterns that have cached images."""
  655. cached = set()
  656. if os.path.exists(CACHE_DIR):
  657. for pattern in pattern_files:
  658. cache_path = get_cache_path(pattern)
  659. if os.path.exists(cache_path):
  660. cached.add(pattern)
  661. return cached
  662. cached_images = await asyncio.to_thread(_list_cached_images)
  663. if pattern_set != cached_images:
  664. # Some patterns missing image cache
  665. return True
  666. return False
  667. except Exception as e:
  668. logger.warning(f"Error checking cache status: {e}")
  669. return False # Don't block startup on errors
  670. async def list_theta_rho_files_async():
  671. """Async version: List all theta-rho files."""
  672. def _walk_files():
  673. files = []
  674. for root, _, filenames in os.walk(THETA_RHO_DIR):
  675. # Only process .thr files to reduce memory usage
  676. thr_files = [f for f in filenames if f.endswith('.thr')]
  677. for file in thr_files:
  678. relative_path = os.path.relpath(os.path.join(root, file), THETA_RHO_DIR)
  679. # Normalize path separators to always use forward slashes for consistency across platforms
  680. relative_path = relative_path.replace(os.sep, '/')
  681. files.append(relative_path)
  682. return files
  683. files = await asyncio.to_thread(_walk_files)
  684. logger.debug(f"Found {len(files)} theta-rho files")
  685. return files # Already filtered for .thr