cache_manager.py 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845
  1. """Image Cache Manager for pre-generating and managing image previews."""
  2. import os
  3. import json
  4. import asyncio
  5. import logging
  6. from pathlib import Path
  7. from modules.core.pattern_manager import list_theta_rho_files, THETA_RHO_DIR, parse_theta_rho_file
  8. logger = logging.getLogger(__name__)
  9. # Global cache progress state
  10. cache_progress = {
  11. "is_running": False,
  12. "total_files": 0,
  13. "processed_files": 0,
  14. "current_file": "",
  15. "stage": "idle", # idle, metadata, images, complete
  16. "error": None
  17. }
  18. # Constants
  19. CACHE_DIR = os.path.join(THETA_RHO_DIR, "cached_images")
  20. METADATA_CACHE_FILE = "metadata_cache.json" # Now in root directory
  21. # Cache schema version - increment when structure changes
  22. CACHE_SCHEMA_VERSION = 1
  23. # Expected cache schema structure
  24. EXPECTED_CACHE_SCHEMA = {
  25. 'version': CACHE_SCHEMA_VERSION,
  26. 'structure': {
  27. 'mtime': 'number',
  28. 'metadata': {
  29. 'first_coordinate': {'x': 'number', 'y': 'number'},
  30. 'last_coordinate': {'x': 'number', 'y': 'number'},
  31. 'total_coordinates': 'number'
  32. }
  33. }
  34. }
  35. def validate_cache_schema(cache_data):
  36. """Validate that cache data matches the expected schema structure."""
  37. try:
  38. # Check if version info exists
  39. if not isinstance(cache_data, dict):
  40. return False
  41. # Check for version field - if missing, it's old format
  42. cache_version = cache_data.get('version')
  43. if cache_version is None:
  44. logger.info("Cache file missing version info - treating as outdated schema")
  45. return False
  46. # Check if version matches current expected version
  47. if cache_version != CACHE_SCHEMA_VERSION:
  48. logger.info(f"Cache schema version mismatch: found {cache_version}, expected {CACHE_SCHEMA_VERSION}")
  49. return False
  50. # Check if data section exists
  51. if 'data' not in cache_data:
  52. logger.warning("Cache file missing 'data' section")
  53. return False
  54. # Validate structure of a few entries if they exist
  55. data_section = cache_data.get('data', {})
  56. if data_section and isinstance(data_section, dict):
  57. # Check first entry structure
  58. for pattern_file, entry in list(data_section.items())[:1]: # Just check first entry
  59. if not isinstance(entry, dict):
  60. return False
  61. if 'mtime' not in entry or 'metadata' not in entry:
  62. return False
  63. metadata = entry.get('metadata', {})
  64. required_fields = ['first_coordinate', 'last_coordinate', 'total_coordinates']
  65. if not all(field in metadata for field in required_fields):
  66. return False
  67. # Validate coordinate structure
  68. for coord_field in ['first_coordinate', 'last_coordinate']:
  69. coord = metadata.get(coord_field)
  70. if not isinstance(coord, dict) or 'x' not in coord or 'y' not in coord:
  71. return False
  72. return True
  73. except Exception as e:
  74. logger.warning(f"Error validating cache schema: {str(e)}")
  75. return False
  76. def invalidate_cache():
  77. """Delete only the metadata cache file, preserving image cache."""
  78. try:
  79. # Delete metadata cache file only
  80. if os.path.exists(METADATA_CACHE_FILE):
  81. os.remove(METADATA_CACHE_FILE)
  82. logger.info("Deleted outdated metadata cache file")
  83. # Keep image cache directory intact - images are still valid
  84. # Just ensure the cache directory structure exists
  85. ensure_cache_dir()
  86. return True
  87. except Exception as e:
  88. logger.error(f"Failed to invalidate metadata cache: {str(e)}")
  89. return False
  90. async def invalidate_cache_async():
  91. """Async version: Delete only the metadata cache file, preserving image cache."""
  92. try:
  93. # Delete metadata cache file only
  94. if await asyncio.to_thread(os.path.exists, METADATA_CACHE_FILE):
  95. await asyncio.to_thread(os.remove, METADATA_CACHE_FILE)
  96. logger.info("Deleted outdated metadata cache file")
  97. # Keep image cache directory intact - images are still valid
  98. # Just ensure the cache directory structure exists
  99. await ensure_cache_dir_async()
  100. return True
  101. except Exception as e:
  102. logger.error(f"Failed to invalidate metadata cache: {str(e)}")
  103. return False
  104. def ensure_cache_dir():
  105. """Ensure the cache directory exists with proper permissions."""
  106. try:
  107. Path(CACHE_DIR).mkdir(parents=True, exist_ok=True)
  108. # Initialize metadata cache if it doesn't exist
  109. if not os.path.exists(METADATA_CACHE_FILE):
  110. initial_cache = {
  111. 'version': CACHE_SCHEMA_VERSION,
  112. 'data': {}
  113. }
  114. with open(METADATA_CACHE_FILE, 'w') as f:
  115. json.dump(initial_cache, f)
  116. try:
  117. os.chmod(METADATA_CACHE_FILE, 0o644) # More conservative permissions
  118. except (OSError, PermissionError) as e:
  119. logger.debug(f"Could not set metadata cache file permissions: {str(e)}")
  120. for root, dirs, files in os.walk(CACHE_DIR):
  121. try:
  122. os.chmod(root, 0o755) # More conservative permissions
  123. for file in files:
  124. file_path = os.path.join(root, file)
  125. try:
  126. os.chmod(file_path, 0o644) # More conservative permissions
  127. except (OSError, PermissionError) as e:
  128. # Log as debug instead of error since this is not critical
  129. logger.debug(f"Could not set permissions for file {file_path}: {str(e)}")
  130. except (OSError, PermissionError) as e:
  131. # Log as debug instead of error since this is not critical
  132. logger.debug(f"Could not set permissions for directory {root}: {str(e)}")
  133. continue
  134. except Exception as e:
  135. logger.error(f"Failed to create cache directory: {str(e)}")
  136. async def ensure_cache_dir_async():
  137. """Async version: Ensure the cache directory exists with proper permissions."""
  138. try:
  139. await asyncio.to_thread(Path(CACHE_DIR).mkdir, parents=True, exist_ok=True)
  140. # Initialize metadata cache if it doesn't exist
  141. if not await asyncio.to_thread(os.path.exists, METADATA_CACHE_FILE):
  142. initial_cache = {
  143. 'version': CACHE_SCHEMA_VERSION,
  144. 'data': {}
  145. }
  146. def _write_initial_cache():
  147. with open(METADATA_CACHE_FILE, 'w') as f:
  148. json.dump(initial_cache, f)
  149. await asyncio.to_thread(_write_initial_cache)
  150. try:
  151. await asyncio.to_thread(os.chmod, METADATA_CACHE_FILE, 0o644)
  152. except (OSError, PermissionError) as e:
  153. logger.debug(f"Could not set metadata cache file permissions: {str(e)}")
  154. def _set_permissions():
  155. for root, dirs, files in os.walk(CACHE_DIR):
  156. try:
  157. os.chmod(root, 0o755)
  158. for file in files:
  159. file_path = os.path.join(root, file)
  160. try:
  161. os.chmod(file_path, 0o644)
  162. except (OSError, PermissionError) as e:
  163. logger.debug(f"Could not set permissions for file {file_path}: {str(e)}")
  164. except (OSError, PermissionError) as e:
  165. logger.debug(f"Could not set permissions for directory {root}: {str(e)}")
  166. continue
  167. await asyncio.to_thread(_set_permissions)
  168. except Exception as e:
  169. logger.error(f"Failed to create cache directory: {str(e)}")
  170. def get_cache_path(pattern_file):
  171. """Get the cache path for a pattern file."""
  172. # Normalize path separators to handle both forward slashes and backslashes
  173. pattern_file = pattern_file.replace('\\', '/')
  174. # Create subdirectories in cache to match the pattern file structure
  175. cache_subpath = os.path.dirname(pattern_file)
  176. if cache_subpath:
  177. # Create the same subdirectory structure in cache (including custom_patterns)
  178. # Convert forward slashes back to platform-specific separator for os.path.join
  179. cache_subpath = cache_subpath.replace('/', os.sep)
  180. cache_dir = os.path.join(CACHE_DIR, cache_subpath)
  181. else:
  182. # For files in root pattern directory
  183. cache_dir = CACHE_DIR
  184. # Ensure the subdirectory exists
  185. os.makedirs(cache_dir, exist_ok=True)
  186. try:
  187. os.chmod(cache_dir, 0o755) # More conservative permissions
  188. except (OSError, PermissionError) as e:
  189. # Log as debug instead of error since this is not critical
  190. logger.debug(f"Could not set permissions for cache subdirectory {cache_dir}: {str(e)}")
  191. # Use just the filename part for the cache file
  192. filename = os.path.basename(pattern_file)
  193. safe_name = filename.replace('\\', '_')
  194. return os.path.join(cache_dir, f"{safe_name}.webp")
  195. def delete_pattern_cache(pattern_file):
  196. """Delete cached preview image and metadata for a pattern file."""
  197. try:
  198. # Remove cached image
  199. cache_path = get_cache_path(pattern_file)
  200. if os.path.exists(cache_path):
  201. os.remove(cache_path)
  202. logger.info(f"Deleted cached image: {cache_path}")
  203. # Remove from metadata cache
  204. metadata_cache = load_metadata_cache()
  205. data_section = metadata_cache.get('data', {})
  206. if pattern_file in data_section:
  207. del data_section[pattern_file]
  208. metadata_cache['data'] = data_section
  209. save_metadata_cache(metadata_cache)
  210. logger.info(f"Removed {pattern_file} from metadata cache")
  211. return True
  212. except Exception as e:
  213. logger.error(f"Failed to delete cache for {pattern_file}: {str(e)}")
  214. return False
  215. def load_metadata_cache():
  216. """Load the metadata cache from disk with schema validation."""
  217. try:
  218. if os.path.exists(METADATA_CACHE_FILE):
  219. with open(METADATA_CACHE_FILE, 'r') as f:
  220. cache_data = json.load(f)
  221. # Validate schema
  222. if not validate_cache_schema(cache_data):
  223. logger.info("Cache schema validation failed - invalidating cache")
  224. invalidate_cache()
  225. # Return empty cache structure after invalidation
  226. return {
  227. 'version': CACHE_SCHEMA_VERSION,
  228. 'data': {}
  229. }
  230. return cache_data
  231. except Exception as e:
  232. logger.warning(f"Failed to load metadata cache: {str(e)} - invalidating cache")
  233. try:
  234. invalidate_cache()
  235. except Exception as invalidate_error:
  236. logger.error(f"Failed to invalidate corrupted cache: {str(invalidate_error)}")
  237. # Return empty cache structure
  238. return {
  239. 'version': CACHE_SCHEMA_VERSION,
  240. 'data': {}
  241. }
  242. async def load_metadata_cache_async():
  243. """Async version: Load the metadata cache from disk with schema validation."""
  244. try:
  245. logger.debug(f"load_metadata_cache_async: Checking if metadata cache file exists at {METADATA_CACHE_FILE}")
  246. file_exists = await asyncio.to_thread(os.path.exists, METADATA_CACHE_FILE)
  247. logger.debug(f"load_metadata_cache_async: File exists: {file_exists}")
  248. if file_exists:
  249. def _load_json():
  250. with open(METADATA_CACHE_FILE, 'r') as f:
  251. return json.load(f)
  252. logger.debug("load_metadata_cache_async: Loading JSON data from cache file...")
  253. cache_data = await asyncio.to_thread(_load_json)
  254. logger.debug(f"load_metadata_cache_async: Loaded cache with {len(cache_data.get('data', {}))} entries")
  255. # Validate schema
  256. logger.debug("load_metadata_cache_async: Validating cache schema...")
  257. if not validate_cache_schema(cache_data):
  258. logger.info("Cache schema validation failed - invalidating cache")
  259. await invalidate_cache_async()
  260. # Return empty cache structure after invalidation
  261. logger.debug("load_metadata_cache_async: Returning empty cache after invalidation")
  262. return {
  263. 'version': CACHE_SCHEMA_VERSION,
  264. 'data': {}
  265. }
  266. logger.debug("load_metadata_cache_async: Cache validated successfully")
  267. return cache_data
  268. else:
  269. logger.debug("load_metadata_cache_async: Cache file does not exist, returning empty cache")
  270. except Exception as e:
  271. logger.warning(f"Failed to load metadata cache: {str(e)} - invalidating cache")
  272. try:
  273. await invalidate_cache_async()
  274. except Exception as invalidate_error:
  275. logger.error(f"Failed to invalidate corrupted cache: {str(invalidate_error)}")
  276. # Return empty cache structure
  277. logger.debug("load_metadata_cache_async: Returning empty cache structure")
  278. return {
  279. 'version': CACHE_SCHEMA_VERSION,
  280. 'data': {}
  281. }
  282. def save_metadata_cache(cache_data):
  283. """Save the metadata cache to disk with version info."""
  284. try:
  285. ensure_cache_dir()
  286. # Ensure cache data has proper structure
  287. if not isinstance(cache_data, dict) or 'version' not in cache_data:
  288. # Convert old format or create new structure
  289. if isinstance(cache_data, dict) and 'data' not in cache_data:
  290. # Old format - wrap existing data
  291. structured_cache = {
  292. 'version': CACHE_SCHEMA_VERSION,
  293. 'data': cache_data
  294. }
  295. else:
  296. structured_cache = cache_data
  297. else:
  298. structured_cache = cache_data
  299. with open(METADATA_CACHE_FILE, 'w') as f:
  300. json.dump(structured_cache, f, indent=2)
  301. except Exception as e:
  302. logger.error(f"Failed to save metadata cache: {str(e)}")
  303. def get_pattern_metadata(pattern_file):
  304. """Get cached metadata for a pattern file."""
  305. cache_data = load_metadata_cache()
  306. data_section = cache_data.get('data', {})
  307. # Check if we have cached metadata and if the file hasn't changed
  308. if pattern_file in data_section:
  309. cached_entry = data_section[pattern_file]
  310. pattern_path = os.path.join(THETA_RHO_DIR, pattern_file)
  311. try:
  312. file_mtime = os.path.getmtime(pattern_path)
  313. if cached_entry.get('mtime') == file_mtime:
  314. return cached_entry.get('metadata')
  315. except OSError:
  316. pass
  317. return None
  318. async def get_pattern_metadata_async(pattern_file):
  319. """Async version: Get cached metadata for a pattern file."""
  320. cache_data = await load_metadata_cache_async()
  321. data_section = cache_data.get('data', {})
  322. # Check if we have cached metadata and if the file hasn't changed
  323. if pattern_file in data_section:
  324. cached_entry = data_section[pattern_file]
  325. pattern_path = os.path.join(THETA_RHO_DIR, pattern_file)
  326. try:
  327. file_mtime = await asyncio.to_thread(os.path.getmtime, pattern_path)
  328. if cached_entry.get('mtime') == file_mtime:
  329. return cached_entry.get('metadata')
  330. except OSError:
  331. pass
  332. return None
  333. def cache_pattern_metadata(pattern_file, first_coord, last_coord, total_coords):
  334. """Cache metadata for a pattern file."""
  335. try:
  336. cache_data = load_metadata_cache()
  337. data_section = cache_data.get('data', {})
  338. pattern_path = os.path.join(THETA_RHO_DIR, pattern_file)
  339. file_mtime = os.path.getmtime(pattern_path)
  340. data_section[pattern_file] = {
  341. 'mtime': file_mtime,
  342. 'metadata': {
  343. 'first_coordinate': first_coord,
  344. 'last_coordinate': last_coord,
  345. 'total_coordinates': total_coords
  346. }
  347. }
  348. cache_data['data'] = data_section
  349. save_metadata_cache(cache_data)
  350. logger.debug(f"Cached metadata for {pattern_file}")
  351. except Exception as e:
  352. logger.warning(f"Failed to cache metadata for {pattern_file}: {str(e)}")
  353. def needs_cache(pattern_file):
  354. """Check if a pattern file needs its cache generated."""
  355. # Check if image preview exists
  356. cache_path = get_cache_path(pattern_file)
  357. if not os.path.exists(cache_path):
  358. return True
  359. # Check if metadata cache exists and is valid
  360. metadata = get_pattern_metadata(pattern_file)
  361. if metadata is None:
  362. return True
  363. return False
  364. def needs_image_cache_only(pattern_file):
  365. """Quick check if a pattern file needs its image cache generated.
  366. Only checks for image file existence, not metadata validity.
  367. Used during startup for faster checking.
  368. """
  369. cache_path = get_cache_path(pattern_file)
  370. return not os.path.exists(cache_path)
  371. async def needs_cache_async(pattern_file):
  372. """Async version: Check if a pattern file needs its cache generated."""
  373. # Check if image preview exists
  374. cache_path = get_cache_path(pattern_file)
  375. if not await asyncio.to_thread(os.path.exists, cache_path):
  376. return True
  377. # Check if metadata cache exists and is valid
  378. metadata = await get_pattern_metadata_async(pattern_file)
  379. if metadata is None:
  380. return True
  381. return False
  382. async def generate_image_preview(pattern_file):
  383. """Generate image preview for a single pattern file."""
  384. from modules.core.preview import generate_preview_image
  385. from modules.core.pattern_manager import parse_theta_rho_file
  386. try:
  387. logger.debug(f"Starting preview generation for {pattern_file}")
  388. # Check if we need to update metadata cache
  389. metadata = get_pattern_metadata(pattern_file)
  390. if metadata is None:
  391. # Parse file to get metadata (this is the only time we need to parse)
  392. logger.debug(f"Parsing {pattern_file} for metadata cache")
  393. pattern_path = os.path.join(THETA_RHO_DIR, pattern_file)
  394. try:
  395. coordinates = await asyncio.to_thread(parse_theta_rho_file, pattern_path)
  396. if coordinates:
  397. first_coord = {"x": coordinates[0][0], "y": coordinates[0][1]}
  398. last_coord = {"x": coordinates[-1][0], "y": coordinates[-1][1]}
  399. total_coords = len(coordinates)
  400. # Cache the metadata for future use
  401. cache_pattern_metadata(pattern_file, first_coord, last_coord, total_coords)
  402. logger.debug(f"Metadata cached for {pattern_file}: {total_coords} coordinates")
  403. else:
  404. logger.warning(f"No coordinates found in {pattern_file}")
  405. except Exception as e:
  406. logger.error(f"Failed to parse {pattern_file} for metadata: {str(e)}")
  407. # Continue with image generation even if metadata fails
  408. # Check if we need to generate the image
  409. cache_path = get_cache_path(pattern_file)
  410. if os.path.exists(cache_path):
  411. logger.debug(f"Skipping image generation for {pattern_file} - already cached")
  412. return True
  413. # Generate the image
  414. logger.debug(f"Generating image preview for {pattern_file}")
  415. image_content = await generate_preview_image(pattern_file)
  416. if not image_content:
  417. logger.error(f"Generated image content is empty for {pattern_file}")
  418. return False
  419. # Ensure cache directory exists
  420. ensure_cache_dir()
  421. with open(cache_path, 'wb') as f:
  422. f.write(image_content)
  423. try:
  424. os.chmod(cache_path, 0o644) # More conservative permissions
  425. except (OSError, PermissionError) as e:
  426. # Log as debug instead of error since this is not critical
  427. logger.debug(f"Could not set cache file permissions for {pattern_file}: {str(e)}")
  428. logger.debug(f"Successfully generated preview for {pattern_file}")
  429. return True
  430. except Exception as e:
  431. logger.error(f"Failed to generate image for {pattern_file}: {str(e)}")
  432. return False
  433. async def generate_all_image_previews():
  434. """Generate image previews for missing patterns using set difference."""
  435. global cache_progress
  436. try:
  437. await ensure_cache_dir_async()
  438. # Step 1: Get all pattern files
  439. pattern_files = await list_theta_rho_files_async()
  440. if not pattern_files:
  441. logger.info("No .thr pattern files found. Skipping image preview generation.")
  442. return
  443. # Step 2: Find patterns with existing cache
  444. def _find_cached_patterns():
  445. cached = set()
  446. for pattern in pattern_files:
  447. cache_path = get_cache_path(pattern)
  448. if os.path.exists(cache_path):
  449. cached.add(pattern)
  450. return cached
  451. cached_patterns = await asyncio.to_thread(_find_cached_patterns)
  452. # Step 3: Calculate delta (patterns missing image cache)
  453. pattern_set = set(pattern_files)
  454. patterns_to_cache = list(pattern_set - cached_patterns)
  455. total_files = len(patterns_to_cache)
  456. skipped_files = len(pattern_files) - total_files
  457. if total_files == 0:
  458. logger.info(f"All {skipped_files} pattern files already have image previews. Skipping image generation.")
  459. return
  460. # Update progress state
  461. cache_progress.update({
  462. "stage": "images",
  463. "total_files": total_files,
  464. "processed_files": 0,
  465. "current_file": "",
  466. "error": None
  467. })
  468. logger.info(f"Generating image cache for {total_files} uncached .thr patterns ({skipped_files} already cached)...")
  469. batch_size = 5
  470. successful = 0
  471. for i in range(0, total_files, batch_size):
  472. batch = patterns_to_cache[i:i + batch_size]
  473. tasks = [generate_image_preview(file) for file in batch]
  474. results = await asyncio.gather(*tasks)
  475. successful += sum(1 for r in results if r)
  476. # Update progress
  477. cache_progress["processed_files"] = min(i + batch_size, total_files)
  478. if i < total_files:
  479. cache_progress["current_file"] = patterns_to_cache[min(i + batch_size - 1, total_files - 1)]
  480. # Log progress
  481. progress = min(i + batch_size, total_files)
  482. logger.info(f"Image cache generation progress: {progress}/{total_files} files processed")
  483. logger.info(f"Image cache generation completed: {successful}/{total_files} patterns cached successfully, {skipped_files} patterns skipped (already cached)")
  484. except Exception as e:
  485. logger.error(f"Error during image cache generation: {str(e)}")
  486. cache_progress["error"] = str(e)
  487. raise
  488. async def generate_metadata_cache():
  489. """Generate metadata cache for missing patterns using set difference."""
  490. global cache_progress
  491. try:
  492. logger.info("Starting metadata cache generation...")
  493. # Step 1: Get all pattern files
  494. pattern_files = await list_theta_rho_files_async()
  495. if not pattern_files:
  496. logger.info("No pattern files found. Skipping metadata cache generation.")
  497. return
  498. # Step 2: Get existing metadata keys
  499. metadata_cache = await load_metadata_cache_async()
  500. existing_keys = set(metadata_cache.get('data', {}).keys())
  501. # Step 3: Calculate delta (patterns missing from metadata)
  502. pattern_set = set(pattern_files)
  503. files_to_process = list(pattern_set - existing_keys)
  504. total_files = len(files_to_process)
  505. skipped_files = len(pattern_files) - total_files
  506. if total_files == 0:
  507. logger.info(f"All {skipped_files} files already have metadata cache. Skipping metadata generation.")
  508. return
  509. # Update progress state
  510. cache_progress.update({
  511. "stage": "metadata",
  512. "total_files": total_files,
  513. "processed_files": 0,
  514. "current_file": "",
  515. "error": None
  516. })
  517. logger.info(f"Generating metadata cache for {total_files} new files ({skipped_files} files already cached)...")
  518. # Process in smaller batches for Pi Zero 2 W
  519. batch_size = 3 # Reduced from 5
  520. successful = 0
  521. for i in range(0, total_files, batch_size):
  522. batch = files_to_process[i:i + batch_size]
  523. # Process files sequentially within batch (no parallel tasks)
  524. for file_name in batch:
  525. pattern_path = os.path.join(THETA_RHO_DIR, file_name)
  526. cache_progress["current_file"] = file_name
  527. try:
  528. # Parse file to get metadata
  529. coordinates = await asyncio.to_thread(parse_theta_rho_file, pattern_path)
  530. if coordinates:
  531. first_coord = {"x": coordinates[0][0], "y": coordinates[0][1]}
  532. last_coord = {"x": coordinates[-1][0], "y": coordinates[-1][1]}
  533. total_coords = len(coordinates)
  534. # Cache the metadata
  535. cache_pattern_metadata(file_name, first_coord, last_coord, total_coords)
  536. successful += 1
  537. logger.debug(f"Generated metadata for {file_name}")
  538. # Small delay to reduce I/O pressure
  539. await asyncio.sleep(0.05)
  540. except Exception as e:
  541. logger.error(f"Failed to generate metadata for {file_name}: {str(e)}")
  542. # Update progress
  543. cache_progress["processed_files"] = min(i + batch_size, total_files)
  544. # Log progress
  545. progress = min(i + batch_size, total_files)
  546. logger.info(f"Metadata cache generation progress: {progress}/{total_files} files processed")
  547. # Delay between batches for system recovery
  548. if i + batch_size < total_files:
  549. await asyncio.sleep(0.3)
  550. logger.info(f"Metadata cache generation completed: {successful}/{total_files} patterns cached successfully, {skipped_files} patterns skipped (already cached)")
  551. except Exception as e:
  552. logger.error(f"Error during metadata cache generation: {str(e)}")
  553. cache_progress["error"] = str(e)
  554. raise
  555. async def rebuild_cache():
  556. """Rebuild the entire cache for all pattern files."""
  557. logger.info("Starting cache rebuild...")
  558. # Ensure cache directory exists
  559. ensure_cache_dir()
  560. # First generate metadata cache for all files
  561. await generate_metadata_cache()
  562. # Then generate image previews
  563. pattern_files = [f for f in list_theta_rho_files() if f.endswith('.thr')]
  564. total_files = len(pattern_files)
  565. if total_files == 0:
  566. logger.info("No pattern files found to cache")
  567. return
  568. logger.info(f"Generating image previews for {total_files} pattern files...")
  569. # Process in batches
  570. batch_size = 5
  571. successful = 0
  572. for i in range(0, total_files, batch_size):
  573. batch = pattern_files[i:i + batch_size]
  574. tasks = [generate_image_preview(file) for file in batch]
  575. results = await asyncio.gather(*tasks)
  576. successful += sum(1 for r in results if r)
  577. # Log progress
  578. progress = min(i + batch_size, total_files)
  579. logger.info(f"Image preview generation progress: {progress}/{total_files} files processed")
  580. logger.info(f"Cache rebuild completed: {successful}/{total_files} patterns cached successfully")
  581. async def generate_cache_background():
  582. """Run cache generation in the background with progress tracking."""
  583. global cache_progress
  584. try:
  585. cache_progress.update({
  586. "is_running": True,
  587. "stage": "starting",
  588. "total_files": 0,
  589. "processed_files": 0,
  590. "current_file": "",
  591. "error": None
  592. })
  593. # First generate metadata cache
  594. await generate_metadata_cache()
  595. # Then generate image previews
  596. await generate_all_image_previews()
  597. # Mark as complete
  598. cache_progress.update({
  599. "is_running": False,
  600. "stage": "complete",
  601. "current_file": "",
  602. "error": None
  603. })
  604. logger.info("Background cache generation completed successfully")
  605. except Exception as e:
  606. logger.error(f"Background cache generation failed: {str(e)}")
  607. cache_progress.update({
  608. "is_running": False,
  609. "stage": "error",
  610. "error": str(e)
  611. })
  612. raise
  613. def get_cache_progress():
  614. """Get the current cache generation progress."""
  615. global cache_progress
  616. return cache_progress.copy()
  617. def is_cache_generation_needed():
  618. """Check if cache generation is needed."""
  619. pattern_files = [f for f in list_theta_rho_files() if f.endswith('.thr')]
  620. if not pattern_files:
  621. return False
  622. # Check if any files need caching
  623. patterns_to_cache = [f for f in pattern_files if needs_cache(f)]
  624. # Check metadata cache
  625. files_needing_metadata = []
  626. for file_name in pattern_files:
  627. if get_pattern_metadata(file_name) is None:
  628. files_needing_metadata.append(file_name)
  629. return len(patterns_to_cache) > 0 or len(files_needing_metadata) > 0
  630. async def is_cache_generation_needed_async():
  631. """Check if cache generation is needed using simple set difference.
  632. Returns True if any patterns are missing from either metadata or image cache.
  633. """
  634. try:
  635. logger.debug("is_cache_generation_needed_async: Starting cache check")
  636. # Step 1: List all patterns
  637. logger.debug("is_cache_generation_needed_async: Listing pattern files...")
  638. pattern_files = await list_theta_rho_files_async()
  639. logger.debug(f"is_cache_generation_needed_async: Found {len(pattern_files) if pattern_files else 0} pattern files")
  640. if not pattern_files:
  641. logger.debug("is_cache_generation_needed_async: No pattern files found, returning False")
  642. return False
  643. pattern_set = set(pattern_files)
  644. logger.debug(f"is_cache_generation_needed_async: Pattern set contains {len(pattern_set)} unique patterns")
  645. # Step 2: Check metadata cache
  646. logger.debug("is_cache_generation_needed_async: Loading metadata cache...")
  647. metadata_cache = await load_metadata_cache_async()
  648. metadata_keys = set(metadata_cache.get('data', {}).keys())
  649. logger.debug(f"is_cache_generation_needed_async: Metadata cache has {len(metadata_keys)} entries")
  650. if pattern_set != metadata_keys:
  651. # Metadata is missing some patterns
  652. missing = pattern_set - metadata_keys
  653. extra = metadata_keys - pattern_set
  654. logger.debug(f"is_cache_generation_needed_async: Metadata mismatch - missing: {len(missing)}, extra: {len(extra)}")
  655. return True
  656. # Step 3: Check image cache
  657. logger.debug("is_cache_generation_needed_async: Checking image cache...")
  658. def _list_cached_images():
  659. """List all patterns that have cached images."""
  660. cached = set()
  661. if os.path.exists(CACHE_DIR):
  662. for pattern in pattern_files:
  663. cache_path = get_cache_path(pattern)
  664. if os.path.exists(cache_path):
  665. cached.add(pattern)
  666. return cached
  667. cached_images = await asyncio.to_thread(_list_cached_images)
  668. logger.debug(f"is_cache_generation_needed_async: Found {len(cached_images)} cached images")
  669. if pattern_set != cached_images:
  670. # Some patterns missing image cache
  671. missing = pattern_set - cached_images
  672. logger.debug(f"is_cache_generation_needed_async: Image cache missing {len(missing)} patterns")
  673. return True
  674. logger.debug("is_cache_generation_needed_async: Cache is up to date, returning False")
  675. return False
  676. except Exception as e:
  677. logger.warning(f"Error checking cache status: {e}")
  678. return False # Don't block startup on errors
  679. async def list_theta_rho_files_async():
  680. """Async version: List all theta-rho files."""
  681. def _walk_files():
  682. files = []
  683. for root, _, filenames in os.walk(THETA_RHO_DIR):
  684. # Only process .thr files to reduce memory usage
  685. thr_files = [f for f in filenames if f.endswith('.thr')]
  686. for file in thr_files:
  687. relative_path = os.path.relpath(os.path.join(root, file), THETA_RHO_DIR)
  688. # Normalize path separators to always use forward slashes for consistency across platforms
  689. relative_path = relative_path.replace(os.sep, '/')
  690. files.append(relative_path)
  691. return files
  692. files = await asyncio.to_thread(_walk_files)
  693. logger.debug(f"Found {len(files)} theta-rho files")
  694. return files # Already filtered for .thr