1
0

cache_manager.py 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831
  1. """Image Cache Manager for pre-generating and managing image previews."""
  2. import os
  3. import json
  4. import asyncio
  5. import logging
  6. from pathlib import Path
  7. from modules.core.pattern_manager import list_theta_rho_files, THETA_RHO_DIR, parse_theta_rho_file
  8. from modules.core.process_pool import get_pool as _get_process_pool
  9. logger = logging.getLogger(__name__)
  10. # Global cache progress state
  11. cache_progress = {
  12. "is_running": False,
  13. "total_files": 0,
  14. "processed_files": 0,
  15. "current_file": "",
  16. "stage": "idle", # idle, metadata, images, complete
  17. "error": None
  18. }
  19. # Constants
  20. CACHE_DIR = os.path.join(THETA_RHO_DIR, "cached_images")
  21. METADATA_CACHE_FILE = "metadata_cache.json" # Now in root directory
  22. # Cache schema version - increment when structure changes
  23. CACHE_SCHEMA_VERSION = 1
  24. # Expected cache schema structure
  25. EXPECTED_CACHE_SCHEMA = {
  26. 'version': CACHE_SCHEMA_VERSION,
  27. 'structure': {
  28. 'mtime': 'number',
  29. 'metadata': {
  30. 'first_coordinate': {'x': 'number', 'y': 'number'},
  31. 'last_coordinate': {'x': 'number', 'y': 'number'},
  32. 'total_coordinates': 'number'
  33. }
  34. }
  35. }
  36. def validate_cache_schema(cache_data):
  37. """Validate that cache data matches the expected schema structure."""
  38. try:
  39. # Check if version info exists
  40. if not isinstance(cache_data, dict):
  41. return False
  42. # Check for version field - if missing, it's old format
  43. cache_version = cache_data.get('version')
  44. if cache_version is None:
  45. logger.info("Cache file missing version info - treating as outdated schema")
  46. return False
  47. # Check if version matches current expected version
  48. if cache_version != CACHE_SCHEMA_VERSION:
  49. logger.info(f"Cache schema version mismatch: found {cache_version}, expected {CACHE_SCHEMA_VERSION}")
  50. return False
  51. # Check if data section exists
  52. if 'data' not in cache_data:
  53. logger.warning("Cache file missing 'data' section")
  54. return False
  55. # Validate structure of a few entries if they exist
  56. data_section = cache_data.get('data', {})
  57. if data_section and isinstance(data_section, dict):
  58. # Check first entry structure
  59. for pattern_file, entry in list(data_section.items())[:1]: # Just check first entry
  60. if not isinstance(entry, dict):
  61. return False
  62. if 'mtime' not in entry or 'metadata' not in entry:
  63. return False
  64. metadata = entry.get('metadata', {})
  65. required_fields = ['first_coordinate', 'last_coordinate', 'total_coordinates']
  66. if not all(field in metadata for field in required_fields):
  67. return False
  68. # Validate coordinate structure
  69. for coord_field in ['first_coordinate', 'last_coordinate']:
  70. coord = metadata.get(coord_field)
  71. if not isinstance(coord, dict) or 'x' not in coord or 'y' not in coord:
  72. return False
  73. return True
  74. except Exception as e:
  75. logger.warning(f"Error validating cache schema: {str(e)}")
  76. return False
  77. def invalidate_cache():
  78. """Delete only the metadata cache file, preserving image cache."""
  79. try:
  80. # Delete metadata cache file only
  81. if os.path.exists(METADATA_CACHE_FILE):
  82. os.remove(METADATA_CACHE_FILE)
  83. logger.info("Deleted outdated metadata cache file")
  84. # Keep image cache directory intact - images are still valid
  85. # Just ensure the cache directory structure exists
  86. ensure_cache_dir()
  87. return True
  88. except Exception as e:
  89. logger.error(f"Failed to invalidate metadata cache: {str(e)}")
  90. return False
  91. async def invalidate_cache_async():
  92. """Async version: Delete only the metadata cache file, preserving image cache."""
  93. try:
  94. # Delete metadata cache file only
  95. if await asyncio.to_thread(os.path.exists, METADATA_CACHE_FILE):
  96. await asyncio.to_thread(os.remove, METADATA_CACHE_FILE)
  97. logger.info("Deleted outdated metadata cache file")
  98. # Keep image cache directory intact - images are still valid
  99. # Just ensure the cache directory structure exists
  100. await ensure_cache_dir_async()
  101. return True
  102. except Exception as e:
  103. logger.error(f"Failed to invalidate metadata cache: {str(e)}")
  104. return False
  105. def ensure_cache_dir():
  106. """Ensure the cache directory exists with proper permissions."""
  107. try:
  108. Path(CACHE_DIR).mkdir(parents=True, exist_ok=True)
  109. # Initialize metadata cache if it doesn't exist
  110. if not os.path.exists(METADATA_CACHE_FILE):
  111. initial_cache = {
  112. 'version': CACHE_SCHEMA_VERSION,
  113. 'data': {}
  114. }
  115. with open(METADATA_CACHE_FILE, 'w') as f:
  116. json.dump(initial_cache, f)
  117. try:
  118. os.chmod(METADATA_CACHE_FILE, 0o644) # More conservative permissions
  119. except (OSError, PermissionError) as e:
  120. logger.debug(f"Could not set metadata cache file permissions: {str(e)}")
  121. for root, dirs, files in os.walk(CACHE_DIR):
  122. try:
  123. os.chmod(root, 0o755) # More conservative permissions
  124. for file in files:
  125. file_path = os.path.join(root, file)
  126. try:
  127. os.chmod(file_path, 0o644) # More conservative permissions
  128. except (OSError, PermissionError) as e:
  129. # Log as debug instead of error since this is not critical
  130. logger.debug(f"Could not set permissions for file {file_path}: {str(e)}")
  131. except (OSError, PermissionError) as e:
  132. # Log as debug instead of error since this is not critical
  133. logger.debug(f"Could not set permissions for directory {root}: {str(e)}")
  134. continue
  135. except Exception as e:
  136. logger.error(f"Failed to create cache directory: {str(e)}")
  137. async def ensure_cache_dir_async():
  138. """Async version: Ensure the cache directory exists with proper permissions."""
  139. try:
  140. await asyncio.to_thread(Path(CACHE_DIR).mkdir, parents=True, exist_ok=True)
  141. # Initialize metadata cache if it doesn't exist
  142. if not await asyncio.to_thread(os.path.exists, METADATA_CACHE_FILE):
  143. initial_cache = {
  144. 'version': CACHE_SCHEMA_VERSION,
  145. 'data': {}
  146. }
  147. def _write_initial_cache():
  148. with open(METADATA_CACHE_FILE, 'w') as f:
  149. json.dump(initial_cache, f)
  150. await asyncio.to_thread(_write_initial_cache)
  151. try:
  152. await asyncio.to_thread(os.chmod, METADATA_CACHE_FILE, 0o644)
  153. except (OSError, PermissionError) as e:
  154. logger.debug(f"Could not set metadata cache file permissions: {str(e)}")
  155. def _set_permissions():
  156. for root, dirs, files in os.walk(CACHE_DIR):
  157. try:
  158. os.chmod(root, 0o755)
  159. for file in files:
  160. file_path = os.path.join(root, file)
  161. try:
  162. os.chmod(file_path, 0o644)
  163. except (OSError, PermissionError) as e:
  164. logger.debug(f"Could not set permissions for file {file_path}: {str(e)}")
  165. except (OSError, PermissionError) as e:
  166. logger.debug(f"Could not set permissions for directory {root}: {str(e)}")
  167. continue
  168. await asyncio.to_thread(_set_permissions)
  169. except Exception as e:
  170. logger.error(f"Failed to create cache directory: {str(e)}")
  171. def get_cache_path(pattern_file):
  172. """Get the cache path for a pattern file."""
  173. # Normalize path separators to handle both forward slashes and backslashes
  174. pattern_file = pattern_file.replace('\\', '/')
  175. # Create subdirectories in cache to match the pattern file structure
  176. cache_subpath = os.path.dirname(pattern_file)
  177. if cache_subpath:
  178. # Create the same subdirectory structure in cache (including custom_patterns)
  179. # Convert forward slashes back to platform-specific separator for os.path.join
  180. cache_subpath = cache_subpath.replace('/', os.sep)
  181. cache_dir = os.path.join(CACHE_DIR, cache_subpath)
  182. else:
  183. # For files in root pattern directory
  184. cache_dir = CACHE_DIR
  185. # Ensure the subdirectory exists
  186. os.makedirs(cache_dir, exist_ok=True)
  187. try:
  188. os.chmod(cache_dir, 0o755) # More conservative permissions
  189. except (OSError, PermissionError) as e:
  190. # Log as debug instead of error since this is not critical
  191. logger.debug(f"Could not set permissions for cache subdirectory {cache_dir}: {str(e)}")
  192. # Use just the filename part for the cache file
  193. filename = os.path.basename(pattern_file)
  194. safe_name = filename.replace('\\', '_')
  195. return os.path.join(cache_dir, f"{safe_name}.webp")
  196. def delete_pattern_cache(pattern_file):
  197. """Delete cached preview image and metadata for a pattern file."""
  198. try:
  199. # Remove cached image
  200. cache_path = get_cache_path(pattern_file)
  201. if os.path.exists(cache_path):
  202. os.remove(cache_path)
  203. logger.info(f"Deleted cached image: {cache_path}")
  204. # Remove from metadata cache
  205. metadata_cache = load_metadata_cache()
  206. data_section = metadata_cache.get('data', {})
  207. if pattern_file in data_section:
  208. del data_section[pattern_file]
  209. metadata_cache['data'] = data_section
  210. save_metadata_cache(metadata_cache)
  211. logger.info(f"Removed {pattern_file} from metadata cache")
  212. return True
  213. except Exception as e:
  214. logger.error(f"Failed to delete cache for {pattern_file}: {str(e)}")
  215. return False
  216. def load_metadata_cache():
  217. """Load the metadata cache from disk with schema validation."""
  218. try:
  219. if os.path.exists(METADATA_CACHE_FILE):
  220. with open(METADATA_CACHE_FILE, 'r') as f:
  221. cache_data = json.load(f)
  222. # Validate schema
  223. if not validate_cache_schema(cache_data):
  224. logger.info("Cache schema validation failed - invalidating cache")
  225. invalidate_cache()
  226. # Return empty cache structure after invalidation
  227. return {
  228. 'version': CACHE_SCHEMA_VERSION,
  229. 'data': {}
  230. }
  231. return cache_data
  232. except Exception as e:
  233. logger.warning(f"Failed to load metadata cache: {str(e)} - invalidating cache")
  234. try:
  235. invalidate_cache()
  236. except Exception as invalidate_error:
  237. logger.error(f"Failed to invalidate corrupted cache: {str(invalidate_error)}")
  238. # Return empty cache structure
  239. return {
  240. 'version': CACHE_SCHEMA_VERSION,
  241. 'data': {}
  242. }
  243. async def load_metadata_cache_async():
  244. """Async version: Load the metadata cache from disk with schema validation."""
  245. try:
  246. if await asyncio.to_thread(os.path.exists, METADATA_CACHE_FILE):
  247. def _load_json():
  248. with open(METADATA_CACHE_FILE, 'r') as f:
  249. return json.load(f)
  250. cache_data = await asyncio.to_thread(_load_json)
  251. # Validate schema
  252. if not validate_cache_schema(cache_data):
  253. logger.info("Cache schema validation failed - invalidating cache")
  254. await invalidate_cache_async()
  255. # Return empty cache structure after invalidation
  256. return {
  257. 'version': CACHE_SCHEMA_VERSION,
  258. 'data': {}
  259. }
  260. return cache_data
  261. except Exception as e:
  262. logger.warning(f"Failed to load metadata cache: {str(e)} - invalidating cache")
  263. try:
  264. await invalidate_cache_async()
  265. except Exception as invalidate_error:
  266. logger.error(f"Failed to invalidate corrupted cache: {str(invalidate_error)}")
  267. # Return empty cache structure
  268. return {
  269. 'version': CACHE_SCHEMA_VERSION,
  270. 'data': {}
  271. }
  272. def save_metadata_cache(cache_data):
  273. """Save the metadata cache to disk with version info."""
  274. try:
  275. ensure_cache_dir()
  276. # Ensure cache data has proper structure
  277. if not isinstance(cache_data, dict) or 'version' not in cache_data:
  278. # Convert old format or create new structure
  279. if isinstance(cache_data, dict) and 'data' not in cache_data:
  280. # Old format - wrap existing data
  281. structured_cache = {
  282. 'version': CACHE_SCHEMA_VERSION,
  283. 'data': cache_data
  284. }
  285. else:
  286. structured_cache = cache_data
  287. else:
  288. structured_cache = cache_data
  289. with open(METADATA_CACHE_FILE, 'w') as f:
  290. json.dump(structured_cache, f, indent=2)
  291. except Exception as e:
  292. logger.error(f"Failed to save metadata cache: {str(e)}")
  293. def get_pattern_metadata(pattern_file):
  294. """Get cached metadata for a pattern file."""
  295. cache_data = load_metadata_cache()
  296. data_section = cache_data.get('data', {})
  297. # Check if we have cached metadata and if the file hasn't changed
  298. if pattern_file in data_section:
  299. cached_entry = data_section[pattern_file]
  300. pattern_path = os.path.join(THETA_RHO_DIR, pattern_file)
  301. try:
  302. file_mtime = os.path.getmtime(pattern_path)
  303. if cached_entry.get('mtime') == file_mtime:
  304. return cached_entry.get('metadata')
  305. except OSError:
  306. pass
  307. return None
  308. async def get_pattern_metadata_async(pattern_file):
  309. """Async version: Get cached metadata for a pattern file."""
  310. cache_data = await load_metadata_cache_async()
  311. data_section = cache_data.get('data', {})
  312. # Check if we have cached metadata and if the file hasn't changed
  313. if pattern_file in data_section:
  314. cached_entry = data_section[pattern_file]
  315. pattern_path = os.path.join(THETA_RHO_DIR, pattern_file)
  316. try:
  317. file_mtime = await asyncio.to_thread(os.path.getmtime, pattern_path)
  318. if cached_entry.get('mtime') == file_mtime:
  319. return cached_entry.get('metadata')
  320. except OSError:
  321. pass
  322. return None
  323. def cache_pattern_metadata(pattern_file, first_coord, last_coord, total_coords):
  324. """Cache metadata for a pattern file."""
  325. try:
  326. cache_data = load_metadata_cache()
  327. data_section = cache_data.get('data', {})
  328. pattern_path = os.path.join(THETA_RHO_DIR, pattern_file)
  329. file_mtime = os.path.getmtime(pattern_path)
  330. data_section[pattern_file] = {
  331. 'mtime': file_mtime,
  332. 'metadata': {
  333. 'first_coordinate': first_coord,
  334. 'last_coordinate': last_coord,
  335. 'total_coordinates': total_coords
  336. }
  337. }
  338. cache_data['data'] = data_section
  339. save_metadata_cache(cache_data)
  340. logger.debug(f"Cached metadata for {pattern_file}")
  341. except Exception as e:
  342. logger.warning(f"Failed to cache metadata for {pattern_file}: {str(e)}")
  343. def needs_cache(pattern_file):
  344. """Check if a pattern file needs its cache generated."""
  345. # Check if image preview exists
  346. cache_path = get_cache_path(pattern_file)
  347. if not os.path.exists(cache_path):
  348. return True
  349. # Check if metadata cache exists and is valid
  350. metadata = get_pattern_metadata(pattern_file)
  351. if metadata is None:
  352. return True
  353. return False
  354. def needs_image_cache_only(pattern_file):
  355. """Quick check if a pattern file needs its image cache generated.
  356. Only checks for image file existence, not metadata validity.
  357. Used during startup for faster checking.
  358. """
  359. cache_path = get_cache_path(pattern_file)
  360. return not os.path.exists(cache_path)
  361. async def needs_cache_async(pattern_file):
  362. """Async version: Check if a pattern file needs its cache generated."""
  363. # Check if image preview exists
  364. cache_path = get_cache_path(pattern_file)
  365. if not await asyncio.to_thread(os.path.exists, cache_path):
  366. return True
  367. # Check if metadata cache exists and is valid
  368. metadata = await get_pattern_metadata_async(pattern_file)
  369. if metadata is None:
  370. return True
  371. return False
  372. async def generate_image_preview(pattern_file):
  373. """Generate image preview for a single pattern file."""
  374. from modules.core.preview import generate_preview_image
  375. from modules.core.pattern_manager import parse_theta_rho_file
  376. try:
  377. logger.debug(f"Starting preview generation for {pattern_file}")
  378. # Check if we need to update metadata cache
  379. metadata = get_pattern_metadata(pattern_file)
  380. if metadata is None:
  381. # Parse file to get metadata (this is the only time we need to parse)
  382. logger.debug(f"Parsing {pattern_file} for metadata cache")
  383. pattern_path = os.path.join(THETA_RHO_DIR, pattern_file)
  384. try:
  385. loop = asyncio.get_running_loop()
  386. coordinates = await loop.run_in_executor(
  387. _get_process_pool(),
  388. parse_theta_rho_file,
  389. pattern_path
  390. )
  391. if coordinates:
  392. first_coord = {"x": coordinates[0][0], "y": coordinates[0][1]}
  393. last_coord = {"x": coordinates[-1][0], "y": coordinates[-1][1]}
  394. total_coords = len(coordinates)
  395. # Cache the metadata for future use
  396. cache_pattern_metadata(pattern_file, first_coord, last_coord, total_coords)
  397. logger.debug(f"Metadata cached for {pattern_file}: {total_coords} coordinates")
  398. else:
  399. logger.warning(f"No coordinates found in {pattern_file}")
  400. except Exception as e:
  401. logger.error(f"Failed to parse {pattern_file} for metadata: {str(e)}")
  402. # Continue with image generation even if metadata fails
  403. # Check if we need to generate the image
  404. cache_path = get_cache_path(pattern_file)
  405. if os.path.exists(cache_path):
  406. logger.debug(f"Skipping image generation for {pattern_file} - already cached")
  407. return True
  408. # Generate the image
  409. logger.debug(f"Generating image preview for {pattern_file}")
  410. image_content = await generate_preview_image(pattern_file)
  411. if not image_content:
  412. logger.error(f"Generated image content is empty for {pattern_file}")
  413. return False
  414. # Ensure cache directory exists
  415. ensure_cache_dir()
  416. with open(cache_path, 'wb') as f:
  417. f.write(image_content)
  418. try:
  419. os.chmod(cache_path, 0o644) # More conservative permissions
  420. except (OSError, PermissionError) as e:
  421. # Log as debug instead of error since this is not critical
  422. logger.debug(f"Could not set cache file permissions for {pattern_file}: {str(e)}")
  423. logger.debug(f"Successfully generated preview for {pattern_file}")
  424. return True
  425. except Exception as e:
  426. logger.error(f"Failed to generate image for {pattern_file}: {str(e)}")
  427. return False
  428. async def generate_all_image_previews():
  429. """Generate image previews for missing patterns using set difference."""
  430. global cache_progress
  431. try:
  432. await ensure_cache_dir_async()
  433. # Step 1: Get all pattern files
  434. pattern_files = await list_theta_rho_files_async()
  435. if not pattern_files:
  436. logger.info("No .thr pattern files found. Skipping image preview generation.")
  437. return
  438. # Step 2: Find patterns with existing cache
  439. def _find_cached_patterns():
  440. cached = set()
  441. for pattern in pattern_files:
  442. cache_path = get_cache_path(pattern)
  443. if os.path.exists(cache_path):
  444. cached.add(pattern)
  445. return cached
  446. cached_patterns = await asyncio.to_thread(_find_cached_patterns)
  447. # Step 3: Calculate delta (patterns missing image cache)
  448. pattern_set = set(pattern_files)
  449. patterns_to_cache = list(pattern_set - cached_patterns)
  450. total_files = len(patterns_to_cache)
  451. skipped_files = len(pattern_files) - total_files
  452. if total_files == 0:
  453. logger.info(f"All {skipped_files} pattern files already have image previews. Skipping image generation.")
  454. return
  455. # Update progress state
  456. cache_progress.update({
  457. "stage": "images",
  458. "total_files": total_files,
  459. "processed_files": 0,
  460. "current_file": "",
  461. "error": None
  462. })
  463. logger.info(f"Generating image cache for {total_files} uncached .thr patterns ({skipped_files} already cached)...")
  464. batch_size = 5
  465. successful = 0
  466. for i in range(0, total_files, batch_size):
  467. batch = patterns_to_cache[i:i + batch_size]
  468. tasks = [generate_image_preview(file) for file in batch]
  469. results = await asyncio.gather(*tasks)
  470. successful += sum(1 for r in results if r)
  471. # Update progress
  472. cache_progress["processed_files"] = min(i + batch_size, total_files)
  473. if i < total_files:
  474. cache_progress["current_file"] = patterns_to_cache[min(i + batch_size - 1, total_files - 1)]
  475. # Log progress
  476. progress = min(i + batch_size, total_files)
  477. logger.info(f"Image cache generation progress: {progress}/{total_files} files processed")
  478. logger.info(f"Image cache generation completed: {successful}/{total_files} patterns cached successfully, {skipped_files} patterns skipped (already cached)")
  479. except Exception as e:
  480. logger.error(f"Error during image cache generation: {str(e)}")
  481. cache_progress["error"] = str(e)
  482. raise
  483. async def generate_metadata_cache():
  484. """Generate metadata cache for missing patterns using set difference."""
  485. global cache_progress
  486. try:
  487. logger.info("Starting metadata cache generation...")
  488. # Step 1: Get all pattern files
  489. pattern_files = await list_theta_rho_files_async()
  490. if not pattern_files:
  491. logger.info("No pattern files found. Skipping metadata cache generation.")
  492. return
  493. # Step 2: Get existing metadata keys
  494. metadata_cache = await load_metadata_cache_async()
  495. existing_keys = set(metadata_cache.get('data', {}).keys())
  496. # Step 3: Calculate delta (patterns missing from metadata)
  497. pattern_set = set(pattern_files)
  498. files_to_process = list(pattern_set - existing_keys)
  499. total_files = len(files_to_process)
  500. skipped_files = len(pattern_files) - total_files
  501. if total_files == 0:
  502. logger.info(f"All {skipped_files} files already have metadata cache. Skipping metadata generation.")
  503. return
  504. # Update progress state
  505. cache_progress.update({
  506. "stage": "metadata",
  507. "total_files": total_files,
  508. "processed_files": 0,
  509. "current_file": "",
  510. "error": None
  511. })
  512. logger.info(f"Generating metadata cache for {total_files} new files ({skipped_files} files already cached)...")
  513. # Process in smaller batches for Pi Zero 2 W
  514. batch_size = 3 # Reduced from 5
  515. successful = 0
  516. for i in range(0, total_files, batch_size):
  517. batch = files_to_process[i:i + batch_size]
  518. # Process files sequentially within batch (no parallel tasks)
  519. for file_name in batch:
  520. pattern_path = os.path.join(THETA_RHO_DIR, file_name)
  521. cache_progress["current_file"] = file_name
  522. try:
  523. # Parse file in separate process to avoid GIL contention with motion thread
  524. loop = asyncio.get_running_loop()
  525. coordinates = await loop.run_in_executor(
  526. _get_process_pool(),
  527. parse_theta_rho_file,
  528. pattern_path
  529. )
  530. if coordinates:
  531. first_coord = {"x": coordinates[0][0], "y": coordinates[0][1]}
  532. last_coord = {"x": coordinates[-1][0], "y": coordinates[-1][1]}
  533. total_coords = len(coordinates)
  534. # Cache the metadata
  535. cache_pattern_metadata(file_name, first_coord, last_coord, total_coords)
  536. successful += 1
  537. logger.debug(f"Generated metadata for {file_name}")
  538. # Small delay to reduce I/O pressure
  539. await asyncio.sleep(0.05)
  540. except Exception as e:
  541. logger.error(f"Failed to generate metadata for {file_name}: {str(e)}")
  542. # Update progress
  543. cache_progress["processed_files"] = min(i + batch_size, total_files)
  544. # Log progress
  545. progress = min(i + batch_size, total_files)
  546. logger.info(f"Metadata cache generation progress: {progress}/{total_files} files processed")
  547. # Delay between batches for system recovery
  548. if i + batch_size < total_files:
  549. await asyncio.sleep(0.3)
  550. logger.info(f"Metadata cache generation completed: {successful}/{total_files} patterns cached successfully, {skipped_files} patterns skipped (already cached)")
  551. except Exception as e:
  552. logger.error(f"Error during metadata cache generation: {str(e)}")
  553. cache_progress["error"] = str(e)
  554. raise
  555. async def rebuild_cache():
  556. """Rebuild the entire cache for all pattern files."""
  557. logger.info("Starting cache rebuild...")
  558. # Ensure cache directory exists
  559. ensure_cache_dir()
  560. # First generate metadata cache for all files
  561. await generate_metadata_cache()
  562. # Then generate image previews
  563. pattern_files = [f for f in list_theta_rho_files() if f.endswith('.thr')]
  564. total_files = len(pattern_files)
  565. if total_files == 0:
  566. logger.info("No pattern files found to cache")
  567. return
  568. logger.info(f"Generating image previews for {total_files} pattern files...")
  569. # Process in batches
  570. batch_size = 5
  571. successful = 0
  572. for i in range(0, total_files, batch_size):
  573. batch = pattern_files[i:i + batch_size]
  574. tasks = [generate_image_preview(file) for file in batch]
  575. results = await asyncio.gather(*tasks)
  576. successful += sum(1 for r in results if r)
  577. # Log progress
  578. progress = min(i + batch_size, total_files)
  579. logger.info(f"Image preview generation progress: {progress}/{total_files} files processed")
  580. logger.info(f"Cache rebuild completed: {successful}/{total_files} patterns cached successfully")
  581. async def generate_cache_background():
  582. """Run cache generation in the background with progress tracking."""
  583. global cache_progress
  584. try:
  585. cache_progress.update({
  586. "is_running": True,
  587. "stage": "starting",
  588. "total_files": 0,
  589. "processed_files": 0,
  590. "current_file": "",
  591. "error": None
  592. })
  593. # First generate metadata cache
  594. await generate_metadata_cache()
  595. # Then generate image previews
  596. await generate_all_image_previews()
  597. # Mark as complete
  598. cache_progress.update({
  599. "is_running": False,
  600. "stage": "complete",
  601. "current_file": "",
  602. "error": None
  603. })
  604. logger.info("Background cache generation completed successfully")
  605. except Exception as e:
  606. logger.error(f"Background cache generation failed: {str(e)}")
  607. cache_progress.update({
  608. "is_running": False,
  609. "stage": "error",
  610. "error": str(e)
  611. })
  612. raise
  613. def get_cache_progress():
  614. """Get the current cache generation progress.
  615. Returns a reference to the cache_progress dict for read-only access.
  616. The WebSocket handler should not modify this dict.
  617. """
  618. global cache_progress
  619. return cache_progress # Return reference instead of copy for better performance
  620. def is_cache_generation_needed():
  621. """Check if cache generation is needed."""
  622. pattern_files = [f for f in list_theta_rho_files() if f.endswith('.thr')]
  623. if not pattern_files:
  624. return False
  625. # Check if any files need caching
  626. patterns_to_cache = [f for f in pattern_files if needs_cache(f)]
  627. # Check metadata cache
  628. files_needing_metadata = []
  629. for file_name in pattern_files:
  630. if get_pattern_metadata(file_name) is None:
  631. files_needing_metadata.append(file_name)
  632. return len(patterns_to_cache) > 0 or len(files_needing_metadata) > 0
  633. async def is_cache_generation_needed_async():
  634. """Check if cache generation is needed using simple set difference.
  635. Returns True if any patterns are missing from either metadata or image cache.
  636. """
  637. try:
  638. # Step 1: List all patterns
  639. pattern_files = await list_theta_rho_files_async()
  640. if not pattern_files:
  641. return False
  642. pattern_set = set(pattern_files)
  643. # Step 2: Check metadata cache
  644. metadata_cache = await load_metadata_cache_async()
  645. metadata_keys = set(metadata_cache.get('data', {}).keys())
  646. if pattern_set != metadata_keys:
  647. # Metadata is missing some patterns
  648. return True
  649. # Step 3: Check image cache
  650. def _list_cached_images():
  651. """List all patterns that have cached images."""
  652. cached = set()
  653. if os.path.exists(CACHE_DIR):
  654. for pattern in pattern_files:
  655. cache_path = get_cache_path(pattern)
  656. if os.path.exists(cache_path):
  657. cached.add(pattern)
  658. return cached
  659. cached_images = await asyncio.to_thread(_list_cached_images)
  660. if pattern_set != cached_images:
  661. # Some patterns missing image cache
  662. return True
  663. return False
  664. except Exception as e:
  665. logger.warning(f"Error checking cache status: {e}")
  666. return False # Don't block startup on errors
  667. async def list_theta_rho_files_async():
  668. """Async version: List all theta-rho files."""
  669. def _walk_files():
  670. files = []
  671. for root, _, filenames in os.walk(THETA_RHO_DIR):
  672. # Only process .thr files to reduce memory usage
  673. thr_files = [f for f in filenames if f.endswith('.thr')]
  674. for file in thr_files:
  675. relative_path = os.path.relpath(os.path.join(root, file), THETA_RHO_DIR)
  676. # Normalize path separators to always use forward slashes for consistency across platforms
  677. relative_path = relative_path.replace(os.sep, '/')
  678. files.append(relative_path)
  679. return files
  680. files = await asyncio.to_thread(_walk_files)
  681. logger.debug(f"Found {len(files)} theta-rho files")
  682. return files # Already filtered for .thr