1
0

cache_manager.py 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864
  1. """Image Cache Manager for pre-generating and managing image previews."""
  2. import os
  3. import json
  4. import asyncio
  5. import logging
  6. from pathlib import Path
  7. from modules.core.pattern_manager import list_theta_rho_files, THETA_RHO_DIR, parse_theta_rho_file
  8. from modules.core.process_pool import get_pool as _get_process_pool
  9. logger = logging.getLogger(__name__)
  10. # Global cache progress state
  11. cache_progress = {
  12. "is_running": False,
  13. "total_files": 0,
  14. "processed_files": 0,
  15. "current_file": "",
  16. "stage": "idle", # idle, metadata, images, complete
  17. "error": None
  18. }
  19. # Lock to prevent race conditions when writing to metadata cache
  20. # Multiple concurrent tasks (from asyncio.gather) can try to read-modify-write simultaneously
  21. # Lazily initialized to avoid "attached to a different loop" errors
  22. _metadata_cache_lock: "asyncio.Lock | None" = None
  23. def _get_metadata_cache_lock() -> asyncio.Lock:
  24. """Get or create the metadata cache lock in the current event loop."""
  25. global _metadata_cache_lock
  26. if _metadata_cache_lock is None:
  27. _metadata_cache_lock = asyncio.Lock()
  28. return _metadata_cache_lock
  29. # Constants
  30. CACHE_DIR = os.path.join(THETA_RHO_DIR, "cached_images")
  31. METADATA_CACHE_FILE = "metadata_cache.json" # Now in root directory
  32. # Cache schema version - increment when structure changes
  33. CACHE_SCHEMA_VERSION = 1
  34. # Expected cache schema structure
  35. EXPECTED_CACHE_SCHEMA = {
  36. 'version': CACHE_SCHEMA_VERSION,
  37. 'structure': {
  38. 'mtime': 'number',
  39. 'metadata': {
  40. 'first_coordinate': {'x': 'number', 'y': 'number'},
  41. 'last_coordinate': {'x': 'number', 'y': 'number'},
  42. 'total_coordinates': 'number'
  43. }
  44. }
  45. }
  46. def validate_cache_schema(cache_data):
  47. """Validate that cache data matches the expected schema structure."""
  48. try:
  49. # Check if version info exists
  50. if not isinstance(cache_data, dict):
  51. return False
  52. # Check for version field - if missing, it's old format
  53. cache_version = cache_data.get('version')
  54. if cache_version is None:
  55. logger.info("Cache file missing version info - treating as outdated schema")
  56. return False
  57. # Check if version matches current expected version
  58. if cache_version != CACHE_SCHEMA_VERSION:
  59. logger.info(f"Cache schema version mismatch: found {cache_version}, expected {CACHE_SCHEMA_VERSION}")
  60. return False
  61. # Check if data section exists
  62. if 'data' not in cache_data:
  63. logger.warning("Cache file missing 'data' section")
  64. return False
  65. # Validate structure of a few entries if they exist
  66. data_section = cache_data.get('data', {})
  67. if data_section and isinstance(data_section, dict):
  68. # Check first entry structure
  69. for pattern_file, entry in list(data_section.items())[:1]: # Just check first entry
  70. if not isinstance(entry, dict):
  71. return False
  72. if 'mtime' not in entry or 'metadata' not in entry:
  73. return False
  74. metadata = entry.get('metadata', {})
  75. required_fields = ['first_coordinate', 'last_coordinate', 'total_coordinates']
  76. if not all(field in metadata for field in required_fields):
  77. return False
  78. # Validate coordinate structure
  79. for coord_field in ['first_coordinate', 'last_coordinate']:
  80. coord = metadata.get(coord_field)
  81. if not isinstance(coord, dict) or 'x' not in coord or 'y' not in coord:
  82. return False
  83. return True
  84. except Exception as e:
  85. logger.warning(f"Error validating cache schema: {str(e)}")
  86. return False
  87. def invalidate_cache():
  88. """Delete only the metadata cache file, preserving image cache."""
  89. try:
  90. # Delete metadata cache file only
  91. if os.path.exists(METADATA_CACHE_FILE):
  92. os.remove(METADATA_CACHE_FILE)
  93. logger.info("Deleted outdated metadata cache file")
  94. # Keep image cache directory intact - images are still valid
  95. # Just ensure the cache directory structure exists
  96. ensure_cache_dir()
  97. return True
  98. except Exception as e:
  99. logger.error(f"Failed to invalidate metadata cache: {str(e)}")
  100. return False
  101. async def invalidate_cache_async():
  102. """Async version: Delete only the metadata cache file, preserving image cache."""
  103. try:
  104. # Delete metadata cache file only
  105. if await asyncio.to_thread(os.path.exists, METADATA_CACHE_FILE):
  106. await asyncio.to_thread(os.remove, METADATA_CACHE_FILE)
  107. logger.info("Deleted outdated metadata cache file")
  108. # Keep image cache directory intact - images are still valid
  109. # Just ensure the cache directory structure exists
  110. await ensure_cache_dir_async()
  111. return True
  112. except Exception as e:
  113. logger.error(f"Failed to invalidate metadata cache: {str(e)}")
  114. return False
  115. def ensure_cache_dir():
  116. """Ensure the cache directory exists with proper permissions."""
  117. try:
  118. Path(CACHE_DIR).mkdir(parents=True, exist_ok=True)
  119. # Initialize metadata cache if it doesn't exist
  120. if not os.path.exists(METADATA_CACHE_FILE):
  121. initial_cache = {
  122. 'version': CACHE_SCHEMA_VERSION,
  123. 'data': {}
  124. }
  125. with open(METADATA_CACHE_FILE, 'w') as f:
  126. json.dump(initial_cache, f)
  127. try:
  128. os.chmod(METADATA_CACHE_FILE, 0o644) # More conservative permissions
  129. except (OSError, PermissionError) as e:
  130. logger.debug(f"Could not set metadata cache file permissions: {str(e)}")
  131. for root, dirs, files in os.walk(CACHE_DIR):
  132. try:
  133. os.chmod(root, 0o755) # More conservative permissions
  134. for file in files:
  135. file_path = os.path.join(root, file)
  136. try:
  137. os.chmod(file_path, 0o644) # More conservative permissions
  138. except (OSError, PermissionError) as e:
  139. # Log as debug instead of error since this is not critical
  140. logger.debug(f"Could not set permissions for file {file_path}: {str(e)}")
  141. except (OSError, PermissionError) as e:
  142. # Log as debug instead of error since this is not critical
  143. logger.debug(f"Could not set permissions for directory {root}: {str(e)}")
  144. continue
  145. except Exception as e:
  146. logger.error(f"Failed to create cache directory: {str(e)}")
  147. async def ensure_cache_dir_async():
  148. """Async version: Ensure the cache directory exists with proper permissions."""
  149. try:
  150. await asyncio.to_thread(Path(CACHE_DIR).mkdir, parents=True, exist_ok=True)
  151. # Initialize metadata cache if it doesn't exist
  152. if not await asyncio.to_thread(os.path.exists, METADATA_CACHE_FILE):
  153. initial_cache = {
  154. 'version': CACHE_SCHEMA_VERSION,
  155. 'data': {}
  156. }
  157. def _write_initial_cache():
  158. with open(METADATA_CACHE_FILE, 'w') as f:
  159. json.dump(initial_cache, f)
  160. await asyncio.to_thread(_write_initial_cache)
  161. try:
  162. await asyncio.to_thread(os.chmod, METADATA_CACHE_FILE, 0o644)
  163. except (OSError, PermissionError) as e:
  164. logger.debug(f"Could not set metadata cache file permissions: {str(e)}")
  165. def _set_permissions():
  166. for root, dirs, files in os.walk(CACHE_DIR):
  167. try:
  168. os.chmod(root, 0o755)
  169. for file in files:
  170. file_path = os.path.join(root, file)
  171. try:
  172. os.chmod(file_path, 0o644)
  173. except (OSError, PermissionError) as e:
  174. logger.debug(f"Could not set permissions for file {file_path}: {str(e)}")
  175. except (OSError, PermissionError) as e:
  176. logger.debug(f"Could not set permissions for directory {root}: {str(e)}")
  177. continue
  178. await asyncio.to_thread(_set_permissions)
  179. except Exception as e:
  180. logger.error(f"Failed to create cache directory: {str(e)}")
  181. def get_cache_path(pattern_file):
  182. """Get the cache path for a pattern file."""
  183. # Normalize path separators to handle both forward slashes and backslashes
  184. pattern_file = pattern_file.replace('\\', '/')
  185. # Create subdirectories in cache to match the pattern file structure
  186. cache_subpath = os.path.dirname(pattern_file)
  187. if cache_subpath:
  188. # Create the same subdirectory structure in cache (including custom_patterns)
  189. # Convert forward slashes back to platform-specific separator for os.path.join
  190. cache_subpath = cache_subpath.replace('/', os.sep)
  191. cache_dir = os.path.join(CACHE_DIR, cache_subpath)
  192. else:
  193. # For files in root pattern directory
  194. cache_dir = CACHE_DIR
  195. # Ensure the subdirectory exists
  196. os.makedirs(cache_dir, exist_ok=True)
  197. try:
  198. os.chmod(cache_dir, 0o755) # More conservative permissions
  199. except (OSError, PermissionError) as e:
  200. # Log as debug instead of error since this is not critical
  201. logger.debug(f"Could not set permissions for cache subdirectory {cache_dir}: {str(e)}")
  202. # Use just the filename part for the cache file
  203. filename = os.path.basename(pattern_file)
  204. safe_name = filename.replace('\\', '_')
  205. return os.path.join(cache_dir, f"{safe_name}.webp")
  206. def delete_pattern_cache(pattern_file):
  207. """Delete cached preview image and metadata for a pattern file."""
  208. try:
  209. # Remove cached image
  210. cache_path = get_cache_path(pattern_file)
  211. if os.path.exists(cache_path):
  212. os.remove(cache_path)
  213. logger.info(f"Deleted cached image: {cache_path}")
  214. # Remove from metadata cache
  215. metadata_cache = load_metadata_cache()
  216. data_section = metadata_cache.get('data', {})
  217. if pattern_file in data_section:
  218. del data_section[pattern_file]
  219. metadata_cache['data'] = data_section
  220. save_metadata_cache(metadata_cache)
  221. logger.info(f"Removed {pattern_file} from metadata cache")
  222. return True
  223. except Exception as e:
  224. logger.error(f"Failed to delete cache for {pattern_file}: {str(e)}")
  225. return False
  226. def load_metadata_cache():
  227. """Load the metadata cache from disk with schema validation."""
  228. try:
  229. if os.path.exists(METADATA_CACHE_FILE):
  230. with open(METADATA_CACHE_FILE, 'r') as f:
  231. cache_data = json.load(f)
  232. # Validate schema
  233. if not validate_cache_schema(cache_data):
  234. logger.info("Cache schema validation failed - invalidating cache")
  235. invalidate_cache()
  236. # Return empty cache structure after invalidation
  237. return {
  238. 'version': CACHE_SCHEMA_VERSION,
  239. 'data': {}
  240. }
  241. return cache_data
  242. except Exception as e:
  243. logger.warning(f"Failed to load metadata cache: {str(e)} - invalidating cache")
  244. try:
  245. invalidate_cache()
  246. except Exception as invalidate_error:
  247. logger.error(f"Failed to invalidate corrupted cache: {str(invalidate_error)}")
  248. # Return empty cache structure
  249. return {
  250. 'version': CACHE_SCHEMA_VERSION,
  251. 'data': {}
  252. }
  253. async def load_metadata_cache_async():
  254. """Async version: Load the metadata cache from disk with schema validation."""
  255. try:
  256. if await asyncio.to_thread(os.path.exists, METADATA_CACHE_FILE):
  257. def _load_json():
  258. with open(METADATA_CACHE_FILE, 'r') as f:
  259. return json.load(f)
  260. cache_data = await asyncio.to_thread(_load_json)
  261. # Validate schema
  262. if not validate_cache_schema(cache_data):
  263. logger.info("Cache schema validation failed - invalidating cache")
  264. await invalidate_cache_async()
  265. # Return empty cache structure after invalidation
  266. return {
  267. 'version': CACHE_SCHEMA_VERSION,
  268. 'data': {}
  269. }
  270. return cache_data
  271. except Exception as e:
  272. logger.warning(f"Failed to load metadata cache: {str(e)} - invalidating cache")
  273. try:
  274. await invalidate_cache_async()
  275. except Exception as invalidate_error:
  276. logger.error(f"Failed to invalidate corrupted cache: {str(invalidate_error)}")
  277. # Return empty cache structure
  278. return {
  279. 'version': CACHE_SCHEMA_VERSION,
  280. 'data': {}
  281. }
  282. def save_metadata_cache(cache_data):
  283. """Save the metadata cache to disk with version info."""
  284. try:
  285. ensure_cache_dir()
  286. # Ensure cache data has proper structure
  287. if not isinstance(cache_data, dict) or 'version' not in cache_data:
  288. # Convert old format or create new structure
  289. if isinstance(cache_data, dict) and 'data' not in cache_data:
  290. # Old format - wrap existing data
  291. structured_cache = {
  292. 'version': CACHE_SCHEMA_VERSION,
  293. 'data': cache_data
  294. }
  295. else:
  296. structured_cache = cache_data
  297. else:
  298. structured_cache = cache_data
  299. with open(METADATA_CACHE_FILE, 'w') as f:
  300. json.dump(structured_cache, f, indent=2)
  301. except Exception as e:
  302. logger.error(f"Failed to save metadata cache: {str(e)}")
  303. def get_pattern_metadata(pattern_file):
  304. """Get cached metadata for a pattern file."""
  305. cache_data = load_metadata_cache()
  306. data_section = cache_data.get('data', {})
  307. # Check if we have cached metadata and if the file hasn't changed
  308. if pattern_file in data_section:
  309. cached_entry = data_section[pattern_file]
  310. pattern_path = os.path.join(THETA_RHO_DIR, pattern_file)
  311. try:
  312. file_mtime = os.path.getmtime(pattern_path)
  313. if cached_entry.get('mtime') == file_mtime:
  314. return cached_entry.get('metadata')
  315. except OSError:
  316. pass
  317. return None
  318. async def get_pattern_metadata_async(pattern_file):
  319. """Async version: Get cached metadata for a pattern file."""
  320. cache_data = await load_metadata_cache_async()
  321. data_section = cache_data.get('data', {})
  322. # Check if we have cached metadata and if the file hasn't changed
  323. if pattern_file in data_section:
  324. cached_entry = data_section[pattern_file]
  325. pattern_path = os.path.join(THETA_RHO_DIR, pattern_file)
  326. try:
  327. file_mtime = await asyncio.to_thread(os.path.getmtime, pattern_path)
  328. if cached_entry.get('mtime') == file_mtime:
  329. return cached_entry.get('metadata')
  330. except OSError:
  331. pass
  332. return None
  333. async def cache_pattern_metadata(pattern_file, first_coord, last_coord, total_coords):
  334. """Cache metadata for a pattern file.
  335. Uses asyncio.Lock to prevent race conditions when multiple concurrent tasks
  336. (from asyncio.gather) try to read-modify-write the cache file simultaneously.
  337. """
  338. async with _get_metadata_cache_lock():
  339. try:
  340. cache_data = await asyncio.to_thread(load_metadata_cache)
  341. data_section = cache_data.get('data', {})
  342. pattern_path = os.path.join(THETA_RHO_DIR, pattern_file)
  343. file_mtime = await asyncio.to_thread(os.path.getmtime, pattern_path)
  344. data_section[pattern_file] = {
  345. 'mtime': file_mtime,
  346. 'metadata': {
  347. 'first_coordinate': first_coord,
  348. 'last_coordinate': last_coord,
  349. 'total_coordinates': total_coords
  350. }
  351. }
  352. cache_data['data'] = data_section
  353. await asyncio.to_thread(save_metadata_cache, cache_data)
  354. logger.debug(f"Cached metadata for {pattern_file}")
  355. except Exception as e:
  356. logger.warning(f"Failed to cache metadata for {pattern_file}: {str(e)}")
  357. def needs_cache(pattern_file):
  358. """Check if a pattern file needs its cache generated."""
  359. # Check if image preview exists
  360. cache_path = get_cache_path(pattern_file)
  361. if not os.path.exists(cache_path):
  362. return True
  363. # Check if metadata cache exists and is valid
  364. metadata = get_pattern_metadata(pattern_file)
  365. if metadata is None:
  366. return True
  367. return False
  368. def needs_image_cache_only(pattern_file):
  369. """Quick check if a pattern file needs its image cache generated.
  370. Only checks for image file existence, not metadata validity.
  371. Used during startup for faster checking.
  372. """
  373. cache_path = get_cache_path(pattern_file)
  374. return not os.path.exists(cache_path)
  375. async def needs_cache_async(pattern_file):
  376. """Async version: Check if a pattern file needs its cache generated."""
  377. # Check if image preview exists
  378. cache_path = get_cache_path(pattern_file)
  379. if not await asyncio.to_thread(os.path.exists, cache_path):
  380. return True
  381. # Check if metadata cache exists and is valid
  382. metadata = await get_pattern_metadata_async(pattern_file)
  383. if metadata is None:
  384. return True
  385. return False
  386. async def generate_image_preview(pattern_file):
  387. """Generate image preview for a single pattern file."""
  388. from modules.core.preview import generate_preview_image
  389. from modules.core.pattern_manager import parse_theta_rho_file
  390. try:
  391. logger.debug(f"Starting preview generation for {pattern_file}")
  392. # Check if we need to update metadata cache
  393. metadata = get_pattern_metadata(pattern_file)
  394. if metadata is None:
  395. # Parse file to get metadata (this is the only time we need to parse)
  396. logger.debug(f"Parsing {pattern_file} for metadata cache")
  397. pattern_path = os.path.join(THETA_RHO_DIR, pattern_file)
  398. try:
  399. # Use process pool to avoid GIL contention with motion thread
  400. # Add timeout protection to prevent hanging on problematic files
  401. loop = asyncio.get_running_loop()
  402. coordinates = await asyncio.wait_for(
  403. loop.run_in_executor(
  404. _get_process_pool(),
  405. parse_theta_rho_file,
  406. pattern_path
  407. ),
  408. timeout=30.0 # 30 second timeout per file
  409. )
  410. if coordinates:
  411. first_coord = {"x": coordinates[0][0], "y": coordinates[0][1]}
  412. last_coord = {"x": coordinates[-1][0], "y": coordinates[-1][1]}
  413. total_coords = len(coordinates)
  414. # Cache the metadata for future use
  415. await cache_pattern_metadata(pattern_file, first_coord, last_coord, total_coords)
  416. logger.debug(f"Metadata cached for {pattern_file}: {total_coords} coordinates")
  417. else:
  418. logger.warning(f"No coordinates found in {pattern_file}")
  419. except asyncio.TimeoutError:
  420. logger.error(f"Timeout parsing {pattern_file} for metadata - skipping")
  421. except Exception as e:
  422. logger.error(f"Failed to parse {pattern_file} for metadata: {str(e)}")
  423. # Continue with image generation even if metadata fails
  424. # Check if we need to generate the image
  425. cache_path = get_cache_path(pattern_file)
  426. if os.path.exists(cache_path):
  427. logger.debug(f"Skipping image generation for {pattern_file} - already cached")
  428. return True
  429. # Generate the image
  430. logger.debug(f"Generating image preview for {pattern_file}")
  431. image_content = await generate_preview_image(pattern_file)
  432. if not image_content:
  433. logger.error(f"Generated image content is empty for {pattern_file}")
  434. return False
  435. # Ensure cache directory exists
  436. ensure_cache_dir()
  437. with open(cache_path, 'wb') as f:
  438. f.write(image_content)
  439. try:
  440. os.chmod(cache_path, 0o644) # More conservative permissions
  441. except (OSError, PermissionError) as e:
  442. # Log as debug instead of error since this is not critical
  443. logger.debug(f"Could not set cache file permissions for {pattern_file}: {str(e)}")
  444. logger.debug(f"Successfully generated preview for {pattern_file}")
  445. return True
  446. except Exception as e:
  447. logger.error(f"Failed to generate image for {pattern_file}: {str(e)}")
  448. return False
  449. async def generate_all_image_previews():
  450. """Generate image previews for missing patterns using set difference."""
  451. global cache_progress
  452. try:
  453. await ensure_cache_dir_async()
  454. # Step 1: Get all pattern files
  455. pattern_files = await list_theta_rho_files_async()
  456. if not pattern_files:
  457. logger.info("No .thr pattern files found. Skipping image preview generation.")
  458. return
  459. # Step 2: Find patterns with existing cache
  460. def _find_cached_patterns():
  461. cached = set()
  462. for pattern in pattern_files:
  463. cache_path = get_cache_path(pattern)
  464. if os.path.exists(cache_path):
  465. cached.add(pattern)
  466. return cached
  467. cached_patterns = await asyncio.to_thread(_find_cached_patterns)
  468. # Step 3: Calculate delta (patterns missing image cache)
  469. pattern_set = set(pattern_files)
  470. patterns_to_cache = list(pattern_set - cached_patterns)
  471. total_files = len(patterns_to_cache)
  472. skipped_files = len(pattern_files) - total_files
  473. if total_files == 0:
  474. logger.info(f"All {skipped_files} pattern files already have image previews. Skipping image generation.")
  475. return
  476. # Update progress state
  477. cache_progress.update({
  478. "stage": "images",
  479. "total_files": total_files,
  480. "processed_files": 0,
  481. "current_file": "",
  482. "error": None
  483. })
  484. logger.info(f"Generating image cache for {total_files} uncached .thr patterns ({skipped_files} already cached)...")
  485. batch_size = 5
  486. successful = 0
  487. for i in range(0, total_files, batch_size):
  488. batch = patterns_to_cache[i:i + batch_size]
  489. tasks = [generate_image_preview(file) for file in batch]
  490. results = await asyncio.gather(*tasks)
  491. successful += sum(1 for r in results if r)
  492. # Update progress
  493. cache_progress["processed_files"] = min(i + batch_size, total_files)
  494. if i < total_files:
  495. cache_progress["current_file"] = patterns_to_cache[min(i + batch_size - 1, total_files - 1)]
  496. # Log progress
  497. progress = min(i + batch_size, total_files)
  498. logger.info(f"Image cache generation progress: {progress}/{total_files} files processed")
  499. logger.info(f"Image cache generation completed: {successful}/{total_files} patterns cached successfully, {skipped_files} patterns skipped (already cached)")
  500. except Exception as e:
  501. logger.error(f"Error during image cache generation: {str(e)}")
  502. cache_progress["error"] = str(e)
  503. raise
  504. async def generate_metadata_cache():
  505. """Generate metadata cache for missing patterns using set difference."""
  506. global cache_progress
  507. try:
  508. logger.info("Starting metadata cache generation...")
  509. # Step 1: Get all pattern files
  510. pattern_files = await list_theta_rho_files_async()
  511. if not pattern_files:
  512. logger.info("No pattern files found. Skipping metadata cache generation.")
  513. return
  514. # Step 2: Get existing metadata keys
  515. metadata_cache = await load_metadata_cache_async()
  516. existing_keys = set(metadata_cache.get('data', {}).keys())
  517. # Step 3: Calculate delta (patterns missing from metadata)
  518. pattern_set = set(pattern_files)
  519. files_to_process = list(pattern_set - existing_keys)
  520. total_files = len(files_to_process)
  521. skipped_files = len(pattern_files) - total_files
  522. if total_files == 0:
  523. logger.info(f"All {skipped_files} files already have metadata cache. Skipping metadata generation.")
  524. return
  525. # Update progress state
  526. cache_progress.update({
  527. "stage": "metadata",
  528. "total_files": total_files,
  529. "processed_files": 0,
  530. "current_file": "",
  531. "error": None
  532. })
  533. logger.info(f"Generating metadata cache for {total_files} new files ({skipped_files} files already cached)...")
  534. # Process in smaller batches for Pi Zero 2 W
  535. batch_size = 3 # Reduced from 5
  536. successful = 0
  537. for i in range(0, total_files, batch_size):
  538. batch = files_to_process[i:i + batch_size]
  539. # Process files sequentially within batch (no parallel tasks)
  540. for file_name in batch:
  541. pattern_path = os.path.join(THETA_RHO_DIR, file_name)
  542. cache_progress["current_file"] = file_name
  543. try:
  544. # Parse file in separate process to avoid GIL contention with motion thread
  545. # Add timeout protection to prevent hanging on problematic files
  546. try:
  547. loop = asyncio.get_running_loop()
  548. coordinates = await asyncio.wait_for(
  549. loop.run_in_executor(
  550. _get_process_pool(),
  551. parse_theta_rho_file,
  552. pattern_path
  553. ),
  554. timeout=30.0 # 30 second timeout per file
  555. )
  556. except asyncio.TimeoutError:
  557. logger.error(f"Timeout parsing {file_name} - skipping (file may be too large or corrupted)")
  558. continue
  559. if coordinates:
  560. first_coord = {"x": coordinates[0][0], "y": coordinates[0][1]}
  561. last_coord = {"x": coordinates[-1][0], "y": coordinates[-1][1]}
  562. total_coords = len(coordinates)
  563. # Cache the metadata
  564. await cache_pattern_metadata(file_name, first_coord, last_coord, total_coords)
  565. successful += 1
  566. logger.debug(f"Generated metadata for {file_name}")
  567. # Small delay to reduce I/O pressure
  568. await asyncio.sleep(0.05)
  569. except Exception as e:
  570. logger.error(f"Failed to generate metadata for {file_name}: {str(e)}")
  571. # Update progress
  572. cache_progress["processed_files"] = min(i + batch_size, total_files)
  573. # Log progress
  574. progress = min(i + batch_size, total_files)
  575. logger.info(f"Metadata cache generation progress: {progress}/{total_files} files processed")
  576. # Delay between batches for system recovery
  577. if i + batch_size < total_files:
  578. await asyncio.sleep(0.3)
  579. logger.info(f"Metadata cache generation completed: {successful}/{total_files} patterns cached successfully, {skipped_files} patterns skipped (already cached)")
  580. except Exception as e:
  581. logger.error(f"Error during metadata cache generation: {str(e)}")
  582. cache_progress["error"] = str(e)
  583. raise
  584. async def rebuild_cache():
  585. """Rebuild the entire cache for all pattern files."""
  586. logger.info("Starting cache rebuild...")
  587. # Ensure cache directory exists
  588. ensure_cache_dir()
  589. # First generate metadata cache for all files
  590. await generate_metadata_cache()
  591. # Then generate image previews
  592. pattern_files = [f for f in list_theta_rho_files() if f.endswith('.thr')]
  593. total_files = len(pattern_files)
  594. if total_files == 0:
  595. logger.info("No pattern files found to cache")
  596. return
  597. logger.info(f"Generating image previews for {total_files} pattern files...")
  598. # Process in batches
  599. batch_size = 5
  600. successful = 0
  601. for i in range(0, total_files, batch_size):
  602. batch = pattern_files[i:i + batch_size]
  603. tasks = [generate_image_preview(file) for file in batch]
  604. results = await asyncio.gather(*tasks)
  605. successful += sum(1 for r in results if r)
  606. # Log progress
  607. progress = min(i + batch_size, total_files)
  608. logger.info(f"Image preview generation progress: {progress}/{total_files} files processed")
  609. logger.info(f"Cache rebuild completed: {successful}/{total_files} patterns cached successfully")
  610. async def generate_cache_background():
  611. """Run cache generation in the background with progress tracking."""
  612. global cache_progress
  613. try:
  614. cache_progress.update({
  615. "is_running": True,
  616. "stage": "starting",
  617. "total_files": 0,
  618. "processed_files": 0,
  619. "current_file": "",
  620. "error": None
  621. })
  622. # First generate metadata cache
  623. await generate_metadata_cache()
  624. # Then generate image previews
  625. await generate_all_image_previews()
  626. # Mark as complete
  627. cache_progress.update({
  628. "is_running": False,
  629. "stage": "complete",
  630. "current_file": "",
  631. "error": None
  632. })
  633. logger.info("Background cache generation completed successfully")
  634. except Exception as e:
  635. logger.error(f"Background cache generation failed: {str(e)}")
  636. cache_progress.update({
  637. "is_running": False,
  638. "stage": "error",
  639. "error": str(e)
  640. })
  641. raise
  642. def get_cache_progress():
  643. """Get the current cache generation progress.
  644. Returns a reference to the cache_progress dict for read-only access.
  645. The WebSocket handler should not modify this dict.
  646. """
  647. global cache_progress
  648. return cache_progress # Return reference instead of copy for better performance
  649. def is_cache_generation_needed():
  650. """Check if cache generation is needed."""
  651. pattern_files = [f for f in list_theta_rho_files() if f.endswith('.thr')]
  652. if not pattern_files:
  653. return False
  654. # Check if any files need caching
  655. patterns_to_cache = [f for f in pattern_files if needs_cache(f)]
  656. # Check metadata cache
  657. files_needing_metadata = []
  658. for file_name in pattern_files:
  659. if get_pattern_metadata(file_name) is None:
  660. files_needing_metadata.append(file_name)
  661. return len(patterns_to_cache) > 0 or len(files_needing_metadata) > 0
  662. async def is_cache_generation_needed_async():
  663. """Check if cache generation is needed using simple set difference.
  664. Returns True if any patterns are missing from either metadata or image cache.
  665. """
  666. try:
  667. # Step 1: List all patterns
  668. pattern_files = await list_theta_rho_files_async()
  669. if not pattern_files:
  670. return False
  671. pattern_set = set(pattern_files)
  672. # Step 2: Check metadata cache
  673. metadata_cache = await load_metadata_cache_async()
  674. metadata_keys = set(metadata_cache.get('data', {}).keys())
  675. if pattern_set != metadata_keys:
  676. # Metadata is missing some patterns
  677. return True
  678. # Step 3: Check image cache
  679. def _list_cached_images():
  680. """List all patterns that have cached images."""
  681. cached = set()
  682. if os.path.exists(CACHE_DIR):
  683. for pattern in pattern_files:
  684. cache_path = get_cache_path(pattern)
  685. if os.path.exists(cache_path):
  686. cached.add(pattern)
  687. return cached
  688. cached_images = await asyncio.to_thread(_list_cached_images)
  689. if pattern_set != cached_images:
  690. # Some patterns missing image cache
  691. return True
  692. return False
  693. except Exception as e:
  694. logger.warning(f"Error checking cache status: {e}")
  695. return False # Don't block startup on errors
  696. async def list_theta_rho_files_async():
  697. """Async version: List all theta-rho files."""
  698. def _walk_files():
  699. files = []
  700. for root, _, filenames in os.walk(THETA_RHO_DIR):
  701. # Only process .thr files to reduce memory usage
  702. thr_files = [f for f in filenames if f.endswith('.thr')]
  703. for file in thr_files:
  704. relative_path = os.path.relpath(os.path.join(root, file), THETA_RHO_DIR)
  705. # Normalize path separators to always use forward slashes for consistency across platforms
  706. relative_path = relative_path.replace(os.sep, '/')
  707. files.append(relative_path)
  708. return files
  709. files = await asyncio.to_thread(_walk_files)
  710. logger.debug(f"Found {len(files)} theta-rho files")
  711. return files # Already filtered for .thr