cache_manager.py 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847
  1. """Image Cache Manager for pre-generating and managing image previews."""
  2. import os
  3. import json
  4. import asyncio
  5. import logging
  6. from pathlib import Path
  7. from modules.core.pattern_manager import list_theta_rho_files, THETA_RHO_DIR, parse_theta_rho_file
  8. from modules.core.process_pool import get_pool as _get_process_pool
  9. logger = logging.getLogger(__name__)
  10. # Global cache progress state
  11. cache_progress = {
  12. "is_running": False,
  13. "total_files": 0,
  14. "processed_files": 0,
  15. "current_file": "",
  16. "stage": "idle", # idle, metadata, images, complete
  17. "error": None
  18. }
  19. # Constants
  20. CACHE_DIR = os.path.join(THETA_RHO_DIR, "cached_images")
  21. METADATA_CACHE_FILE = "metadata_cache.json" # Now in root directory
  22. # Cache schema version - increment when structure changes
  23. CACHE_SCHEMA_VERSION = 1
  24. # Expected cache schema structure
  25. EXPECTED_CACHE_SCHEMA = {
  26. 'version': CACHE_SCHEMA_VERSION,
  27. 'structure': {
  28. 'mtime': 'number',
  29. 'metadata': {
  30. 'first_coordinate': {'x': 'number', 'y': 'number'},
  31. 'last_coordinate': {'x': 'number', 'y': 'number'},
  32. 'total_coordinates': 'number'
  33. }
  34. }
  35. }
  36. def validate_cache_schema(cache_data):
  37. """Validate that cache data matches the expected schema structure."""
  38. try:
  39. # Check if version info exists
  40. if not isinstance(cache_data, dict):
  41. return False
  42. # Check for version field - if missing, it's old format
  43. cache_version = cache_data.get('version')
  44. if cache_version is None:
  45. logger.info("Cache file missing version info - treating as outdated schema")
  46. return False
  47. # Check if version matches current expected version
  48. if cache_version != CACHE_SCHEMA_VERSION:
  49. logger.info(f"Cache schema version mismatch: found {cache_version}, expected {CACHE_SCHEMA_VERSION}")
  50. return False
  51. # Check if data section exists
  52. if 'data' not in cache_data:
  53. logger.warning("Cache file missing 'data' section")
  54. return False
  55. # Validate structure of a few entries if they exist
  56. data_section = cache_data.get('data', {})
  57. if data_section and isinstance(data_section, dict):
  58. # Check first entry structure
  59. for pattern_file, entry in list(data_section.items())[:1]: # Just check first entry
  60. if not isinstance(entry, dict):
  61. return False
  62. if 'mtime' not in entry or 'metadata' not in entry:
  63. return False
  64. metadata = entry.get('metadata', {})
  65. required_fields = ['first_coordinate', 'last_coordinate', 'total_coordinates']
  66. if not all(field in metadata for field in required_fields):
  67. return False
  68. # Validate coordinate structure
  69. for coord_field in ['first_coordinate', 'last_coordinate']:
  70. coord = metadata.get(coord_field)
  71. if not isinstance(coord, dict) or 'x' not in coord or 'y' not in coord:
  72. return False
  73. return True
  74. except Exception as e:
  75. logger.warning(f"Error validating cache schema: {str(e)}")
  76. return False
  77. def invalidate_cache():
  78. """Delete only the metadata cache file, preserving image cache."""
  79. try:
  80. # Delete metadata cache file only
  81. if os.path.exists(METADATA_CACHE_FILE):
  82. os.remove(METADATA_CACHE_FILE)
  83. logger.info("Deleted outdated metadata cache file")
  84. # Keep image cache directory intact - images are still valid
  85. # Just ensure the cache directory structure exists
  86. ensure_cache_dir()
  87. return True
  88. except Exception as e:
  89. logger.error(f"Failed to invalidate metadata cache: {str(e)}")
  90. return False
  91. async def invalidate_cache_async():
  92. """Async version: Delete only the metadata cache file, preserving image cache."""
  93. try:
  94. # Delete metadata cache file only
  95. if await asyncio.to_thread(os.path.exists, METADATA_CACHE_FILE):
  96. await asyncio.to_thread(os.remove, METADATA_CACHE_FILE)
  97. logger.info("Deleted outdated metadata cache file")
  98. # Keep image cache directory intact - images are still valid
  99. # Just ensure the cache directory structure exists
  100. await ensure_cache_dir_async()
  101. return True
  102. except Exception as e:
  103. logger.error(f"Failed to invalidate metadata cache: {str(e)}")
  104. return False
  105. def ensure_cache_dir():
  106. """Ensure the cache directory exists with proper permissions."""
  107. try:
  108. Path(CACHE_DIR).mkdir(parents=True, exist_ok=True)
  109. # Initialize metadata cache if it doesn't exist
  110. if not os.path.exists(METADATA_CACHE_FILE):
  111. initial_cache = {
  112. 'version': CACHE_SCHEMA_VERSION,
  113. 'data': {}
  114. }
  115. with open(METADATA_CACHE_FILE, 'w') as f:
  116. json.dump(initial_cache, f)
  117. try:
  118. os.chmod(METADATA_CACHE_FILE, 0o644) # More conservative permissions
  119. except (OSError, PermissionError) as e:
  120. logger.debug(f"Could not set metadata cache file permissions: {str(e)}")
  121. for root, dirs, files in os.walk(CACHE_DIR):
  122. try:
  123. os.chmod(root, 0o755) # More conservative permissions
  124. for file in files:
  125. file_path = os.path.join(root, file)
  126. try:
  127. os.chmod(file_path, 0o644) # More conservative permissions
  128. except (OSError, PermissionError) as e:
  129. # Log as debug instead of error since this is not critical
  130. logger.debug(f"Could not set permissions for file {file_path}: {str(e)}")
  131. except (OSError, PermissionError) as e:
  132. # Log as debug instead of error since this is not critical
  133. logger.debug(f"Could not set permissions for directory {root}: {str(e)}")
  134. continue
  135. except Exception as e:
  136. logger.error(f"Failed to create cache directory: {str(e)}")
  137. async def ensure_cache_dir_async():
  138. """Async version: Ensure the cache directory exists with proper permissions."""
  139. try:
  140. await asyncio.to_thread(Path(CACHE_DIR).mkdir, parents=True, exist_ok=True)
  141. # Initialize metadata cache if it doesn't exist
  142. if not await asyncio.to_thread(os.path.exists, METADATA_CACHE_FILE):
  143. initial_cache = {
  144. 'version': CACHE_SCHEMA_VERSION,
  145. 'data': {}
  146. }
  147. def _write_initial_cache():
  148. with open(METADATA_CACHE_FILE, 'w') as f:
  149. json.dump(initial_cache, f)
  150. await asyncio.to_thread(_write_initial_cache)
  151. try:
  152. await asyncio.to_thread(os.chmod, METADATA_CACHE_FILE, 0o644)
  153. except (OSError, PermissionError) as e:
  154. logger.debug(f"Could not set metadata cache file permissions: {str(e)}")
  155. def _set_permissions():
  156. for root, dirs, files in os.walk(CACHE_DIR):
  157. try:
  158. os.chmod(root, 0o755)
  159. for file in files:
  160. file_path = os.path.join(root, file)
  161. try:
  162. os.chmod(file_path, 0o644)
  163. except (OSError, PermissionError) as e:
  164. logger.debug(f"Could not set permissions for file {file_path}: {str(e)}")
  165. except (OSError, PermissionError) as e:
  166. logger.debug(f"Could not set permissions for directory {root}: {str(e)}")
  167. continue
  168. await asyncio.to_thread(_set_permissions)
  169. except Exception as e:
  170. logger.error(f"Failed to create cache directory: {str(e)}")
  171. def get_cache_path(pattern_file):
  172. """Get the cache path for a pattern file."""
  173. # Normalize path separators to handle both forward slashes and backslashes
  174. pattern_file = pattern_file.replace('\\', '/')
  175. # Create subdirectories in cache to match the pattern file structure
  176. cache_subpath = os.path.dirname(pattern_file)
  177. if cache_subpath:
  178. # Create the same subdirectory structure in cache (including custom_patterns)
  179. # Convert forward slashes back to platform-specific separator for os.path.join
  180. cache_subpath = cache_subpath.replace('/', os.sep)
  181. cache_dir = os.path.join(CACHE_DIR, cache_subpath)
  182. else:
  183. # For files in root pattern directory
  184. cache_dir = CACHE_DIR
  185. # Ensure the subdirectory exists
  186. os.makedirs(cache_dir, exist_ok=True)
  187. try:
  188. os.chmod(cache_dir, 0o755) # More conservative permissions
  189. except (OSError, PermissionError) as e:
  190. # Log as debug instead of error since this is not critical
  191. logger.debug(f"Could not set permissions for cache subdirectory {cache_dir}: {str(e)}")
  192. # Use just the filename part for the cache file
  193. filename = os.path.basename(pattern_file)
  194. safe_name = filename.replace('\\', '_')
  195. return os.path.join(cache_dir, f"{safe_name}.webp")
  196. def delete_pattern_cache(pattern_file):
  197. """Delete cached preview image and metadata for a pattern file."""
  198. try:
  199. # Remove cached image
  200. cache_path = get_cache_path(pattern_file)
  201. if os.path.exists(cache_path):
  202. os.remove(cache_path)
  203. logger.info(f"Deleted cached image: {cache_path}")
  204. # Remove from metadata cache
  205. metadata_cache = load_metadata_cache()
  206. data_section = metadata_cache.get('data', {})
  207. if pattern_file in data_section:
  208. del data_section[pattern_file]
  209. metadata_cache['data'] = data_section
  210. save_metadata_cache(metadata_cache)
  211. logger.info(f"Removed {pattern_file} from metadata cache")
  212. return True
  213. except Exception as e:
  214. logger.error(f"Failed to delete cache for {pattern_file}: {str(e)}")
  215. return False
  216. def load_metadata_cache():
  217. """Load the metadata cache from disk with schema validation."""
  218. try:
  219. if os.path.exists(METADATA_CACHE_FILE):
  220. with open(METADATA_CACHE_FILE, 'r') as f:
  221. cache_data = json.load(f)
  222. # Validate schema
  223. if not validate_cache_schema(cache_data):
  224. logger.info("Cache schema validation failed - invalidating cache")
  225. invalidate_cache()
  226. # Return empty cache structure after invalidation
  227. return {
  228. 'version': CACHE_SCHEMA_VERSION,
  229. 'data': {}
  230. }
  231. return cache_data
  232. except Exception as e:
  233. logger.warning(f"Failed to load metadata cache: {str(e)} - invalidating cache")
  234. try:
  235. invalidate_cache()
  236. except Exception as invalidate_error:
  237. logger.error(f"Failed to invalidate corrupted cache: {str(invalidate_error)}")
  238. # Return empty cache structure
  239. return {
  240. 'version': CACHE_SCHEMA_VERSION,
  241. 'data': {}
  242. }
  243. async def load_metadata_cache_async():
  244. """Async version: Load the metadata cache from disk with schema validation."""
  245. try:
  246. if await asyncio.to_thread(os.path.exists, METADATA_CACHE_FILE):
  247. def _load_json():
  248. with open(METADATA_CACHE_FILE, 'r') as f:
  249. return json.load(f)
  250. cache_data = await asyncio.to_thread(_load_json)
  251. # Validate schema
  252. if not validate_cache_schema(cache_data):
  253. logger.info("Cache schema validation failed - invalidating cache")
  254. await invalidate_cache_async()
  255. # Return empty cache structure after invalidation
  256. return {
  257. 'version': CACHE_SCHEMA_VERSION,
  258. 'data': {}
  259. }
  260. return cache_data
  261. except Exception as e:
  262. logger.warning(f"Failed to load metadata cache: {str(e)} - invalidating cache")
  263. try:
  264. await invalidate_cache_async()
  265. except Exception as invalidate_error:
  266. logger.error(f"Failed to invalidate corrupted cache: {str(invalidate_error)}")
  267. # Return empty cache structure
  268. return {
  269. 'version': CACHE_SCHEMA_VERSION,
  270. 'data': {}
  271. }
  272. def save_metadata_cache(cache_data):
  273. """Save the metadata cache to disk with version info."""
  274. try:
  275. ensure_cache_dir()
  276. # Ensure cache data has proper structure
  277. if not isinstance(cache_data, dict) or 'version' not in cache_data:
  278. # Convert old format or create new structure
  279. if isinstance(cache_data, dict) and 'data' not in cache_data:
  280. # Old format - wrap existing data
  281. structured_cache = {
  282. 'version': CACHE_SCHEMA_VERSION,
  283. 'data': cache_data
  284. }
  285. else:
  286. structured_cache = cache_data
  287. else:
  288. structured_cache = cache_data
  289. with open(METADATA_CACHE_FILE, 'w') as f:
  290. json.dump(structured_cache, f, indent=2)
  291. except Exception as e:
  292. logger.error(f"Failed to save metadata cache: {str(e)}")
  293. def get_pattern_metadata(pattern_file):
  294. """Get cached metadata for a pattern file."""
  295. cache_data = load_metadata_cache()
  296. data_section = cache_data.get('data', {})
  297. # Check if we have cached metadata and if the file hasn't changed
  298. if pattern_file in data_section:
  299. cached_entry = data_section[pattern_file]
  300. pattern_path = os.path.join(THETA_RHO_DIR, pattern_file)
  301. try:
  302. file_mtime = os.path.getmtime(pattern_path)
  303. if cached_entry.get('mtime') == file_mtime:
  304. return cached_entry.get('metadata')
  305. except OSError:
  306. pass
  307. return None
  308. async def get_pattern_metadata_async(pattern_file):
  309. """Async version: Get cached metadata for a pattern file."""
  310. cache_data = await load_metadata_cache_async()
  311. data_section = cache_data.get('data', {})
  312. # Check if we have cached metadata and if the file hasn't changed
  313. if pattern_file in data_section:
  314. cached_entry = data_section[pattern_file]
  315. pattern_path = os.path.join(THETA_RHO_DIR, pattern_file)
  316. try:
  317. file_mtime = await asyncio.to_thread(os.path.getmtime, pattern_path)
  318. if cached_entry.get('mtime') == file_mtime:
  319. return cached_entry.get('metadata')
  320. except OSError:
  321. pass
  322. return None
  323. def cache_pattern_metadata(pattern_file, first_coord, last_coord, total_coords):
  324. """Cache metadata for a pattern file."""
  325. try:
  326. cache_data = load_metadata_cache()
  327. data_section = cache_data.get('data', {})
  328. pattern_path = os.path.join(THETA_RHO_DIR, pattern_file)
  329. file_mtime = os.path.getmtime(pattern_path)
  330. data_section[pattern_file] = {
  331. 'mtime': file_mtime,
  332. 'metadata': {
  333. 'first_coordinate': first_coord,
  334. 'last_coordinate': last_coord,
  335. 'total_coordinates': total_coords
  336. }
  337. }
  338. cache_data['data'] = data_section
  339. save_metadata_cache(cache_data)
  340. logger.debug(f"Cached metadata for {pattern_file}")
  341. except Exception as e:
  342. logger.warning(f"Failed to cache metadata for {pattern_file}: {str(e)}")
  343. def needs_cache(pattern_file):
  344. """Check if a pattern file needs its cache generated."""
  345. # Check if image preview exists
  346. cache_path = get_cache_path(pattern_file)
  347. if not os.path.exists(cache_path):
  348. return True
  349. # Check if metadata cache exists and is valid
  350. metadata = get_pattern_metadata(pattern_file)
  351. if metadata is None:
  352. return True
  353. return False
  354. def needs_image_cache_only(pattern_file):
  355. """Quick check if a pattern file needs its image cache generated.
  356. Only checks for image file existence, not metadata validity.
  357. Used during startup for faster checking.
  358. """
  359. cache_path = get_cache_path(pattern_file)
  360. return not os.path.exists(cache_path)
  361. async def needs_cache_async(pattern_file):
  362. """Async version: Check if a pattern file needs its cache generated."""
  363. # Check if image preview exists
  364. cache_path = get_cache_path(pattern_file)
  365. if not await asyncio.to_thread(os.path.exists, cache_path):
  366. return True
  367. # Check if metadata cache exists and is valid
  368. metadata = await get_pattern_metadata_async(pattern_file)
  369. if metadata is None:
  370. return True
  371. return False
  372. async def generate_image_preview(pattern_file):
  373. """Generate image preview for a single pattern file."""
  374. from modules.core.preview import generate_preview_image
  375. from modules.core.pattern_manager import parse_theta_rho_file
  376. try:
  377. logger.debug(f"Starting preview generation for {pattern_file}")
  378. # Check if we need to update metadata cache
  379. metadata = get_pattern_metadata(pattern_file)
  380. if metadata is None:
  381. # Parse file to get metadata (this is the only time we need to parse)
  382. logger.debug(f"Parsing {pattern_file} for metadata cache")
  383. pattern_path = os.path.join(THETA_RHO_DIR, pattern_file)
  384. try:
  385. # Use process pool to avoid GIL contention with motion thread
  386. # Add timeout protection to prevent hanging on problematic files
  387. loop = asyncio.get_running_loop()
  388. coordinates = await asyncio.wait_for(
  389. loop.run_in_executor(
  390. _get_process_pool(),
  391. parse_theta_rho_file,
  392. pattern_path
  393. ),
  394. timeout=30.0 # 30 second timeout per file
  395. )
  396. if coordinates:
  397. first_coord = {"x": coordinates[0][0], "y": coordinates[0][1]}
  398. last_coord = {"x": coordinates[-1][0], "y": coordinates[-1][1]}
  399. total_coords = len(coordinates)
  400. # Cache the metadata for future use
  401. cache_pattern_metadata(pattern_file, first_coord, last_coord, total_coords)
  402. logger.debug(f"Metadata cached for {pattern_file}: {total_coords} coordinates")
  403. else:
  404. logger.warning(f"No coordinates found in {pattern_file}")
  405. except asyncio.TimeoutError:
  406. logger.error(f"Timeout parsing {pattern_file} for metadata - skipping")
  407. except Exception as e:
  408. logger.error(f"Failed to parse {pattern_file} for metadata: {str(e)}")
  409. # Continue with image generation even if metadata fails
  410. # Check if we need to generate the image
  411. cache_path = get_cache_path(pattern_file)
  412. if os.path.exists(cache_path):
  413. logger.debug(f"Skipping image generation for {pattern_file} - already cached")
  414. return True
  415. # Generate the image
  416. logger.debug(f"Generating image preview for {pattern_file}")
  417. image_content = await generate_preview_image(pattern_file)
  418. if not image_content:
  419. logger.error(f"Generated image content is empty for {pattern_file}")
  420. return False
  421. # Ensure cache directory exists
  422. ensure_cache_dir()
  423. with open(cache_path, 'wb') as f:
  424. f.write(image_content)
  425. try:
  426. os.chmod(cache_path, 0o644) # More conservative permissions
  427. except (OSError, PermissionError) as e:
  428. # Log as debug instead of error since this is not critical
  429. logger.debug(f"Could not set cache file permissions for {pattern_file}: {str(e)}")
  430. logger.debug(f"Successfully generated preview for {pattern_file}")
  431. return True
  432. except Exception as e:
  433. logger.error(f"Failed to generate image for {pattern_file}: {str(e)}")
  434. return False
  435. async def generate_all_image_previews():
  436. """Generate image previews for missing patterns using set difference."""
  437. global cache_progress
  438. try:
  439. await ensure_cache_dir_async()
  440. # Step 1: Get all pattern files
  441. pattern_files = await list_theta_rho_files_async()
  442. if not pattern_files:
  443. logger.info("No .thr pattern files found. Skipping image preview generation.")
  444. return
  445. # Step 2: Find patterns with existing cache
  446. def _find_cached_patterns():
  447. cached = set()
  448. for pattern in pattern_files:
  449. cache_path = get_cache_path(pattern)
  450. if os.path.exists(cache_path):
  451. cached.add(pattern)
  452. return cached
  453. cached_patterns = await asyncio.to_thread(_find_cached_patterns)
  454. # Step 3: Calculate delta (patterns missing image cache)
  455. pattern_set = set(pattern_files)
  456. patterns_to_cache = list(pattern_set - cached_patterns)
  457. total_files = len(patterns_to_cache)
  458. skipped_files = len(pattern_files) - total_files
  459. if total_files == 0:
  460. logger.info(f"All {skipped_files} pattern files already have image previews. Skipping image generation.")
  461. return
  462. # Update progress state
  463. cache_progress.update({
  464. "stage": "images",
  465. "total_files": total_files,
  466. "processed_files": 0,
  467. "current_file": "",
  468. "error": None
  469. })
  470. logger.info(f"Generating image cache for {total_files} uncached .thr patterns ({skipped_files} already cached)...")
  471. batch_size = 5
  472. successful = 0
  473. for i in range(0, total_files, batch_size):
  474. batch = patterns_to_cache[i:i + batch_size]
  475. tasks = [generate_image_preview(file) for file in batch]
  476. results = await asyncio.gather(*tasks)
  477. successful += sum(1 for r in results if r)
  478. # Update progress
  479. cache_progress["processed_files"] = min(i + batch_size, total_files)
  480. if i < total_files:
  481. cache_progress["current_file"] = patterns_to_cache[min(i + batch_size - 1, total_files - 1)]
  482. # Log progress
  483. progress = min(i + batch_size, total_files)
  484. logger.info(f"Image cache generation progress: {progress}/{total_files} files processed")
  485. logger.info(f"Image cache generation completed: {successful}/{total_files} patterns cached successfully, {skipped_files} patterns skipped (already cached)")
  486. except Exception as e:
  487. logger.error(f"Error during image cache generation: {str(e)}")
  488. cache_progress["error"] = str(e)
  489. raise
  490. async def generate_metadata_cache():
  491. """Generate metadata cache for missing patterns using set difference."""
  492. global cache_progress
  493. try:
  494. logger.info("Starting metadata cache generation...")
  495. # Step 1: Get all pattern files
  496. pattern_files = await list_theta_rho_files_async()
  497. if not pattern_files:
  498. logger.info("No pattern files found. Skipping metadata cache generation.")
  499. return
  500. # Step 2: Get existing metadata keys
  501. metadata_cache = await load_metadata_cache_async()
  502. existing_keys = set(metadata_cache.get('data', {}).keys())
  503. # Step 3: Calculate delta (patterns missing from metadata)
  504. pattern_set = set(pattern_files)
  505. files_to_process = list(pattern_set - existing_keys)
  506. total_files = len(files_to_process)
  507. skipped_files = len(pattern_files) - total_files
  508. if total_files == 0:
  509. logger.info(f"All {skipped_files} files already have metadata cache. Skipping metadata generation.")
  510. return
  511. # Update progress state
  512. cache_progress.update({
  513. "stage": "metadata",
  514. "total_files": total_files,
  515. "processed_files": 0,
  516. "current_file": "",
  517. "error": None
  518. })
  519. logger.info(f"Generating metadata cache for {total_files} new files ({skipped_files} files already cached)...")
  520. # Process in smaller batches for Pi Zero 2 W
  521. batch_size = 3 # Reduced from 5
  522. successful = 0
  523. for i in range(0, total_files, batch_size):
  524. batch = files_to_process[i:i + batch_size]
  525. # Process files sequentially within batch (no parallel tasks)
  526. for file_name in batch:
  527. pattern_path = os.path.join(THETA_RHO_DIR, file_name)
  528. cache_progress["current_file"] = file_name
  529. try:
  530. # Parse file in separate process to avoid GIL contention with motion thread
  531. # Add timeout protection to prevent hanging on problematic files
  532. try:
  533. loop = asyncio.get_running_loop()
  534. coordinates = await asyncio.wait_for(
  535. loop.run_in_executor(
  536. _get_process_pool(),
  537. parse_theta_rho_file,
  538. pattern_path
  539. ),
  540. timeout=30.0 # 30 second timeout per file
  541. )
  542. except asyncio.TimeoutError:
  543. logger.error(f"Timeout parsing {file_name} - skipping (file may be too large or corrupted)")
  544. continue
  545. if coordinates:
  546. first_coord = {"x": coordinates[0][0], "y": coordinates[0][1]}
  547. last_coord = {"x": coordinates[-1][0], "y": coordinates[-1][1]}
  548. total_coords = len(coordinates)
  549. # Cache the metadata
  550. cache_pattern_metadata(file_name, first_coord, last_coord, total_coords)
  551. successful += 1
  552. logger.debug(f"Generated metadata for {file_name}")
  553. # Small delay to reduce I/O pressure
  554. await asyncio.sleep(0.05)
  555. except Exception as e:
  556. logger.error(f"Failed to generate metadata for {file_name}: {str(e)}")
  557. # Update progress
  558. cache_progress["processed_files"] = min(i + batch_size, total_files)
  559. # Log progress
  560. progress = min(i + batch_size, total_files)
  561. logger.info(f"Metadata cache generation progress: {progress}/{total_files} files processed")
  562. # Delay between batches for system recovery
  563. if i + batch_size < total_files:
  564. await asyncio.sleep(0.3)
  565. logger.info(f"Metadata cache generation completed: {successful}/{total_files} patterns cached successfully, {skipped_files} patterns skipped (already cached)")
  566. except Exception as e:
  567. logger.error(f"Error during metadata cache generation: {str(e)}")
  568. cache_progress["error"] = str(e)
  569. raise
  570. async def rebuild_cache():
  571. """Rebuild the entire cache for all pattern files."""
  572. logger.info("Starting cache rebuild...")
  573. # Ensure cache directory exists
  574. ensure_cache_dir()
  575. # First generate metadata cache for all files
  576. await generate_metadata_cache()
  577. # Then generate image previews
  578. pattern_files = [f for f in list_theta_rho_files() if f.endswith('.thr')]
  579. total_files = len(pattern_files)
  580. if total_files == 0:
  581. logger.info("No pattern files found to cache")
  582. return
  583. logger.info(f"Generating image previews for {total_files} pattern files...")
  584. # Process in batches
  585. batch_size = 5
  586. successful = 0
  587. for i in range(0, total_files, batch_size):
  588. batch = pattern_files[i:i + batch_size]
  589. tasks = [generate_image_preview(file) for file in batch]
  590. results = await asyncio.gather(*tasks)
  591. successful += sum(1 for r in results if r)
  592. # Log progress
  593. progress = min(i + batch_size, total_files)
  594. logger.info(f"Image preview generation progress: {progress}/{total_files} files processed")
  595. logger.info(f"Cache rebuild completed: {successful}/{total_files} patterns cached successfully")
  596. async def generate_cache_background():
  597. """Run cache generation in the background with progress tracking."""
  598. global cache_progress
  599. try:
  600. cache_progress.update({
  601. "is_running": True,
  602. "stage": "starting",
  603. "total_files": 0,
  604. "processed_files": 0,
  605. "current_file": "",
  606. "error": None
  607. })
  608. # First generate metadata cache
  609. await generate_metadata_cache()
  610. # Then generate image previews
  611. await generate_all_image_previews()
  612. # Mark as complete
  613. cache_progress.update({
  614. "is_running": False,
  615. "stage": "complete",
  616. "current_file": "",
  617. "error": None
  618. })
  619. logger.info("Background cache generation completed successfully")
  620. except Exception as e:
  621. logger.error(f"Background cache generation failed: {str(e)}")
  622. cache_progress.update({
  623. "is_running": False,
  624. "stage": "error",
  625. "error": str(e)
  626. })
  627. raise
  628. def get_cache_progress():
  629. """Get the current cache generation progress.
  630. Returns a reference to the cache_progress dict for read-only access.
  631. The WebSocket handler should not modify this dict.
  632. """
  633. global cache_progress
  634. return cache_progress # Return reference instead of copy for better performance
  635. def is_cache_generation_needed():
  636. """Check if cache generation is needed."""
  637. pattern_files = [f for f in list_theta_rho_files() if f.endswith('.thr')]
  638. if not pattern_files:
  639. return False
  640. # Check if any files need caching
  641. patterns_to_cache = [f for f in pattern_files if needs_cache(f)]
  642. # Check metadata cache
  643. files_needing_metadata = []
  644. for file_name in pattern_files:
  645. if get_pattern_metadata(file_name) is None:
  646. files_needing_metadata.append(file_name)
  647. return len(patterns_to_cache) > 0 or len(files_needing_metadata) > 0
  648. async def is_cache_generation_needed_async():
  649. """Check if cache generation is needed using simple set difference.
  650. Returns True if any patterns are missing from either metadata or image cache.
  651. """
  652. try:
  653. # Step 1: List all patterns
  654. pattern_files = await list_theta_rho_files_async()
  655. if not pattern_files:
  656. return False
  657. pattern_set = set(pattern_files)
  658. # Step 2: Check metadata cache
  659. metadata_cache = await load_metadata_cache_async()
  660. metadata_keys = set(metadata_cache.get('data', {}).keys())
  661. if pattern_set != metadata_keys:
  662. # Metadata is missing some patterns
  663. return True
  664. # Step 3: Check image cache
  665. def _list_cached_images():
  666. """List all patterns that have cached images."""
  667. cached = set()
  668. if os.path.exists(CACHE_DIR):
  669. for pattern in pattern_files:
  670. cache_path = get_cache_path(pattern)
  671. if os.path.exists(cache_path):
  672. cached.add(pattern)
  673. return cached
  674. cached_images = await asyncio.to_thread(_list_cached_images)
  675. if pattern_set != cached_images:
  676. # Some patterns missing image cache
  677. return True
  678. return False
  679. except Exception as e:
  680. logger.warning(f"Error checking cache status: {e}")
  681. return False # Don't block startup on errors
  682. async def list_theta_rho_files_async():
  683. """Async version: List all theta-rho files."""
  684. def _walk_files():
  685. files = []
  686. for root, _, filenames in os.walk(THETA_RHO_DIR):
  687. # Only process .thr files to reduce memory usage
  688. thr_files = [f for f in filenames if f.endswith('.thr')]
  689. for file in thr_files:
  690. relative_path = os.path.relpath(os.path.join(root, file), THETA_RHO_DIR)
  691. # Normalize path separators to always use forward slashes for consistency across platforms
  692. relative_path = relative_path.replace(os.sep, '/')
  693. files.append(relative_path)
  694. return files
  695. files = await asyncio.to_thread(_walk_files)
  696. logger.debug(f"Found {len(files)} theta-rho files")
  697. return files # Already filtered for .thr