cache_manager.py 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834
  1. """Image Cache Manager for pre-generating and managing image previews."""
  2. import os
  3. import json
  4. import asyncio
  5. import logging
  6. from pathlib import Path
  7. from modules.core.pattern_manager import list_theta_rho_files, THETA_RHO_DIR, parse_theta_rho_file
  8. logger = logging.getLogger(__name__)
  9. # Global cache progress state
  10. cache_progress = {
  11. "is_running": False,
  12. "total_files": 0,
  13. "processed_files": 0,
  14. "current_file": "",
  15. "stage": "idle", # idle, metadata, images, complete
  16. "error": None
  17. }
  18. # Constants
  19. CACHE_DIR = os.path.join(THETA_RHO_DIR, "cached_images")
  20. METADATA_CACHE_FILE = "metadata_cache.json" # Now in root directory
  21. # Cache schema version - increment when structure changes
  22. CACHE_SCHEMA_VERSION = 1
  23. # Expected cache schema structure
  24. EXPECTED_CACHE_SCHEMA = {
  25. 'version': CACHE_SCHEMA_VERSION,
  26. 'structure': {
  27. 'mtime': 'number',
  28. 'metadata': {
  29. 'first_coordinate': {'x': 'number', 'y': 'number'},
  30. 'last_coordinate': {'x': 'number', 'y': 'number'},
  31. 'total_coordinates': 'number'
  32. }
  33. }
  34. }
  35. def validate_cache_schema(cache_data):
  36. """Validate that cache data matches the expected schema structure."""
  37. try:
  38. # Check if version info exists
  39. if not isinstance(cache_data, dict):
  40. return False
  41. # Check for version field - if missing, it's old format
  42. cache_version = cache_data.get('version')
  43. if cache_version is None:
  44. logger.info("Cache file missing version info - treating as outdated schema")
  45. return False
  46. # Check if version matches current expected version
  47. if cache_version != CACHE_SCHEMA_VERSION:
  48. logger.info(f"Cache schema version mismatch: found {cache_version}, expected {CACHE_SCHEMA_VERSION}")
  49. return False
  50. # Check if data section exists
  51. if 'data' not in cache_data:
  52. logger.warning("Cache file missing 'data' section")
  53. return False
  54. # Validate structure of a few entries if they exist
  55. data_section = cache_data.get('data', {})
  56. if data_section and isinstance(data_section, dict):
  57. # Check first entry structure
  58. for pattern_file, entry in list(data_section.items())[:1]: # Just check first entry
  59. if not isinstance(entry, dict):
  60. return False
  61. if 'mtime' not in entry or 'metadata' not in entry:
  62. return False
  63. metadata = entry.get('metadata', {})
  64. required_fields = ['first_coordinate', 'last_coordinate', 'total_coordinates']
  65. if not all(field in metadata for field in required_fields):
  66. return False
  67. # Validate coordinate structure
  68. for coord_field in ['first_coordinate', 'last_coordinate']:
  69. coord = metadata.get(coord_field)
  70. if not isinstance(coord, dict) or 'x' not in coord or 'y' not in coord:
  71. return False
  72. return True
  73. except Exception as e:
  74. logger.warning(f"Error validating cache schema: {str(e)}")
  75. return False
  76. def invalidate_cache():
  77. """Delete only the metadata cache file, preserving image cache."""
  78. try:
  79. # Delete metadata cache file only
  80. if os.path.exists(METADATA_CACHE_FILE):
  81. os.remove(METADATA_CACHE_FILE)
  82. logger.info("Deleted outdated metadata cache file")
  83. # Keep image cache directory intact - images are still valid
  84. # Just ensure the cache directory structure exists
  85. ensure_cache_dir()
  86. return True
  87. except Exception as e:
  88. logger.error(f"Failed to invalidate metadata cache: {str(e)}")
  89. return False
  90. async def invalidate_cache_async():
  91. """Async version: Delete only the metadata cache file, preserving image cache."""
  92. try:
  93. # Delete metadata cache file only
  94. if await asyncio.to_thread(os.path.exists, METADATA_CACHE_FILE):
  95. await asyncio.to_thread(os.remove, METADATA_CACHE_FILE)
  96. logger.info("Deleted outdated metadata cache file")
  97. # Keep image cache directory intact - images are still valid
  98. # Just ensure the cache directory structure exists
  99. await ensure_cache_dir_async()
  100. return True
  101. except Exception as e:
  102. logger.error(f"Failed to invalidate metadata cache: {str(e)}")
  103. return False
  104. def ensure_cache_dir():
  105. """Ensure the cache directory exists with proper permissions."""
  106. try:
  107. Path(CACHE_DIR).mkdir(parents=True, exist_ok=True)
  108. # Initialize metadata cache if it doesn't exist
  109. if not os.path.exists(METADATA_CACHE_FILE):
  110. initial_cache = {
  111. 'version': CACHE_SCHEMA_VERSION,
  112. 'data': {}
  113. }
  114. with open(METADATA_CACHE_FILE, 'w') as f:
  115. json.dump(initial_cache, f)
  116. try:
  117. os.chmod(METADATA_CACHE_FILE, 0o644) # More conservative permissions
  118. except (OSError, PermissionError) as e:
  119. logger.debug(f"Could not set metadata cache file permissions: {str(e)}")
  120. for root, dirs, files in os.walk(CACHE_DIR):
  121. try:
  122. os.chmod(root, 0o755) # More conservative permissions
  123. for file in files:
  124. file_path = os.path.join(root, file)
  125. try:
  126. os.chmod(file_path, 0o644) # More conservative permissions
  127. except (OSError, PermissionError) as e:
  128. # Log as debug instead of error since this is not critical
  129. logger.debug(f"Could not set permissions for file {file_path}: {str(e)}")
  130. except (OSError, PermissionError) as e:
  131. # Log as debug instead of error since this is not critical
  132. logger.debug(f"Could not set permissions for directory {root}: {str(e)}")
  133. continue
  134. except Exception as e:
  135. logger.error(f"Failed to create cache directory: {str(e)}")
  136. async def ensure_cache_dir_async():
  137. """Async version: Ensure the cache directory exists with proper permissions."""
  138. try:
  139. await asyncio.to_thread(Path(CACHE_DIR).mkdir, parents=True, exist_ok=True)
  140. # Initialize metadata cache if it doesn't exist
  141. if not await asyncio.to_thread(os.path.exists, METADATA_CACHE_FILE):
  142. initial_cache = {
  143. 'version': CACHE_SCHEMA_VERSION,
  144. 'data': {}
  145. }
  146. def _write_initial_cache():
  147. with open(METADATA_CACHE_FILE, 'w') as f:
  148. json.dump(initial_cache, f)
  149. await asyncio.to_thread(_write_initial_cache)
  150. try:
  151. await asyncio.to_thread(os.chmod, METADATA_CACHE_FILE, 0o644)
  152. except (OSError, PermissionError) as e:
  153. logger.debug(f"Could not set metadata cache file permissions: {str(e)}")
  154. def _set_permissions():
  155. for root, dirs, files in os.walk(CACHE_DIR):
  156. try:
  157. os.chmod(root, 0o755)
  158. for file in files:
  159. file_path = os.path.join(root, file)
  160. try:
  161. os.chmod(file_path, 0o644)
  162. except (OSError, PermissionError) as e:
  163. logger.debug(f"Could not set permissions for file {file_path}: {str(e)}")
  164. except (OSError, PermissionError) as e:
  165. logger.debug(f"Could not set permissions for directory {root}: {str(e)}")
  166. continue
  167. await asyncio.to_thread(_set_permissions)
  168. except Exception as e:
  169. logger.error(f"Failed to create cache directory: {str(e)}")
  170. def get_cache_path(pattern_file):
  171. """Get the cache path for a pattern file."""
  172. # Normalize path separators to handle both forward slashes and backslashes
  173. pattern_file = pattern_file.replace('\\', '/')
  174. # Create subdirectories in cache to match the pattern file structure
  175. cache_subpath = os.path.dirname(pattern_file)
  176. if cache_subpath:
  177. # Create the same subdirectory structure in cache (including custom_patterns)
  178. # Convert forward slashes back to platform-specific separator for os.path.join
  179. cache_subpath = cache_subpath.replace('/', os.sep)
  180. cache_dir = os.path.join(CACHE_DIR, cache_subpath)
  181. else:
  182. # For files in root pattern directory
  183. cache_dir = CACHE_DIR
  184. # Ensure the subdirectory exists
  185. os.makedirs(cache_dir, exist_ok=True)
  186. try:
  187. os.chmod(cache_dir, 0o755) # More conservative permissions
  188. except (OSError, PermissionError) as e:
  189. # Log as debug instead of error since this is not critical
  190. logger.debug(f"Could not set permissions for cache subdirectory {cache_dir}: {str(e)}")
  191. # Use just the filename part for the cache file
  192. filename = os.path.basename(pattern_file)
  193. safe_name = filename.replace('\\', '_')
  194. return os.path.join(cache_dir, f"{safe_name}.webp")
  195. def delete_pattern_cache(pattern_file):
  196. """Delete cached preview image and metadata for a pattern file."""
  197. try:
  198. # Remove cached image
  199. cache_path = get_cache_path(pattern_file)
  200. if os.path.exists(cache_path):
  201. os.remove(cache_path)
  202. logger.info(f"Deleted cached image: {cache_path}")
  203. # Remove from metadata cache
  204. metadata_cache = load_metadata_cache()
  205. data_section = metadata_cache.get('data', {})
  206. if pattern_file in data_section:
  207. del data_section[pattern_file]
  208. metadata_cache['data'] = data_section
  209. save_metadata_cache(metadata_cache)
  210. logger.info(f"Removed {pattern_file} from metadata cache")
  211. return True
  212. except Exception as e:
  213. logger.error(f"Failed to delete cache for {pattern_file}: {str(e)}")
  214. return False
  215. def load_metadata_cache():
  216. """Load the metadata cache from disk with schema validation."""
  217. try:
  218. if os.path.exists(METADATA_CACHE_FILE):
  219. with open(METADATA_CACHE_FILE, 'r') as f:
  220. cache_data = json.load(f)
  221. # Validate schema
  222. if not validate_cache_schema(cache_data):
  223. logger.info("Cache schema validation failed - invalidating cache")
  224. invalidate_cache()
  225. # Return empty cache structure after invalidation
  226. return {
  227. 'version': CACHE_SCHEMA_VERSION,
  228. 'data': {}
  229. }
  230. return cache_data
  231. except Exception as e:
  232. logger.warning(f"Failed to load metadata cache: {str(e)} - invalidating cache")
  233. try:
  234. invalidate_cache()
  235. except Exception as invalidate_error:
  236. logger.error(f"Failed to invalidate corrupted cache: {str(invalidate_error)}")
  237. # Return empty cache structure
  238. return {
  239. 'version': CACHE_SCHEMA_VERSION,
  240. 'data': {}
  241. }
  242. async def load_metadata_cache_async():
  243. """Async version: Load the metadata cache from disk with schema validation."""
  244. try:
  245. if await asyncio.to_thread(os.path.exists, METADATA_CACHE_FILE):
  246. def _load_json():
  247. with open(METADATA_CACHE_FILE, 'r') as f:
  248. return json.load(f)
  249. cache_data = await asyncio.to_thread(_load_json)
  250. # Validate schema
  251. if not validate_cache_schema(cache_data):
  252. logger.info("Cache schema validation failed - invalidating cache")
  253. await invalidate_cache_async()
  254. # Return empty cache structure after invalidation
  255. return {
  256. 'version': CACHE_SCHEMA_VERSION,
  257. 'data': {}
  258. }
  259. return cache_data
  260. except Exception as e:
  261. logger.warning(f"Failed to load metadata cache: {str(e)} - invalidating cache")
  262. try:
  263. await invalidate_cache_async()
  264. except Exception as invalidate_error:
  265. logger.error(f"Failed to invalidate corrupted cache: {str(invalidate_error)}")
  266. # Return empty cache structure
  267. return {
  268. 'version': CACHE_SCHEMA_VERSION,
  269. 'data': {}
  270. }
  271. def save_metadata_cache(cache_data):
  272. """Save the metadata cache to disk with version info."""
  273. try:
  274. ensure_cache_dir()
  275. # Ensure cache data has proper structure
  276. if not isinstance(cache_data, dict) or 'version' not in cache_data:
  277. # Convert old format or create new structure
  278. if isinstance(cache_data, dict) and 'data' not in cache_data:
  279. # Old format - wrap existing data
  280. structured_cache = {
  281. 'version': CACHE_SCHEMA_VERSION,
  282. 'data': cache_data
  283. }
  284. else:
  285. structured_cache = cache_data
  286. else:
  287. structured_cache = cache_data
  288. with open(METADATA_CACHE_FILE, 'w') as f:
  289. json.dump(structured_cache, f, indent=2)
  290. except Exception as e:
  291. logger.error(f"Failed to save metadata cache: {str(e)}")
  292. def get_pattern_metadata(pattern_file):
  293. """Get cached metadata for a pattern file."""
  294. cache_data = load_metadata_cache()
  295. data_section = cache_data.get('data', {})
  296. # Check if we have cached metadata and if the file hasn't changed
  297. if pattern_file in data_section:
  298. cached_entry = data_section[pattern_file]
  299. pattern_path = os.path.join(THETA_RHO_DIR, pattern_file)
  300. try:
  301. file_mtime = os.path.getmtime(pattern_path)
  302. if cached_entry.get('mtime') == file_mtime:
  303. return cached_entry.get('metadata')
  304. except OSError:
  305. pass
  306. return None
  307. async def get_pattern_metadata_async(pattern_file):
  308. """Async version: Get cached metadata for a pattern file."""
  309. cache_data = await load_metadata_cache_async()
  310. data_section = cache_data.get('data', {})
  311. # Check if we have cached metadata and if the file hasn't changed
  312. if pattern_file in data_section:
  313. cached_entry = data_section[pattern_file]
  314. pattern_path = os.path.join(THETA_RHO_DIR, pattern_file)
  315. try:
  316. file_mtime = await asyncio.to_thread(os.path.getmtime, pattern_path)
  317. if cached_entry.get('mtime') == file_mtime:
  318. return cached_entry.get('metadata')
  319. except OSError:
  320. pass
  321. return None
  322. def cache_pattern_metadata(pattern_file, first_coord, last_coord, total_coords):
  323. """Cache metadata for a pattern file."""
  324. try:
  325. cache_data = load_metadata_cache()
  326. data_section = cache_data.get('data', {})
  327. pattern_path = os.path.join(THETA_RHO_DIR, pattern_file)
  328. file_mtime = os.path.getmtime(pattern_path)
  329. data_section[pattern_file] = {
  330. 'mtime': file_mtime,
  331. 'metadata': {
  332. 'first_coordinate': first_coord,
  333. 'last_coordinate': last_coord,
  334. 'total_coordinates': total_coords
  335. }
  336. }
  337. cache_data['data'] = data_section
  338. save_metadata_cache(cache_data)
  339. logger.debug(f"Cached metadata for {pattern_file}")
  340. except Exception as e:
  341. logger.warning(f"Failed to cache metadata for {pattern_file}: {str(e)}")
  342. def needs_cache(pattern_file):
  343. """Check if a pattern file needs its cache generated."""
  344. # Check if image preview exists
  345. cache_path = get_cache_path(pattern_file)
  346. if not os.path.exists(cache_path):
  347. return True
  348. # Check if metadata cache exists and is valid
  349. metadata = get_pattern_metadata(pattern_file)
  350. if metadata is None:
  351. return True
  352. return False
  353. def needs_image_cache_only(pattern_file):
  354. """Quick check if a pattern file needs its image cache generated.
  355. Only checks for image file existence, not metadata validity.
  356. Used during startup for faster checking.
  357. """
  358. cache_path = get_cache_path(pattern_file)
  359. return not os.path.exists(cache_path)
  360. async def needs_cache_async(pattern_file):
  361. """Async version: Check if a pattern file needs its cache generated."""
  362. # Check if image preview exists
  363. cache_path = get_cache_path(pattern_file)
  364. if not await asyncio.to_thread(os.path.exists, cache_path):
  365. return True
  366. # Check if metadata cache exists and is valid
  367. metadata = await get_pattern_metadata_async(pattern_file)
  368. if metadata is None:
  369. return True
  370. return False
  371. async def generate_image_preview(pattern_file):
  372. """Generate image preview for a single pattern file."""
  373. from modules.core.preview import generate_preview_image
  374. from modules.core.pattern_manager import parse_theta_rho_file
  375. try:
  376. logger.debug(f"Starting preview generation for {pattern_file}")
  377. # Check if we need to update metadata cache
  378. metadata = get_pattern_metadata(pattern_file)
  379. if metadata is None:
  380. # Parse file to get metadata (this is the only time we need to parse)
  381. logger.debug(f"Parsing {pattern_file} for metadata cache")
  382. pattern_path = os.path.join(THETA_RHO_DIR, pattern_file)
  383. try:
  384. # Add timeout protection to prevent hanging on problematic files
  385. coordinates = await asyncio.wait_for(
  386. asyncio.to_thread(parse_theta_rho_file, pattern_path),
  387. timeout=30.0 # 30 second timeout per file
  388. )
  389. if coordinates:
  390. first_coord = {"x": coordinates[0][0], "y": coordinates[0][1]}
  391. last_coord = {"x": coordinates[-1][0], "y": coordinates[-1][1]}
  392. total_coords = len(coordinates)
  393. # Cache the metadata for future use
  394. cache_pattern_metadata(pattern_file, first_coord, last_coord, total_coords)
  395. logger.debug(f"Metadata cached for {pattern_file}: {total_coords} coordinates")
  396. else:
  397. logger.warning(f"No coordinates found in {pattern_file}")
  398. except asyncio.TimeoutError:
  399. logger.error(f"Timeout parsing {pattern_file} for metadata - skipping")
  400. except Exception as e:
  401. logger.error(f"Failed to parse {pattern_file} for metadata: {str(e)}")
  402. # Continue with image generation even if metadata fails
  403. # Check if we need to generate the image
  404. cache_path = get_cache_path(pattern_file)
  405. if os.path.exists(cache_path):
  406. logger.debug(f"Skipping image generation for {pattern_file} - already cached")
  407. return True
  408. # Generate the image
  409. logger.debug(f"Generating image preview for {pattern_file}")
  410. image_content = await generate_preview_image(pattern_file)
  411. if not image_content:
  412. logger.error(f"Generated image content is empty for {pattern_file}")
  413. return False
  414. # Ensure cache directory exists
  415. ensure_cache_dir()
  416. with open(cache_path, 'wb') as f:
  417. f.write(image_content)
  418. try:
  419. os.chmod(cache_path, 0o644) # More conservative permissions
  420. except (OSError, PermissionError) as e:
  421. # Log as debug instead of error since this is not critical
  422. logger.debug(f"Could not set cache file permissions for {pattern_file}: {str(e)}")
  423. logger.debug(f"Successfully generated preview for {pattern_file}")
  424. return True
  425. except Exception as e:
  426. logger.error(f"Failed to generate image for {pattern_file}: {str(e)}")
  427. return False
  428. async def generate_all_image_previews():
  429. """Generate image previews for missing patterns using set difference."""
  430. global cache_progress
  431. try:
  432. await ensure_cache_dir_async()
  433. # Step 1: Get all pattern files
  434. pattern_files = await list_theta_rho_files_async()
  435. if not pattern_files:
  436. logger.info("No .thr pattern files found. Skipping image preview generation.")
  437. return
  438. # Step 2: Find patterns with existing cache
  439. def _find_cached_patterns():
  440. cached = set()
  441. for pattern in pattern_files:
  442. cache_path = get_cache_path(pattern)
  443. if os.path.exists(cache_path):
  444. cached.add(pattern)
  445. return cached
  446. cached_patterns = await asyncio.to_thread(_find_cached_patterns)
  447. # Step 3: Calculate delta (patterns missing image cache)
  448. pattern_set = set(pattern_files)
  449. patterns_to_cache = list(pattern_set - cached_patterns)
  450. total_files = len(patterns_to_cache)
  451. skipped_files = len(pattern_files) - total_files
  452. if total_files == 0:
  453. logger.info(f"All {skipped_files} pattern files already have image previews. Skipping image generation.")
  454. return
  455. # Update progress state
  456. cache_progress.update({
  457. "stage": "images",
  458. "total_files": total_files,
  459. "processed_files": 0,
  460. "current_file": "",
  461. "error": None
  462. })
  463. logger.info(f"Generating image cache for {total_files} uncached .thr patterns ({skipped_files} already cached)...")
  464. batch_size = 5
  465. successful = 0
  466. for i in range(0, total_files, batch_size):
  467. batch = patterns_to_cache[i:i + batch_size]
  468. tasks = [generate_image_preview(file) for file in batch]
  469. results = await asyncio.gather(*tasks)
  470. successful += sum(1 for r in results if r)
  471. # Update progress
  472. cache_progress["processed_files"] = min(i + batch_size, total_files)
  473. if i < total_files:
  474. cache_progress["current_file"] = patterns_to_cache[min(i + batch_size - 1, total_files - 1)]
  475. # Log progress
  476. progress = min(i + batch_size, total_files)
  477. logger.info(f"Image cache generation progress: {progress}/{total_files} files processed")
  478. logger.info(f"Image cache generation completed: {successful}/{total_files} patterns cached successfully, {skipped_files} patterns skipped (already cached)")
  479. except Exception as e:
  480. logger.error(f"Error during image cache generation: {str(e)}")
  481. cache_progress["error"] = str(e)
  482. raise
  483. async def generate_metadata_cache():
  484. """Generate metadata cache for missing patterns using set difference."""
  485. global cache_progress
  486. try:
  487. logger.info("Starting metadata cache generation...")
  488. # Step 1: Get all pattern files
  489. pattern_files = await list_theta_rho_files_async()
  490. if not pattern_files:
  491. logger.info("No pattern files found. Skipping metadata cache generation.")
  492. return
  493. # Step 2: Get existing metadata keys
  494. metadata_cache = await load_metadata_cache_async()
  495. existing_keys = set(metadata_cache.get('data', {}).keys())
  496. # Step 3: Calculate delta (patterns missing from metadata)
  497. pattern_set = set(pattern_files)
  498. files_to_process = list(pattern_set - existing_keys)
  499. total_files = len(files_to_process)
  500. skipped_files = len(pattern_files) - total_files
  501. if total_files == 0:
  502. logger.info(f"All {skipped_files} files already have metadata cache. Skipping metadata generation.")
  503. return
  504. # Update progress state
  505. cache_progress.update({
  506. "stage": "metadata",
  507. "total_files": total_files,
  508. "processed_files": 0,
  509. "current_file": "",
  510. "error": None
  511. })
  512. logger.info(f"Generating metadata cache for {total_files} new files ({skipped_files} files already cached)...")
  513. # Process in smaller batches for Pi Zero 2 W
  514. batch_size = 3 # Reduced from 5
  515. successful = 0
  516. for i in range(0, total_files, batch_size):
  517. batch = files_to_process[i:i + batch_size]
  518. # Process files sequentially within batch (no parallel tasks)
  519. for file_name in batch:
  520. pattern_path = os.path.join(THETA_RHO_DIR, file_name)
  521. cache_progress["current_file"] = file_name
  522. try:
  523. # Parse file to get metadata with timeout protection
  524. try:
  525. coordinates = await asyncio.wait_for(
  526. asyncio.to_thread(parse_theta_rho_file, pattern_path),
  527. timeout=30.0 # 30 second timeout per file
  528. )
  529. except asyncio.TimeoutError:
  530. logger.error(f"Timeout parsing {file_name} - skipping (file may be too large or corrupted)")
  531. continue
  532. if coordinates:
  533. first_coord = {"x": coordinates[0][0], "y": coordinates[0][1]}
  534. last_coord = {"x": coordinates[-1][0], "y": coordinates[-1][1]}
  535. total_coords = len(coordinates)
  536. # Cache the metadata
  537. cache_pattern_metadata(file_name, first_coord, last_coord, total_coords)
  538. successful += 1
  539. logger.debug(f"Generated metadata for {file_name}")
  540. # Small delay to reduce I/O pressure
  541. await asyncio.sleep(0.05)
  542. except Exception as e:
  543. logger.error(f"Failed to generate metadata for {file_name}: {str(e)}")
  544. # Update progress
  545. cache_progress["processed_files"] = min(i + batch_size, total_files)
  546. # Log progress
  547. progress = min(i + batch_size, total_files)
  548. logger.info(f"Metadata cache generation progress: {progress}/{total_files} files processed")
  549. # Delay between batches for system recovery
  550. if i + batch_size < total_files:
  551. await asyncio.sleep(0.3)
  552. logger.info(f"Metadata cache generation completed: {successful}/{total_files} patterns cached successfully, {skipped_files} patterns skipped (already cached)")
  553. except Exception as e:
  554. logger.error(f"Error during metadata cache generation: {str(e)}")
  555. cache_progress["error"] = str(e)
  556. raise
  557. async def rebuild_cache():
  558. """Rebuild the entire cache for all pattern files."""
  559. logger.info("Starting cache rebuild...")
  560. # Ensure cache directory exists
  561. ensure_cache_dir()
  562. # First generate metadata cache for all files
  563. await generate_metadata_cache()
  564. # Then generate image previews
  565. pattern_files = [f for f in list_theta_rho_files() if f.endswith('.thr')]
  566. total_files = len(pattern_files)
  567. if total_files == 0:
  568. logger.info("No pattern files found to cache")
  569. return
  570. logger.info(f"Generating image previews for {total_files} pattern files...")
  571. # Process in batches
  572. batch_size = 5
  573. successful = 0
  574. for i in range(0, total_files, batch_size):
  575. batch = pattern_files[i:i + batch_size]
  576. tasks = [generate_image_preview(file) for file in batch]
  577. results = await asyncio.gather(*tasks)
  578. successful += sum(1 for r in results if r)
  579. # Log progress
  580. progress = min(i + batch_size, total_files)
  581. logger.info(f"Image preview generation progress: {progress}/{total_files} files processed")
  582. logger.info(f"Cache rebuild completed: {successful}/{total_files} patterns cached successfully")
  583. async def generate_cache_background():
  584. """Run cache generation in the background with progress tracking."""
  585. global cache_progress
  586. try:
  587. cache_progress.update({
  588. "is_running": True,
  589. "stage": "starting",
  590. "total_files": 0,
  591. "processed_files": 0,
  592. "current_file": "",
  593. "error": None
  594. })
  595. # First generate metadata cache
  596. await generate_metadata_cache()
  597. # Then generate image previews
  598. await generate_all_image_previews()
  599. # Mark as complete
  600. cache_progress.update({
  601. "is_running": False,
  602. "stage": "complete",
  603. "current_file": "",
  604. "error": None
  605. })
  606. logger.info("Background cache generation completed successfully")
  607. except Exception as e:
  608. logger.error(f"Background cache generation failed: {str(e)}")
  609. cache_progress.update({
  610. "is_running": False,
  611. "stage": "error",
  612. "error": str(e)
  613. })
  614. raise
  615. def get_cache_progress():
  616. """Get the current cache generation progress.
  617. Returns a reference to the cache_progress dict for read-only access.
  618. The WebSocket handler should not modify this dict.
  619. """
  620. global cache_progress
  621. return cache_progress # Return reference instead of copy for better performance
  622. def is_cache_generation_needed():
  623. """Check if cache generation is needed."""
  624. pattern_files = [f for f in list_theta_rho_files() if f.endswith('.thr')]
  625. if not pattern_files:
  626. return False
  627. # Check if any files need caching
  628. patterns_to_cache = [f for f in pattern_files if needs_cache(f)]
  629. # Check metadata cache
  630. files_needing_metadata = []
  631. for file_name in pattern_files:
  632. if get_pattern_metadata(file_name) is None:
  633. files_needing_metadata.append(file_name)
  634. return len(patterns_to_cache) > 0 or len(files_needing_metadata) > 0
  635. async def is_cache_generation_needed_async():
  636. """Check if cache generation is needed using simple set difference.
  637. Returns True if any patterns are missing from either metadata or image cache.
  638. """
  639. try:
  640. # Step 1: List all patterns
  641. pattern_files = await list_theta_rho_files_async()
  642. if not pattern_files:
  643. return False
  644. pattern_set = set(pattern_files)
  645. # Step 2: Check metadata cache
  646. metadata_cache = await load_metadata_cache_async()
  647. metadata_keys = set(metadata_cache.get('data', {}).keys())
  648. if pattern_set != metadata_keys:
  649. # Metadata is missing some patterns
  650. return True
  651. # Step 3: Check image cache
  652. def _list_cached_images():
  653. """List all patterns that have cached images."""
  654. cached = set()
  655. if os.path.exists(CACHE_DIR):
  656. for pattern in pattern_files:
  657. cache_path = get_cache_path(pattern)
  658. if os.path.exists(cache_path):
  659. cached.add(pattern)
  660. return cached
  661. cached_images = await asyncio.to_thread(_list_cached_images)
  662. if pattern_set != cached_images:
  663. # Some patterns missing image cache
  664. return True
  665. return False
  666. except Exception as e:
  667. logger.warning(f"Error checking cache status: {e}")
  668. return False # Don't block startup on errors
  669. async def list_theta_rho_files_async():
  670. """Async version: List all theta-rho files."""
  671. def _walk_files():
  672. files = []
  673. for root, _, filenames in os.walk(THETA_RHO_DIR):
  674. # Only process .thr files to reduce memory usage
  675. thr_files = [f for f in filenames if f.endswith('.thr')]
  676. for file in thr_files:
  677. relative_path = os.path.relpath(os.path.join(root, file), THETA_RHO_DIR)
  678. # Normalize path separators to always use forward slashes for consistency across platforms
  679. relative_path = relative_path.replace(os.sep, '/')
  680. files.append(relative_path)
  681. return files
  682. files = await asyncio.to_thread(_walk_files)
  683. logger.debug(f"Found {len(files)} theta-rho files")
  684. return files # Already filtered for .thr