1
0

cache_manager.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597
  1. """Image Cache Manager for pre-generating and managing image previews."""
  2. import os
  3. import json
  4. import asyncio
  5. import logging
  6. from pathlib import Path
  7. from modules.core.pattern_manager import list_theta_rho_files, THETA_RHO_DIR, parse_theta_rho_file
  8. logger = logging.getLogger(__name__)
  9. # Global cache progress state
  10. cache_progress = {
  11. "is_running": False,
  12. "total_files": 0,
  13. "processed_files": 0,
  14. "current_file": "",
  15. "stage": "idle", # idle, metadata, images, complete
  16. "error": None
  17. }
  18. # Constants
  19. CACHE_DIR = os.path.join(THETA_RHO_DIR, "cached_images")
  20. METADATA_CACHE_FILE = "metadata_cache.json" # Now in root directory
  21. # Cache schema version - increment when structure changes
  22. CACHE_SCHEMA_VERSION = 1
  23. # Expected cache schema structure
  24. EXPECTED_CACHE_SCHEMA = {
  25. 'version': CACHE_SCHEMA_VERSION,
  26. 'structure': {
  27. 'mtime': 'number',
  28. 'metadata': {
  29. 'first_coordinate': {'x': 'number', 'y': 'number'},
  30. 'last_coordinate': {'x': 'number', 'y': 'number'},
  31. 'total_coordinates': 'number'
  32. }
  33. }
  34. }
  35. def validate_cache_schema(cache_data):
  36. """Validate that cache data matches the expected schema structure."""
  37. try:
  38. # Check if version info exists
  39. if not isinstance(cache_data, dict):
  40. return False
  41. # Check for version field - if missing, it's old format
  42. cache_version = cache_data.get('version')
  43. if cache_version is None:
  44. logger.info("Cache file missing version info - treating as outdated schema")
  45. return False
  46. # Check if version matches current expected version
  47. if cache_version != CACHE_SCHEMA_VERSION:
  48. logger.info(f"Cache schema version mismatch: found {cache_version}, expected {CACHE_SCHEMA_VERSION}")
  49. return False
  50. # Check if data section exists
  51. if 'data' not in cache_data:
  52. logger.warning("Cache file missing 'data' section")
  53. return False
  54. # Validate structure of a few entries if they exist
  55. data_section = cache_data.get('data', {})
  56. if data_section and isinstance(data_section, dict):
  57. # Check first entry structure
  58. for pattern_file, entry in list(data_section.items())[:1]: # Just check first entry
  59. if not isinstance(entry, dict):
  60. return False
  61. if 'mtime' not in entry or 'metadata' not in entry:
  62. return False
  63. metadata = entry.get('metadata', {})
  64. required_fields = ['first_coordinate', 'last_coordinate', 'total_coordinates']
  65. if not all(field in metadata for field in required_fields):
  66. return False
  67. # Validate coordinate structure
  68. for coord_field in ['first_coordinate', 'last_coordinate']:
  69. coord = metadata.get(coord_field)
  70. if not isinstance(coord, dict) or 'x' not in coord or 'y' not in coord:
  71. return False
  72. return True
  73. except Exception as e:
  74. logger.warning(f"Error validating cache schema: {str(e)}")
  75. return False
  76. def invalidate_cache():
  77. """Delete only the metadata cache file, preserving image cache."""
  78. try:
  79. # Delete metadata cache file only
  80. if os.path.exists(METADATA_CACHE_FILE):
  81. os.remove(METADATA_CACHE_FILE)
  82. logger.info("Deleted outdated metadata cache file")
  83. # Keep image cache directory intact - images are still valid
  84. # Just ensure the cache directory structure exists
  85. ensure_cache_dir()
  86. return True
  87. except Exception as e:
  88. logger.error(f"Failed to invalidate metadata cache: {str(e)}")
  89. return False
  90. def ensure_cache_dir():
  91. """Ensure the cache directory exists with proper permissions."""
  92. try:
  93. Path(CACHE_DIR).mkdir(parents=True, exist_ok=True)
  94. # Initialize metadata cache if it doesn't exist
  95. if not os.path.exists(METADATA_CACHE_FILE):
  96. initial_cache = {
  97. 'version': CACHE_SCHEMA_VERSION,
  98. 'data': {}
  99. }
  100. with open(METADATA_CACHE_FILE, 'w') as f:
  101. json.dump(initial_cache, f)
  102. try:
  103. os.chmod(METADATA_CACHE_FILE, 0o644) # More conservative permissions
  104. except (OSError, PermissionError) as e:
  105. logger.debug(f"Could not set metadata cache file permissions: {str(e)}")
  106. for root, dirs, files in os.walk(CACHE_DIR):
  107. try:
  108. os.chmod(root, 0o755) # More conservative permissions
  109. for file in files:
  110. file_path = os.path.join(root, file)
  111. try:
  112. os.chmod(file_path, 0o644) # More conservative permissions
  113. except (OSError, PermissionError) as e:
  114. # Log as debug instead of error since this is not critical
  115. logger.debug(f"Could not set permissions for file {file_path}: {str(e)}")
  116. except (OSError, PermissionError) as e:
  117. # Log as debug instead of error since this is not critical
  118. logger.debug(f"Could not set permissions for directory {root}: {str(e)}")
  119. continue
  120. except Exception as e:
  121. logger.error(f"Failed to create cache directory: {str(e)}")
  122. def get_cache_path(pattern_file):
  123. """Get the cache path for a pattern file."""
  124. # Normalize path separators to handle both forward slashes and backslashes
  125. pattern_file = pattern_file.replace('\\', '/')
  126. # Create subdirectories in cache to match the pattern file structure
  127. cache_subpath = os.path.dirname(pattern_file)
  128. if cache_subpath:
  129. # Create the same subdirectory structure in cache (including custom_patterns)
  130. # Convert forward slashes back to platform-specific separator for os.path.join
  131. cache_subpath = cache_subpath.replace('/', os.sep)
  132. cache_dir = os.path.join(CACHE_DIR, cache_subpath)
  133. else:
  134. # For files in root pattern directory
  135. cache_dir = CACHE_DIR
  136. # Ensure the subdirectory exists
  137. os.makedirs(cache_dir, exist_ok=True)
  138. try:
  139. os.chmod(cache_dir, 0o755) # More conservative permissions
  140. except (OSError, PermissionError) as e:
  141. # Log as debug instead of error since this is not critical
  142. logger.debug(f"Could not set permissions for cache subdirectory {cache_dir}: {str(e)}")
  143. # Use just the filename part for the cache file
  144. filename = os.path.basename(pattern_file)
  145. safe_name = filename.replace('\\', '_')
  146. return os.path.join(cache_dir, f"{safe_name}.webp")
  147. def delete_pattern_cache(pattern_file):
  148. """Delete cached preview image and metadata for a pattern file."""
  149. try:
  150. # Remove cached image
  151. cache_path = get_cache_path(pattern_file)
  152. if os.path.exists(cache_path):
  153. os.remove(cache_path)
  154. logger.info(f"Deleted cached image: {cache_path}")
  155. # Remove from metadata cache
  156. metadata_cache = load_metadata_cache()
  157. data_section = metadata_cache.get('data', {})
  158. if pattern_file in data_section:
  159. del data_section[pattern_file]
  160. metadata_cache['data'] = data_section
  161. save_metadata_cache(metadata_cache)
  162. logger.info(f"Removed {pattern_file} from metadata cache")
  163. return True
  164. except Exception as e:
  165. logger.error(f"Failed to delete cache for {pattern_file}: {str(e)}")
  166. return False
  167. def load_metadata_cache():
  168. """Load the metadata cache from disk with schema validation."""
  169. try:
  170. if os.path.exists(METADATA_CACHE_FILE):
  171. with open(METADATA_CACHE_FILE, 'r') as f:
  172. cache_data = json.load(f)
  173. # Validate schema
  174. if not validate_cache_schema(cache_data):
  175. logger.info("Cache schema validation failed - invalidating cache")
  176. invalidate_cache()
  177. # Return empty cache structure after invalidation
  178. return {
  179. 'version': CACHE_SCHEMA_VERSION,
  180. 'data': {}
  181. }
  182. return cache_data
  183. except Exception as e:
  184. logger.warning(f"Failed to load metadata cache: {str(e)} - invalidating cache")
  185. try:
  186. invalidate_cache()
  187. except Exception as invalidate_error:
  188. logger.error(f"Failed to invalidate corrupted cache: {str(invalidate_error)}")
  189. # Return empty cache structure
  190. return {
  191. 'version': CACHE_SCHEMA_VERSION,
  192. 'data': {}
  193. }
  194. def save_metadata_cache(cache_data):
  195. """Save the metadata cache to disk with version info."""
  196. try:
  197. ensure_cache_dir()
  198. # Ensure cache data has proper structure
  199. if not isinstance(cache_data, dict) or 'version' not in cache_data:
  200. # Convert old format or create new structure
  201. if isinstance(cache_data, dict) and 'data' not in cache_data:
  202. # Old format - wrap existing data
  203. structured_cache = {
  204. 'version': CACHE_SCHEMA_VERSION,
  205. 'data': cache_data
  206. }
  207. else:
  208. structured_cache = cache_data
  209. else:
  210. structured_cache = cache_data
  211. with open(METADATA_CACHE_FILE, 'w') as f:
  212. json.dump(structured_cache, f, indent=2)
  213. except Exception as e:
  214. logger.error(f"Failed to save metadata cache: {str(e)}")
  215. def get_pattern_metadata(pattern_file):
  216. """Get cached metadata for a pattern file."""
  217. cache_data = load_metadata_cache()
  218. data_section = cache_data.get('data', {})
  219. # Check if we have cached metadata and if the file hasn't changed
  220. if pattern_file in data_section:
  221. cached_entry = data_section[pattern_file]
  222. pattern_path = os.path.join(THETA_RHO_DIR, pattern_file)
  223. try:
  224. file_mtime = os.path.getmtime(pattern_path)
  225. if cached_entry.get('mtime') == file_mtime:
  226. return cached_entry.get('metadata')
  227. except OSError:
  228. pass
  229. return None
  230. def cache_pattern_metadata(pattern_file, first_coord, last_coord, total_coords):
  231. """Cache metadata for a pattern file."""
  232. try:
  233. cache_data = load_metadata_cache()
  234. data_section = cache_data.get('data', {})
  235. pattern_path = os.path.join(THETA_RHO_DIR, pattern_file)
  236. file_mtime = os.path.getmtime(pattern_path)
  237. data_section[pattern_file] = {
  238. 'mtime': file_mtime,
  239. 'metadata': {
  240. 'first_coordinate': first_coord,
  241. 'last_coordinate': last_coord,
  242. 'total_coordinates': total_coords
  243. }
  244. }
  245. cache_data['data'] = data_section
  246. save_metadata_cache(cache_data)
  247. logger.debug(f"Cached metadata for {pattern_file}")
  248. except Exception as e:
  249. logger.warning(f"Failed to cache metadata for {pattern_file}: {str(e)}")
  250. def needs_cache(pattern_file):
  251. """Check if a pattern file needs its cache generated."""
  252. # Check if image preview exists
  253. cache_path = get_cache_path(pattern_file)
  254. if not os.path.exists(cache_path):
  255. return True
  256. # Check if metadata cache exists and is valid
  257. metadata = get_pattern_metadata(pattern_file)
  258. if metadata is None:
  259. return True
  260. return False
  261. async def generate_image_preview(pattern_file):
  262. """Generate image preview for a single pattern file."""
  263. from modules.core.preview import generate_preview_image
  264. from modules.core.pattern_manager import parse_theta_rho_file
  265. try:
  266. logger.debug(f"Starting preview generation for {pattern_file}")
  267. # Check if we need to update metadata cache
  268. metadata = get_pattern_metadata(pattern_file)
  269. if metadata is None:
  270. # Parse file to get metadata (this is the only time we need to parse)
  271. logger.debug(f"Parsing {pattern_file} for metadata cache")
  272. pattern_path = os.path.join(THETA_RHO_DIR, pattern_file)
  273. try:
  274. coordinates = await asyncio.to_thread(parse_theta_rho_file, pattern_path)
  275. if coordinates:
  276. first_coord = {"x": coordinates[0][0], "y": coordinates[0][1]}
  277. last_coord = {"x": coordinates[-1][0], "y": coordinates[-1][1]}
  278. total_coords = len(coordinates)
  279. # Cache the metadata for future use
  280. cache_pattern_metadata(pattern_file, first_coord, last_coord, total_coords)
  281. logger.debug(f"Metadata cached for {pattern_file}: {total_coords} coordinates")
  282. else:
  283. logger.warning(f"No coordinates found in {pattern_file}")
  284. except Exception as e:
  285. logger.error(f"Failed to parse {pattern_file} for metadata: {str(e)}")
  286. # Continue with image generation even if metadata fails
  287. # Check if we need to generate the image
  288. cache_path = get_cache_path(pattern_file)
  289. if os.path.exists(cache_path):
  290. logger.debug(f"Skipping image generation for {pattern_file} - already cached")
  291. return True
  292. # Generate the image
  293. logger.debug(f"Generating image preview for {pattern_file}")
  294. image_content = await generate_preview_image(pattern_file)
  295. if not image_content:
  296. logger.error(f"Generated image content is empty for {pattern_file}")
  297. return False
  298. # Ensure cache directory exists
  299. ensure_cache_dir()
  300. with open(cache_path, 'wb') as f:
  301. f.write(image_content)
  302. try:
  303. os.chmod(cache_path, 0o644) # More conservative permissions
  304. except (OSError, PermissionError) as e:
  305. # Log as debug instead of error since this is not critical
  306. logger.debug(f"Could not set cache file permissions for {pattern_file}: {str(e)}")
  307. logger.debug(f"Successfully generated preview for {pattern_file}")
  308. return True
  309. except Exception as e:
  310. logger.error(f"Failed to generate image for {pattern_file}: {str(e)}")
  311. return False
  312. async def generate_all_image_previews():
  313. """Generate image previews for all pattern files with progress tracking."""
  314. global cache_progress
  315. try:
  316. ensure_cache_dir()
  317. pattern_files = [f for f in list_theta_rho_files() if f.endswith('.thr')]
  318. if not pattern_files:
  319. logger.info("No .thr pattern files found. Skipping image preview generation.")
  320. return
  321. patterns_to_cache = [f for f in pattern_files if needs_cache(f)]
  322. total_files = len(patterns_to_cache)
  323. skipped_files = len(pattern_files) - total_files
  324. if total_files == 0:
  325. logger.info(f"All {skipped_files} pattern files already have image previews. Skipping image generation.")
  326. return
  327. # Update progress state
  328. cache_progress.update({
  329. "stage": "images",
  330. "total_files": total_files,
  331. "processed_files": 0,
  332. "current_file": "",
  333. "error": None
  334. })
  335. logger.info(f"Generating image cache for {total_files} uncached .thr patterns ({skipped_files} already cached)...")
  336. batch_size = 5
  337. successful = 0
  338. for i in range(0, total_files, batch_size):
  339. batch = patterns_to_cache[i:i + batch_size]
  340. tasks = [generate_image_preview(file) for file in batch]
  341. results = await asyncio.gather(*tasks)
  342. successful += sum(1 for r in results if r)
  343. # Update progress
  344. cache_progress["processed_files"] = min(i + batch_size, total_files)
  345. if i < total_files:
  346. cache_progress["current_file"] = patterns_to_cache[min(i + batch_size - 1, total_files - 1)]
  347. # Log progress
  348. progress = min(i + batch_size, total_files)
  349. logger.info(f"Image cache generation progress: {progress}/{total_files} files processed")
  350. logger.info(f"Image cache generation completed: {successful}/{total_files} patterns cached successfully, {skipped_files} patterns skipped (already cached)")
  351. except Exception as e:
  352. logger.error(f"Error during image cache generation: {str(e)}")
  353. cache_progress["error"] = str(e)
  354. raise
  355. async def generate_metadata_cache():
  356. """Generate metadata cache for all pattern files with progress tracking."""
  357. global cache_progress
  358. try:
  359. logger.info("Starting metadata cache generation...")
  360. # Get all pattern files using the same function as the rest of the codebase
  361. pattern_files = list_theta_rho_files()
  362. if not pattern_files:
  363. logger.info("No pattern files found. Skipping metadata cache generation.")
  364. return
  365. # Filter out files that already have valid metadata cache
  366. files_to_process = []
  367. for file_name in pattern_files:
  368. if get_pattern_metadata(file_name) is None:
  369. files_to_process.append(file_name)
  370. total_files = len(files_to_process)
  371. skipped_files = len(pattern_files) - total_files
  372. if total_files == 0:
  373. logger.info(f"All {skipped_files} files already have metadata cache. Skipping metadata generation.")
  374. return
  375. # Update progress state
  376. cache_progress.update({
  377. "stage": "metadata",
  378. "total_files": total_files,
  379. "processed_files": 0,
  380. "current_file": "",
  381. "error": None
  382. })
  383. logger.info(f"Generating metadata cache for {total_files} new files ({skipped_files} files already cached)...")
  384. # Process in batches
  385. batch_size = 5
  386. successful = 0
  387. for i in range(0, total_files, batch_size):
  388. batch = files_to_process[i:i + batch_size]
  389. tasks = []
  390. for file_name in batch:
  391. pattern_path = os.path.join(THETA_RHO_DIR, file_name)
  392. try:
  393. # Parse file to get metadata
  394. coordinates = await asyncio.to_thread(parse_theta_rho_file, pattern_path)
  395. if coordinates:
  396. first_coord = {"x": coordinates[0][0], "y": coordinates[0][1]}
  397. last_coord = {"x": coordinates[-1][0], "y": coordinates[-1][1]}
  398. total_coords = len(coordinates)
  399. # Cache the metadata
  400. cache_pattern_metadata(file_name, first_coord, last_coord, total_coords)
  401. successful += 1
  402. logger.debug(f"Generated metadata for {file_name}")
  403. # Update current file being processed
  404. cache_progress["current_file"] = file_name
  405. except Exception as e:
  406. logger.error(f"Failed to generate metadata for {file_name}: {str(e)}")
  407. # Update progress
  408. cache_progress["processed_files"] = min(i + batch_size, total_files)
  409. # Log progress
  410. progress = min(i + batch_size, total_files)
  411. logger.info(f"Metadata cache generation progress: {progress}/{total_files} files processed")
  412. logger.info(f"Metadata cache generation completed: {successful}/{total_files} patterns cached successfully, {skipped_files} patterns skipped (already cached)")
  413. except Exception as e:
  414. logger.error(f"Error during metadata cache generation: {str(e)}")
  415. cache_progress["error"] = str(e)
  416. raise
  417. async def rebuild_cache():
  418. """Rebuild the entire cache for all pattern files."""
  419. logger.info("Starting cache rebuild...")
  420. # Ensure cache directory exists
  421. ensure_cache_dir()
  422. # First generate metadata cache for all files
  423. await generate_metadata_cache()
  424. # Then generate image previews
  425. pattern_files = [f for f in list_theta_rho_files() if f.endswith('.thr')]
  426. total_files = len(pattern_files)
  427. if total_files == 0:
  428. logger.info("No pattern files found to cache")
  429. return
  430. logger.info(f"Generating image previews for {total_files} pattern files...")
  431. # Process in batches
  432. batch_size = 5
  433. successful = 0
  434. for i in range(0, total_files, batch_size):
  435. batch = pattern_files[i:i + batch_size]
  436. tasks = [generate_image_preview(file) for file in batch]
  437. results = await asyncio.gather(*tasks)
  438. successful += sum(1 for r in results if r)
  439. # Log progress
  440. progress = min(i + batch_size, total_files)
  441. logger.info(f"Image preview generation progress: {progress}/{total_files} files processed")
  442. logger.info(f"Cache rebuild completed: {successful}/{total_files} patterns cached successfully")
  443. async def generate_cache_background():
  444. """Run cache generation in the background with progress tracking."""
  445. global cache_progress
  446. try:
  447. cache_progress.update({
  448. "is_running": True,
  449. "stage": "starting",
  450. "total_files": 0,
  451. "processed_files": 0,
  452. "current_file": "",
  453. "error": None
  454. })
  455. # First generate metadata cache
  456. await generate_metadata_cache()
  457. # Then generate image previews
  458. await generate_all_image_previews()
  459. # Mark as complete
  460. cache_progress.update({
  461. "is_running": False,
  462. "stage": "complete",
  463. "current_file": "",
  464. "error": None
  465. })
  466. logger.info("Background cache generation completed successfully")
  467. except Exception as e:
  468. logger.error(f"Background cache generation failed: {str(e)}")
  469. cache_progress.update({
  470. "is_running": False,
  471. "stage": "error",
  472. "error": str(e)
  473. })
  474. raise
  475. def get_cache_progress():
  476. """Get the current cache generation progress."""
  477. global cache_progress
  478. return cache_progress.copy()
  479. def is_cache_generation_needed():
  480. """Check if cache generation is needed."""
  481. pattern_files = [f for f in list_theta_rho_files() if f.endswith('.thr')]
  482. if not pattern_files:
  483. return False
  484. # Check if any files need caching
  485. patterns_to_cache = [f for f in pattern_files if needs_cache(f)]
  486. # Check metadata cache
  487. files_needing_metadata = []
  488. for file_name in pattern_files:
  489. if get_pattern_metadata(file_name) is None:
  490. files_needing_metadata.append(file_name)
  491. return len(patterns_to_cache) > 0 or len(files_needing_metadata) > 0