Просмотр исходного кода

Fix infinite loop when machine stays in Run state

Add max_run_extensions limit (10) to prevent infinite timeout loop when
machine continuously reports 'Run' status. Previously, the code would
reset the timeout indefinitely as long as the machine was running.

Now after 10 extensions (20+ minutes total), the pattern will stop
with a clear error message instead of looping forever.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
tuanchris 1 неделя назад
Родитель
Сommit
a2692651e4
1 измененных файлов с 15 добавлено и 5 удалено
  1. 15 5
      modules/core/pattern_manager.py

+ 15 - 5
modules/core/pattern_manager.py

@@ -539,8 +539,10 @@ class MotionControlThread:
         max_wait_time = 120  # Maximum seconds to wait for 'ok' response
         max_corruption_retries = 10  # Max retries for corruption-type errors
         max_timeout_retries = 10  # Max retries for timeout (lost 'ok' response)
+        max_run_extensions = 10  # Max times to extend wait when machine is still running
         corruption_retry_count = 0
         timeout_retry_count = 0
+        run_extension_count = 0
 
         # GRBL error codes that indicate likely serial corruption (syntax errors)
         # These are recoverable by resending the command
@@ -589,7 +591,7 @@ class MotionControlThread:
                         # Attempt to recover by checking machine status
                         # The 'ok' might have been lost but command may have executed
                         logger.info("Motion thread: Attempting timeout recovery - checking machine status")
-                        logger.info(f"Motion thread: Current retry counts - timeout: {timeout_retry_count}/{max_timeout_retries}, corruption: {corruption_retry_count}/{max_corruption_retries}")
+                        logger.info(f"Motion thread: Current retry counts - timeout: {timeout_retry_count}/{max_timeout_retries}, corruption: {corruption_retry_count}/{max_corruption_retries}, run_ext: {run_extension_count}/{max_run_extensions}")
 
                         try:
                             # Check connection state first
@@ -651,10 +653,16 @@ class MotionControlThread:
                                     logger.info("Motion thread: Machine is Idle - assuming command completed (ok was lost) - SUCCESS")
                                     return True
                                 elif 'Run' in status_response:
-                                    # Machine still running - extend timeout
-                                    logger.info("Motion thread: Machine still running, extending wait time")
-                                    wait_start = time.time()  # Reset timeout
-                                    continue
+                                    # Machine still running - extend timeout but with a limit
+                                    run_extension_count += 1
+                                    if run_extension_count <= max_run_extensions:
+                                        logger.info(f"Motion thread: Machine still running, extending wait time ({run_extension_count}/{max_run_extensions})")
+                                        wait_start = time.time()  # Reset timeout
+                                        continue
+                                    else:
+                                        logger.error(f"Motion thread: Machine stuck in Run state for too long ({run_extension_count} extensions)")
+                                        logger.error("Motion thread: This may indicate the machine is running a command that will never complete")
+                                        # Fall through to retry/failure logic
                                 else:
                                     logger.warning(f"Motion thread: Status response didn't contain Idle or Run: '{status_response}'")
                             else:
@@ -680,10 +688,12 @@ class MotionControlThread:
                         logger.error(f"  Failed command: {gcode}")
                         logger.error(f"  Timeout retries used: {timeout_retry_count}/{max_timeout_retries}")
                         logger.error(f"  Corruption retries used: {corruption_retry_count}/{max_corruption_retries}")
+                        logger.error(f"  Run extensions used: {run_extension_count}/{max_run_extensions}")
                         logger.error("  Possible causes:")
                         logger.error("    - Serial connection lost or unstable")
                         logger.error("    - Hardware controller unresponsive")
                         logger.error("    - USB power issue (try powered hub)")
+                        logger.error("    - Machine stuck executing a very long move")
                         logger.error("=" * 60)
                         state.stop_requested = True
                         return False