Ver Fonte

Add timeout recovery logic for lost 'ok' responses on Pi 3B+

When a 120s timeout occurs waiting for 'ok', the code now attempts recovery:
- Sends status query '?' to check if machine is still responsive
- If machine is Idle: assumes command completed, 'ok' was lost - continues pattern
- If machine is Run: extends timeout as movement is still in progress
- If delayed 'ok' received during recovery: accepts it and continues
- If no response: retries the command (up to 2 times) before stopping

This handles cases where the serial 'ok' response is completely lost
due to UART timing issues on Pi 3B+, without unnecessarily stopping patterns.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
tuanchris há 1 semana atrás
pai
commit
50e4927409
1 ficheiros alterados com 58 adições e 2 exclusões
  1. 58 2
      modules/core/pattern_manager.py

+ 58 - 2
modules/core/pattern_manager.py

@@ -542,7 +542,9 @@ class MotionControlThread:
         gcode = f"$J=G91 G21 Y{y} F{speed}" if home else f"G1 G53 X{x} Y{y} F{speed}"
         gcode = f"$J=G91 G21 Y{y} F{speed}" if home else f"G1 G53 X{x} Y{y} F{speed}"
         max_wait_time = 120  # Maximum seconds to wait for 'ok' response
         max_wait_time = 120  # Maximum seconds to wait for 'ok' response
         max_corruption_retries = 3  # Max retries for corruption-type errors
         max_corruption_retries = 3  # Max retries for corruption-type errors
+        max_timeout_retries = 2  # Max retries for timeout (lost 'ok' response)
         corruption_retry_count = 0
         corruption_retry_count = 0
+        timeout_retry_count = 0
 
 
         # GRBL error codes that indicate likely serial corruption (syntax errors)
         # GRBL error codes that indicate likely serial corruption (syntax errors)
         # These are recoverable by resending the command
         # These are recoverable by resending the command
@@ -585,8 +587,62 @@ class MotionControlThread:
                     # Check for timeout
                     # Check for timeout
                     elapsed = time.time() - wait_start
                     elapsed = time.time() - wait_start
                     if elapsed > max_wait_time:
                     if elapsed > max_wait_time:
-                        logger.error(f"Motion thread: Timeout ({max_wait_time}s) waiting for 'ok' response")
-                        logger.error("Possible serial communication issue - stopping pattern")
+                        logger.warning(f"Motion thread: Timeout ({max_wait_time}s) waiting for 'ok' response")
+
+                        # Attempt to recover by checking machine status
+                        # The 'ok' might have been lost but command may have executed
+                        logger.info("Motion thread: Attempting timeout recovery - checking machine status")
+
+                        try:
+                            # Clear buffer first
+                            if hasattr(state.conn, 'reset_input_buffer'):
+                                state.conn.reset_input_buffer()
+
+                            # Send status query
+                            state.conn.send("?\n")
+                            time.sleep(0.2)
+
+                            # Try to read status response
+                            status_response = None
+                            for _ in range(10):
+                                resp = state.conn.readline()
+                                if resp:
+                                    logger.debug(f"Motion thread: Status query response: {resp}")
+                                    if '<' in resp or 'Idle' in resp or 'Run' in resp:
+                                        status_response = resp
+                                        break
+                                    # Also check for 'ok' that might have been delayed
+                                    if resp.lower() == 'ok':
+                                        logger.info("Motion thread: Received delayed 'ok' during recovery")
+                                        return True
+                                time.sleep(0.05)
+
+                            if status_response:
+                                if 'Idle' in status_response:
+                                    # Machine is idle - command likely completed, 'ok' was lost
+                                    logger.info("Motion thread: Machine is Idle - assuming command completed (ok was lost)")
+                                    return True
+                                elif 'Run' in status_response:
+                                    # Machine still running - extend timeout
+                                    logger.info("Motion thread: Machine still running, extending wait time")
+                                    wait_start = time.time()  # Reset timeout
+                                    continue
+                                else:
+                                    logger.warning(f"Motion thread: Unknown status: {status_response}")
+
+                            # No valid status response - connection may be dead
+                            timeout_retry_count += 1
+                            if timeout_retry_count <= max_timeout_retries:
+                                logger.warning(f"Motion thread: No response, retrying command ({timeout_retry_count}/{max_timeout_retries})")
+                                time.sleep(0.1)
+                                break  # Break inner loop to resend command
+
+                        except Exception as e:
+                            logger.error(f"Motion thread: Error during timeout recovery: {e}")
+
+                        # Max retries exceeded or recovery failed
+                        logger.error("Motion thread: Timeout recovery failed - stopping pattern")
+                        logger.error("Possible serial communication issue")
                         state.stop_requested = True
                         state.stop_requested = True
                         return False
                         return False