Ver código fonte

Fix $Bye reset reliability and prevent position drift

- Add idle check before reset_theta() to prevent error:25 when
  controller is still processing commands
- Add retry logic (3 attempts with 5s/7s/9s timeouts) to soft reset
- Fail-fast: only set position to (0,0) when confirmation received,
  preventing position drift from accumulating over long operations
- Add CPU affinity: pin Docker backend to cores 0-2, touch app to
  core 3 with Nice=10 to prevent serial I/O timing issues

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
tuanchris 1 semana atrás
pai
commit
6893719141

+ 3 - 0
docker-compose.yml

@@ -21,6 +21,9 @@ services:
     build: .
     image: ghcr.io/tuanchris/dune-weaver:feature-react-ui
     restart: always
+    # Pin motion-critical backend to cores 0-2 (Raspberry Pi 4/5 has cores 0-3)
+    # This prevents CPU contention from touch app blocking I/O calls
+    cpuset: "0,1,2"
     ports:
       - "8080:8080"
     # Environment variables for testing (uncomment to enable):

+ 4 - 1
dune-weaver-touch/dune-weaver-touch.service

@@ -16,7 +16,10 @@ Environment=QT_QPA_EGLFS_ALWAYS_SET_MODE=1
 Environment=QT_QPA_EGLFS_HIDECURSOR=1
 Environment=QT_QPA_EGLFS_INTEGRATION=eglfs_kms
 Environment=QT_QPA_EGLFS_KMS_ATOMIC=1
-ExecStart=/home/pi/dune-weaver-touch/venv/bin/python /home/pi/dune-weaver-touch/main.py
+# CPU isolation: Pin touch app to core 3, lower priority to prevent starving motion backend
+# Backend runs in Docker pinned to cores 0-2 for serial I/O timing reliability
+Nice=10
+ExecStart=/usr/bin/taskset -c 3 /home/pi/dune-weaver-touch/venv/bin/python /home/pi/dune-weaver-touch/main.py
 Restart=always
 RestartSec=10
 StartLimitInterval=200

+ 88 - 66
modules/connection/connection_manager.py

@@ -1269,7 +1269,7 @@ async def update_machine_position():
             logger.error(f"Error updating machine position: {e}")
 
 
-def perform_soft_reset_sync():
+def perform_soft_reset_sync(max_retries: int = 3):
     """
     Synchronous version of soft reset for use during device initialization.
 
@@ -1277,6 +1277,15 @@ def perform_soft_reset_sync():
     Triggers a software reset which clears position counters to 0.
     This is more reliable than G92 which only sets a work coordinate offset
     without changing the actual machine position (MPos).
+
+    IMPORTANT: Position is only reset to (0,0) if confirmation is received.
+    This prevents position drift from accumulating over long operation periods.
+
+    Args:
+        max_retries: Maximum number of reset attempts (default 3)
+
+    Returns:
+        True if reset confirmed, False if all attempts failed
     """
     if not state.conn or not state.conn.is_connected():
         logger.warning("Cannot perform soft reset: no active connection")
@@ -1286,87 +1295,100 @@ def perform_soft_reset_sync():
         # Detect firmware type to use appropriate reset command
         firmware_type, version = _detect_firmware()
         logger.info(f"Detected firmware: {firmware_type} {version or ''}")
-
         logger.info(f"Performing soft reset (was: X={state.machine_x:.2f}, Y={state.machine_y:.2f})")
 
-        # Clear any pending data first
-        if isinstance(state.conn, SerialConnection) and state.conn.ser:
-            state.conn.ser.reset_input_buffer()
+        for attempt in range(max_retries):
+            # Increasing timeout: 5s, 7s, 9s
+            timeout = 5.0 + (attempt * 2.0)
+            logger.info(f"Reset attempt {attempt + 1}/{max_retries} (timeout: {timeout}s)")
 
-        # Send appropriate reset command based on firmware
-        if firmware_type == 'fluidnc':
-            # FluidNC uses $Bye for soft reset
-            if isinstance(state.conn, SerialConnection) and state.conn.ser:
-                state.conn.ser.write(b'$Bye\n')
-                state.conn.ser.flush()
-                logger.info(f"$Bye sent directly via serial to {state.port}")
-            else:
-                state.conn.send('$Bye\n')
-                logger.info("$Bye sent via connection abstraction")
-        else:
-            # GRBL uses Ctrl+X (0x18) for soft reset
+            # Clear any pending data first
             if isinstance(state.conn, SerialConnection) and state.conn.ser:
-                state.conn.ser.write(b'\x18')
-                state.conn.ser.flush()
-                logger.info(f"Ctrl+X (0x18) sent directly via serial to {state.port}")
+                state.conn.ser.reset_input_buffer()
+
+            # Send appropriate reset command based on firmware
+            if firmware_type == 'fluidnc':
+                # FluidNC uses $Bye for soft reset
+                if isinstance(state.conn, SerialConnection) and state.conn.ser:
+                    state.conn.ser.write(b'$Bye\n')
+                    state.conn.ser.flush()
+                    logger.info(f"$Bye sent directly via serial to {state.port}")
+                else:
+                    state.conn.send('$Bye\n')
+                    logger.info("$Bye sent via connection abstraction")
             else:
-                state.conn.send('\x18')
-                logger.info("Ctrl+X (0x18) sent via connection abstraction")
-
-        # Wait for controller to fully restart
-        # FluidNC sequence: [MSG:INFO: Restarting] -> ... -> "Grbl 3.9 [FluidNC...]"
-        # GRBL sequence: "Grbl 1.1h ['$' for help]"
-        start_time = time.time()
-        reset_confirmed = False
-        while time.time() - start_time < 5.0:  # 5 second timeout for full reboot
-            try:
-                response = state.conn.readline()
-                if response:
-                    logger.debug(f"Reset response: {response}")
-                    # Wait for the "Grbl" startup banner - this means fully ready
-                    if response.startswith("Grbl") or "fluidnc" in response.lower():
-                        reset_confirmed = True
-                        logger.info(f"Controller restart complete: {response}")
-                        break
-            except Exception:
-                pass
-            time.sleep(0.05)
-
-        # Small delay to let controller fully stabilize
-        time.sleep(0.2)
-
-        # Unlock controller in case it's in alarm state after reset
-        if reset_confirmed:
-            logger.info("Sending $X to unlock controller after reset")
-            state.conn.send("$X\n")
-            # Wait for ok response
-            unlock_start = time.time()
-            while time.time() - unlock_start < 1.0:
+                # GRBL uses Ctrl+X (0x18) for soft reset
+                if isinstance(state.conn, SerialConnection) and state.conn.ser:
+                    state.conn.ser.write(b'\x18')
+                    state.conn.ser.flush()
+                    logger.info(f"Ctrl+X (0x18) sent directly via serial to {state.port}")
+                else:
+                    state.conn.send('\x18')
+                    logger.info("Ctrl+X (0x18) sent via connection abstraction")
+
+            # Wait for controller to fully restart
+            # FluidNC sequence: [MSG:INFO: Restarting] -> ... -> "Grbl 3.9 [FluidNC...]"
+            # GRBL sequence: "Grbl 1.1h ['$' for help]"
+            start_time = time.time()
+            reset_confirmed = False
+            while time.time() - start_time < timeout:
                 try:
                     response = state.conn.readline()
                     if response:
-                        logger.debug(f"$X response: {response}")
-                        if response.lower() == "ok":
-                            logger.info("Controller unlocked")
+                        logger.debug(f"Reset response: {response}")
+                        # Wait for the "Grbl" startup banner - this means fully ready
+                        if response.startswith("Grbl") or "fluidnc" in response.lower():
+                            reset_confirmed = True
+                            logger.info(f"Controller restart complete: {response}")
                             break
                 except Exception:
                     pass
                 time.sleep(0.05)
 
-        # Reset state positions to 0 after soft reset
-        state.machine_x = 0.0
-        state.machine_y = 0.0
+            if reset_confirmed:
+                # Small delay to let controller fully stabilize
+                time.sleep(0.2)
 
-        if reset_confirmed:
-            logger.info(f"Machine position reset to 0 via {'$Bye' if firmware_type == 'fluidnc' else 'Ctrl+X'} soft reset")
-        else:
-            logger.warning("Soft reset sent but no confirmation received, position set to 0 anyway")
+                # Unlock controller in case it's in alarm state after reset
+                logger.info("Sending $X to unlock controller after reset")
+                state.conn.send("$X\n")
+                # Wait for ok response
+                unlock_start = time.time()
+                while time.time() - unlock_start < 1.0:
+                    try:
+                        response = state.conn.readline()
+                        if response:
+                            logger.debug(f"$X response: {response}")
+                            if response.lower() == "ok":
+                                logger.info("Controller unlocked")
+                                break
+                    except Exception:
+                        pass
+                    time.sleep(0.05)
+
+                # Only reset state positions when confirmation received
+                state.machine_x = 0.0
+                state.machine_y = 0.0
+                reset_cmd = '$Bye' if firmware_type == 'fluidnc' else 'Ctrl+X'
+                logger.info(f"Machine position reset to 0 via {reset_cmd} soft reset")
+
+                # Save the reset position
+                state.save()
+                logger.info(f"Machine position saved: {state.machine_x}, {state.machine_y}")
+                return True
 
-        # Save the reset position
-        state.save()
-        logger.info(f"Machine position saved: {state.machine_x}, {state.machine_y}")
+            # Retry after failed attempt
+            if attempt < max_retries - 1:
+                logger.warning(f"Reset attempt {attempt + 1}/{max_retries} failed, retrying...")
+                time.sleep(0.5)  # Brief pause before retry
 
-        return True
+        # All attempts failed - DO NOT reset position to prevent drift
+        logger.error(
+            f"All {max_retries} reset attempts failed - no confirmation received. "
+            f"Position NOT reset (still: X={state.machine_x:.2f}, Y={state.machine_y:.2f}). "
+            "This may indicate communication issues or controller not responding."
+        )
+        return False
 
     except Exception as e:
         logger.error(f"Error performing soft reset: {e}")

+ 15 - 1
modules/core/pattern_manager.py

@@ -1677,12 +1677,26 @@ async def reset_theta():
     $Bye sends a soft reset to FluidNC which resets the controller and clears
     all position counters to 0. This is more reliable than G92 which only sets
     a work coordinate offset without changing the actual machine position (MPos).
+
+    IMPORTANT: We wait for machine to be idle before sending $Bye to avoid
+    error:25 ("Feed rate not specified in block") which can occur if the
+    controller is still processing commands when reset is triggered.
     """
     logger.info('Resetting Theta')
+
+    # Wait for machine to be idle before reset to prevent error:25
+    if state.conn and state.conn.is_connected():
+        logger.info("Waiting for machine to be idle before reset...")
+        idle = await connection_manager.check_idle_async(timeout=30)
+        if not idle:
+            logger.warning("Machine not idle after 30s, proceeding with reset anyway")
+
     state.current_theta = state.current_theta % (2 * pi)
 
     # Hard reset machine position using $Bye via connection_manager
-    await connection_manager.perform_soft_reset()
+    success = await connection_manager.perform_soft_reset()
+    if not success:
+        logger.error("Soft reset failed - theta reset may be unreliable")
 
 def set_speed(new_speed):
     state.speed = new_speed