فهرست منبع

Fix $Bye reset reliability and prevent position drift

- Add idle check before reset_theta() to prevent error:25 when
  controller is still processing commands
- Add retry logic (3 attempts with 5s/7s/9s timeouts) to soft reset
- Fail-fast: only set position to (0,0) when confirmation received,
  preventing position drift from accumulating over long operations
- Add CPU affinity: pin Docker backend to cores 0-2, touch app to
  core 3 with Nice=10 to prevent serial I/O timing issues

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
tuanchris 1 هفته پیش
والد
کامیت
6893719141
4فایلهای تغییر یافته به همراه110 افزوده شده و 68 حذف شده
  1. 3 0
      docker-compose.yml
  2. 4 1
      dune-weaver-touch/dune-weaver-touch.service
  3. 88 66
      modules/connection/connection_manager.py
  4. 15 1
      modules/core/pattern_manager.py

+ 3 - 0
docker-compose.yml

@@ -21,6 +21,9 @@ services:
     build: .
     build: .
     image: ghcr.io/tuanchris/dune-weaver:feature-react-ui
     image: ghcr.io/tuanchris/dune-weaver:feature-react-ui
     restart: always
     restart: always
+    # Pin motion-critical backend to cores 0-2 (Raspberry Pi 4/5 has cores 0-3)
+    # This prevents CPU contention from touch app blocking I/O calls
+    cpuset: "0,1,2"
     ports:
     ports:
       - "8080:8080"
       - "8080:8080"
     # Environment variables for testing (uncomment to enable):
     # Environment variables for testing (uncomment to enable):

+ 4 - 1
dune-weaver-touch/dune-weaver-touch.service

@@ -16,7 +16,10 @@ Environment=QT_QPA_EGLFS_ALWAYS_SET_MODE=1
 Environment=QT_QPA_EGLFS_HIDECURSOR=1
 Environment=QT_QPA_EGLFS_HIDECURSOR=1
 Environment=QT_QPA_EGLFS_INTEGRATION=eglfs_kms
 Environment=QT_QPA_EGLFS_INTEGRATION=eglfs_kms
 Environment=QT_QPA_EGLFS_KMS_ATOMIC=1
 Environment=QT_QPA_EGLFS_KMS_ATOMIC=1
-ExecStart=/home/pi/dune-weaver-touch/venv/bin/python /home/pi/dune-weaver-touch/main.py
+# CPU isolation: Pin touch app to core 3, lower priority to prevent starving motion backend
+# Backend runs in Docker pinned to cores 0-2 for serial I/O timing reliability
+Nice=10
+ExecStart=/usr/bin/taskset -c 3 /home/pi/dune-weaver-touch/venv/bin/python /home/pi/dune-weaver-touch/main.py
 Restart=always
 Restart=always
 RestartSec=10
 RestartSec=10
 StartLimitInterval=200
 StartLimitInterval=200

+ 88 - 66
modules/connection/connection_manager.py

@@ -1269,7 +1269,7 @@ async def update_machine_position():
             logger.error(f"Error updating machine position: {e}")
             logger.error(f"Error updating machine position: {e}")
 
 
 
 
-def perform_soft_reset_sync():
+def perform_soft_reset_sync(max_retries: int = 3):
     """
     """
     Synchronous version of soft reset for use during device initialization.
     Synchronous version of soft reset for use during device initialization.
 
 
@@ -1277,6 +1277,15 @@ def perform_soft_reset_sync():
     Triggers a software reset which clears position counters to 0.
     Triggers a software reset which clears position counters to 0.
     This is more reliable than G92 which only sets a work coordinate offset
     This is more reliable than G92 which only sets a work coordinate offset
     without changing the actual machine position (MPos).
     without changing the actual machine position (MPos).
+
+    IMPORTANT: Position is only reset to (0,0) if confirmation is received.
+    This prevents position drift from accumulating over long operation periods.
+
+    Args:
+        max_retries: Maximum number of reset attempts (default 3)
+
+    Returns:
+        True if reset confirmed, False if all attempts failed
     """
     """
     if not state.conn or not state.conn.is_connected():
     if not state.conn or not state.conn.is_connected():
         logger.warning("Cannot perform soft reset: no active connection")
         logger.warning("Cannot perform soft reset: no active connection")
@@ -1286,87 +1295,100 @@ def perform_soft_reset_sync():
         # Detect firmware type to use appropriate reset command
         # Detect firmware type to use appropriate reset command
         firmware_type, version = _detect_firmware()
         firmware_type, version = _detect_firmware()
         logger.info(f"Detected firmware: {firmware_type} {version or ''}")
         logger.info(f"Detected firmware: {firmware_type} {version or ''}")
-
         logger.info(f"Performing soft reset (was: X={state.machine_x:.2f}, Y={state.machine_y:.2f})")
         logger.info(f"Performing soft reset (was: X={state.machine_x:.2f}, Y={state.machine_y:.2f})")
 
 
-        # Clear any pending data first
-        if isinstance(state.conn, SerialConnection) and state.conn.ser:
-            state.conn.ser.reset_input_buffer()
+        for attempt in range(max_retries):
+            # Increasing timeout: 5s, 7s, 9s
+            timeout = 5.0 + (attempt * 2.0)
+            logger.info(f"Reset attempt {attempt + 1}/{max_retries} (timeout: {timeout}s)")
 
 
-        # Send appropriate reset command based on firmware
-        if firmware_type == 'fluidnc':
-            # FluidNC uses $Bye for soft reset
-            if isinstance(state.conn, SerialConnection) and state.conn.ser:
-                state.conn.ser.write(b'$Bye\n')
-                state.conn.ser.flush()
-                logger.info(f"$Bye sent directly via serial to {state.port}")
-            else:
-                state.conn.send('$Bye\n')
-                logger.info("$Bye sent via connection abstraction")
-        else:
-            # GRBL uses Ctrl+X (0x18) for soft reset
+            # Clear any pending data first
             if isinstance(state.conn, SerialConnection) and state.conn.ser:
             if isinstance(state.conn, SerialConnection) and state.conn.ser:
-                state.conn.ser.write(b'\x18')
-                state.conn.ser.flush()
-                logger.info(f"Ctrl+X (0x18) sent directly via serial to {state.port}")
+                state.conn.ser.reset_input_buffer()
+
+            # Send appropriate reset command based on firmware
+            if firmware_type == 'fluidnc':
+                # FluidNC uses $Bye for soft reset
+                if isinstance(state.conn, SerialConnection) and state.conn.ser:
+                    state.conn.ser.write(b'$Bye\n')
+                    state.conn.ser.flush()
+                    logger.info(f"$Bye sent directly via serial to {state.port}")
+                else:
+                    state.conn.send('$Bye\n')
+                    logger.info("$Bye sent via connection abstraction")
             else:
             else:
-                state.conn.send('\x18')
-                logger.info("Ctrl+X (0x18) sent via connection abstraction")
-
-        # Wait for controller to fully restart
-        # FluidNC sequence: [MSG:INFO: Restarting] -> ... -> "Grbl 3.9 [FluidNC...]"
-        # GRBL sequence: "Grbl 1.1h ['$' for help]"
-        start_time = time.time()
-        reset_confirmed = False
-        while time.time() - start_time < 5.0:  # 5 second timeout for full reboot
-            try:
-                response = state.conn.readline()
-                if response:
-                    logger.debug(f"Reset response: {response}")
-                    # Wait for the "Grbl" startup banner - this means fully ready
-                    if response.startswith("Grbl") or "fluidnc" in response.lower():
-                        reset_confirmed = True
-                        logger.info(f"Controller restart complete: {response}")
-                        break
-            except Exception:
-                pass
-            time.sleep(0.05)
-
-        # Small delay to let controller fully stabilize
-        time.sleep(0.2)
-
-        # Unlock controller in case it's in alarm state after reset
-        if reset_confirmed:
-            logger.info("Sending $X to unlock controller after reset")
-            state.conn.send("$X\n")
-            # Wait for ok response
-            unlock_start = time.time()
-            while time.time() - unlock_start < 1.0:
+                # GRBL uses Ctrl+X (0x18) for soft reset
+                if isinstance(state.conn, SerialConnection) and state.conn.ser:
+                    state.conn.ser.write(b'\x18')
+                    state.conn.ser.flush()
+                    logger.info(f"Ctrl+X (0x18) sent directly via serial to {state.port}")
+                else:
+                    state.conn.send('\x18')
+                    logger.info("Ctrl+X (0x18) sent via connection abstraction")
+
+            # Wait for controller to fully restart
+            # FluidNC sequence: [MSG:INFO: Restarting] -> ... -> "Grbl 3.9 [FluidNC...]"
+            # GRBL sequence: "Grbl 1.1h ['$' for help]"
+            start_time = time.time()
+            reset_confirmed = False
+            while time.time() - start_time < timeout:
                 try:
                 try:
                     response = state.conn.readline()
                     response = state.conn.readline()
                     if response:
                     if response:
-                        logger.debug(f"$X response: {response}")
-                        if response.lower() == "ok":
-                            logger.info("Controller unlocked")
+                        logger.debug(f"Reset response: {response}")
+                        # Wait for the "Grbl" startup banner - this means fully ready
+                        if response.startswith("Grbl") or "fluidnc" in response.lower():
+                            reset_confirmed = True
+                            logger.info(f"Controller restart complete: {response}")
                             break
                             break
                 except Exception:
                 except Exception:
                     pass
                     pass
                 time.sleep(0.05)
                 time.sleep(0.05)
 
 
-        # Reset state positions to 0 after soft reset
-        state.machine_x = 0.0
-        state.machine_y = 0.0
+            if reset_confirmed:
+                # Small delay to let controller fully stabilize
+                time.sleep(0.2)
 
 
-        if reset_confirmed:
-            logger.info(f"Machine position reset to 0 via {'$Bye' if firmware_type == 'fluidnc' else 'Ctrl+X'} soft reset")
-        else:
-            logger.warning("Soft reset sent but no confirmation received, position set to 0 anyway")
+                # Unlock controller in case it's in alarm state after reset
+                logger.info("Sending $X to unlock controller after reset")
+                state.conn.send("$X\n")
+                # Wait for ok response
+                unlock_start = time.time()
+                while time.time() - unlock_start < 1.0:
+                    try:
+                        response = state.conn.readline()
+                        if response:
+                            logger.debug(f"$X response: {response}")
+                            if response.lower() == "ok":
+                                logger.info("Controller unlocked")
+                                break
+                    except Exception:
+                        pass
+                    time.sleep(0.05)
+
+                # Only reset state positions when confirmation received
+                state.machine_x = 0.0
+                state.machine_y = 0.0
+                reset_cmd = '$Bye' if firmware_type == 'fluidnc' else 'Ctrl+X'
+                logger.info(f"Machine position reset to 0 via {reset_cmd} soft reset")
+
+                # Save the reset position
+                state.save()
+                logger.info(f"Machine position saved: {state.machine_x}, {state.machine_y}")
+                return True
 
 
-        # Save the reset position
-        state.save()
-        logger.info(f"Machine position saved: {state.machine_x}, {state.machine_y}")
+            # Retry after failed attempt
+            if attempt < max_retries - 1:
+                logger.warning(f"Reset attempt {attempt + 1}/{max_retries} failed, retrying...")
+                time.sleep(0.5)  # Brief pause before retry
 
 
-        return True
+        # All attempts failed - DO NOT reset position to prevent drift
+        logger.error(
+            f"All {max_retries} reset attempts failed - no confirmation received. "
+            f"Position NOT reset (still: X={state.machine_x:.2f}, Y={state.machine_y:.2f}). "
+            "This may indicate communication issues or controller not responding."
+        )
+        return False
 
 
     except Exception as e:
     except Exception as e:
         logger.error(f"Error performing soft reset: {e}")
         logger.error(f"Error performing soft reset: {e}")

+ 15 - 1
modules/core/pattern_manager.py

@@ -1677,12 +1677,26 @@ async def reset_theta():
     $Bye sends a soft reset to FluidNC which resets the controller and clears
     $Bye sends a soft reset to FluidNC which resets the controller and clears
     all position counters to 0. This is more reliable than G92 which only sets
     all position counters to 0. This is more reliable than G92 which only sets
     a work coordinate offset without changing the actual machine position (MPos).
     a work coordinate offset without changing the actual machine position (MPos).
+
+    IMPORTANT: We wait for machine to be idle before sending $Bye to avoid
+    error:25 ("Feed rate not specified in block") which can occur if the
+    controller is still processing commands when reset is triggered.
     """
     """
     logger.info('Resetting Theta')
     logger.info('Resetting Theta')
+
+    # Wait for machine to be idle before reset to prevent error:25
+    if state.conn and state.conn.is_connected():
+        logger.info("Waiting for machine to be idle before reset...")
+        idle = await connection_manager.check_idle_async(timeout=30)
+        if not idle:
+            logger.warning("Machine not idle after 30s, proceeding with reset anyway")
+
     state.current_theta = state.current_theta % (2 * pi)
     state.current_theta = state.current_theta % (2 * pi)
 
 
     # Hard reset machine position using $Bye via connection_manager
     # Hard reset machine position using $Bye via connection_manager
-    await connection_manager.perform_soft_reset()
+    success = await connection_manager.perform_soft_reset()
+    if not success:
+        logger.error("Soft reset failed - theta reset may be unreliable")
 
 
 def set_speed(new_speed):
 def set_speed(new_speed):
     state.speed = new_speed
     state.speed = new_speed