13 Commit-ok 10be8dbd62 ... e1e70bc13a

Szerző SHA1 Üzenet Dátum
  tuanchris e1e70bc13a Revert termios modifications - may cause serial corruption 1 hete
  tuanchris 2ce38dd635 Disable SCHED_RR - causes serial buffer corruption on Pi 3B+ 1 hete
  tuanchris 867b90a22e Handle FluidNC command echo gracefully 1 hete
  tuanchris 382806095b Fix serial echo causing GRBL communication failure on Pi 3B+ 1 hete
  tuanchris eb619c0895 TEMP: Disable real-time scheduling to test Pi 3B+ serial timeout 1 hete
  tuanchris 7e8284961a Add debug logging for GRBL response timeout investigation 1 hete
  tuanchris 9c31bbffb8 Fix get_machine_position to also accept WPos format 1 hete
  tuanchris 0e96bdffa4 Fix homing timeout due to WPos vs MPos format mismatch 1 hete
  tuanchris 064762f0c1 Add NaN/Infinity validation before sending GRBL coordinates 1 hete
  tuanchris fba7aee412 Stop pattern on GRBL error/alarm instead of hanging 1 hete
  tuanchris 8c0b628309 Add validation for coordinate calculations to prevent GRBL error:2 1 hete
  tuanchris 27b52408ad Fix race condition with multiple playlist tasks running 1 hete
  tuanchris afb73dbad8 Add 120s timeout to motion thread readline loop 1 hete

+ 27 - 14
modules/connection/connection_manager.py

@@ -386,6 +386,8 @@ def check_and_unlock_alarm():
 def get_status_response() -> str:
     """
     Send a status query ('?') and return the response if available.
+    Accepts both MPos (machine position) and WPos (work position) formats
+    depending on GRBL's $10 setting.
     """
     if state.conn is None or not state.conn.is_connected():
         logger.warning("Cannot get status response: no active connection")
@@ -395,7 +397,8 @@ def get_status_response() -> str:
         try:
             state.conn.send('?')
             response = state.conn.readline()
-            if "MPos" in response:
+            # Accept either MPos or WPos format (depends on GRBL $10 setting)
+            if "MPos" in response or "WPos" in response:
                 logger.debug(f"Status response: {response}")
                 return response
         except Exception as e:
@@ -405,22 +408,30 @@ def get_status_response() -> str:
         
 def parse_machine_position(response: str):
     """
-    Parse the work position (MPos) from a status response.
-    Expected format: "<...|MPos:-994.869,-321.861,0.000|...>"
-    Returns a tuple (work_x, work_y) if found, else None.
+    Parse the position from a status response.
+    Supports both MPos (machine position) and WPos (work position) formats
+    depending on GRBL's $10 setting.
+    Expected formats:
+        "<...|MPos:-994.869,-321.861,0.000|...>"
+        "<...|WPos:0.000,19.000,0.000|...>"
+    Returns a tuple (x, y) if found, else None.
     """
-    if "MPos:" not in response:
+    if "MPos:" not in response and "WPos:" not in response:
         return None
     try:
-        wpos_section = next((part for part in response.split("|") if part.startswith("MPos:")), None)
-        if wpos_section:
-            wpos_str = wpos_section.split(":", 1)[1]
-            wpos_values = wpos_str.split(",")
-            work_x = float(wpos_values[0])
-            work_y = float(wpos_values[1])
-            return work_x, work_y
+        # Try MPos first, then WPos
+        pos_section = next((part for part in response.split("|") if part.startswith("MPos:")), None)
+        if pos_section is None:
+            pos_section = next((part for part in response.split("|") if part.startswith("WPos:")), None)
+
+        if pos_section:
+            pos_str = pos_section.split(":", 1)[1]
+            pos_values = pos_str.split(",")
+            pos_x = float(pos_values[0])
+            pos_y = float(pos_values[1])
+            return pos_x, pos_y
     except Exception as e:
-        logger.error(f"Error parsing work position: {e}")
+        logger.error(f"Error parsing position: {e}")
     return None
 
 
@@ -1210,6 +1221,7 @@ def is_machine_idle() -> bool:
 def get_machine_position(timeout=5):
     """
     Query the device for its position.
+    Supports both MPos and WPos formats (depends on GRBL $10 setting).
     """
     start_time = time.time()
     while time.time() - start_time < timeout:
@@ -1217,7 +1229,8 @@ def get_machine_position(timeout=5):
             state.conn.send('?')
             response = state.conn.readline()
             logger.debug(f"Raw status response: {response}")
-            if "MPos" in response:
+            # Accept either MPos or WPos format
+            if "MPos" in response or "WPos" in response:
                 pos = parse_machine_position(response)
                 if pos:
                     machine_x, machine_y = pos

+ 59 - 6
modules/core/pattern_manager.py

@@ -8,7 +8,7 @@ from datetime import datetime, time as datetime_time
 from tqdm import tqdm
 from modules.connection import connection_manager
 from modules.core.state import state
-from math import pi
+from math import pi, isnan, isinf
 import asyncio
 import json
 # Import for legacy support, but we'll use LED interface through state
@@ -477,6 +477,13 @@ class MotionControlThread:
 
     def _move_polar_sync(self, theta: float, rho: float, speed: Optional[float] = None):
         """Synchronous version of move_polar for use in motion thread."""
+        # Check for valid machine position (can be None if homing failed)
+        if state.machine_x is None or state.machine_y is None:
+            logger.error("Cannot execute move: machine position unknown (homing may have failed)")
+            logger.error("Please home the machine before running patterns")
+            state.stop_requested = True
+            return
+
         # This is the original sync logic but running in dedicated thread
         if state.table_type == 'dune_weaver_mini':
             x_scaling_factor = 2
@@ -506,6 +513,14 @@ class MotionControlThread:
         # Use provided speed or fall back to state.speed
         actual_speed = speed if speed is not None else state.speed
 
+        # Validate coordinates before sending to prevent GRBL error:2
+        if isnan(new_x_abs) or isnan(new_y_abs) or isinf(new_x_abs) or isinf(new_y_abs):
+            logger.error(f"Motion thread: Invalid coordinates detected - X:{new_x_abs}, Y:{new_y_abs}")
+            logger.error(f"  theta:{theta}, rho:{rho}, current_theta:{state.current_theta}, current_rho:{state.current_rho}")
+            logger.error(f"  x_steps_per_mm:{state.x_steps_per_mm}, y_steps_per_mm:{state.y_steps_per_mm}, gear_ratio:{state.gear_ratio}")
+            state.stop_requested = True
+            return
+
         # Call sync version of send_grbl_coordinates in this thread
         self._send_grbl_coordinates_sync(round(new_x_abs, 3), round(new_y_abs, 3), actual_speed)
 
@@ -518,10 +533,12 @@ class MotionControlThread:
     def _send_grbl_coordinates_sync(self, x: float, y: float, speed: int = 600, timeout: int = 2, home: bool = False):
         """Synchronous version of send_grbl_coordinates for motion thread.
 
-        Waits indefinitely for 'ok' because GRBL only responds after the move completes,
-        which can take many seconds at slow speeds.
+        Waits for 'ok' with a timeout. GRBL sends 'ok' after the move completes,
+        which can take many seconds at slow speeds. We use a generous timeout
+        (120 seconds) to handle slow movements, but prevent indefinite hangs.
         """
         gcode = f"$J=G91 G21 Y{y} F{speed}" if home else f"G1 G53 X{x} Y{y} F{speed}"
+        max_wait_time = 120  # Maximum seconds to wait for 'ok' response
 
         while True:
             # Check stop_requested at the start of each iteration
@@ -533,20 +550,52 @@ class MotionControlThread:
                 logger.debug(f"Motion thread sending G-code: {gcode}")
                 state.conn.send(gcode + "\n")
 
-                # Wait indefinitely for 'ok' - GRBL sends it after move completes
+                # Wait for 'ok' with timeout
+                wait_start = time.time()
                 while True:
                     # Check stop_requested while waiting
                     if state.stop_requested:
                         logger.debug("Motion thread: Stop requested while waiting for response")
                         return False
+
+                    # Check for timeout
+                    elapsed = time.time() - wait_start
+                    if elapsed > max_wait_time:
+                        logger.error(f"Motion thread: Timeout ({max_wait_time}s) waiting for 'ok' response")
+                        logger.error("Possible serial communication issue - stopping pattern")
+                        state.stop_requested = True
+                        return False
+
                     response = state.conn.readline()
                     if response:
                         logger.debug(f"Motion thread response: {response}")
                         if response.lower() == "ok":
                             logger.debug("Motion thread: Command execution confirmed.")
                             return True
-                    # Small sleep to prevent CPU spin when readline() times out
-                    time.sleep(0.01)
+                        # Handle GRBL errors - these mean command failed, stop pattern
+                        if response.lower().startswith("error"):
+                            logger.error(f"Motion thread: GRBL error received: {response}")
+                            logger.error(f"Failed command: {gcode}")
+                            logger.error("Stopping pattern due to GRBL error")
+                            state.stop_requested = True
+                            return False
+                        # Handle GRBL alarms - machine needs attention
+                        if "alarm" in response.lower():
+                            logger.error(f"Motion thread: GRBL ALARM: {response}")
+                            logger.error("Machine alarm triggered - stopping pattern")
+                            state.stop_requested = True
+                            return False
+                        # FluidNC may echo commands back before sending 'ok'
+                        # Silently ignore echoed G-code commands (G0, G1, $J, etc.)
+                        if response.startswith(('G0', 'G1', 'G2', 'G3', '$J', 'M')):
+                            logger.debug(f"Motion thread: Ignoring echoed command: {response}")
+                            continue  # Read next line to get 'ok'
+                        # Log truly unexpected responses
+                        logger.warning(f"Motion thread: Unexpected response: '{response}'")
+                    else:
+                        # Log periodically when waiting for response (every 30s)
+                        if int(elapsed) > 0 and int(elapsed) % 30 == 0 and elapsed - int(elapsed) < 0.1:
+                            logger.warning(f"Motion thread: Still waiting for 'ok' after {int(elapsed)}s for command: {gcode}")
 
             except Exception as e:
                 error_str = str(e)
@@ -1354,6 +1403,10 @@ async def stop_actions(clear_playlist = True, wait_for_lock = True):
                 if progress_update_task and not progress_update_task.done():
                     progress_update_task.cancel()
 
+                # Cancel the playlist task itself (late import to avoid circular dependency)
+                from modules.core import playlist_manager
+                await playlist_manager.cancel_current_playlist()
+
             state.pause_condition.notify_all()
 
         # Also set the pause event to wake up any paused patterns

+ 25 - 1
modules/core/playlist_manager.py

@@ -3,6 +3,7 @@ import os
 import threading
 import logging
 import asyncio
+from typing import Optional
 from modules.core import pattern_manager
 from modules.core.state import state
 from fastapi import HTTPException
@@ -10,6 +11,9 @@ from fastapi import HTTPException
 # Configure logging
 logger = logging.getLogger(__name__)
 
+# Track the current playlist task so we can cancel it properly
+_current_playlist_task: Optional[asyncio.Task] = None
+
 # Global state
 PLAYLISTS_FILE = os.path.join(os.getcwd(), "playlists.json")
 
@@ -113,8 +117,28 @@ def rename_playlist(old_name, new_name):
     logger.info(f"Renamed playlist '{old_name}' to '{new_name}'")
     return True, f"Playlist renamed to '{new_name}'"
 
+async def cancel_current_playlist():
+    """Cancel the current playlist task if one is running."""
+    global _current_playlist_task
+    if _current_playlist_task and not _current_playlist_task.done():
+        logger.info("Cancelling existing playlist task...")
+        _current_playlist_task.cancel()
+        try:
+            await _current_playlist_task
+        except asyncio.CancelledError:
+            logger.info("Playlist task cancelled successfully")
+        except Exception as e:
+            logger.warning(f"Error while cancelling playlist task: {e}")
+        _current_playlist_task = None
+
 async def run_playlist(playlist_name, pause_time=0, clear_pattern=None, run_mode="single", shuffle=False):
     """Run a playlist with the given options."""
+    global _current_playlist_task
+
+    # Cancel any existing playlist task first
+    await cancel_current_playlist()
+
+    # Also stop any running pattern
     if pattern_manager.get_pattern_lock().locked():
         logger.info("Another pattern is running, stopping it first...")
         await pattern_manager.stop_actions()
@@ -135,7 +159,7 @@ async def run_playlist(playlist_name, pause_time=0, clear_pattern=None, run_mode
         logger.info(f"Starting playlist '{playlist_name}' with mode={run_mode}, shuffle={shuffle}")
         state.current_playlist = file_paths
         state.current_playlist_name = playlist_name
-        asyncio.create_task(
+        _current_playlist_task = asyncio.create_task(
             pattern_manager.run_theta_rho_files(
                 file_paths,
                 pause_time=pause_time,

+ 12 - 4
modules/core/scheduling.py

@@ -164,19 +164,27 @@ def pin_to_cpus(cpu_ids: Set[int], tid: Optional[int] = None) -> bool:
 
 def setup_realtime_thread(tid: Optional[int] = None, priority: int = 50) -> None:
     """Setup for time-critical I/O threads (motion control, LED effects).
-    
+
     Elevates priority and pins to CPU 0.
-    
+
     Args:
         tid: Thread native_id. If None, uses current thread.
         priority: SCHED_RR priority (1-99). Higher = more important.
                   Motion should use higher than LED (e.g., 60 vs 40).
     """
+    # DISABLED: SCHED_RR + CPU pinning causes serial buffer corruption on Pi 3B+
+    # The real-time scheduling appears to interfere with serial I/O timing,
+    # causing commands to be merged/corrupted (e.g., "G1 G53" -> "G10G53").
+    # This needs further investigation - may need to pin to a different CPU
+    # or use a different scheduling policy.
+    logger.info(f"Real-time scheduling disabled (was priority {priority}) - causes serial issues on some Pi models")
+    return
+
     cpu_count = get_cpu_count()
-    
+
     # Elevate priority (logs internally on success)
     elevate_priority(tid, realtime_priority=priority)
-    
+
     # Pin to CPU 0 if multi-core
     if cpu_count > 1:
         if pin_to_cpu(0, tid):