audoWin/autodemo/recorder.py

# MIT License
# Copyright (c) 2024
"""Multimodal recorder for Windows desktop sessions."""

from __future__ import annotations

import json
import threading
import time
import uuid
from pathlib import Path
from typing import List, Optional, Tuple

import cv2  # type: ignore
import numpy as np  # type: ignore
import psutil  # type: ignore
import uiautomation as auto  # type: ignore
from pynput import keyboard, mouse
import mss  # type: ignore

from .schema import (
    EventRecord,
    FramePaths,
    MouseInfo,
    Rect,
    SessionManifest,
    UISnapshot,
    UITreeNode,
    UISelector,
    WindowInfo,
)
from .screen_recorder import ScreenRecorder


class Recorder:
    """Capture UI events, UIA context, screenshots, and screen video."""

    def __init__(self, output_dir: Path, hotkey: str = "F9", fps: int = 12, screen: int = 0) -> None:
        self.output_dir = output_dir
        self.hotkey = hotkey
        self.fps = fps
        self.screen = screen

        self.session_id = str(uuid.uuid4())
        self.session_dir = self.output_dir / self.session_id
        self.events_path = self.session_dir / "events.jsonl"
        self.video_path = self.session_dir / "video.mp4"
        self.frames_dir = self.session_dir / "frames"
        self.frames_crops_dir = self.session_dir / "frames_crops"
        self.ui_snapshots_dir = self.session_dir / "ui_snapshots"

        self.events: List[EventRecord] = []
        self._stop_event = threading.Event()
        self._lock = threading.Lock()
        self._text_buffer: List[str] = []
        self._flush_timer: Optional[threading.Timer] = None
        self._start_perf = 0.0
        self._start_ts = 0.0
        self._last_hwnd: Optional[int] = None
        self._mouse_controller = mouse.Controller()
        self._screen_recorder: Optional[ScreenRecorder] = None
        self._window_thread: Optional[threading.Thread] = None
        self._mouse_listener: Optional[mouse.Listener] = None
        self._keyboard_listener: Optional[keyboard.Listener] = None
        self._monitor: Optional[dict] = None
        self._event_index = 0
        self._uia_local = threading.local()
        self._ensure_uia_initialized()

    # Public API ---------------------------------------------------------
    def start(self) -> Path:
        """Start recording until the hotkey is pressed."""
        self.session_dir.mkdir(parents=True, exist_ok=True)
        self.frames_dir.mkdir(parents=True, exist_ok=True)
        self.frames_crops_dir.mkdir(parents=True, exist_ok=True)
        self.ui_snapshots_dir.mkdir(parents=True, exist_ok=True)

        self._start_perf = time.perf_counter()
        self._start_ts = time.time()
        with mss.mss() as sct:
            monitors = sct.monitors
        if 0 <= self.screen < len(monitors):
            self._monitor = monitors[self.screen]
        else:
            self._monitor = monitors[0]

        self._screen_recorder = ScreenRecorder(self.video_path, fps=self.fps, screen=self.screen)
        self._screen_recorder.start()

        self._window_thread = threading.Thread(target=self._watch_window, daemon=True)
        self._window_thread.start()

        self._mouse_listener = mouse.Listener(on_click=self._on_click)
        self._keyboard_listener = keyboard.Listener(on_press=self._on_key_press)
        self._mouse_listener.start()
        self._keyboard_listener.start()

        self._stop_event.wait()
        self._flush_text_buffer()
        self._shutdown()
        return self.session_dir

    # Event handlers -----------------------------------------------------
    def _on_click(self, x: int, y: int, button: mouse.Button, pressed: bool) -> None:
        if not pressed or self._stop_event.is_set():
            return
        window_info = self._get_window_info()
        selector = self._hit_test(x, y)
        mouse_info = MouseInfo(x=int(x), y=int(y), button=str(button).split(".")[-1], action="down")
        self._record_event(
            event_type="mouse_click",
            mouse_info=mouse_info,
            text=None,
            uia_selector=selector,
            window=window_info,
        )

    def _on_key_press(self, key: keyboard.Key | keyboard.KeyCode) -> Optional[bool]:
        if self._is_hotkey(key):
            self._stop_event.set()
            return False
        if self._stop_event.is_set():
            return False
        ch = self._key_to_char(key)
        if ch is None:
            return None
        self._text_buffer.append(ch)
        self._schedule_flush()
        return None

    # Background watchers ------------------------------------------------
    def _watch_window(self, interval: float = 0.5) -> None:
        while not self._stop_event.is_set():
            info = self._get_window_info()
            hwnd = info.hwnd if info else None
            if hwnd and hwnd != self._last_hwnd:
                self._last_hwnd = hwnd
                selector = self._hit_test(*self._current_mouse_position())
                self._record_event(
                    event_type="window_change",
                    mouse_info=self._current_mouse_info(),
                    text=None,
                    uia_selector=selector,
                    window=info,
                )
            time.sleep(interval)

    # Recording helpers --------------------------------------------------
    def _shutdown(self) -> None:
        if self._flush_timer and self._flush_timer.is_alive():
            self._flush_timer.cancel()
        if self._mouse_listener:
            self._mouse_listener.stop()
        if self._keyboard_listener:
            self._keyboard_listener.stop()
        if self._window_thread and self._window_thread.is_alive():
            self._window_thread.join(timeout=1.0)
        if self._screen_recorder:
            self._screen_recorder.stop()
        self._write_events()
        self._write_manifest()

    def _schedule_flush(self) -> None:
        if self._flush_timer and self._flush_timer.is_alive():
            self._flush_timer.cancel()
        self._flush_timer = threading.Timer(0.8, self._flush_text_buffer)
        self._flush_timer.daemon = True
        self._flush_timer.start()

    def _flush_text_buffer(self) -> None:
        if not self._text_buffer:
            return
        text = "".join(self._text_buffer)
        self._text_buffer = []
        mouse_info = self._current_mouse_info()
        selector = None
        if mouse_info:
            selector = self._hit_test(mouse_info.x, mouse_info.y)
        window_info = self._get_window_info()
        self._record_event(
            event_type="text_input",
            mouse_info=mouse_info,
            text=text,
            uia_selector=selector,
            window=window_info,
        )

    def _record_event(
        self,
        event_type: str,
        mouse_info: Optional[MouseInfo],
        text: Optional[str],
        uia_selector: Optional[UISelector],
        window: Optional[WindowInfo],
    ) -> None:
        self._event_index += 1
        ts = time.time()
        offset_ms = int((time.perf_counter() - self._start_perf) * 1000)
        frame_paths = self._capture_frame(event_type, self._event_index, mouse_info, uia_selector, window)
        ui_snapshot_path = self._save_ui_snapshot(self._event_index, uia_selector)

        record = EventRecord(
            ts=ts,
            event_type=event_type,
            window=window,
            mouse=mouse_info,
            text=text,
            uia=uia_selector,
            frame_paths=frame_paths,
            video_time_offset_ms=offset_ms,
            ui_snapshot=ui_snapshot_path,
        )
        with self._lock:
            self.events.append(record)

    def _capture_frame(
        self,
        tag: str,
        event_index: int,
        mouse_info: Optional[MouseInfo],
        uia_selector: Optional[UISelector],
        window: Optional[WindowInfo],
    ) -> Optional[FramePaths]:
        if not self._monitor:
            return None

        region = self._monitor_region(window)
        with mss.mss() as sct:
            shot = np.array(sct.grab(region))
        frame = cv2.cvtColor(shot, cv2.COLOR_BGRA2BGR)

        full_path = self.frames_dir / f"frame_{event_index:05d}_{tag}.png"
        cv2.imwrite(str(full_path), frame)

        crop_mouse_path = None
        crop_element_path = None
        if mouse_info:
            crop_mouse_path = self._save_mouse_crop(frame, region, mouse_info, event_index)
        if uia_selector and uia_selector.bounding_rect:
            crop_element_path = self._save_element_crop(frame, region, uia_selector.bounding_rect, event_index)

        return FramePaths(
            full=str(full_path),
            crop_mouse=str(crop_mouse_path) if crop_mouse_path else None,
            crop_element=str(crop_element_path) if crop_element_path else None,
        )

    def _save_mouse_crop(self, frame: np.ndarray, region: dict, mouse_info: MouseInfo, event_index: int) -> Optional[Path]:
        width, height = frame.shape[1], frame.shape[0]
        center_x = int(mouse_info.x - region["left"])
        center_y = int(mouse_info.y - region["top"])
        crop_w, crop_h = 400, 300
        x0 = max(0, center_x - crop_w // 2)
        y0 = max(0, center_y - crop_h // 2)
        x1 = min(width, x0 + crop_w)
        y1 = min(height, y0 + crop_h)
        if x1 <= x0 or y1 <= y0:
            return None
        crop = frame[y0:y1, x0:x1]
        path = self.frames_crops_dir / f"frame_{event_index:05d}_mouse.png"
        cv2.imwrite(str(path), crop)
        return path

    def _save_element_crop(self, frame: np.ndarray, region: dict, rect: Rect, event_index: int) -> Optional[Path]:
        width, height = frame.shape[1], frame.shape[0]
        x0 = max(0, int(rect.left - region["left"]))
        y0 = max(0, int(rect.top - region["top"]))
        x1 = min(width, int(rect.right - region["left"]))
        y1 = min(height, int(rect.bottom - region["top"]))
        if x1 <= x0 or y1 <= y0:
            return None
        crop = frame[y0:y1, x0:x1]
        path = self.frames_crops_dir / f"frame_{event_index:05d}_element.png"
        cv2.imwrite(str(path), crop)
        return path

    def _monitor_region(self, window: Optional[WindowInfo]) -> dict:
        if window and window.rect and window.rect.width > 0 and window.rect.height > 0:
            return {
                "left": int(window.rect.left),
                "top": int(window.rect.top),
                "width": int(window.rect.width),
                "height": int(window.rect.height),
            }
        return {
            "left": int(self._monitor["left"]),
            "top": int(self._monitor["top"]),
            "width": int(self._monitor["width"]),
            "height": int(self._monitor["height"]),
        }

    def _save_ui_snapshot(self, event_index: int, selector: Optional[UISelector]) -> Optional[str]:
        tree = self._capture_tree(max_depth=3)
        if not tree and selector is None:
            return None
        path = self.ui_snapshots_dir / f"ui_{event_index:05d}.json"
        snapshot = UISnapshot(selector=selector, tree=tree)
        with path.open("w", encoding="utf-8") as f:
            json.dump(snapshot.dict(exclude_none=True), f, ensure_ascii=False)
        return str(path)

    # UI helpers ---------------------------------------------------------
    def _capture_tree(self, max_depth: int = 3) -> List[UITreeNode]:
        self._ensure_uia_initialized()
        root = auto.GetForegroundControl()
        if root is None:
            return []
        nodes: List[UITreeNode] = []
        queue: List[Tuple[auto.Control, int]] = [(root, 0)]  # type: ignore
        while queue:
            node, depth = queue.pop(0)
            if depth > max_depth:
                continue
            nodes.append(
                UITreeNode(
                    name=node.Name,
                    automation_id=node.AutomationId,
                    class_name=node.ClassName,
                    control_type=node.ControlTypeName,
                    depth=depth,
                )
            )
            try:
                children = list(node.GetChildren())
            except Exception:
                children = []
            for child in children:
                queue.append((child, depth + 1))
        return nodes

    def _hit_test(self, x: int, y: int) -> Optional[UISelector]:
        try:
            self._ensure_uia_initialized()
            ctrl = auto.ControlFromPoint((int(x), int(y)))
        except Exception:
            ctrl = None
        if not ctrl:
            return None
        return self._build_selector(ctrl)

    def _get_window_info(self) -> Optional[WindowInfo]:
        self._ensure_uia_initialized()
        ctrl = auto.GetForegroundControl()
        if ctrl is None:
            return None
        rect = getattr(ctrl, "BoundingRectangle", None)
        self._ensure_uia_initialized()
        rect_model = None
        if rect:
            rect_model = Rect(left=int(rect.left), top=int(rect.top), right=int(rect.right), bottom=int(rect.bottom))
        process_name = None
        try:
            process_name = psutil.Process(ctrl.ProcessId).name()
        except Exception:
            process_name = None
        hwnd = getattr(ctrl, "NativeWindowHandle", None) or getattr(ctrl, "Handle", None)
        return WindowInfo(
            hwnd=int(hwnd) if hwnd else None,
            title=ctrl.Name,
            process_name=process_name,
            rect=rect_model,
        )

    def _build_selector(self, ctrl: auto.Control) -> UISelector:  # type: ignore
        rect = getattr(ctrl, "BoundingRectangle", None)
        rect_model = None
        if rect:
            rect_model = Rect(left=int(rect.left), top=int(rect.top), right=int(rect.right), bottom=int(rect.bottom))
        return UISelector(
            automation_id=getattr(ctrl, "AutomationId", None),
            name=getattr(ctrl, "Name", None),
            class_name=getattr(ctrl, "ClassName", None),
            control_type=getattr(ctrl, "ControlTypeName", None),
            bounding_rect=rect_model,
        )

    # Utility ------------------------------------------------------------
    def _key_to_char(self, key: keyboard.Key | keyboard.KeyCode) -> Optional[str]:
        if isinstance(key, keyboard.KeyCode) and key.char:
            return key.char
        if key == keyboard.Key.space:
            return " "
        if key == keyboard.Key.enter:
            return "\n"
        if key == keyboard.Key.backspace:
            if self._text_buffer:
                self._text_buffer.pop()
            return None
        return None

    def _is_hotkey(self, key: keyboard.Key | keyboard.KeyCode) -> bool:
        target = self.hotkey.lower()
        name = None
        if isinstance(key, keyboard.Key):
            name = (key.name or "").lower()
        elif isinstance(key, keyboard.KeyCode):
            name = (key.char or "").lower()
        return name == target

    def _current_mouse_position(self) -> Tuple[int, int]:
        pos = self._mouse_controller.position
        return int(pos[0]), int(pos[1])

    def _current_mouse_info(self) -> Optional[MouseInfo]:
        x, y = self._current_mouse_position()
        return MouseInfo(x=int(x), y=int(y), button=None, action=None)

    def _ensure_uia_initialized(self) -> None:
        if getattr(self._uia_local, "token", None) is None:
            self._uia_local.token = auto.UIAutomationInitializerInThread()

    # Persistence --------------------------------------------------------
    def _write_events(self) -> None:
        with self.events_path.open("w", encoding="utf-8") as f:
            for event in self.events:
                f.write(json.dumps(event.dict(exclude_none=True), ensure_ascii=False))
                f.write("\n")

    def _write_manifest(self) -> None:
        resolution = self._resolution()
        manifest = SessionManifest(
            session_id=self.session_id,
            start_time=self._start_ts,
            end_time=time.time(),
            resolution=resolution,
            fps=self.fps,
            screen=self.screen,
            video_path=str(self.video_path),
            events_path=str(self.events_path),
            frames_dir=str(self.frames_dir),
            frames_crops_dir=str(self.frames_crops_dir),
            ui_snapshots_dir=str(self.ui_snapshots_dir),
        )
        path = self.session_dir / "manifest.json"
        with path.open("w", encoding="utf-8") as f:
            json.dump(manifest.dict(exclude_none=True), f, ensure_ascii=False, indent=2)

    def _resolution(self) -> str:
        if self._monitor:
            return f"{self._monitor['width']}x{self._monitor['height']}"
        try:
            width, height = auto.GetScreenSize()
            return f"{width}x{height}"
        except Exception:
            return "unknown"
init2 2025-12-19 16:24:04 +08:00			`# MIT License`
			`# Copyright (c) 2024`
			`"""Multimodal recorder for Windows desktop sessions."""`

			`from __future__ import annotations`

			`import json`
			`import threading`
			`import time`
			`import uuid`
			`from pathlib import Path`
			`from typing import List, Optional, Tuple`

			`import cv2 # type: ignore`
			`import numpy as np # type: ignore`
			`import psutil # type: ignore`
			`import uiautomation as auto # type: ignore`
			`from pynput import keyboard, mouse`
			`import mss # type: ignore`

			`from .schema import (`
			`EventRecord,`
			`FramePaths,`
			`MouseInfo,`
			`Rect,`
			`SessionManifest,`
			`UISnapshot,`
			`UITreeNode,`
			`UISelector,`
			`WindowInfo,`
			`)`
			`from .screen_recorder import ScreenRecorder`


			`class Recorder:`
			`"""Capture UI events, UIA context, screenshots, and screen video."""`

			`def __init__(self, output_dir: Path, hotkey: str = "F9", fps: int = 12, screen: int = 0) -> None:`
			`self.output_dir = output_dir`
			`self.hotkey = hotkey`
			`self.fps = fps`
			`self.screen = screen`

			`self.session_id = str(uuid.uuid4())`
			`self.session_dir = self.output_dir / self.session_id`
			`self.events_path = self.session_dir / "events.jsonl"`
			`self.video_path = self.session_dir / "video.mp4"`
			`self.frames_dir = self.session_dir / "frames"`
			`self.frames_crops_dir = self.session_dir / "frames_crops"`
			`self.ui_snapshots_dir = self.session_dir / "ui_snapshots"`

			`self.events: List[EventRecord] = []`
			`self._stop_event = threading.Event()`
			`self._lock = threading.Lock()`
			`self._text_buffer: List[str] = []`
			`self._flush_timer: Optional[threading.Timer] = None`
			`self._start_perf = 0.0`
			`self._start_ts = 0.0`
			`self._last_hwnd: Optional[int] = None`
			`self._mouse_controller = mouse.Controller()`
			`self._screen_recorder: Optional[ScreenRecorder] = None`
			`self._window_thread: Optional[threading.Thread] = None`
			`self._mouse_listener: Optional[mouse.Listener] = None`
			`self._keyboard_listener: Optional[keyboard.Listener] = None`
			`self._monitor: Optional[dict] = None`
			`self._event_index = 0`
			`self._uia_local = threading.local()`
			`self._ensure_uia_initialized()`

			`# Public API ---------------------------------------------------------`
			`def start(self) -> Path:`
			`"""Start recording until the hotkey is pressed."""`
			`self.session_dir.mkdir(parents=True, exist_ok=True)`
			`self.frames_dir.mkdir(parents=True, exist_ok=True)`
			`self.frames_crops_dir.mkdir(parents=True, exist_ok=True)`
			`self.ui_snapshots_dir.mkdir(parents=True, exist_ok=True)`

			`self._start_perf = time.perf_counter()`
			`self._start_ts = time.time()`
			`with mss.mss() as sct:`
			`monitors = sct.monitors`
			`if 0 <= self.screen < len(monitors):`
			`self._monitor = monitors[self.screen]`
			`else:`
			`self._monitor = monitors[0]`

			`self._screen_recorder = ScreenRecorder(self.video_path, fps=self.fps, screen=self.screen)`
			`self._screen_recorder.start()`

			`self._window_thread = threading.Thread(target=self._watch_window, daemon=True)`
			`self._window_thread.start()`

			`self._mouse_listener = mouse.Listener(on_click=self._on_click)`
			`self._keyboard_listener = keyboard.Listener(on_press=self._on_key_press)`
			`self._mouse_listener.start()`
			`self._keyboard_listener.start()`

			`self._stop_event.wait()`
			`self._flush_text_buffer()`
			`self._shutdown()`
			`return self.session_dir`

			`# Event handlers -----------------------------------------------------`
			`def _on_click(self, x: int, y: int, button: mouse.Button, pressed: bool) -> None:`
			`if not pressed or self._stop_event.is_set():`
			`return`
			`window_info = self._get_window_info()`
			`selector = self._hit_test(x, y)`
			`mouse_info = MouseInfo(x=int(x), y=int(y), button=str(button).split(".")[-1], action="down")`
			`self._record_event(`
			`event_type="mouse_click",`
			`mouse_info=mouse_info,`
			`text=None,`
			`uia_selector=selector,`
			`window=window_info,`
			`)`

			`def _on_key_press(self, key: keyboard.Key \| keyboard.KeyCode) -> Optional[bool]:`
			`if self._is_hotkey(key):`
			`self._stop_event.set()`
			`return False`
			`if self._stop_event.is_set():`
			`return False`
			`ch = self._key_to_char(key)`
			`if ch is None:`
			`return None`
			`self._text_buffer.append(ch)`
			`self._schedule_flush()`
			`return None`

			`# Background watchers ------------------------------------------------`
			`def _watch_window(self, interval: float = 0.5) -> None:`
			`while not self._stop_event.is_set():`
			`info = self._get_window_info()`
			`hwnd = info.hwnd if info else None`
			`if hwnd and hwnd != self._last_hwnd:`
			`self._last_hwnd = hwnd`
			`selector = self._hit_test(*self._current_mouse_position())`
			`self._record_event(`
			`event_type="window_change",`
			`mouse_info=self._current_mouse_info(),`
			`text=None,`
			`uia_selector=selector,`
			`window=info,`
			`)`
			`time.sleep(interval)`

			`# Recording helpers --------------------------------------------------`
			`def _shutdown(self) -> None:`
			`if self._flush_timer and self._flush_timer.is_alive():`
			`self._flush_timer.cancel()`
			`if self._mouse_listener:`
			`self._mouse_listener.stop()`
			`if self._keyboard_listener:`
			`self._keyboard_listener.stop()`
			`if self._window_thread and self._window_thread.is_alive():`
			`self._window_thread.join(timeout=1.0)`
			`if self._screen_recorder:`
			`self._screen_recorder.stop()`
			`self._write_events()`
			`self._write_manifest()`

			`def _schedule_flush(self) -> None:`
			`if self._flush_timer and self._flush_timer.is_alive():`
			`self._flush_timer.cancel()`
			`self._flush_timer = threading.Timer(0.8, self._flush_text_buffer)`
			`self._flush_timer.daemon = True`
			`self._flush_timer.start()`

			`def _flush_text_buffer(self) -> None:`
			`if not self._text_buffer:`
			`return`
			`text = "".join(self._text_buffer)`
			`self._text_buffer = []`
			`mouse_info = self._current_mouse_info()`
			`selector = None`
			`if mouse_info:`
			`selector = self._hit_test(mouse_info.x, mouse_info.y)`
			`window_info = self._get_window_info()`
			`self._record_event(`
			`event_type="text_input",`
			`mouse_info=mouse_info,`
			`text=text,`
			`uia_selector=selector,`
			`window=window_info,`
			`)`

			`def _record_event(`
			`self,`
			`event_type: str,`
			`mouse_info: Optional[MouseInfo],`
			`text: Optional[str],`
			`uia_selector: Optional[UISelector],`
			`window: Optional[WindowInfo],`
			`) -> None:`
			`self._event_index += 1`
			`ts = time.time()`
			`offset_ms = int((time.perf_counter() - self._start_perf) * 1000)`
			`frame_paths = self._capture_frame(event_type, self._event_index, mouse_info, uia_selector, window)`
			`ui_snapshot_path = self._save_ui_snapshot(self._event_index, uia_selector)`

			`record = EventRecord(`
			`ts=ts,`
			`event_type=event_type,`
			`window=window,`
			`mouse=mouse_info,`
			`text=text,`
			`uia=uia_selector,`
			`frame_paths=frame_paths,`
			`video_time_offset_ms=offset_ms,`
			`ui_snapshot=ui_snapshot_path,`
			`)`
			`with self._lock:`
			`self.events.append(record)`

			`def _capture_frame(`
			`self,`
			`tag: str,`
			`event_index: int,`
			`mouse_info: Optional[MouseInfo],`
			`uia_selector: Optional[UISelector],`
			`window: Optional[WindowInfo],`
			`) -> Optional[FramePaths]:`
			`if not self._monitor:`
			`return None`

			`region = self._monitor_region(window)`
			`with mss.mss() as sct:`
			`shot = np.array(sct.grab(region))`
			`frame = cv2.cvtColor(shot, cv2.COLOR_BGRA2BGR)`

			`full_path = self.frames_dir / f"frame_{event_index:05d}_{tag}.png"`
			`cv2.imwrite(str(full_path), frame)`

			`crop_mouse_path = None`
			`crop_element_path = None`
			`if mouse_info:`
			`crop_mouse_path = self._save_mouse_crop(frame, region, mouse_info, event_index)`
			`if uia_selector and uia_selector.bounding_rect:`
			`crop_element_path = self._save_element_crop(frame, region, uia_selector.bounding_rect, event_index)`

			`return FramePaths(`
			`full=str(full_path),`
			`crop_mouse=str(crop_mouse_path) if crop_mouse_path else None,`
			`crop_element=str(crop_element_path) if crop_element_path else None,`
			`)`

			`def _save_mouse_crop(self, frame: np.ndarray, region: dict, mouse_info: MouseInfo, event_index: int) -> Optional[Path]:`
			`width, height = frame.shape[1], frame.shape[0]`
			`center_x = int(mouse_info.x - region["left"])`
			`center_y = int(mouse_info.y - region["top"])`
			`crop_w, crop_h = 400, 300`
			`x0 = max(0, center_x - crop_w // 2)`
			`y0 = max(0, center_y - crop_h // 2)`
			`x1 = min(width, x0 + crop_w)`
			`y1 = min(height, y0 + crop_h)`
			`if x1 <= x0 or y1 <= y0:`
			`return None`
			`crop = frame[y0:y1, x0:x1]`
			`path = self.frames_crops_dir / f"frame_{event_index:05d}_mouse.png"`
			`cv2.imwrite(str(path), crop)`
			`return path`

			`def _save_element_crop(self, frame: np.ndarray, region: dict, rect: Rect, event_index: int) -> Optional[Path]:`
			`width, height = frame.shape[1], frame.shape[0]`
			`x0 = max(0, int(rect.left - region["left"]))`
			`y0 = max(0, int(rect.top - region["top"]))`
			`x1 = min(width, int(rect.right - region["left"]))`
			`y1 = min(height, int(rect.bottom - region["top"]))`
			`if x1 <= x0 or y1 <= y0:`
			`return None`
			`crop = frame[y0:y1, x0:x1]`
			`path = self.frames_crops_dir / f"frame_{event_index:05d}_element.png"`
			`cv2.imwrite(str(path), crop)`
			`return path`

			`def _monitor_region(self, window: Optional[WindowInfo]) -> dict:`
			`if window and window.rect and window.rect.width > 0 and window.rect.height > 0:`
			`return {`
			`"left": int(window.rect.left),`
			`"top": int(window.rect.top),`
			`"width": int(window.rect.width),`
			`"height": int(window.rect.height),`
			`}`
			`return {`
			`"left": int(self._monitor["left"]),`
			`"top": int(self._monitor["top"]),`
			`"width": int(self._monitor["width"]),`
			`"height": int(self._monitor["height"]),`
			`}`

			`def _save_ui_snapshot(self, event_index: int, selector: Optional[UISelector]) -> Optional[str]:`
			`tree = self._capture_tree(max_depth=3)`
			`if not tree and selector is None:`
			`return None`
			`path = self.ui_snapshots_dir / f"ui_{event_index:05d}.json"`
			`snapshot = UISnapshot(selector=selector, tree=tree)`
			`with path.open("w", encoding="utf-8") as f:`
			`json.dump(snapshot.dict(exclude_none=True), f, ensure_ascii=False)`
			`return str(path)`

			`# UI helpers ---------------------------------------------------------`
			`def _capture_tree(self, max_depth: int = 3) -> List[UITreeNode]:`
			`self._ensure_uia_initialized()`
			`root = auto.GetForegroundControl()`
			`if root is None:`
			`return []`
			`nodes: List[UITreeNode] = []`
			`queue: List[Tuple[auto.Control, int]] = [(root, 0)] # type: ignore`
			`while queue:`
			`node, depth = queue.pop(0)`
			`if depth > max_depth:`
			`continue`
			`nodes.append(`
			`UITreeNode(`
			`name=node.Name,`
			`automation_id=node.AutomationId,`
			`class_name=node.ClassName,`
			`control_type=node.ControlTypeName,`
			`depth=depth,`
			`)`
			`)`
			`try:`
			`children = list(node.GetChildren())`
			`except Exception:`
			`children = []`
			`for child in children:`
			`queue.append((child, depth + 1))`
			`return nodes`

			`def _hit_test(self, x: int, y: int) -> Optional[UISelector]:`
			`try:`
			`self._ensure_uia_initialized()`
			`ctrl = auto.ControlFromPoint((int(x), int(y)))`
			`except Exception:`
			`ctrl = None`
			`if not ctrl:`
			`return None`
			`return self._build_selector(ctrl)`

			`def _get_window_info(self) -> Optional[WindowInfo]:`
			`self._ensure_uia_initialized()`
			`ctrl = auto.GetForegroundControl()`
			`if ctrl is None:`
			`return None`
			`rect = getattr(ctrl, "BoundingRectangle", None)`
			`self._ensure_uia_initialized()`
			`rect_model = None`
			`if rect:`
			`rect_model = Rect(left=int(rect.left), top=int(rect.top), right=int(rect.right), bottom=int(rect.bottom))`
			`process_name = None`
			`try:`
			`process_name = psutil.Process(ctrl.ProcessId).name()`
			`except Exception:`
			`process_name = None`
			`hwnd = getattr(ctrl, "NativeWindowHandle", None) or getattr(ctrl, "Handle", None)`
			`return WindowInfo(`
			`hwnd=int(hwnd) if hwnd else None,`
			`title=ctrl.Name,`
			`process_name=process_name,`
			`rect=rect_model,`
			`)`

			`def _build_selector(self, ctrl: auto.Control) -> UISelector: # type: ignore`
			`rect = getattr(ctrl, "BoundingRectangle", None)`
			`rect_model = None`
			`if rect:`
			`rect_model = Rect(left=int(rect.left), top=int(rect.top), right=int(rect.right), bottom=int(rect.bottom))`
			`return UISelector(`
			`automation_id=getattr(ctrl, "AutomationId", None),`
			`name=getattr(ctrl, "Name", None),`
			`class_name=getattr(ctrl, "ClassName", None),`
			`control_type=getattr(ctrl, "ControlTypeName", None),`
			`bounding_rect=rect_model,`
			`)`

			`# Utility ------------------------------------------------------------`
			`def _key_to_char(self, key: keyboard.Key \| keyboard.KeyCode) -> Optional[str]:`
			`if isinstance(key, keyboard.KeyCode) and key.char:`
			`return key.char`
			`if key == keyboard.Key.space:`
			`return " "`
			`if key == keyboard.Key.enter:`
			`return "\n"`
			`if key == keyboard.Key.backspace:`
			`if self._text_buffer:`
			`self._text_buffer.pop()`
			`return None`
			`return None`

			`def _is_hotkey(self, key: keyboard.Key \| keyboard.KeyCode) -> bool:`
			`target = self.hotkey.lower()`
			`name = None`
			`if isinstance(key, keyboard.Key):`
			`name = (key.name or "").lower()`
			`elif isinstance(key, keyboard.KeyCode):`
			`name = (key.char or "").lower()`
			`return name == target`

			`def _current_mouse_position(self) -> Tuple[int, int]:`
			`pos = self._mouse_controller.position`
			`return int(pos[0]), int(pos[1])`

			`def _current_mouse_info(self) -> Optional[MouseInfo]:`
			`x, y = self._current_mouse_position()`
			`return MouseInfo(x=int(x), y=int(y), button=None, action=None)`

			`def _ensure_uia_initialized(self) -> None:`
			`if getattr(self._uia_local, "token", None) is None:`
			`self._uia_local.token = auto.UIAutomationInitializerInThread()`

			`# Persistence --------------------------------------------------------`
			`def _write_events(self) -> None:`
			`with self.events_path.open("w", encoding="utf-8") as f:`
			`for event in self.events:`
			`f.write(json.dumps(event.dict(exclude_none=True), ensure_ascii=False))`
			`f.write("\n")`

			`def _write_manifest(self) -> None:`
			`resolution = self._resolution()`
			`manifest = SessionManifest(`
			`session_id=self.session_id,`
			`start_time=self._start_ts,`
			`end_time=time.time(),`
			`resolution=resolution,`
			`fps=self.fps,`
			`screen=self.screen,`
			`video_path=str(self.video_path),`
			`events_path=str(self.events_path),`
			`frames_dir=str(self.frames_dir),`
			`frames_crops_dir=str(self.frames_crops_dir),`
			`ui_snapshots_dir=str(self.ui_snapshots_dir),`
			`)`
			`path = self.session_dir / "manifest.json"`
			`with path.open("w", encoding="utf-8") as f:`
			`json.dump(manifest.dict(exclude_none=True), f, ensure_ascii=False, indent=2)`

			`def _resolution(self) -> str:`
			`if self._monitor:`
			`return f"{self._monitor['width']}x{self._monitor['height']}"`
			`try:`
			`width, height = auto.GetScreenSize()`
			`return f"{width}x{height}"`
			`except Exception:`
			`return "unknown"`