audoWin/autodemo/executor.py
2025-12-22 17:07:08 +08:00

502 lines
20 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# MIT License
# Copyright (c) 2024
"""执行层:基于 DSL 进行 UI 自动化,并支持可选视觉校验与结构化日志"""
from __future__ import annotations
import json
import re
import time
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, List, Optional
import cv2 # type: ignore
import mss # type: ignore
import numpy as np # type: ignore
import uiautomation as auto # type: ignore
from .schema import DSLSpec
@dataclass
class ExecContext:
"""执行上下文"""
allow_title: str
dry_run: bool = False
artifacts_dir: Path = Path("artifacts")
def _match_window(allow_title: str) -> Optional[auto.Control]:
"""仅在窗口标题匹配白名单时返回窗口,容忍标题前缀/包含"""
patterns = [allow_title]
if " - " in allow_title:
patterns.append(allow_title.split(" - ", 1)[0])
def _title_match(name: Optional[str]) -> bool:
if not name:
return False
for pat in patterns:
if pat and pat in name:
return True
return False
def _ascend_to_top(node: auto.Control) -> auto.Control:
"""向上寻找最可能的顶层窗口Chrome 主窗口类名/WindowControl 优先)"""
best = node
cur = node
while True:
try:
parent = cur.GetParent()
except Exception:
parent = None
if not parent:
return best
try:
cls = getattr(parent, "ClassName", None)
ctype = getattr(parent, "ControlTypeName", None)
if cls == "Chrome_WidgetWin_1" or ctype == "WindowControl":
best = parent
except Exception:
pass
cur = parent
fg = auto.GetForegroundControl()
if fg and _title_match(getattr(fg, "Name", None)):
return _ascend_to_top(fg)
root = auto.GetRootControl()
queue: List[Any] = [(root, 0)] if root else []
while queue:
node, depth = queue.pop(0)
if depth > 2:
continue
try:
name = node.Name
except Exception:
name = None
if _title_match(name):
return _ascend_to_top(node)
try:
children = list(node.GetChildren())
except Exception:
children = []
for child in children:
queue.append((child, depth + 1))
return None
def _find_control(root: auto.Control, locator: Dict[str, Any], timeout: float) -> Optional[auto.Control]:
"""Find a control under root according to locator."""
start = time.time()
try:
print(
f"[debug] 查找控件 locator={locator} root=({getattr(root, 'Name', None)}, {getattr(root, 'ClassName', None)}, {getattr(root, 'ControlTypeName', None)})"
)
except Exception:
pass
def _matches(ctrl: auto.Control) -> bool:
"""Simple property match without relying on uiautomation AndCondition."""
try:
name_val = locator.get("Name")
name_contains = locator.get("Name__contains")
class_val = locator.get("ClassName")
ctrl_type_val = locator.get("ControlType")
auto_id_val = locator.get("AutomationId")
if name_val and ctrl.Name != name_val:
return False
if name_contains:
cur = "" if ctrl.Name is None else str(ctrl.Name)
pat = str(name_contains)
if pat not in cur and cur not in pat:
return False
if class_val and ctrl.ClassName != class_val:
if not name_contains:
return False
if ctrl_type_val and ctrl.ControlTypeName != ctrl_type_val:
# 当使用标题包含匹配时,容忍控件类型差异(不同 Chrome 版本可能是 PaneControl
if not name_contains:
return False
if auto_id_val and ctrl.AutomationId != auto_id_val:
return False
return True
except Exception:
return False
while time.time() - start <= timeout:
try:
if not locator:
print("10001")
return root
# Check root itself first
if _matches(root):
print("10002")
return root
# Simple BFS when AndCondition is unavailable
queue: List[Any] = [(root, 0)]
while queue:
node, depth = queue.pop(0)
if depth >= 15: # Chrome 控件层级较深,放宽搜索深度
continue
try:
children = list(node.GetChildren())
except Exception:
children = []
for child in children:
if _matches(child):
return child
queue.append((child, depth + 1))
except Exception as exc:
print(f"[warn] find control error: {exc}")
time.sleep(0.2)
# 额外兜底:在全局根下再搜一次(只在超时后触发)
try:
sys_root = auto.GetRootControl()
queue: List[Any] = [(sys_root, 0)] if sys_root else []
while queue:
node, depth = queue.pop(0)
if depth >= 20:
continue
if _matches(node):
return node
try:
children = list(node.GetChildren())
except Exception:
children = []
for ch in children:
queue.append((ch, depth + 1))
except Exception:
pass
# 再次兜底:如果按 name/name_contains 未命中,尝试忽略 ClassName/ControlType 放宽匹配
try:
relaxed = dict(locator)
relaxed.pop("ClassName", None)
relaxed.pop("ControlType", None)
if relaxed.get("Name") or relaxed.get("Name__contains"):
print("[debug] 放宽匹配,仅按名称再次查找")
return _find_control(root, relaxed, max(timeout, 0.5))
except Exception:
pass
return None
def _capture_screenshot(ctrl: Optional[auto.Control], out_path: Path) -> Optional[Path]:
"""截取控件区域或全屏"""
try:
with mss.mss() as sct:
if ctrl and getattr(ctrl, "BoundingRectangle", None):
rect = ctrl.BoundingRectangle
region = {"left": int(rect.left), "top": int(rect.top), "width": int(rect.right - rect.left), "height": int(rect.bottom - rect.top)}
else:
monitor = sct.monitors[1] if len(sct.monitors) > 1 else sct.monitors[0]
region = {"left": monitor["left"], "top": monitor["top"], "width": monitor["width"], "height": monitor["height"]}
shot = np.array(sct.grab(region))
frame = cv2.cvtColor(shot, cv2.COLOR_BGRA2BGR)
out_path.parent.mkdir(parents=True, exist_ok=True)
cv2.imwrite(str(out_path), frame)
return out_path
except Exception:
return None
def _capture_tree(ctrl: Optional[auto.Control], max_depth: int = 3) -> List[Dict[str, Any]]:
"""采集浅层 UIA 树摘要"""
if ctrl is None:
return []
nodes: List[Dict[str, Any]] = []
queue: List[Any] = [(ctrl, 0)]
while queue:
node, depth = queue.pop(0)
if depth > max_depth:
continue
nodes.append(
{
"name": node.Name,
"automation_id": node.AutomationId,
"class_name": node.ClassName,
"control_type": node.ControlTypeName,
"depth": depth,
}
)
try:
children = list(node.GetChildren())
except Exception:
children = []
for child in children:
queue.append((child, depth + 1))
return nodes
def _save_tree(ctrl: Optional[auto.Control], out_path: Path) -> Optional[Path]:
try:
data = _capture_tree(ctrl)
out_path.parent.mkdir(parents=True, exist_ok=True)
out_path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
return out_path
except Exception:
return None
def _image_similarity(full_img_path: Path, template_path: Path, threshold: float = 0.8) -> bool:
"""简单模板匹配,相似度 >= 阈值视为通过"""
if not full_img_path.exists() or not template_path.exists():
return False
full = cv2.imread(str(full_img_path), cv2.IMREAD_COLOR)
tmpl = cv2.imread(str(template_path), cv2.IMREAD_COLOR)
if full is None or tmpl is None or full.shape[0] < tmpl.shape[0] or full.shape[1] < tmpl.shape[1]:
return False
res = cv2.matchTemplate(full, tmpl, cv2.TM_CCOEFF_NORMED)
_, max_val, _, _ = cv2.minMaxLoc(res)
return float(max_val) >= threshold
def _visual_check(expected: Dict[str, Any], ctrl: Optional[auto.Control], artifacts_dir: Path, step_idx: int, attempt: int) -> bool:
"""执行可选视觉校验:模板匹配"""
template_path = expected.get("template_path")
threshold = float(expected.get("threshold", 0.8))
if not template_path:
return True
snap_path = artifacts_dir / "screenshots" / f"step{step_idx:03d}_attempt{attempt}_visual.png"
snap = _capture_screenshot(ctrl, snap_path)
if not snap:
return False
return _image_similarity(snap, Path(template_path), threshold)
def _log_event(log_path: Path, record: Dict[str, Any]) -> None:
log_path.parent.mkdir(parents=True, exist_ok=True)
with log_path.open("a", encoding="utf-8") as f:
f.write(json.dumps(record, ensure_ascii=False))
f.write("\n")
def _render_value(val: Any, params: Dict[str, Any]) -> Any:
"""简单占位符替换 ${param}"""
if isinstance(val, str):
out = val
for k, v in params.items():
placeholder = f"${{{k}}}"
if placeholder in out:
out = out.replace(placeholder, str(v))
return out
if isinstance(val, dict):
return {k: _render_value(v, params) for k, v in val.items()}
if isinstance(val, list):
return [_render_value(v, params) for v in val]
return val
def _do_action(ctrl: auto.Control, step: Dict[str, Any], dry_run: bool) -> None:
"""执行单步动作"""
action = step.get("action")
text = step.get("text", "")
send_enter = bool(step.get("send_enter"))
if dry_run:
extra = " +Enter" if send_enter else ""
print(f"[dry-run] {action} -> target={step.get('target')} text={text}{extra}")
return
if action == "click":
ctrl.Click()
elif action == "type":
ctrl.SetFocus()
to_send = text + ("{Enter}" if send_enter else "")
auto.SendKeys(to_send)
elif action == "set_value":
try:
ctrl.GetValuePattern().SetValue(text)
except Exception:
ctrl.SendKeys(text)
elif action == "assert_exists":
if ctrl is None:
raise RuntimeError("控件未找到")
elif action == "wait_for":
time.sleep(float(step.get("waits", {}).get("appear", 1.0)))
def execute_spec(spec: DSLSpec, ctx: ExecContext) -> None:
"""执行完整 DSL。
流程概览:
1. 先根据 allow_title 找到当前前台窗口作为根控件 root。
2. 逐步标准化 DSL字段兼容、文本替换、等待策略等。
3. 对每个步骤依次查找目标控件 -> 视觉校验(可选)-> 执行动作/记录 dry-run。
4. 每次尝试都会落盘截图、UI 树和日志,方便回溯。"""
# 给前台窗口切换预留时间,避免刚启动命令时窗口还未聚焦
time.sleep(1.0)
root = _match_window(ctx.allow_title)
if root is None:
raise RuntimeError(f"前台窗口标题未匹配白名单: {ctx.allow_title}")
if ctx.dry_run:
try:
print(f"[debug] root -> name={root.Name} class={root.ClassName} type={root.ControlTypeName}")
except Exception:
pass
artifacts = ctx.artifacts_dir
screenshots_dir = artifacts / "screenshots"
trees_dir = artifacts / "ui_trees"
log_path = artifacts / "executor_log.jsonl"
def _normalize_target(tgt: Dict[str, Any]) -> Dict[str, Any]:
"""规范 target 键名,兼容窗口标题匹配/包含等写法"""
norm: Dict[str, Any] = {}
for k, v in tgt.items():
lk = k.lower()
if lk in ("name", "window_title"):
if lk == "window_title" and isinstance(v, str) and " - " in v:
norm["Name__contains"] = v
else:
norm["Name"] = v
norm["Name__contains"] = v
elif lk in ("window_title_contains", "name_contains"):
norm["Name__contains"] = v
elif lk == "window_title_contains_param":
norm["Name__contains"] = spec.params.get(str(v), v)
elif lk in ("classname", "class_name"):
norm["ClassName"] = v
elif lk in ("controltype", "control_type"):
norm["ControlType"] = v
elif lk == "automationid":
norm["AutomationId"] = v
else:
norm[k] = v
return norm
def normalize_step(step: Dict[str, Any]) -> Dict[str, Any]:
"""归一化字段,兼容不同 DSL 变体"""
out = _render_value(dict(step), spec.params)
if "target" not in out and "selector" in out:
out["target"] = out.get("selector")
out.pop("selector", None)
if "value" in out and "text" not in out:
out["text"] = out.get("value")
out.pop("value", None)
if "text" not in out and out.get("text_param"):
key = str(out.pop("text_param"))
out["text"] = str(spec.params.get(key, ""))
tgt = out.get("target")
if isinstance(tgt, dict):
out["target"] = _normalize_target(tgt)
waits_obj = out.get("waits")
if isinstance(waits_obj, list):
appear = None
for w in waits_obj:
if isinstance(w, dict) and "timeout_ms" in w:
appear = float(w.get("timeout_ms", 0)) / 1000.0
break
out["waits"] = {"appear": appear or spec.waits.get("appear", 5.0), "disappear": spec.waits.get("disappear", 1.0)}
elif isinstance(waits_obj, dict):
waits_obj = dict(waits_obj)
if "timeout_ms" in waits_obj and "appear" not in waits_obj:
waits_obj["appear"] = float(waits_obj.pop("timeout_ms")) / 1000.0
out["waits"] = waits_obj
else:
out["waits"] = spec.waits
if "timeout_ms" in out:
out.setdefault("waits", {})
out["waits"]["appear"] = float(out.pop("timeout_ms")) / 1000.0
return out
def normalize_steps(steps: List[Any]) -> List[Any]:
normed: List[Any] = []
for st in steps:
if isinstance(st, dict):
st = normalize_step(st)
if "steps" in st and isinstance(st["steps"], list):
st["steps"] = normalize_steps(st["steps"])
if "else_steps" in st and isinstance(st["else_steps"], list):
st["else_steps"] = normalize_steps(st["else_steps"])
normed.append(st)
return normed
normalized_steps = normalize_steps(spec.steps)
def run_steps(steps: List[Any]) -> None:
"""按顺序执行步骤,支持 for_each/if_condition 嵌套。"""
for idx, step in enumerate(steps, start=1):
if "for_each" in step:
# for_each根据参数数组展开子步骤
iterable = spec.params.get(step["for_each"], [])
for item in iterable:
run_steps(step.get("steps", []))
elif "if_condition" in step:
# if_condition依据参数布尔值选择分支
cond = step["if_condition"]
if spec.params.get(cond):
run_steps(step.get("steps", []))
else:
run_steps(step.get("else_steps", []))
else:
# 普通步骤:查找控件 -> 视觉校验(可选) -> 执行动作
target = step.get("target", {})
timeout = float(step.get("waits", {}).get("appear", spec.waits.get("appear", 1.0)))
if ctx.dry_run:
timeout = min(timeout, 1) # 纯 dry-run 场景快速返回,避免长时间等待
retry = step.get("retry_policy", spec.retry_policy)
attempts = int(retry.get("max_attempts", 1))
interval = float(retry.get("interval", 1.0))
expected = step.get("expected_screen") or {}
last_err: Optional[Exception] = None
for attempt in range(1, attempts + 1):
ctrl = _find_control(root, target, timeout)
try:
if ctrl is None and ctx.dry_run:
ctrl = root
if ctrl is None:
raise RuntimeError("控件未找到")
# 视觉校验(可选)
if expected:
ok = _visual_check(expected, ctrl, artifacts, idx, attempt)
if not ok:
raise RuntimeError("视觉校验未通过")
_do_action(ctrl, step, ctx.dry_run)
snap_path = _capture_screenshot(ctrl, screenshots_dir / f"step{idx:03d}_attempt{attempt}_success.png")
tree_path = _save_tree(ctrl, trees_dir / f"step{idx:03d}_attempt{attempt}_tree.json")
_log_event(
log_path,
{
"ts": time.time(),
"step_index": idx,
"action": step.get("action"),
"target": target,
"attempt": attempt,
"result": "success",
"screenshot": str(snap_path) if snap_path else None,
"tree": str(tree_path) if tree_path else None,
},
)
last_err = None
break
except Exception as e: # noqa: BLE001
last_err = e
snap_path = _capture_screenshot(ctrl, screenshots_dir / f"step{idx:03d}_attempt{attempt}_fail.png")
tree_path = _save_tree(ctrl, trees_dir / f"step{idx:03d}_attempt{attempt}_tree.json")
_log_event(
log_path,
{
"ts": time.time(),
"step_index": idx,
"action": step.get("action"),
"target": target,
"attempt": attempt,
"result": "fail",
"error": str(e),
"screenshot": str(snap_path) if snap_path else None,
"tree": str(tree_path) if tree_path else None,
},
)
if attempt < attempts:
time.sleep(interval)
if last_err:
raise last_err
run_steps(normalized_steps)