diff --git a/.gitignore b/.gitignore index 2b9c225..88f7851 100644 --- a/.gitignore +++ b/.gitignore @@ -157,3 +157,6 @@ bin-release/ # should NOT be excluded as they contain compiler settings and other important # information for Eclipse / Flash Builder. >>>>>>> 2a066347ae84a69f9986cffe451aeae1a5364b10 + +# YoYo AI version control directory +.yoyo/ diff --git a/README.md b/README.md index 4621aa4..0ec2dcc 100644 --- a/README.md +++ b/README.md @@ -1,123 +1,128 @@ -# Midscene Python +# Midscene Python [![zread](https://img.shields.io/badge/Ask_Zread-_.svg?style=flat&color=00b0aa&labelColor=000000&logo=data%3Aimage%2Fsvg%2Bxml%3Bbase64%2CPHN2ZyB3aWR0aD0iMTYiIGhlaWdodD0iMTYiIHZpZXdCb3g9IjAgMCAxNiAxNiIgZmlsbD0ibm9uZSIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KPHBhdGggZD0iTTQuOTYxNTYgMS42MDAxSDIuMjQxNTZDMS44ODgxIDEuNjAwMSAxLjYwMTU2IDEuODg2NjQgMS42MDE1NiAyLjI0MDFWNC45NjAxQzEuNjAxNTYgNS4zMTM1NiAxLjg4ODEgNS42MDAxIDIuMjQxNTYgNS42MDAxSDQuOTYxNTZDNS4zMTUwMiA1LjYwMDEgNS42MDE1NiA1LjMxMzU2IDUuNjAxNTYgNC45NjAxVjIuMjQwMUM1LjYwMTU2IDEuODg2NjQgNS4zMTUwMiAxLjYwMDEgNC45NjE1NiAxLjYwMDFaIiBmaWxsPSIjZmZmIi8%2BCjxwYXRoIGQ9Ik00Ljk2MTU2IDEwLjM5OTlIMi4yNDE1NkMxLjg4ODEgMTAuMzk5OSAxLjYwMTU2IDEwLjY4NjQgMS42MDE1NiAxMS4wMzk5VjEzLjc1OTlDMS42MDE1NiAxNC4xMTM0IDEuODg4MSAxNC4zOTk5IDIuMjQxNTYgMTQuMzk5OUg0Ljk2MTU2QzUuMzE1MDIgMTQuMzk5OSA1LjYwMTU2IDE0LjExMzQgNS42MDE1NiAxMy43NTk5VjExLjAzOTlDNS42MDE1NiAxMC42ODY0IDUuMzE1MDIgMTAuMzk5OSA0Ljk2MTU2IDEwLjM5OTlaIiBmaWxsPSIjZmZmIi8%2BCjxwYXRoIGQ9Ik0xMy43NTg0IDEuNjAwMUgxMS4wMzg0QzEwLjY4NSAxLjYwMDEgMTAuMzk4NCAxLjg4NjY0IDEwLjM5ODQgMi4yNDAxVjQuOTYwMUMxMC4zOTg0IDUuMzEzNTYgMTAuNjg1IDUuNjAwMSAxMS4wMzg0IDUuNjAwMUgxMy43NTg0QzE0LjExMTkgNS42MDAxIDE0LjM5ODQgNS4zMTM1NiAxNC4zOTg0IDQuOTYwMVYyLjI0MDFDMTQuMzk4NCAxLjg4NjY0IDE0LjExMTkgMS42MDAxIDEzLjc1ODQgMS42MDAxWiIgZmlsbD0iI2ZmZiIvPgo8cGF0aCBkPSJNNCAxMkwxMiA0TDQgMTJaIiBmaWxsPSIjZmZmIi8%2BCjxwYXRoIGQ9Ik00IDEyTDEyIDQiIHN0cm9rZT0iI2ZmZiIgc3Ryb2tlLXdpZHRoPSIxLjUiIHN0cm9rZS1saW5lY2FwPSJyb3VuZCIvPgo8L3N2Zz4K&logoColor=ffffff)](https://zread.ai/Python51888/Midscene-Python)               +[English](README.md) | [简体中文](README.zh.md) -Midscene Python 是一个基于 AI 的自动化框架,支持 Web 和 Android 平台的 UI 自动化操作。 +Midscene Python is an AI-based automation framework that supports UI automation operations on Web and Android platforms.    -## 概述 +## Overview -Midscene Python 提供全面的 UI 自动化能力,具有以下核心特性: +Midscene Python provides comprehensive UI automation capabilities with the following core features: -- **自然语言驱动**:使用自然语言描述自动化任务 -- **多平台支持**:支持 Web(Selenium/Playwright)和 Android(ADB) -- **AI 模型集成**:支持 GPT-4V、Qwen2.5-VL、Gemini 等多种视觉语言模型 -- **可视化调试**:提供详细的执行报告和调试信息 -- **缓存机制**:智能缓存提升执行效率 +- **Natural Language Driven**: Describe automation tasks using natural language +- **Multi-platform Support**: Supports Web (Selenium/Playwright) and Android (ADB) +- **AI Model Integration**: Supports multiple vision-language models such as GPT-4V, Qwen2.5-VL, and Gemini  +- **Visual Debugging**: Provides detailed execution reports and debugging information +- **Caching Mechanism**: Intelligent caching to improve execution efficiency -## 项目架构 +## Project Architecture ``` midscene-python/ -├── midscene/ # 核心框架 -│ ├── core/ # 核心框架 -│ │ ├── agent/ # Agent系统 -│ │ ├── insight/ # AI推理引擎 -│ │ ├── ai_model/ # AI模型集成 -│ │ ├── yaml/ # YAML脚本执行器 -│ │ └── types.py # 核心类型定义 -│ ├── web/ # Web集成 -│ │ ├── selenium/ # Selenium集成 -│ │ ├── playwright/ # Playwright集成 -│ │ └── bridge/ # Bridge模式 -│ ├── android/ # Android集成 -│ │ ├── device.py # 设备管理 +├── midscene/ # Core framework +│ ├── core/ # Core framework +│ │ ├── agent/ # Agent system +│ │ ├── insight/ # AI inference engine +│ │ ├── ai_model/ # AI model integration +│ │ ├── yaml/ # YAML script executor +│ │ └── types.py # Core type definitions +│ ├── web/ # Web integration +│ │ ├── selenium/ # Selenium integration +│ │ ├── playwright/ # Playwright integration +│ │ └── bridge/ # Bridge mode +│ ├── android/ # Android integration +│ │ ├── device.py # Device management │ │ └── agent.py # Android Agent -│ ├── cli/ # 命令行工具 -│ ├── mcp/ # MCP协议支持 -│ ├── shared/ # 共享工具 -│ └── visualizer/ # 可视化报告 -├── examples/ # 示例代码 -├── tests/ # 测试用例 -└── docs/ # 文档 +│ ├── cli/ # Command line tools +│ ├── mcp/ # MCP protocol support +│ ├── shared/ # Shared utilities +│ └── visualizer/ # Visual reports +├── examples/ # Example code +├── tests/ # Test cases +└── docs/ # Documentation ``` -## 技术栈 +## Tech Stack -- **Python 3.9+**:核心运行环境 -- **Pydantic**:数据验证和序列化 -- **Selenium/Playwright**:Web 自动化 -- **OpenCV/Pillow**:图像处理 -- **HTTPX/AIOHTTP**:HTTP 客户端 -- **Typer**:CLI 框架 -- **Loguru**:日志记录 +- **Python 3.9+**: Core runtime environment +- **Pydantic**: Data validation and serialization +- **Selenium/Playwright**: Web automation +- **OpenCV/Pillow**: Image processing +- **HTTPX/AIOHTTP**: HTTP client +- **Typer**: CLI framework +- **Loguru**: Logging -## 快速开始 +## Quick Start -### 安装 +### Installation ```bash pip install midscene-python ``` -### 基础用法 +### Basic Usage ```python from midscene import Agent from midscene.web import SeleniumWebPage -# 创建 Web Agent +# Create a Web Agent with SeleniumWebPage.create() as page: agent = Agent(page) - # 使用自然语言进行自动化操作 - await agent.ai_action("点击登录按钮") - await agent.ai_action("输入用户名 'test@example.com'") - await agent.ai_action("输入密码 'password123'") - await agent.ai_action("点击提交按钮") + # Perform automation operations using natural language + await agent.ai_action("Click the login button") + await agent.ai_action("Enter username 'test@example.com'") + await agent.ai_action("Enter password 'password123'") + await agent.ai_action("Click the submit button") - # 数据提取 - user_info = await agent.ai_extract("提取用户个人信息") + # Data extraction + user_info = await agent.ai_extract("Extract user personal information") - # 断言验证 - await agent.ai_assert("页面显示欢迎信息") + # Assertion verification + await agent.ai_assert("Page displays welcome message") ``` -## 主要特性 +## Key Features -### 🤖 AI 驱动的自动化 +### 🤖 AI-Driven Automation -使用自然语言描述操作,AI 自动理解并执行: +Describe operations using natural language, and AI automatically understands and executes: ```python -await agent.ai_action("在搜索框中输入'Python教程'并搜索") +await agent.ai_action("Enter 'Python tutorial' in the search box and search") ``` -### 🔍 智能元素定位 +### 🔍 Intelligent Element Location -支持多种定位策略,自动选择最优方案: +Supports multiple location strategies and automatically selects the optimal solution: ```python -element = await agent.ai_locate("登录按钮") +element = await agent.ai_locate("Login button") ``` -### 📊 数据提取 +### 📊 Data Extraction -从页面提取结构化数据: +Extract structured data from the page: ```python products = await agent.ai_extract({ "products": [ - {"name": "产品名称", "price": "价格", "rating": "评分"} + {"name": "Product Name", "price": "Price", "rating": "Rating"} ] }) ``` -### ✅ 智能断言 +### ✅ Intelligent Assertions -AI 理解页面状态,进行智能断言: +AI understands page state and performs intelligent assertions: ```python -await agent.ai_assert("用户已成功登录") +await agent.ai_assert("User has successfully logged in") ``` -## 许可证 +### 📝 Credits -MIT License \ No newline at end of file +Thanks to Midscene Project: https://github.com/web-infra-dev/midscene for inspiration and technical references + +## License + +MIT License diff --git a/README.zh.md b/README.zh.md new file mode 100644 index 0000000..bb1f0ee --- /dev/null +++ b/README.zh.md @@ -0,0 +1,129 @@ +# Midscene Python [![zread](https://img.shields.io/badge/Ask_Zread-_.svg?style=flat&color=00b0aa&labelColor=000000&logo=data%3Aimage%2Fsvg%2Bxml%3Bbase64%2CPHN2ZyB3aWR0aD0iMTYiIGhlaWdodD0iMTYiIHZpZXdCb3g9IjAgMCAxNiAxNiIgZmlsbD0ibm9uZSIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KPHBhdGggZD0iTTQuOTYxNTYgMS42MDAxSDIuMjQxNTZDMS44ODgxIDEuNjAwMSAxLjYwMTU2IDEuODg2NjQgMS42MDE1NiAyLjI0MDFWNC45NjAxQzEuNjAxNTYgNS4zMTM1NiAxLjg4ODEgNS42MDAxIDIuMjQxNTYgNS42MDAxSDQuOTYxNTZDNS4zMTUwMiA1LjYwMDEgNS42MDE1NiA1LjMxMzU2IDUuNjAxNTYgNC45NjAxVjIuMjQwMUM1LjYwMTU2IDEuODg2NjQgNS4zMTUwMiAxLjYwMDEgNC45NjE1NiAxLjYwMDFaIiBmaWxsPSIjZmZmIi8%2BCjxwYXRoIGQ9Ik00Ljk2MTU2IDEwLjM5OTlIMi4yNDE1NkMxLjg4ODEgMTAuMzk5OSAxLjYwMTU2IDEwLjY4NjQgMS42MDE1NiAxMS4wMzk5VjEzLjc1OTlDMS42MDE1NiAxNC4xMTM0IDEuODg4MSAxNC4zOTk5IDIuMjQxNTYgMTQuMzk5OUg0Ljk2MTU2QzUuMzE1MDIgMTQuMzk5OSA1LjYwMTU2IDE0LjExMzQgNS42MDE1NiAxMy43NTk5VjExLjAzOTlDNS42MDE1NiAxMC42ODY0IDUuMzE1MDIgMTAuMzk5OSA0Ljk2MTU2IDEwLjM5OTlaIiBmaWxsPSIjZmZmIi8%2BCjxwYXRoIGQ9Ik0xMy43NTg0IDEuNjAwMUgxMS4wMzg0QzEwLjY4NSAxLjYwMDEgMTAuMzk4NCAxLjg4NjY0IDEwLjM5ODQgMi4yNDAxVjQuOTYwMUMxMC4zOTg0IDUuMzEzNTYgMTAuNjg1IDUuNjAwMSAxMS4wMzg0IDUuNjAwMUgxMy43NTg0QzE0LjExMTkgNS42MDAxIDE0LjM5ODQgNS4zMTM1NiAxNC4zOTg0IDQuOTYwMVYyLjI0MDFDMTQuMzk4NCAxLjg4NjY0IDE0LjExMTkgMS42MDAxIDEzLjc1ODQgMS42MDAxWiIgZmlsbD0iI2ZmZiIvPgo8cGF0aCBkPSJNNCAxMkwxMiA0TDQgMTJaIiBmaWxsPSIjZmZmIi8%2BCjxwYXRoIGQ9Ik00IDEyTDEyIDQiIHN0cm9rZT0iI2ZmZiIgc3Ryb2tlLXdpZHRoPSIxLjUiIHN0cm9rZS1saW5lY2FwPSJyb3VuZCIvPgo8L3N2Zz4K&logoColor=ffffff)](https://zread.ai/Python51888/Midscene-Python) + +[English](README.md) | [中文](README.zh.md)  + +Midscene Python 是一个基于 AI 的自动化框架,支持 Web 和 Android 平台的 UI 自动化操作。 + +## 概述 + +Midscene Python 提供全面的 UI 自动化能力,具有以下核心特性: + +- **自然语言驱动**:使用自然语言描述自动化任务 +- **多平台支持**:支持 Web(Selenium/Playwright)和 Android(ADB) +- **AI 模型集成**:支持 GPT-4V、Qwen2.5-VL、Gemini 等多种视觉语言模型 +- **可视化调试**:提供详细的执行报告和调试信息 +- **缓存机制**:智能缓存提升执行效率 + +## 项目架构 + +``` +midscene-python/ +├── midscene/ # 核心框架 +│ ├── core/ # 核心框架 +│ │ ├── agent/ # Agent系统 +│ │ ├── insight/ # AI推理引擎 +│ │ ├── ai_model/ # AI模型集成 +│ │ ├── yaml/ # YAML脚本执行器 +│ │ └── types.py # 核心类型定义 +│ ├── web/ # Web集成 +│ │ ├── selenium/ # Selenium集成 +│ │ ├── playwright/ # Playwright集成 +│ │ └── bridge/ # Bridge模式 +│ ├── android/ # Android集成 +│ │ ├── device.py # 设备管理 +│ │ └── agent.py # Android Agent +│ ├── cli/ # 命令行工具 +│ ├── mcp/ # MCP协议支持 +│ ├── shared/ # 共享工具 +│ └── visualizer/ # 可视化报告 +├── examples/ # 示例代码 +├── tests/ # 测试用例 +└── docs/ # 文档 +``` + +## 技术栈 + +- **Python 3.9+**:核心运行环境 +- **Pydantic**:数据验证和序列化 +- **Selenium/Playwright**:Web 自动化 +- **OpenCV/Pillow**:图像处理 +- **HTTPX/AIOHTTP**:HTTP 客户端 +- **Typer**:CLI 框架 +- **Loguru**:日志记录 + +## 快速开始 + +### 安装 + +```bash +pip install midscene-python +``` + +### 基础用法 + +```python +from midscene import Agent +from midscene.web import SeleniumWebPage + +# 创建 Web Agent +with SeleniumWebPage.create() as page: + agent = Agent(page) + + # 使用自然语言进行自动化操作 + await agent.ai_action("点击登录按钮") + await agent.ai_action("输入用户名 'test@example.com'") + await agent.ai_action("输入密码 'password123'") + await agent.ai_action("点击提交按钮") + + # 数据提取 + user_info = await agent.ai_extract("提取用户个人信息") + + # 断言验证 + await agent.ai_assert("页面显示欢迎信息") +``` + +## 主要特性 + +### 🤖 AI 驱动的自动化 + +使用自然语言描述操作,AI 自动理解并执行: + +```python +await agent.ai_action("在搜索框中输入'Python教程'并搜索") +``` + +### 🔍 智能元素定位 + +支持多种定位策略,自动选择最优方案: + +```python +element = await agent.ai_locate("登录按钮") +``` + +### 📊 数据提取 + +从页面提取结构化数据: + +```python +products = await agent.ai_extract({ + "products": [ + {"name": "产品名称", "price": "价格", "rating": "评分"} + ] +}) +``` + +### ✅ 智能断言 + +AI 理解页面状态,进行智能断言: + +```python +await agent.ai_assert("用户已成功登录") +``` + +### 📝 致谢 + +感谢Midscene项目:https://github.com/web-infra-dev/midscene 提供的灵感和技术参考 + +## 许可证 + +MIT License