zhiyun-autoweb/modules/or_address_handler.py

# modules/or_address_handler.py

import pandas as pd
import os
import time
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

def process_addresses(driver):
    """
    处理 o-address.csv 文件，访问 Etherscan 并更新地址信息。
    :param driver: Selenium WebDriver 实例。
    """
    # 定义文件路径
    csv_file_path = os.path.join('data', 'o-address.csv')

    # 1. 读取 CSV 文件
    if not os.path.exists(csv_file_path):
        print(f"错误: 文件未找到 at {csv_file_path}")
        return
        
    df = pd.read_csv(csv_file_path)
    print(f"成功读取 {len(df)} 条地址。")

    # 2. 循环处理每个地址
    for index, row in df.iterrows():
        address = row['address']
        
        # 检查 main_address 是否已经有值，如果有则跳过
        # 使用 pd.isna() 检查 NaN，并检查是否为空字符串
        if 'main_address' in df.columns and pd.notna(row['main_address']) and row['main_address']:
            print(f"地址 {address} 已有 main_address，跳过。")
            continue

        # 构建 URL 并访问
        url = f"https://etherscan.io/address/{address}"
        print(f"正在访问: {url}")
        driver.get(url)

        try:
            # 等待页面加载并找到 #mainaddress 元素
            wait = WebDriverWait(driver, 20) # 最多等待20秒
            main_address_element = wait.until(
                EC.presence_of_element_located((By.ID, "mainaddress"))
            )
            
            # 获取并保存值
            main_address_value = main_address_element.text
            df.loc[index, 'main_address'] = main_address_value
            print(f"  -> 成功获取 main_address: {main_address_value}")

        except Exception as e:
            print(f"  -> 无法为地址 {address} 找到 main_address。")
            df.loc[index, 'main_address'] = "Not Found" # 标记为未找到

        # 每次处理后都保存一次，防止中途中断
        df.to_csv(csv_file_path, index=False)
        
        # 短暂延时，避免请求过于频繁
        time.sleep(2)

    # 3. 所有处理完成后，再次保存最终结果
    df.to_csv(csv_file_path, index=False)
    print(f"\n处理完成！结果已保存到 {csv_file_path}")