66 lines
2.3 KiB
Python
66 lines
2.3 KiB
Python
# modules/or_address_handler.py
|
||
|
||
import pandas as pd
|
||
import os
|
||
import time
|
||
from selenium.webdriver.common.by import By
|
||
from selenium.webdriver.support.ui import WebDriverWait
|
||
from selenium.webdriver.support import expected_conditions as EC
|
||
|
||
def process_addresses(driver):
|
||
"""
|
||
处理 o-address.csv 文件,访问 Etherscan 并更新地址信息。
|
||
:param driver: Selenium WebDriver 实例。
|
||
"""
|
||
# 定义文件路径
|
||
csv_file_path = os.path.join('data', 'o-address.csv')
|
||
|
||
# 1. 读取 CSV 文件
|
||
if not os.path.exists(csv_file_path):
|
||
print(f"错误: 文件未找到 at {csv_file_path}")
|
||
return
|
||
|
||
df = pd.read_csv(csv_file_path)
|
||
print(f"成功读取 {len(df)} 条地址。")
|
||
|
||
# 2. 循环处理每个地址
|
||
for index, row in df.iterrows():
|
||
address = row['address']
|
||
|
||
# 检查 main_address 是否已经有值,如果有则跳过
|
||
# 使用 pd.isna() 检查 NaN,并检查是否为空字符串
|
||
if 'main_address' in df.columns and pd.notna(row['main_address']) and row['main_address']:
|
||
print(f"地址 {address} 已有 main_address,跳过。")
|
||
continue
|
||
|
||
# 构建 URL 并访问
|
||
url = f"https://etherscan.io/address/{address}"
|
||
print(f"正在访问: {url}")
|
||
driver.get(url)
|
||
|
||
try:
|
||
# 等待页面加载并找到 #mainaddress 元素
|
||
wait = WebDriverWait(driver, 20) # 最多等待20秒
|
||
main_address_element = wait.until(
|
||
EC.presence_of_element_located((By.ID, "mainaddress"))
|
||
)
|
||
|
||
# 获取并保存值
|
||
main_address_value = main_address_element.text
|
||
df.loc[index, 'main_address'] = main_address_value
|
||
print(f" -> 成功获取 main_address: {main_address_value}")
|
||
|
||
except Exception as e:
|
||
print(f" -> 无法为地址 {address} 找到 main_address。")
|
||
df.loc[index, 'main_address'] = "Not Found" # 标记为未找到
|
||
|
||
# 每次处理后都保存一次,防止中途中断
|
||
df.to_csv(csv_file_path, index=False)
|
||
|
||
# 短暂延时,避免请求过于频繁
|
||
time.sleep(2)
|
||
|
||
# 3. 所有处理完成后,再次保存最终结果
|
||
df.to_csv(csv_file_path, index=False)
|
||
print(f"\n处理完成!结果已保存到 {csv_file_path}")
|