66 lines
2.3 KiB
Python
66 lines
2.3 KiB
Python
|
|
# modules/or_address_handler.py
|
|||
|
|
|
|||
|
|
import pandas as pd
|
|||
|
|
import os
|
|||
|
|
import time
|
|||
|
|
from selenium.webdriver.common.by import By
|
|||
|
|
from selenium.webdriver.support.ui import WebDriverWait
|
|||
|
|
from selenium.webdriver.support import expected_conditions as EC
|
|||
|
|
|
|||
|
|
def process_addresses(driver):
|
|||
|
|
"""
|
|||
|
|
处理 o-address.csv 文件,访问 Etherscan 并更新地址信息。
|
|||
|
|
:param driver: Selenium WebDriver 实例。
|
|||
|
|
"""
|
|||
|
|
# 定义文件路径
|
|||
|
|
csv_file_path = os.path.join('data', 'o-address.csv')
|
|||
|
|
|
|||
|
|
# 1. 读取 CSV 文件
|
|||
|
|
if not os.path.exists(csv_file_path):
|
|||
|
|
print(f"错误: 文件未找到 at {csv_file_path}")
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
df = pd.read_csv(csv_file_path)
|
|||
|
|
print(f"成功读取 {len(df)} 条地址。")
|
|||
|
|
|
|||
|
|
# 2. 循环处理每个地址
|
|||
|
|
for index, row in df.iterrows():
|
|||
|
|
address = row['address']
|
|||
|
|
|
|||
|
|
# 检查 main_address 是否已经有值,如果有则跳过
|
|||
|
|
# 使用 pd.isna() 检查 NaN,并检查是否为空字符串
|
|||
|
|
if 'main_address' in df.columns and pd.notna(row['main_address']) and row['main_address']:
|
|||
|
|
print(f"地址 {address} 已有 main_address,跳过。")
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
# 构建 URL 并访问
|
|||
|
|
url = f"https://etherscan.io/address/{address}"
|
|||
|
|
print(f"正在访问: {url}")
|
|||
|
|
driver.get(url)
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
# 等待页面加载并找到 #mainaddress 元素
|
|||
|
|
wait = WebDriverWait(driver, 20) # 最多等待20秒
|
|||
|
|
main_address_element = wait.until(
|
|||
|
|
EC.presence_of_element_located((By.ID, "mainaddress"))
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# 获取并保存值
|
|||
|
|
main_address_value = main_address_element.text
|
|||
|
|
df.loc[index, 'main_address'] = main_address_value
|
|||
|
|
print(f" -> 成功获取 main_address: {main_address_value}")
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f" -> 无法为地址 {address} 找到 main_address。")
|
|||
|
|
df.loc[index, 'main_address'] = "Not Found" # 标记为未找到
|
|||
|
|
|
|||
|
|
# 每次处理后都保存一次,防止中途中断
|
|||
|
|
df.to_csv(csv_file_path, index=False)
|
|||
|
|
|
|||
|
|
# 短暂延时,避免请求过于频繁
|
|||
|
|
time.sleep(2)
|
|||
|
|
|
|||
|
|
# 3. 所有处理完成后,再次保存最终结果
|
|||
|
|
df.to_csv(csv_file_path, index=False)
|
|||
|
|
print(f"\n处理完成!结果已保存到 {csv_file_path}")
|