碎碎念鸭鸭🪿

08:22 · Mar 10, 2025 · Mon

web表格导出（油猴脚本版） #excel

说明
补一下之前的坑。可以方便的从任意网站提取表格为excel的格式。原py版

发布地址 https://greasyfork.org/zh-CN/scripts/529375-web%E8%A1%A8%E6%A0%BC%E5%AF%BC%E5%87%BA%E5%8A%A9%E6%89%8B

greasyfork.org

Web表格导出助手

从网页中提取表格并导出为Excel

excel

06:26 · Oct 16, 2024 · Wed

web表格 #Excel

import os
import subprocess

from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime
import random
import string
from selenium import webdriver
from selenium.webdriver.chrome.service import Service


def generate_random_string(length=6):
    return ''.join(random.choices(string.ascii_letters + string.digits, k=length))

try:
    subprocess.run(
        ["taskkill", "/f", "/im", "chrome.exe"],
        stdout=subprocess.DEVNULL,
        stderr=subprocess.DEVNULL,
        check=True
    )
except subprocess.CalledProcessError:
    pass

user_data_dir = os.path.join(os.environ['LOCALAPPDATA'], r"Google\Chrome\User Data")

options = webdriver.ChromeOptions()
options.add_argument(f"user-data-dir={user_data_dir}")
service = Service(executable_path=r'C:\Windows\chromedriver.exe')

driver = webdriver.Chrome(options=options, service=service)

# 打开一个网页
driver.get("https://baidu.com")

while True:
    try:
        # 提示用户按回车开始读取网页内容
        input("请访问web表格所在页面，回车开始执行：")
        div_html = driver.page_source

        # 使用 BeautifulSoup 解析 HTML
        soup = BeautifulSoup(div_html, 'html.parser')

        # 获取 title 标签内容或生成随机字符串
        title_tag = soup.title.string if soup.title else generate_random_string()
        current_date = datetime.now().strftime("%m%d")

        # 找到所有表格
        tables = soup.find_all('table')

        num_tables = len(tables)
        if num_tables == 0:
            print("未找到表格，请检查页面内容。")
            continue

        print(f"*****识别到 {num_tables} 个表格*****")

        dataframes = []
        for i, table in enumerate(tables):
            rows = []
            for row in table.find_all('tr'):
                cols = row.find_all(['td', 'th'])
                cols = [ele.text.strip() for ele in cols]
                rows.append(cols)

            # 使用 pandas 将数据转换为 DataFrame
            df = pd.DataFrame(rows[1:], columns=rows[0])
            dataframes.append(df)

            print(f"\n======================预览数据 (表格 {i + 1})===========================")
            print(df.head())

        # 用户选择需要输出的表格序号
        selection = input(f"\n请输入要输出的表格序号 (1-{num_tables}) 或 'a' 输出所有表格: ").strip().lower()

        if selection == 'a':
            for i, df in enumerate(dataframes):
                excel_path = f'{title_tag}_{current_date}_table{i + 1}.xlsx'
                df.to_excel(excel_path, index=False)
                print(f"数据已保存到 {excel_path}")
        else:
            try:
                selected_table = int(selection) - 1
                if 0 <= selected_table < num_tables:
                    excel_path = f'{title_tag}_{current_date}_table{selected_table + 1}.xlsx'
                    dataframes[selected_table].to_excel(excel_path, index=False)
                    print(f"数据已保存到 {excel_path}")
                else:
                    print("无效的table序号。")
            except ValueError:
                print("请输入有效的序号")

    except Exception as e:
        if 'target window already closed' in str(e):
            print('监听窗口已关闭..')
            exit()
        print(f"发生错误: {e}")

    # 检查是否继续
    cont = input('继续处理？(y/其他退出): ')
    if cont.strip().lower() != 'y':
        driver.quit()
        break

Excel

02:48 · Oct 16, 2024 · Wed

图片 #Excel

https://docsmall.com/image-to-excel

（订阅本频道 https://t.me/duckduckcute ）

Docsmall

免费在线图片转 Excel 工具 - docsmall

docsmall 提供在线图片转Excel功能，使用图片OCR技术，将图片转换为可编辑的Excel文档，高度还原图片格式。

Excel