使用Python對(duì)Excel表內(nèi)容進(jìn)行中文提取的示例代碼

更新時(shí)間：2025年11月28日 09:35:59 作者：溫輕舟

本項(xiàng)目是基于Tkinter的圖形界面應(yīng)用程序,用于從Excel文件中提取符合特定正則表達(dá)式模式（默認(rèn)提取中文）的文本內(nèi)容,并將結(jié)果輸出到指定列或新文件中,感興趣的小伙伴跟著小編一起來(lái)看看吧

一：效果展示：

本項(xiàng)目是基于Tkinter的圖形界面應(yīng)用程序，用于從Excel文件中提取符合特定正則表達(dá)式模式（默認(rèn)提取中文）的文本內(nèi)容，并將結(jié)果輸出到指定列或新文件中

二：功能描述：

1. 核心功能

（1）中文內(nèi)容提取

默認(rèn)使用正則表達(dá)式 [\u4e00-\u9fa5]+ 匹配所有中文字符
可自定義正則表達(dá)式模式提取特定內(nèi)容
將匹配到的內(nèi)容從輸入列提取到輸出列

（2）文件處理

支持選擇輸入 Excel 文件（.xlsx 格式）
支持選擇輸出目錄
可選擇覆蓋原文件或創(chuàng)建新文件（自動(dòng)在原文件名前添加 “提取結(jié)果_” 前綴）

2. 用戶界面功能

（1）直觀的圖形界面

清晰的輸入字段和按鈕
進(jìn)度條顯示處理進(jìn)度

（2）主要組件

文件選擇區(qū)域：瀏覽并選擇輸入 Excel 文件和輸出目錄
正則表達(dá)式設(shè)置：可自定義或恢復(fù)默認(rèn)中文提取模式
列設(shè)置：指定輸入列和輸出列（默認(rèn)為 A 列到 B 列）
高級(jí)選項(xiàng)：覆蓋模式開(kāi)關(guān)
按鈕——開(kāi)始處理

3. 使用場(chǎng)景

數(shù)據(jù)清洗： 從混合內(nèi)容中提取純中文信息
文本分析預(yù)處理： 為后續(xù)的中文自然語(yǔ)言處理準(zhǔn)備數(shù)據(jù)
內(nèi)容遷移： 將分散在各處的中文內(nèi)容集中到特定列
多語(yǔ)言文檔處理： 分離中文和其他語(yǔ)言內(nèi)容

三：完整代碼：

import os
import re
import tkinter as tk
from tkinter import filedialog, messagebox, ttk
from openpyxl import load_workbook

class ExcelChineseExtractorApp:
    def __init__(self, root):
        self.root = root
        self.root.title("Excel 中文提取工具")
        self.root.geometry("800x500")
        self.default_regex = r"[\u4e00-\u9fa5]+"
        self.input_file = tk.StringVar()
        self.output_dir = tk.StringVar()
        self.regex_pattern = tk.StringVar(value=self.default_regex)
        self.input_column = tk.StringVar(value="A")
        self.output_column = tk.StringVar(value="B")
        self.overwrite_mode = tk.BooleanVar(value=False)
        self.create_widgets()

    def create_widgets(self):
        main_frame = tk.Frame(self.root, padx=10, pady=10)
        main_frame.pack(fill="both", expand=True)

        tk.Label(main_frame, text="1. 選擇 Excel 文件:", anchor="w").pack(fill="x", pady=(5, 0))
        file_frame = tk.Frame(main_frame)
        file_frame.pack(fill="x", pady=5)
        tk.Entry(file_frame, textvariable=self.input_file, width=60).pack(side="left", expand=True, fill="x")
        tk.Button(file_frame, text="瀏覽...", command=self.browse_input_file).pack(side="right", padx=5)

        tk.Label(main_frame, text="2. 選擇輸出目錄:", anchor="w").pack(fill="x", pady=(5, 0))
        output_frame = tk.Frame(main_frame)
        output_frame.pack(fill="x", pady=5)
        tk.Entry(output_frame, textvariable=self.output_dir, width=60).pack(side="left", expand=True, fill="x")
        tk.Button(output_frame, text="瀏覽...", command=self.browse_output_dir).pack(side="right", padx=5)

        tk.Label(main_frame, text="3. 自定義正則表達(dá)式（默認(rèn)提取中文）:", anchor="w").pack(fill="x", pady=(5, 0))
        regex_frame = tk.Frame(main_frame)
        regex_frame.pack(fill="x", pady=5)
        tk.Entry(regex_frame, textvariable=self.regex_pattern, width=60).pack(side="left", expand=True, fill="x")
        tk.Button(regex_frame, text="恢復(fù)默認(rèn)", command=self.reset_regex).pack(side="right", padx=5)

        col_frame = tk.Frame(main_frame)
        col_frame.pack(fill="x", pady=5)
        tk.Label(col_frame, text="4. 處理列（如 A）:").pack(side="left")
        tk.Entry(col_frame, textvariable=self.input_column, width=5).pack(side="left", padx=5)
        tk.Label(col_frame, text="輸出列（如 B）:").pack(side="left")
        tk.Entry(col_frame, textvariable=self.output_column, width=5).pack(side="left", padx=5)

        adv_frame = tk.LabelFrame(main_frame, text="高級(jí)選項(xiàng)", padx=10, pady=10)
        adv_frame.pack(fill="x", pady=10)
        tk.Checkbutton(adv_frame, text="覆蓋原文件（不創(chuàng)建新文件）", variable=self.overwrite_mode).pack(anchor="w")

        tk.Button(
            main_frame,
            text="開(kāi)始提取",
            command=self.run_extraction,
            bg="#4CAF50",
            fg="white",
            height=2,
            font=("Arial", 12, "bold")
        ).pack(fill="x", pady=10)

        self.progress = ttk.Progressbar(main_frame, orient="horizontal", length=700, mode="determinate")
        self.progress.pack(pady=10)

    def browse_input_file(self):
        filepath = filedialog.askopenfilename(
            title="選擇 Excel 文件",
            filetypes=[("Excel 文件", "*.xlsx"), ("所有文件", "*.*")]
        )
        if filepath:
            self.input_file.set(filepath)
            self.output_dir.set(os.path.dirname(filepath))

    def browse_output_dir(self):
        dirpath = filedialog.askdirectory(title="選擇輸出目錄")
        if dirpath:
            self.output_dir.set(dirpath)

    def reset_regex(self):
        self.regex_pattern.set(self.default_regex)

    def run_extraction(self):
        input_path = self.input_file.get()
        output_dir = self.output_dir.get()
        regex = self.regex_pattern.get()
        input_col = self.input_column.get().upper()
        output_col = self.output_column.get().upper()
        overwrite = self.overwrite_mode.get()

        if not input_path or not output_dir:
            messagebox.showwarning("警告", "請(qǐng)選擇輸入文件和輸出目錄！")
            return

        try:
            wb = load_workbook(input_path)
            ws = wb.active

            total_rows = ws.max_row
            self.progress["maximum"] = total_rows
            self.progress["value"] = 0

            for row in range(2, total_rows + 1):
                cell_value = str(ws[f"{input_col}{row}"].value) if ws[f"{input_col}{row}"].value else ""
                matches = re.findall(regex, cell_value)
                extracted_text = " ".join(matches) if matches else ""
                ws[f"{output_col}{row}"].value = extracted_text
                self.progress["value"] = row
                self.root.update()

            if overwrite:
                output_path = input_path
            else:
                filename = os.path.basename(input_path)
                output_path = os.path.join(output_dir, f"提取結(jié)果_{filename}")

            wb.save(output_path)
            messagebox.showinfo("成功", f"提取完成！\n結(jié)果已保存至:\n{output_path}")

        except Exception as e:
            messagebox.showerror("錯(cuò)誤", f"處理失敗:\n{str(e)}")
        finally:
            self.progress["value"] = 0

if __name__ == "__main__":
    root = tk.Tk()
    app = ExcelChineseExtractorApp(root)
    root.mainloop()

四：代碼分析：

1. 導(dǎo)入模塊

# os: 用于文件路徑操作
import os

# re: 用于正則表達(dá)式匹配
import re

# tkinter及相關(guān)模塊: 創(chuàng)建圖形用戶界面
import tkinter as tk
from tkinter import filedialog, messagebox, ttk

# openpyxl: 處理Excel文件
from openpyxl import load_workbook

2. 主應(yīng)用類

class ExcelChineseExtractorApp:
    def __init__(self, root):
        self.root = root
        self.root.title("Excel 中文提取工具")
        self.root.geometry("800x500")
        
        # 默認(rèn)正則表達(dá)式（匹配中文字符）
        self.default_regex = r"[\u4e00-\u9fa5]+"
        
        # 界面控件綁定的變量
        self.input_file = tk.StringVar()  # 輸入文件路徑
        self.output_dir = tk.StringVar()  # 輸出目錄
        self.regex_pattern = tk.StringVar(value=self.default_regex)  # 正則表達(dá)式
        self.input_column = tk.StringVar(value="A")  # 輸入列
        self.output_column = tk.StringVar(value="B")  # 輸出列
        self.overwrite_mode = tk.BooleanVar(value=False)  # 是否覆蓋原文件
        
        self.create_widgets()  # 創(chuàng)建界面控件

3. 界面創(chuàng)建方法

def create_widgets(self):
    # 主框架
    main_frame = tk.Frame(self.root, padx=10, pady=10)
    main_frame.pack(fill="both", expand=True)

    # 輸入文件選擇部分
    tk.Label(main_frame, text="1. 選擇 Excel 文件:", anchor="w").pack(fill="x", pady=(5, 0))
    file_frame = tk.Frame(main_frame)
    file_frame.pack(fill="x", pady=5)
    tk.Entry(file_frame, textvariable=self.input_file, width=60).pack(side="left", expand=True, fill="x")
    tk.Button(file_frame, text="瀏覽...", command=self.browse_input_file).pack(side="right", padx=5)

    # 輸出目錄選擇部分
    tk.Label(main_frame, text="2. 選擇輸出目錄:", anchor="w").pack(fill="x", pady=(5, 0))
    output_frame = tk.Frame(main_frame)
    output_frame.pack(fill="x", pady=5)
    tk.Entry(output_frame, textvariable=self.output_dir, width=60).pack(side="left", expand=True, fill="x")
    tk.Button(output_frame, text="瀏覽...", command=self.browse_output_dir).pack(side="right", padx=5)

    # 正則表達(dá)式設(shè)置部分
    tk.Label(main_frame, text="3. 自定義正則表達(dá)式（默認(rèn)提取中文）:", anchor="w").pack(fill="x", pady=(5, 0))
    regex_frame = tk.Frame(main_frame)
    regex_frame.pack(fill="x", pady=5)
    tk.Entry(regex_frame, textvariable=self.regex_pattern, width=60).pack(side="left", expand=True, fill="x")
    tk.Button(regex_frame, text="恢復(fù)默認(rèn)", command=self.reset_regex).pack(side="right", padx=5)

    # 列設(shè)置部分
    col_frame = tk.Frame(main_frame)
    col_frame.pack(fill="x", pady=5)
    tk.Label(col_frame, text="4. 處理列（如 A）:").pack(side="left")
    tk.Entry(col_frame, textvariable=self.input_column, width=5).pack(side="left", padx=5)
    tk.Label(col_frame, text="輸出列（如 B）:").pack(side="left")
    tk.Entry(col_frame, textvariable=self.output_column, width=5).pack(side="left", padx=5)

    # 高級(jí)選項(xiàng)部分
    adv_frame = tk.LabelFrame(main_frame, text="高級(jí)選項(xiàng)", padx=10, pady=10)
    adv_frame.pack(fill="x", pady=10)
    tk.Checkbutton(adv_frame, text="覆蓋原文件（不創(chuàng)建新文件）", variable=self.overwrite_mode).pack(anchor="w")

    # 開(kāi)始按鈕
    tk.Button(
        main_frame,
        text="開(kāi)始提取",
        command=self.run_extraction,
        bg="#4CAF50",
        fg="white",
        height=2,
        font=("Arial", 12, "bold")
    ).pack(fill="x", pady=10)

    # 進(jìn)度條
    self.progress = ttk.Progressbar(main_frame, orient="horizontal", length=700, mode="determinate")
    self.progress.pack(pady=10)

4. 文件瀏覽方法

def browse_input_file(self):
    
    """打開(kāi)文件對(duì)話框選擇輸入Excel文件"""
    filepath = filedialog.askopenfilename(
        title="選擇 Excel 文件",
        filetypes=[("Excel 文件", "*.xlsx"), ("所有文件", "*.*")]
    )
    if filepath:
        self.input_file.set(filepath)
        # 默認(rèn)設(shè)置輸出目錄為輸入文件所在目錄
        self.output_dir.set(os.path.dirname(filepath))

def browse_output_dir(self):
    
    """打開(kāi)目錄對(duì)話框選擇輸出目錄"""
    dirpath = filedialog.askdirectory(title="選擇輸出目錄")
    if dirpath:
        self.output_dir.set(dirpath)

def reset_regex(self):
    
    """重置正則表達(dá)式為默認(rèn)值"""
    self.regex_pattern.set(self.default_regex)

5. 核心提取功能

def run_extraction(self):
    
    """執(zhí)行文本提取的主要邏輯"""
    # 獲取用戶輸入?yún)?shù)
    input_path = self.input_file.get()
    output_dir = self.output_dir.get()
    regex = self.regex_pattern.get()
    input_col = self.input_column.get().upper()
    output_col = self.output_column.get().upper()
    overwrite = self.overwrite_mode.get()

    # 驗(yàn)證必要參數(shù)
    if not input_path or not output_dir:
        messagebox.showwarning("警告", "請(qǐng)選擇輸入文件和輸出目錄！")
        return

    try:
        # 加載Excel文件
        wb = load_workbook(input_path)
        ws = wb.active

        # 設(shè)置進(jìn)度條最大值
        total_rows = ws.max_row
        self.progress["maximum"] = total_rows
        self.progress["value"] = 0

        # 處理每一行數(shù)據(jù)（從第2行開(kāi)始，假設(shè)第1行是標(biāo)題）
        for row in range(2, total_rows + 1):
            # 獲取輸入單元格的值
            cell_value = str(ws[f"{input_col}{row}"].value) if ws[f"{input_col}{row}"].value else ""
            
            # 使用正則表達(dá)式提取匹配內(nèi)容
            matches = re.findall(regex, cell_value)
            extracted_text = " ".join(matches) if matches else ""
            
            # 將結(jié)果寫(xiě)入輸出列
            ws[f"{output_col}{row}"].value = extracted_text
            
            # 更新進(jìn)度條
            self.progress["value"] = row
            self.root.update()  # 保持界面響應(yīng)

        # 確定輸出文件路徑
        if overwrite:
            output_path = input_path  # 覆蓋原文件
        else:
            filename = os.path.basename(input_path)
            output_path = os.path.join(output_dir, f"提取結(jié)果_{filename}")  # 創(chuàng)建新文件

        # 保存結(jié)果
        wb.save(output_path)
        messagebox.showinfo("成功", f"提取完成！\n結(jié)果已保存至:\n{output_path}")

    except Exception as e:
        # 錯(cuò)誤處理
        messagebox.showerror("錯(cuò)誤", f"處理失敗:\n{str(e)}")
    finally:
        # 重置進(jìn)度條
        self.progress["value"] = 0

以上就是使用Python對(duì)Excel表內(nèi)容進(jìn)行中文提取的示例代碼的詳細(xì)內(nèi)容，更多關(guān)于Python Excel表內(nèi)容中文提取的資料請(qǐng)關(guān)注腳本之家其它相關(guān)文章！

您可能感興趣的文章:

国产无遮挡裸体免费直播视频,久久精品国产蜜臀av,动漫在线视频一区二区,欧亚日韩一区二区三区,久艹在线免费视频,国产精品美女网站免费,正在播放 97超级视频在线观看,斗破苍穹年番在线观看免费,51最新乱码中文字幕

使用Python對(duì)Excel表內(nèi)容進(jìn)行中文提取的示例代碼

目錄

一：效果展示：

二：功能描述：

1. 核心功能

（1）中文內(nèi)容提取

（2）文件處理

2. 用戶界面功能

（1）直觀的圖形界面

（2）主要組件

3. 使用場(chǎng)景

三：完整代碼：

四：代碼分析：

1. 導(dǎo)入模塊

2. 主應(yīng)用類

3. 界面創(chuàng)建方法

4. 文件瀏覽方法

5. 核心提取功能

相關(guān)文章

最新評(píng)論

大家感興趣的內(nèi)容

最近更新的內(nèi)容

常用在線小工具

国产无遮挡裸体免费直播视频,久久精品国产蜜臀av,动漫在线视频一区二区,欧亚日韩一区二区三区,久艹在线 免费视频,国产精品美女网站免费,正在播放 97超级视频在线观看,斗破苍穹年番在线观看免费,51最新乱码中文字幕

使用Python對(duì)Excel表內(nèi)容進(jìn)行中文提取的示例代碼

目錄

一：效果展示：

二：功能描述：

1. 核心功能

（1）中文內(nèi)容提取

（2）文件處理

2. 用戶界面功能

（1）直觀的圖形界面

（2）主要組件

3. 使用場(chǎng)景

三：完整代碼：

四：代碼分析：

1. 導(dǎo)入模塊

2. 主應(yīng)用類

3. 界面創(chuàng)建方法

4. 文件瀏覽方法

5. 核心提取功能

相關(guān)文章

最新評(píng)論

大家感興趣的內(nèi)容

最近更新的內(nèi)容

常用在線小工具

国产无遮挡裸体免费直播视频,久久精品国产蜜臀av,动漫在线视频一区二区,欧亚日韩一区二区三区,久艹在线免费视频,国产精品美女网站免费,正在播放 97超级视频在线观看,斗破苍穹年番在线观看免费,51最新乱码中文字幕