solidstate-tools/Screen/process_txt.py

# -*- coding: utf-8 -*-

import os
import re
import sys
import csv


def extract_data_from_folder(folder_path):
    """
    读取文件夹中所有txt文件，提取指定信息，并写入一个CSV文件。
    （已修正正则表达式）

    :param folder_path: 包含txt文件的文件夹路径。
    """
    # 检查文件夹是否存在
    if not os.path.isdir(folder_path):
        print(f"错误：文件夹 '{folder_path}' 不存在。")
        return

    # 定义要提取数据的正则表达式
    # 1. 对应 "Percolation diameter (A): 1.234"
    pattern1 = re.compile(r"Percolation diameter \(A\): ([\d\.]+)")

    # 2. 对应 "the minium of d 1.23 #" (已根据您的反馈修正)
    pattern2 = re.compile(r"the minium of d\s*([\d\.]+)\s*#")

    # 3. 对应 "Maximum node length detected: 5.67 A"
    pattern3 = re.compile(r"Maximum node length detected: ([\d\.]+) A")

    # 存储所有提取到的数据
    all_data = []

    # 遍历文件夹中的所有文件，使用 sorted() 确保处理顺序一致
    for filename in sorted(os.listdir(folder_path)):
        if filename.endswith(".txt"):
            txt_path = os.path.join(folder_path, filename)

            try:
                with open(txt_path, 'r', encoding='utf-8') as file:
                    content = file.read()

                    # 使用修正后的正则表达式查找数据
                    match1 = pattern1.search(content)
                    match2 = pattern2.search(content)
                    match3 = pattern3.search(content)

                    # 提取匹配到的值，如果未匹配到则为空字符串 ''
                    val1 = match1.group(1) if match1 else ''
                    val2 = match2.group(1) if match2 else ''
                    val3 = match3.group(1) if match3 else ''

                    # 获取文件名（不含.txt后缀）
                    base_filename = os.path.splitext(filename)[0]

                    # 将这一行的数据添加到总列表中
                    all_data.append([base_filename, val1, val2, val3])

            except Exception as e:
                print(f"处理文件 {filename} 时发生错误: {e}")

    # 如果没有找到任何txt文件或数据，则不创建csv
    if not all_data:
        print("未在文件夹中找到任何 .txt 文件或未能提取任何数据。")
        return

    # 根据文件夹名确定CSV文件名
    folder_name = os.path.basename(os.path.normpath(folder_path))
    csv_filename = f"{folder_name}.csv"

    # 写入CSV文件
    try:
        with open(csv_filename, 'w', newline='', encoding='utf-8') as csvfile:
            writer = csv.writer(csvfile)

            # 写入表头
            headers = ['filename', 'Percolation_diameter_A', 'minium_of_d', 'Max_node_length_A']
            writer.writerow(headers)

            # 写入所有数据
            writer.writerows(all_data)

        print(f"数据成功写入到文件: {csv_filename}")

    except Exception as e:
        print(f"写入CSV文件 {csv_filename} 时发生错误: {e}")


if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("用法: python your_script_name.py <folder_path>")
        sys.exit(1)

    input_folder = sys.argv[1]
    extract_data_from_folder(input_folder)