Files
screen/py/step2-5-file_process.py
2025-12-07 13:56:33 +08:00

122 lines
5.1 KiB
Python

from step2 import process_files as step2_process
from step3 import process_files as step3_process
from step4 import process_files as step4_process
from step5 import read_files_check_partical as step5_process
import os
import shutil
import os
import shutil
def create_empty_directory_structure(source_dir, target_dir):
"""
递归地复制源文件夹的目录结构到目标文件夹,创建空文件夹。
参数:
source_dir (str): 源文件夹路径
target_dir (str): 目标文件夹路径
返回:
int: 成功创建的文件夹数量
异常:
FileNotFoundError: 如果源文件夹不存在
PermissionError: 如果没有权限读取源文件夹或写入目标文件夹
"""
# 计数器
created_count = 0
# 检查源文件夹是否存在
if not os.path.exists(source_dir):
raise FileNotFoundError(f"源文件夹不存在: {source_dir}")
# 确保目标文件夹存在
if not os.path.exists(target_dir):
print(f"目标文件夹不存在,正在创建: {target_dir}")
os.makedirs(target_dir)
created_count += 1
# 递归函数,复制文件夹结构
def copy_structure(src, dst):
nonlocal created_count
try:
# 获取源目录中的所有项目
items = os.listdir(src)
# 遍历所有项目
for item in items:
src_path = os.path.join(src, item)
dst_path = os.path.join(dst, item)
# 如果是目录,递归复制结构
if os.path.isdir(src_path):
if not os.path.exists(dst_path):
os.makedirs(dst_path)
created_count += 1
print(f"创建文件夹: {dst_path}")
copy_structure(src_path, dst_path)
# 对于文件,我们不做任何处理,因为我们只需要文件夹结构
except PermissionError:
print(f"无权限访问目录: {src}")
except Exception as e:
print(f"处理目录 {src} 时出错: {str(e)}")
# 开始递归复制
try:
copy_structure(source_dir, target_dir)
print(f"已成功在 {target_dir} 中创建 {created_count} 个文件夹,复制了完整的目录结构")
return created_count
except Exception as e:
print(f"整体操作失败: {str(e)}")
return created_count
if __name__ == "__main__":
create_empty_directory_structure("../data/after_step1","../data/after_step2")
create_empty_directory_structure("../data/after_step1", "../data/after_step3")
create_empty_directory_structure("../data/after_step1", "../data/after_step4")
create_empty_directory_structure("../data/after_step1", "../data/after_step5")
create_empty_directory_structure("../data/after_step1", "../data/after_step6")
for files in os.listdir("../data/after_step1"):
source_path = os.path.join("../data/after_step1", files)
target_path = os.path.join("../data/after_step2", files)
file = files # 如果需要从文件名提取,替换这一行
print('-------------------')
if "+" in file:
# 第二种情况:多个元素,如"S+O"
elements = file.split("+")
print(f"处理多元素文件 {file},拆分为:{elements}")
for element in elements:
print(element)
source_path_tmp = os.path.join(source_path, element)
target_path_tmp = os.path.join(target_path, element)
print('正在做第二步筛选')
step2_process(source_path_tmp, target_path_tmp, element)
target_path_tmp_2 = os.path.join(f"../data/after_step3/{files}", element)
print('正在做第三步筛选')
step3_process(source_path_tmp, target_path_tmp, target_path_tmp_2,element)
target_path_tmp_3 = os.path.join(f"../data/after_step4/{files}", element)
print('正在做第四步筛选')
step4_process(source_path_tmp, target_path_tmp_2,target_path_tmp_3, element)
target_path_tmp_4 = os.path.join(f"../data/after_step5/{files}", element)
print('正在做第五步筛选')
step5_process(target_path_tmp_3,target_path_tmp_4)
else:
# 第一种情况:单一元素,如"S"
print(f"处理单一元素文件:{file}")
target_path_1 = os.path.join("../data/after_step3", files)
target_path_2 = os.path.join("../data/after_step4", files)
target_path_3 = os.path.join("../data/after_step5", files)
print('正在做第二步筛选')
step2_process(source_path, target_path, file)
print('正在做第三步筛选')
step3_process(source_path, target_path,target_path_1, file)
print('正在做第四步筛选')
step4_process(source_path, target_path_1,target_path_2, file)
print('正在做第五步筛选')
step5_process(target_path_2,target_path_3)