122 lines
5.1 KiB
Python
122 lines
5.1 KiB
Python
from step2 import process_files as step2_process
|
|
from step3 import process_files as step3_process
|
|
from step4 import process_files as step4_process
|
|
from step5 import read_files_check_partical as step5_process
|
|
import os
|
|
import shutil
|
|
|
|
import os
|
|
import shutil
|
|
|
|
|
|
def create_empty_directory_structure(source_dir, target_dir):
|
|
"""
|
|
递归地复制源文件夹的目录结构到目标文件夹,创建空文件夹。
|
|
|
|
参数:
|
|
source_dir (str): 源文件夹路径
|
|
target_dir (str): 目标文件夹路径
|
|
|
|
返回:
|
|
int: 成功创建的文件夹数量
|
|
|
|
异常:
|
|
FileNotFoundError: 如果源文件夹不存在
|
|
PermissionError: 如果没有权限读取源文件夹或写入目标文件夹
|
|
"""
|
|
# 计数器
|
|
created_count = 0
|
|
|
|
# 检查源文件夹是否存在
|
|
if not os.path.exists(source_dir):
|
|
raise FileNotFoundError(f"源文件夹不存在: {source_dir}")
|
|
|
|
# 确保目标文件夹存在
|
|
if not os.path.exists(target_dir):
|
|
print(f"目标文件夹不存在,正在创建: {target_dir}")
|
|
os.makedirs(target_dir)
|
|
created_count += 1
|
|
|
|
# 递归函数,复制文件夹结构
|
|
def copy_structure(src, dst):
|
|
nonlocal created_count
|
|
|
|
try:
|
|
# 获取源目录中的所有项目
|
|
items = os.listdir(src)
|
|
|
|
# 遍历所有项目
|
|
for item in items:
|
|
src_path = os.path.join(src, item)
|
|
dst_path = os.path.join(dst, item)
|
|
|
|
# 如果是目录,递归复制结构
|
|
if os.path.isdir(src_path):
|
|
if not os.path.exists(dst_path):
|
|
os.makedirs(dst_path)
|
|
created_count += 1
|
|
print(f"创建文件夹: {dst_path}")
|
|
copy_structure(src_path, dst_path)
|
|
# 对于文件,我们不做任何处理,因为我们只需要文件夹结构
|
|
|
|
except PermissionError:
|
|
print(f"无权限访问目录: {src}")
|
|
except Exception as e:
|
|
print(f"处理目录 {src} 时出错: {str(e)}")
|
|
|
|
# 开始递归复制
|
|
try:
|
|
copy_structure(source_dir, target_dir)
|
|
print(f"已成功在 {target_dir} 中创建 {created_count} 个文件夹,复制了完整的目录结构")
|
|
return created_count
|
|
except Exception as e:
|
|
print(f"整体操作失败: {str(e)}")
|
|
return created_count
|
|
if __name__ == "__main__":
|
|
create_empty_directory_structure("../data/after_step1","../data/after_step2")
|
|
create_empty_directory_structure("../data/after_step1", "../data/after_step3")
|
|
create_empty_directory_structure("../data/after_step1", "../data/after_step4")
|
|
create_empty_directory_structure("../data/after_step1", "../data/after_step5")
|
|
create_empty_directory_structure("../data/after_step1", "../data/after_step6")
|
|
for files in os.listdir("../data/after_step1"):
|
|
source_path = os.path.join("../data/after_step1", files)
|
|
target_path = os.path.join("../data/after_step2", files)
|
|
file = files # 如果需要从文件名提取,替换这一行
|
|
print('-------------------')
|
|
if "+" in file:
|
|
# 第二种情况:多个元素,如"S+O"
|
|
elements = file.split("+")
|
|
print(f"处理多元素文件 {file},拆分为:{elements}")
|
|
for element in elements:
|
|
print(element)
|
|
source_path_tmp = os.path.join(source_path, element)
|
|
target_path_tmp = os.path.join(target_path, element)
|
|
print('正在做第二步筛选')
|
|
step2_process(source_path_tmp, target_path_tmp, element)
|
|
target_path_tmp_2 = os.path.join(f"../data/after_step3/{files}", element)
|
|
print('正在做第三步筛选')
|
|
step3_process(source_path_tmp, target_path_tmp, target_path_tmp_2,element)
|
|
target_path_tmp_3 = os.path.join(f"../data/after_step4/{files}", element)
|
|
print('正在做第四步筛选')
|
|
step4_process(source_path_tmp, target_path_tmp_2,target_path_tmp_3, element)
|
|
target_path_tmp_4 = os.path.join(f"../data/after_step5/{files}", element)
|
|
print('正在做第五步筛选')
|
|
step5_process(target_path_tmp_3,target_path_tmp_4)
|
|
else:
|
|
# 第一种情况:单一元素,如"S"
|
|
print(f"处理单一元素文件:{file}")
|
|
target_path_1 = os.path.join("../data/after_step3", files)
|
|
target_path_2 = os.path.join("../data/after_step4", files)
|
|
target_path_3 = os.path.join("../data/after_step5", files)
|
|
print('正在做第二步筛选')
|
|
step2_process(source_path, target_path, file)
|
|
print('正在做第三步筛选')
|
|
step3_process(source_path, target_path,target_path_1, file)
|
|
print('正在做第四步筛选')
|
|
step4_process(source_path, target_path_1,target_path_2, file)
|
|
print('正在做第五步筛选')
|
|
step5_process(target_path_2,target_path_3)
|
|
|
|
|
|
|