from step2 import process_files as step2_process from step3 import process_files as step3_process from step4 import process_files as step4_process from step5 import read_files_check_partical as step5_process import os import shutil import os import shutil def create_empty_directory_structure(source_dir, target_dir): """ 递归地复制源文件夹的目录结构到目标文件夹,创建空文件夹。 参数: source_dir (str): 源文件夹路径 target_dir (str): 目标文件夹路径 返回: int: 成功创建的文件夹数量 异常: FileNotFoundError: 如果源文件夹不存在 PermissionError: 如果没有权限读取源文件夹或写入目标文件夹 """ # 计数器 created_count = 0 # 检查源文件夹是否存在 if not os.path.exists(source_dir): raise FileNotFoundError(f"源文件夹不存在: {source_dir}") # 确保目标文件夹存在 if not os.path.exists(target_dir): print(f"目标文件夹不存在,正在创建: {target_dir}") os.makedirs(target_dir) created_count += 1 # 递归函数,复制文件夹结构 def copy_structure(src, dst): nonlocal created_count try: # 获取源目录中的所有项目 items = os.listdir(src) # 遍历所有项目 for item in items: src_path = os.path.join(src, item) dst_path = os.path.join(dst, item) # 如果是目录,递归复制结构 if os.path.isdir(src_path): if not os.path.exists(dst_path): os.makedirs(dst_path) created_count += 1 print(f"创建文件夹: {dst_path}") copy_structure(src_path, dst_path) # 对于文件,我们不做任何处理,因为我们只需要文件夹结构 except PermissionError: print(f"无权限访问目录: {src}") except Exception as e: print(f"处理目录 {src} 时出错: {str(e)}") # 开始递归复制 try: copy_structure(source_dir, target_dir) print(f"已成功在 {target_dir} 中创建 {created_count} 个文件夹,复制了完整的目录结构") return created_count except Exception as e: print(f"整体操作失败: {str(e)}") return created_count if __name__ == "__main__": create_empty_directory_structure("../data/after_step1","../data/after_step2") create_empty_directory_structure("../data/after_step1", "../data/after_step3") create_empty_directory_structure("../data/after_step1", "../data/after_step4") create_empty_directory_structure("../data/after_step1", "../data/after_step5") create_empty_directory_structure("../data/after_step1", "../data/after_step6") for files in os.listdir("../data/after_step1"): source_path = os.path.join("../data/after_step1", files) target_path = os.path.join("../data/after_step2", files) file = files # 如果需要从文件名提取,替换这一行 print('-------------------') if "+" in file: # 第二种情况:多个元素,如"S+O" elements = file.split("+") print(f"处理多元素文件 {file},拆分为:{elements}") for element in elements: print(element) source_path_tmp = os.path.join(source_path, element) target_path_tmp = os.path.join(target_path, element) print('正在做第二步筛选') step2_process(source_path_tmp, target_path_tmp, element) target_path_tmp_2 = os.path.join(f"../data/after_step3/{files}", element) print('正在做第三步筛选') step3_process(source_path_tmp, target_path_tmp, target_path_tmp_2,element) target_path_tmp_3 = os.path.join(f"../data/after_step4/{files}", element) print('正在做第四步筛选') step4_process(source_path_tmp, target_path_tmp_2,target_path_tmp_3, element) target_path_tmp_4 = os.path.join(f"../data/after_step5/{files}", element) print('正在做第五步筛选') step5_process(target_path_tmp_3,target_path_tmp_4) else: # 第一种情况:单一元素,如"S" print(f"处理单一元素文件:{file}") target_path_1 = os.path.join("../data/after_step3", files) target_path_2 = os.path.join("../data/after_step4", files) target_path_3 = os.path.join("../data/after_step5", files) print('正在做第二步筛选') step2_process(source_path, target_path, file) print('正在做第三步筛选') step3_process(source_path, target_path,target_path_1, file) print('正在做第四步筛选') step4_process(source_path, target_path_1,target_path_2, file) print('正在做第五步筛选') step5_process(target_path_2,target_path_3)