import os import shutil from pymatgen.core import Structure def get_anion_type(structure): """ 判断阴离子类型。 仅识别 O, S, Cl, Br 及其组合。 其他非金属元素(如 P, N, F 等)将被忽略: - Li3PS4 (含 P, S) -> 识别为 S - LiFePO4 (含 P, O) -> 识别为 O - Li3P (仅 P) -> 识别为 Unknown """ # --- 修改处:仅保留这四种目标阴离子 --- valid_anions = {'O', 'S', 'Cl', 'Br'} # 获取结构中的所有元素符号 elements = set([e.symbol for e in structure.composition.elements]) # 取交集找到当前结构包含的目标阴离子 found_anions = elements.intersection(valid_anions) if not found_anions: return "Unknown" # 如果有多个阴离子,按字母顺序排序并用 '+' 连接 sorted_anions = sorted(list(found_anions)) return "+".join(sorted_anions) def organize_files_direct(input_folder, output_base): if not os.path.exists(input_folder): print(f"输入文件夹不存在: {input_folder}") return # 确保输出目录存在 if not os.path.exists(output_base): os.makedirs(output_base) cif_files = [f for f in os.listdir(input_folder) if f.endswith(".cif")] print(f"发现 {len(cif_files)} 个 CIF 文件,开始直接整理...") count_dict = {} for filename in cif_files: file_path = os.path.join(input_folder, filename) try: # 读取结构分类 struct = Structure.from_file(file_path) anion_type = get_anion_type(struct) # 统计一下分类情况(可选) count_dict[anion_type] = count_dict.get(anion_type, 0) + 1 # 获取不带后缀的文件名 (ID) file_base_name = os.path.splitext(filename)[0] # --- 构建目标路径逻辑 --- # 目标: ../data/after_step1 / AnionType / ID / ID.cif if "+" in anion_type: # 混合阴离子情况 (如 S+O) # 将文件复制到 S+O 下的各个子阴离子文件夹中 (S+O/S/ID/ID.cif 和 S+O/O/ID/ID.cif) # 这样既保留了组合关系,又方便后续脚本按元素查找 sub_anions = anion_type.split("+") for sub in sub_anions: # 路径: after_step1/S+O/S/123/123.cif target_folder = os.path.join(output_base, anion_type, sub, file_base_name) if not os.path.exists(target_folder): os.makedirs(target_folder) target_file = os.path.join(target_folder, filename) shutil.copy(file_path, target_file) # print(f"整理: {filename} -> {anion_type} (Split)") else: # 单一阴离子或 Unknown: after_step1/S/123/123.cif target_folder = os.path.join(output_base, anion_type, file_base_name) if not os.path.exists(target_folder): os.makedirs(target_folder) target_file = os.path.join(target_folder, filename) shutil.copy(file_path, target_file) # print(f"整理: {filename} -> {anion_type}") except Exception as e: print(f"处理 {filename} 失败: {e}") print("整理完成。分类统计:") for k, v in count_dict.items(): print(f" {k}: {v}") if __name__ == "__main__": # 输入路径 input_dir = "../data/input" # 如果是MP数据请改为 ../data/input_pre # 输出路径 output_dir = "../data/after_step1" organize_files_direct(input_dir, output_dir)