103 lines
3.6 KiB
Python
103 lines
3.6 KiB
Python
import os
|
|
import shutil
|
|
from pymatgen.core import Structure
|
|
|
|
|
|
def get_anion_type(structure):
|
|
"""
|
|
判断阴离子类型。
|
|
仅识别 O, S, Cl, Br 及其组合。
|
|
其他非金属元素(如 P, N, F 等)将被忽略:
|
|
- Li3PS4 (含 P, S) -> 识别为 S
|
|
- LiFePO4 (含 P, O) -> 识别为 O
|
|
- Li3P (仅 P) -> 识别为 Unknown
|
|
"""
|
|
# --- 修改处:仅保留这四种目标阴离子 ---
|
|
valid_anions = {'O', 'S', 'Cl', 'Br'}
|
|
|
|
# 获取结构中的所有元素符号
|
|
elements = set([e.symbol for e in structure.composition.elements])
|
|
|
|
# 取交集找到当前结构包含的目标阴离子
|
|
found_anions = elements.intersection(valid_anions)
|
|
|
|
if not found_anions:
|
|
return "Unknown"
|
|
|
|
# 如果有多个阴离子,按字母顺序排序并用 '+' 连接
|
|
sorted_anions = sorted(list(found_anions))
|
|
return "+".join(sorted_anions)
|
|
|
|
|
|
def organize_files_direct(input_folder, output_base):
|
|
if not os.path.exists(input_folder):
|
|
print(f"输入文件夹不存在: {input_folder}")
|
|
return
|
|
|
|
# 确保输出目录存在
|
|
if not os.path.exists(output_base):
|
|
os.makedirs(output_base)
|
|
|
|
cif_files = [f for f in os.listdir(input_folder) if f.endswith(".cif")]
|
|
print(f"发现 {len(cif_files)} 个 CIF 文件,开始直接整理...")
|
|
|
|
count_dict = {}
|
|
|
|
for filename in cif_files:
|
|
file_path = os.path.join(input_folder, filename)
|
|
|
|
try:
|
|
# 读取结构分类
|
|
struct = Structure.from_file(file_path)
|
|
anion_type = get_anion_type(struct)
|
|
|
|
# 统计一下分类情况(可选)
|
|
count_dict[anion_type] = count_dict.get(anion_type, 0) + 1
|
|
|
|
# 获取不带后缀的文件名 (ID)
|
|
file_base_name = os.path.splitext(filename)[0]
|
|
|
|
# --- 构建目标路径逻辑 ---
|
|
# 目标: ../data/after_step1 / AnionType / ID / ID.cif
|
|
|
|
if "+" in anion_type:
|
|
# 混合阴离子情况 (如 S+O)
|
|
# 将文件复制到 S+O 下的各个子阴离子文件夹中 (S+O/S/ID/ID.cif 和 S+O/O/ID/ID.cif)
|
|
# 这样既保留了组合关系,又方便后续脚本按元素查找
|
|
sub_anions = anion_type.split("+")
|
|
for sub in sub_anions:
|
|
# 路径: after_step1/S+O/S/123/123.cif
|
|
target_folder = os.path.join(output_base, anion_type, sub, file_base_name)
|
|
if not os.path.exists(target_folder):
|
|
os.makedirs(target_folder)
|
|
|
|
target_file = os.path.join(target_folder, filename)
|
|
shutil.copy(file_path, target_file)
|
|
|
|
# print(f"整理: {filename} -> {anion_type} (Split)")
|
|
|
|
else:
|
|
# 单一阴离子或 Unknown: after_step1/S/123/123.cif
|
|
target_folder = os.path.join(output_base, anion_type, file_base_name)
|
|
if not os.path.exists(target_folder):
|
|
os.makedirs(target_folder)
|
|
|
|
target_file = os.path.join(target_folder, filename)
|
|
shutil.copy(file_path, target_file)
|
|
# print(f"整理: {filename} -> {anion_type}")
|
|
|
|
except Exception as e:
|
|
print(f"处理 {filename} 失败: {e}")
|
|
|
|
print("整理完成。分类统计:")
|
|
for k, v in count_dict.items():
|
|
print(f" {k}: {v}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# 输入路径
|
|
input_dir = "../data/input" # 如果是MP数据请改为 ../data/input_pre
|
|
# 输出路径
|
|
output_dir = "../data/after_step1"
|
|
|
|
organize_files_direct(input_dir, output_dir) |