113 lines
4.0 KiB
Python
113 lines
4.0 KiB
Python
from pymatgen.core import Structure
|
||
from crystal_2 import crystal
|
||
import os
|
||
import shutil
|
||
|
||
|
||
def get_anion_type(structure):
|
||
"""
|
||
判断阴离子类型。
|
||
仅识别 O, S, Cl, Br 及其组合。
|
||
其他非金属元素(如 P, N, F 等)将被忽略。
|
||
"""
|
||
# 仅保留这四种目标阴离子
|
||
valid_anions = {'O', 'S', 'Cl', 'Br'}
|
||
|
||
# 获取结构中的所有元素符号
|
||
elements = set([e.symbol for e in structure.composition.elements])
|
||
|
||
# 取交集找到当前结构包含的目标阴离子
|
||
found_anions = elements.intersection(valid_anions)
|
||
|
||
if not found_anions:
|
||
return "Unknown"
|
||
|
||
# 如果有多个阴离子,按字母顺序排序并用 '+' 连接
|
||
sorted_anions = sorted(list(found_anions))
|
||
return "+".join(sorted_anions)
|
||
|
||
|
||
def read_files_check_basic(folder_path):
|
||
"""
|
||
读取 CIF 文件,进行基础检查 (check_basic),
|
||
通过筛选后按自定义阴离子规则分类并整理到 after_step1 文件夹。
|
||
"""
|
||
# 输出基础路径
|
||
output_base = "../data/after_step1"
|
||
|
||
if not os.path.exists(folder_path):
|
||
print(f"{folder_path} 文件夹不存在")
|
||
return
|
||
|
||
# 确保输出目录存在
|
||
if not os.path.exists(output_base):
|
||
os.makedirs(output_base)
|
||
|
||
cif_files = [f for f in os.listdir(folder_path) if f.endswith(".cif")]
|
||
print(f"在 {folder_path} 发现 {len(cif_files)} 个 CIF 文件,开始筛选与整理...")
|
||
|
||
count_pass = 0
|
||
|
||
for filename in cif_files:
|
||
file_path = os.path.join(folder_path, filename)
|
||
|
||
# 1. 调用 crystal_2 进行基础筛选
|
||
try:
|
||
temp = crystal(file_path)
|
||
# 进行基础检查 (电荷平衡、化学式检查等)
|
||
temp.check_basic()
|
||
|
||
if not temp.check_basic_result:
|
||
print(f"Skipped: {filename} (未通过 check_basic)")
|
||
continue
|
||
|
||
except Exception as e:
|
||
print(f"Error checking {filename}: {e}")
|
||
continue
|
||
|
||
# 2. 筛选通过,进行分类整理
|
||
try:
|
||
print(f"Processing: {filename} (Passed)")
|
||
count_pass += 1
|
||
|
||
# 为了确保分类逻辑与 Direct 版本一致,重新读取结构判断阴离子
|
||
# (忽略 crystal_2 内部可能基于 P/N 等元素的命名)
|
||
struct = Structure.from_file(file_path)
|
||
anion_type = get_anion_type(struct)
|
||
|
||
# 获取不带后缀的文件名 (ID)
|
||
file_base_name = os.path.splitext(filename)[0]
|
||
|
||
# --- 构建目标路径逻辑 (Anion/ID/ID.cif) ---
|
||
|
||
if "+" in anion_type:
|
||
# 混合阴离子情况 (如 S+O)
|
||
# 分别复制到 S+O/S 和 S+O/O 下
|
||
sub_anions = anion_type.split("+")
|
||
for sub in sub_anions:
|
||
# 路径: ../data/after_step1/S+O/S/123/123.cif
|
||
target_folder = os.path.join(output_base, anion_type, sub, file_base_name)
|
||
if not os.path.exists(target_folder):
|
||
os.makedirs(target_folder)
|
||
|
||
target_file = os.path.join(target_folder, filename)
|
||
shutil.copy(file_path, target_file)
|
||
else:
|
||
# 单一阴离子或 Unknown: ../data/after_step1/S/123/123.cif
|
||
target_folder = os.path.join(output_base, anion_type, file_base_name)
|
||
if not os.path.exists(target_folder):
|
||
os.makedirs(target_folder)
|
||
|
||
target_file = os.path.join(target_folder, filename)
|
||
shutil.copy(file_path, target_file)
|
||
|
||
except Exception as e:
|
||
print(f"Error copying {filename}: {e}")
|
||
|
||
print(f"处理完成。共 {len(cif_files)} 个文件,通过筛选 {count_pass} 个。")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
# 根据你的 readme,MP数据在 input_pre,ICSD在 input
|
||
# 这里默认读取 input,你可以根据实际情况修改
|
||
read_files_check_basic("../../solidstate-tools/corner-sharing/data/1209/input") |