Files
screen/py/step1.py
2025-12-07 22:30:46 +08:00

113 lines
3.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from pymatgen.core import Structure
from crystal_2 import crystal
import os
import shutil
def get_anion_type(structure):
"""
判断阴离子类型。
仅识别 O, S, Cl, Br 及其组合。
其他非金属元素(如 P, N, F 等)将被忽略。
"""
# 仅保留这四种目标阴离子
valid_anions = {'O', 'S', 'Cl', 'Br'}
# 获取结构中的所有元素符号
elements = set([e.symbol for e in structure.composition.elements])
# 取交集找到当前结构包含的目标阴离子
found_anions = elements.intersection(valid_anions)
if not found_anions:
return "Unknown"
# 如果有多个阴离子,按字母顺序排序并用 '+' 连接
sorted_anions = sorted(list(found_anions))
return "+".join(sorted_anions)
def read_files_check_basic(folder_path):
"""
读取 CIF 文件,进行基础检查 (check_basic)
通过筛选后按自定义阴离子规则分类并整理到 after_step1 文件夹。
"""
# 输出基础路径
output_base = "../data/after_step1"
if not os.path.exists(folder_path):
print(f"{folder_path} 文件夹不存在")
return
# 确保输出目录存在
if not os.path.exists(output_base):
os.makedirs(output_base)
cif_files = [f for f in os.listdir(folder_path) if f.endswith(".cif")]
print(f"{folder_path} 发现 {len(cif_files)} 个 CIF 文件,开始筛选与整理...")
count_pass = 0
for filename in cif_files:
file_path = os.path.join(folder_path, filename)
# 1. 调用 crystal_2 进行基础筛选
try:
temp = crystal(file_path)
# 进行基础检查 (电荷平衡、化学式检查等)
temp.check_basic()
if not temp.check_basic_result:
print(f"Skipped: {filename} (未通过 check_basic)")
continue
except Exception as e:
print(f"Error checking {filename}: {e}")
continue
# 2. 筛选通过,进行分类整理
try:
print(f"Processing: {filename} (Passed)")
count_pass += 1
# 为了确保分类逻辑与 Direct 版本一致,重新读取结构判断阴离子
# (忽略 crystal_2 内部可能基于 P/N 等元素的命名)
struct = Structure.from_file(file_path)
anion_type = get_anion_type(struct)
# 获取不带后缀的文件名 (ID)
file_base_name = os.path.splitext(filename)[0]
# --- 构建目标路径逻辑 (Anion/ID/ID.cif) ---
if "+" in anion_type:
# 混合阴离子情况 (如 S+O)
# 分别复制到 S+O/S 和 S+O/O 下
sub_anions = anion_type.split("+")
for sub in sub_anions:
# 路径: ../data/after_step1/S+O/S/123/123.cif
target_folder = os.path.join(output_base, anion_type, sub, file_base_name)
if not os.path.exists(target_folder):
os.makedirs(target_folder)
target_file = os.path.join(target_folder, filename)
shutil.copy(file_path, target_file)
else:
# 单一阴离子或 Unknown: ../data/after_step1/S/123/123.cif
target_folder = os.path.join(output_base, anion_type, file_base_name)
if not os.path.exists(target_folder):
os.makedirs(target_folder)
target_file = os.path.join(target_folder, filename)
shutil.copy(file_path, target_file)
except Exception as e:
print(f"Error copying {filename}: {e}")
print(f"处理完成。共 {len(cif_files)} 个文件,通过筛选 {count_pass} 个。")
if __name__ == "__main__":
# 根据你的 readmeMP数据在 input_preICSD在 input
# 这里默认读取 input你可以根据实际情况修改
read_files_check_basic("../data/input")