CSM及TET，CS

2025-12-07 22:30:46 +08:00
parent e885893484
commit cea5ab6d3f
3 changed files with 133 additions and 85 deletions
--- a/py/step1.py
+++ b/py/step1.py
@@ -1,69 +1,113 @@
 from pymatgen.core import Structure
-from pymatgen.core.periodic_table import Element, Specie
-from pymatgen.io.cif import CifWriter
 from crystal_2 import crystal
-import crystal_2
 import os
 import shutil


+def get_anion_type(structure):
+    """
+    判断阴离子类型。
+    仅识别 O, S, Cl, Br 及其组合。
+    其他非金属元素（如 P, N, F 等）将被忽略。
+    """
+    # 仅保留这四种目标阴离子
+    valid_anions = {'O', 'S', 'Cl', 'Br'}
+
+    # 获取结构中的所有元素符号
+    elements = set([e.symbol for e in structure.composition.elements])
+
+    # 取交集找到当前结构包含的目标阴离子
+    found_anions = elements.intersection(valid_anions)
+
+    if not found_anions:
+        return "Unknown"
+
+    # 如果有多个阴离子，按字母顺序排序并用 '+' 连接
+    sorted_anions = sorted(list(found_anions))
+    return "+".join(sorted_anions)
+
+
 def read_files_check_basic(folder_path):
-    file_contents = []
+    """
+    读取 CIF 文件，进行基础检查 (check_basic)，
+    通过筛选后按自定义阴离子规则分类并整理到 after_step1 文件夹。
+    """
+    # 输出基础路径
+    output_base = "../data/after_step1"

    if not os.path.exists(folder_path):
        print(f"{folder_path} 文件夹不存在")
-        return file_contents
+        return

-    for filename in os.listdir(folder_path):
+    # 确保输出目录存在
+    if not os.path.exists(output_base):
+        os.makedirs(output_base)
+
+    cif_files = [f for f in os.listdir(folder_path) if f.endswith(".cif")]
+    print(f"在 {folder_path} 发现 {len(cif_files)} 个 CIF 文件，开始筛选与整理...")
+
+    count_pass = 0
+
+    for filename in cif_files:
        file_path = os.path.join(folder_path, filename)

-        if os.path.isfile(file_path):
-            try:
-                temp = crystal(file_path)
-                file_contents.append(temp)
-            except Exception as e:
-                print(e)
-                continue  # 如果出错跳过当前循环，避免temp未定义报错
-
-            print(f"正在处理{filename}")
+        # 1. 调用 crystal_2 进行基础筛选
+        try:
+            temp = crystal(file_path)
+            # 进行基础检查 (电荷平衡、化学式检查等)
            temp.check_basic()

-            if temp.check_basic_result:
-                # 获取不带后缀的文件名，用于创建同名文件夹
-                file_base_name = os.path.splitext(filename)[0]
+            if not temp.check_basic_result:
+                print(f"Skipped: {filename} (未通过 check_basic)")
+                continue

-                if not "+" in temp.anion:
-                    # 单一阴离子情况
-                    # 路径变为: ../data/after_step1/Anion/FileBaseName/
-                    base_anion_folder = os.path.join("../data/after_step1", f"{temp.anion}")
-                    target_folder = os.path.join(base_anion_folder, file_base_name)
+        except Exception as e:
+            print(f"Error checking {filename}: {e}")
+            continue

+        # 2. 筛选通过，进行分类整理
+        try:
+            print(f"Processing: {filename} (Passed)")
+            count_pass += 1
+
+            # 为了确保分类逻辑与 Direct 版本一致，重新读取结构判断阴离子
+            # (忽略 crystal_2 内部可能基于 P/N 等元素的命名)
+            struct = Structure.from_file(file_path)
+            anion_type = get_anion_type(struct)
+
+            # 获取不带后缀的文件名 (ID)
+            file_base_name = os.path.splitext(filename)[0]
+
+            # --- 构建目标路径逻辑 (Anion/ID/ID.cif) ---
+
+            if "+" in anion_type:
+                # 混合阴离子情况 (如 S+O)
+                # 分别复制到 S+O/S 和 S+O/O 下
+                sub_anions = anion_type.split("+")
+                for sub in sub_anions:
+                    # 路径: ../data/after_step1/S+O/S/123/123.cif
+                    target_folder = os.path.join(output_base, anion_type, sub, file_base_name)
                    if not os.path.exists(target_folder):
                        os.makedirs(target_folder)

-                    # 目标文件路径
-                    target_file_path = os.path.join(target_folder, filename)
-                    # 复制文件到目标文件夹
-                    shutil.copy(file_path, target_file_path)
-                    print(f"文件 {filename}通过基本筛选,已复制到 {target_folder}")
-                else:
-                    # 混合阴离子情况
-                    anions = temp.anion.split("+")
-                    for anion in anions:
-                        # 路径变为: ../data/after_step1/AnionCombination/Anion/FileBaseName/
-                        base_group_folder = os.path.join("../data/after_step1", f"{temp.anion}")
-                        base_anion_folder = os.path.join(base_group_folder, anion)
-                        target_folder = os.path.join(base_anion_folder, file_base_name)
+                    target_file = os.path.join(target_folder, filename)
+                    shutil.copy(file_path, target_file)
+            else:
+                # 单一阴离子或 Unknown: ../data/after_step1/S/123/123.cif
+                target_folder = os.path.join(output_base, anion_type, file_base_name)
+                if not os.path.exists(target_folder):
+                    os.makedirs(target_folder)

-                        if not os.path.exists(target_folder):
-                            os.makedirs(target_folder)
+                target_file = os.path.join(target_folder, filename)
+                shutil.copy(file_path, target_file)

-                        # 目标文件路径
-                        target_file_path = os.path.join(target_folder, filename)
-                        # 复制文件到目标文件夹
-                        shutil.copy(file_path, target_file_path)
-                        print(f"文件 {filename}通过基本筛选,已复制到 {target_folder}")
+        except Exception as e:
+            print(f"Error copying {filename}: {e}")
+
+    print(f"处理完成。共 {len(cif_files)} 个文件，通过筛选 {count_pass} 个。")


 if __name__ == "__main__":
+    # 根据你的 readme，MP数据在 input_pre，ICSD在 input
+    # 这里默认读取 input，你可以根据实际情况修改
    read_files_check_basic("../data/input")