CSM及TET，CS

2025-12-07 22:30:46 +08:00
parent e885893484
commit cea5ab6d3f
3 changed files with 133 additions and 85 deletions
--- a/main_property.sh
+++ b/main_property.sh
@@ -10,7 +10,7 @@ chmod -R u+w ../Screen
 source $(conda info --base)/etc/profile.d/conda.sh
 # 激活 screen 环境
-conda activate screen
+conda activate ~/anaconda3/envs/screen
 cd py/
 export PYTHONPATH=$(pwd):$PYTHONPATH
@@ -31,7 +31,7 @@ python make_sh.py
 # 3. 运行 Zeo++ 计算
 echo "============ Stage 2: Zeo++ Calculations ============"
 conda deactivate
-conda activate zeo
+conda activate ~/anaconda3/envs/zeo
 # 进入数据目录
 cd ../data/after_step1
@@ -49,7 +49,7 @@ fi
 echo "============ Stage 3: Data Extraction & Analysis ============"
 # 切回 screen 环境
 conda deactivate
-conda activate screen
+conda activate ~/anaconda3/envs/screen
 cd ../../py
 # 3.1 提取 Zeo++ 基础数据
--- a/py/step1.py
+++ b/py/step1.py
@@ -1,69 +1,113 @@
 from pymatgen.core import Structure
 from pymatgen.core.periodic_table import Element, Specie
 from pymatgen.io.cif import CifWriter
 from crystal_2 import crystal
 import crystal_2
 import os
 import shutil
 def get_anion_type(structure):
    """
    判断阴离子类型。
    仅识别 O, S, Cl, Br 及其组合。
    其他非金属元素（如 P, N, F 等）将被忽略。
    """
    # 仅保留这四种目标阴离子
    valid_anions = {'O', 'S', 'Cl', 'Br'}
    # 获取结构中的所有元素符号
    elements = set([e.symbol for e in structure.composition.elements])
    # 取交集找到当前结构包含的目标阴离子
    found_anions = elements.intersection(valid_anions)
    if not found_anions:
        return "Unknown"
    # 如果有多个阴离子，按字母顺序排序并用 '+' 连接
    sorted_anions = sorted(list(found_anions))
    return "+".join(sorted_anions)
 def read_files_check_basic(folder_path):
-    file_contents = []
+    """
    读取 CIF 文件，进行基础检查 (check_basic)，
    通过筛选后按自定义阴离子规则分类并整理到 after_step1 文件夹。
    """
    # 输出基础路径
    output_base = "../data/after_step1"
    if not os.path.exists(folder_path):
        print(f"{folder_path} 文件夹不存在")
-        return file_contents
+        return
-    for filename in os.listdir(folder_path):
+    # 确保输出目录存在
    if not os.path.exists(output_base):
        os.makedirs(output_base)
    cif_files = [f for f in os.listdir(folder_path) if f.endswith(".cif")]
    print(f"在 {folder_path} 发现 {len(cif_files)} 个 CIF 文件，开始筛选与整理...")
    count_pass = 0
    for filename in cif_files:
        file_path = os.path.join(folder_path, filename)
-        if os.path.isfile(file_path):
+        # 1. 调用 crystal_2 进行基础筛选
-            try:
+        try:
-                temp = crystal(file_path)
+            temp = crystal(file_path)
-                file_contents.append(temp)
+            # 进行基础检查 (电荷平衡、化学式检查等)
            except Exception as e:
                print(e)
                continue  # 如果出错跳过当前循环，避免temp未定义报错
            print(f"正在处理{filename}")
            temp.check_basic()
-            if temp.check_basic_result:
+            if not temp.check_basic_result:
-                # 获取不带后缀的文件名，用于创建同名文件夹
+                print(f"Skipped: {filename} (未通过 check_basic)")
-                file_base_name = os.path.splitext(filename)[0]
+                continue
-                if not "+" in temp.anion:
+        except Exception as e:
-                    # 单一阴离子情况
+            print(f"Error checking {filename}: {e}")
-                    # 路径变为: ../data/after_step1/Anion/FileBaseName/
+            continue
                    base_anion_folder = os.path.join("../data/after_step1", f"{temp.anion}")
                    target_folder = os.path.join(base_anion_folder, file_base_name)
        # 2. 筛选通过，进行分类整理
        try:
            print(f"Processing: {filename} (Passed)")
            count_pass += 1
            # 为了确保分类逻辑与 Direct 版本一致，重新读取结构判断阴离子
            # (忽略 crystal_2 内部可能基于 P/N 等元素的命名)
            struct = Structure.from_file(file_path)
            anion_type = get_anion_type(struct)
            # 获取不带后缀的文件名 (ID)
            file_base_name = os.path.splitext(filename)[0]
            # --- 构建目标路径逻辑 (Anion/ID/ID.cif) ---
            if "+" in anion_type:
                # 混合阴离子情况 (如 S+O)
                # 分别复制到 S+O/S 和 S+O/O 下
                sub_anions = anion_type.split("+")
                for sub in sub_anions:
                    # 路径: ../data/after_step1/S+O/S/123/123.cif
                    target_folder = os.path.join(output_base, anion_type, sub, file_base_name)
                    if not os.path.exists(target_folder):
                        os.makedirs(target_folder)
-                    # 目标文件路径
+                    target_file = os.path.join(target_folder, filename)
-                    target_file_path = os.path.join(target_folder, filename)
+                    shutil.copy(file_path, target_file)
-                    # 复制文件到目标文件夹
+            else:
-                    shutil.copy(file_path, target_file_path)
+                # 单一阴离子或 Unknown: ../data/after_step1/S/123/123.cif
-                    print(f"文件 {filename}通过基本筛选,已复制到 {target_folder}")
+                target_folder = os.path.join(output_base, anion_type, file_base_name)
-                else:
+                if not os.path.exists(target_folder):
-                    # 混合阴离子情况
+                    os.makedirs(target_folder)
                    anions = temp.anion.split("+")
                    for anion in anions:
                        # 路径变为: ../data/after_step1/AnionCombination/Anion/FileBaseName/
                        base_group_folder = os.path.join("../data/after_step1", f"{temp.anion}")
                        base_anion_folder = os.path.join(base_group_folder, anion)
                        target_folder = os.path.join(base_anion_folder, file_base_name)
-                        if not os.path.exists(target_folder):
+                target_file = os.path.join(target_folder, filename)
-                            os.makedirs(target_folder)
+                shutil.copy(file_path, target_file)
-                        # 目标文件路径
+        except Exception as e:
-                        target_file_path = os.path.join(target_folder, filename)
+            print(f"Error copying {filename}: {e}")
-                        # 复制文件到目标文件夹
+
-                        shutil.copy(file_path, target_file_path)
+    print(f"处理完成。共 {len(cif_files)} 个文件，通过筛选 {count_pass} 个。")
                        print(f"文件 {filename}通过基本筛选,已复制到 {target_folder}")
 if __name__ == "__main__":
    # 根据你的 readme，MP数据在 input_pre，ICSD在 input
    # 这里默认读取 input，你可以根据实际情况修改
    read_files_check_basic("../data/input")
--- a/py/step1_direct.py
+++ b/py/step1_direct.py
@@ -5,22 +5,26 @@ from pymatgen.core import Structure
 def get_anion_type(structure):
    """
-    简单判断阴离子类型。
+    判断阴离子类型。
-    返回: 'O', 'S', 'S+O' 等字符串
+    仅识别 O, S, Cl, Br 及其组合。
    其他非金属元素（如 P, N, F 等）将被忽略：
    - Li3PS4 (含 P, S) -> 识别为 S
    - LiFePO4 (含 P, O) -> 识别为 O
    - Li3P (仅 P) -> 识别为 Unknown
    """
-    # 定义常见的阴离子列表
+    # --- 修改处：仅保留这四种目标阴离子 ---
-    valid_anions = {'O', 'S', 'Se', 'Te', 'F', 'Cl', 'Br', 'I', 'N', 'P'}
+    valid_anions = {'O', 'S', 'Cl', 'Br'}
    # 获取结构中的所有元素符号
    elements = set([e.symbol for e in structure.composition.elements])
-    # 取交集找到当前结构包含的阴离子
+    # 取交集找到当前结构包含的目标阴离子
    found_anions = elements.intersection(valid_anions)
    if not found_anions:
        return "Unknown"
-    # 如果有多个阴离子，按字母顺序排序并用 '+' 连接 (模拟 step1 的逻辑)
+    # 如果有多个阴离子，按字母顺序排序并用 '+' 连接
    sorted_anions = sorted(list(found_anions))
    return "+".join(sorted_anions)
@@ -30,70 +34,70 @@ def organize_files_direct(input_folder, output_base):
        print(f"输入文件夹不存在: {input_folder}")
        return
    # 确保输出目录存在
    if not os.path.exists(output_base):
        os.makedirs(output_base)
    cif_files = [f for f in os.listdir(input_folder) if f.endswith(".cif")]
    print(f"发现 {len(cif_files)} 个 CIF 文件，开始直接整理...")
    count_dict = {}
    for filename in cif_files:
        file_path = os.path.join(input_folder, filename)
        try:
-            # 仅读取结构用于分类
+            # 读取结构分类
            struct = Structure.from_file(file_path)
            anion_type = get_anion_type(struct)
            # 统计一下分类情况（可选）
            count_dict[anion_type] = count_dict.get(anion_type, 0) + 1
            # 获取不带后缀的文件名 (ID)
            file_base_name = os.path.splitext(filename)[0]
            # --- 构建目标路径逻辑 ---
-            # 逻辑：../data/after_step1 / 阴离子类别 / ID / ID.cif
+            # 目标: ../data/after_step1 / AnionType / ID / ID.cif
            # 处理混合阴离子情况 (如 S+O)
            if "+" in anion_type:
-                # 按照之前的逻辑，如果是混合阴离子，通常会有多层
+                # 混合阴离子情况 (如 S+O)
-                # 但为了统一后续处理，我们这里将其放入组合名的文件夹下
+                # 将文件复制到 S+O 下的各个子阴离子文件夹中 (S+O/S/ID/ID.cif 和 S+O/O/ID/ID.cif)
-                # 比如: after_step1/S+O/S/123/123.cif (复杂)
+                # 这样既保留了组合关系，又方便后续脚本按元素查找
                # 或者简化为 after_step1/S+O/123/123.cif (简单)
                # 根据你之前的 make_sh.py 和 extract_data.py，
                # 只要是 Folder/ID/ID.cif 结构即可。
                # 为了兼容 analyze_cs.py 的逻辑 (group_name, anion_name)，
                # 这里我们采用 simplified 逻辑：
                # 如果是混合，我们在第一层建 S+O，第二层建具体的 anion 文件夹(比如首字母排序第一个)
                # 或者直接: after_step1/S+O/ID/ID.cif -> 这样 group=S+O, anion=ID (不对)
                # 兼容旧代码的最佳实践：
                # 对于混合 S+O，我们建立 S+O/S/ID/ID.cif 和 S+O/O/ID/ID.cif ?
                # 不，原 Step1 是把一个文件复制了两份到不同文件夹。
                # 这里为了简化，我们只复制一份到主阴离子文件夹，或者直接按组合命名。
                # 让我们采用最稳妥的方式：如果是 S+O，放入 S+O/Mix/ID/ID.cif
                # 这样 group=S+O, anion=Mix。
                # 但为了让 CS_calc 正常工作，最好还是放入具体的元素文件夹。
                # 这里我们简单处理：直接放入 S+O/Combined/ID/
                # 或者根据你的 extract_data.py 逻辑：
                # 它会遍历 top_dir (S+O) -> sub_anion (S, O)
                # 策略：拆分放入。
                sub_anions = anion_type.split("+")
                for sub in sub_anions:
                    # 路径: after_step1/S+O/S/123/123.cif
                    target_folder = os.path.join(output_base, anion_type, sub, file_base_name)
                    if not os.path.exists(target_folder):
                        os.makedirs(target_folder)
                    shutil.copy(file_path, os.path.join(target_folder, filename))
-                print(f"整理: {filename} -> {anion_type} (已复制到各子类)")
+                    target_file = os.path.join(target_folder, filename)
                    shutil.copy(file_path, target_file)
                # print(f"整理: {filename} -> {anion_type} (Split)")
            else:
-                # 单一阴离子: after_step1/S/ID/ID.cif
+                # 单一阴离子或 Unknown: after_step1/S/123/123.cif
                target_folder = os.path.join(output_base, anion_type, file_base_name)
                if not os.path.exists(target_folder):
                    os.makedirs(target_folder)
-                shutil.copy(file_path, os.path.join(target_folder, filename))
+                target_file = os.path.join(target_folder, filename)
-                print(f"整理: {filename} -> {anion_type}")
+                shutil.copy(file_path, target_file)
                # print(f"整理: {filename} -> {anion_type}")
        except Exception as e:
            print(f"处理 {filename} 失败: {e}")
    print("整理完成。分类统计:")
    for k, v in count_dict.items():
        print(f"  {k}: {v}")
 if __name__ == "__main__":
-    organize_files_direct("../data/input", "../data/after_step1")
+    # 输入路径
    input_dir = "../data/input"  # 如果是MP数据请改为 ../data/input_pre
    # 输出路径
    output_dir = "../data/after_step1"
    organize_files_direct(input_dir, output_dir)