calc—_v1

2025-12-07 15:22:36 +08:00
parent 35a4bf640f
commit b9da6d9592
6 changed files with 462 additions and 151 deletions
--- a/py/step2_4_combined.py
+++ b/py/step2_4_combined.py
@@ -0,0 +1,147 @@
+import os
+import pandas as pd
+import math
+
+# ================= 配置区域 =================
+# 定义各阴离子的筛选阈值
+# perc: Percolation diameter (对应 Step 2, 大于此值)
+# min_d: Minimum of d (对应 Step 3, 小于此值)
+# node: Maximum node length (对应 Step 4, 大于此值)
+THRESHOLDS = {
+    "O": {"perc": 0.50, "min_d": 3.0, "node": 2.2},
+    "S": {"perc": 0.55, "min_d": 3.0, "node": 2.2},
+    "Cl": {"perc": 0.45, "min_d": 3.0, "node": 2.0},
+    "Br": {"perc": 0.45, "min_d": 3.0, "node": 2.0}
+}
+
+# 路径配置
+CSV_ROOT_DIR = "../output"  # CSV 所在的根目录
+DATA_SOURCE_DIR = "../data/after_step1"  # 原始 CIF 文件所在的根目录 (用于创建链接源)
+TARGET_DIR = "../data/after_screening"  # 筛选后放置软链接的目标目录
+
+
+# ===========================================
+
+def check_requirements(row, anion_type):
+    """
+    检查单行数据是否符合要求
+    """
+    # 获取该阴离子类型的阈值配置
+    config = THRESHOLDS.get(anion_type)
+    if not config:
+        print(f"Warning: 未知的阴离子类型 {anion_type}，跳过筛选。")
+        return False
+
+    try:
+        # 获取数值 (处理可能的空值或非数字情况)
+        perc = float(row["Percolation Diameter (A)"])
+        min_d = float(row["Minimum of d"])
+        node = float(row["Maximum Node Length (A)"])
+
+        # 检查是否为 NaN
+        if math.isnan(perc) or math.isnan(min_d) or math.isnan(node):
+            return False
+
+        # --- 筛选逻辑 ---
+        # Step 2: 连通孔径 > 阈值
+        c1 = perc > config["perc"]
+        # Step 3: 最短距离 < 3.0 (所有元素目前都是3.0)
+        c2 = min_d < config["min_d"]
+        # Step 4: 扩大锂离子节点 > 阈值
+        c3 = node > config["node"]
+
+        return c1 and c2 and c3
+
+    except (ValueError, TypeError):
+        return False
+
+
+def create_symlink(group_name, anion_name, material_id):
+    """
+    创建软链接
+    源: ../data/after_step1/Group/Anion/ID/ID.cif
+    目: ../data/after_screening/Group/Anion/ID.cif
+    """
+    # 1. 构建源文件路径 (必须使用绝对路径以确保软链接在任何地方都有效)
+    # 注意：根据你修改后的 step1，文件在 ID 文件夹内，如 141/141.cif
+    rel_source_path = os.path.join(DATA_SOURCE_DIR, group_name, anion_name, material_id, f"{material_id}.cif")
+    abs_source_path = os.path.abspath(rel_source_path)
+
+    if not os.path.exists(abs_source_path):
+        print(f"源文件不存在: {abs_source_path}")
+        return
+
+    # 2. 构建目标文件夹路径
+    target_subdir = os.path.join(TARGET_DIR, group_name, anion_name)
+    if not os.path.exists(target_subdir):
+        os.makedirs(target_subdir)
+
+    # 3. 构建目标链接路径
+    target_link_path = os.path.join(target_subdir, f"{material_id}.cif")
+
+    # 4. 创建链接
+    try:
+        # 如果目标已经存在（可能是旧的链接），先删除
+        if os.path.exists(target_link_path) or os.path.islink(target_link_path):
+            os.remove(target_link_path)
+
+        os.symlink(abs_source_path, target_link_path)
+        # print(f"Link: {material_id} -> Passed")
+    except OSError as e:
+        print(f"创建软链接失败 {material_id}: {e}")
+
+
+def process_all_csvs():
+    """
+    遍历 output 文件夹下的所有 CSV 并处理
+    """
+    if not os.path.exists(CSV_ROOT_DIR):
+        print(f"CSV 目录不存在: {CSV_ROOT_DIR}")
+        return
+
+    print("开始执行 Step 2-4 联合筛选...")
+
+    # 遍历 output 目录
+    # 结构预期: ../output/Group/Anion/Anion.csv (例如 ../output/O+S/O/O.csv 或 ../output/O/O.csv)
+    for root, dirs, files in os.walk(CSV_ROOT_DIR):
+        for file in files:
+            if file.endswith(".csv"):
+                csv_path = os.path.join(root, file)
+
+                # 推断 Group 和 Anion
+                # root 的末尾应该是 .../Group/Anion
+                # 例如 root = ../output/O+S/O
+
+                path_parts = os.path.normpath(root).split(os.sep)
+                # 倒数第一级是 Anion (O), 倒数第二级是 Group (O+S)
+                if len(path_parts) >= 2:
+                    anion_name = path_parts[-1]
+                    group_name = path_parts[-2]
+                else:
+                    print(f"跳过路径结构异常的 CSV: {csv_path}")
+                    continue
+
+                # 确保这是一个有效的阴离子类型
+                if anion_name not in THRESHOLDS:
+                    continue
+
+                print(f"正在处理: Group={group_name}, Anion={anion_name} ({file})")
+
+                # 读取 CSV
+                df = pd.read_csv(csv_path)
+
+                pass_count = 0
+                total_count = len(df)
+
+                for index, row in df.iterrows():
+                    material_id = str(row['Filename'])
+
+                    if check_requirements(row, anion_name):
+                        create_symlink(group_name, anion_name, material_id)
+                        pass_count += 1
+
+                print(f"  - 完成: {pass_count}/{total_count} 个材料通过筛选并建立链接。")
+
+
+if __name__ == "__main__":
+    process_all_csvs()