calc—_v2

2025-12-07 16:01:42 +08:00
parent b9da6d9592
commit 1d416d4dd8
2 changed files with 66 additions and 51 deletions
--- a/main.sh
+++ b/main.sh
@@ -6,11 +6,11 @@

 # 1. 初始化设置
 # 修改上一级目录权限
-chmod -R u+w ../Screen
+chmod -R u+w ../screen

 # 启用 screen 环境 (Python 3.11)
 source $(conda info --base)/etc/profile.d/conda.sh
-conda activate screen
+conda activate ~/anaconda3/envs/screen

 # 设置当前目录为 PYTHONPATH
 cd py/
@@ -31,7 +31,7 @@ python make_sh.py
 # 2. 切换环境运行 Zeo++
 echo "============ Stage 2: Zeo++ Calculations ============"
 conda deactivate
-conda activate zeo
+conda activate ~/anaconda3/envs/zeo

 # 进入数据目录执行所有生成的 shell 脚本
 cd ../data/after_step1
@@ -46,7 +46,7 @@ fi
 echo "============ Stage 3: Data Extraction & Advanced Screening ============"
 # 切回 screen 环境
 conda deactivate
-conda activate screen
+conda activate ~/anaconda3/envs/screen
 cd ../../py

 # 提取日志数据
--- a/py/step2_4_combined.py
+++ b/py/step2_4_combined.py
@@ -1,12 +1,10 @@
 import os
 import pandas as pd
 import math
+import shutil

 # ================= 配置区域 =================
 # 定义各阴离子的筛选阈值
-# perc: Percolation diameter (对应 Step 2, 大于此值)
-# min_d: Minimum of d (对应 Step 3, 小于此值)
-# node: Maximum node length (对应 Step 4, 大于此值)
 THRESHOLDS = {
    "O": {"perc": 0.50, "min_d": 3.0, "node": 2.2},
    "S": {"perc": 0.55, "min_d": 3.0, "node": 2.2},
@@ -16,7 +14,7 @@ THRESHOLDS = {

 # 路径配置
 CSV_ROOT_DIR = "../output"  # CSV 所在的根目录
-DATA_SOURCE_DIR = "../data/after_step1"  # 原始 CIF 文件所在的根目录 (用于创建链接源)
+DATA_SOURCE_DIR = "../data/after_step1"  # 原始 CIF 文件所在的根目录
 TARGET_DIR = "../data/after_screening"  # 筛选后放置软链接的目标目录


@@ -26,28 +24,21 @@ def check_requirements(row, anion_type):
    """
    检查单行数据是否符合要求
    """
-    # 获取该阴离子类型的阈值配置
    config = THRESHOLDS.get(anion_type)
    if not config:
-        print(f"Warning: 未知的阴离子类型 {anion_type}，跳过筛选。")
        return False

    try:
-        # 获取数值 (处理可能的空值或非数字情况)
        perc = float(row["Percolation Diameter (A)"])
        min_d = float(row["Minimum of d"])
        node = float(row["Maximum Node Length (A)"])

-        # 检查是否为 NaN
        if math.isnan(perc) or math.isnan(min_d) or math.isnan(node):
            return False

-        # --- 筛选逻辑 ---
-        # Step 2: 连通孔径 > 阈值
+        # 筛选逻辑
        c1 = perc > config["perc"]
-        # Step 3: 最短距离 < 3.0 (所有元素目前都是3.0)
        c2 = min_d < config["min_d"]
-        # Step 4: 扩大锂离子节点 > 阈值
        c3 = node > config["node"]

        return c1 and c2 and c3
@@ -56,39 +47,56 @@ def check_requirements(row, anion_type):
        return False


-def create_symlink(group_name, anion_name, material_id):
+def create_result_file(group_name, anion_name, material_id):
    """
-    创建软链接
-    源: ../data/after_step1/Group/Anion/ID/ID.cif
-    目: ../data/after_screening/Group/Anion/ID.cif
+    创建结果文件 (这里改为直接复制，软链接在跨文件系统或某些环境下可能不稳定，复制更稳妥)
+    如果确实需要软链接，可以将 shutil.copy 换回 os.symlink
    """
-    # 1. 构建源文件路径 (必须使用绝对路径以确保软链接在任何地方都有效)
-    # 注意：根据你修改后的 step1，文件在 ID 文件夹内，如 141/141.cif
+    # 1. 构建源文件路径
+    # 正确路径: ../data/after_step1/Group/MaterialID/MaterialID.cif
+    # 例如: ../data/after_step1/S/195819/195819.cif
+    # 注意：如果原本结构是 S+O/S/ID... 这里会自动适配
+
+    # 这里的路径逻辑要非常小心，取决于 extract_data.py 是怎么生成 CSV 目录结构的
+    # 如果 CSV 在 output/S/S.csv -> 对应源文件在 after_step1/S/...
+    # 如果 CSV 在 output/S+O/S/S.csv -> 对应源文件在 after_step1/S+O/S/...
+
+    if group_name == anion_name:
+        # 单阴离子情况 (如 output/S/S.csv -> after_step1/S/ID/ID.cif)
+        rel_source_path = os.path.join(DATA_SOURCE_DIR, group_name, material_id, f"{material_id}.cif")
+    else:
+        # 混合阴离子情况 (如 output/S+O/S/S.csv -> after_step1/S+O/S/ID/ID.cif)
        rel_source_path = os.path.join(DATA_SOURCE_DIR, group_name, anion_name, material_id, f"{material_id}.cif")
+
    abs_source_path = os.path.abspath(rel_source_path)

    if not os.path.exists(abs_source_path):
        print(f"源文件不存在: {abs_source_path}")
        return

-    # 2. 构建目标文件夹路径
+    # 2. 构建目标文件夹路径 ../data/after_screening/Group/Anion
+    if group_name == anion_name:
+        target_subdir = os.path.join(TARGET_DIR, group_name)
+    else:
        target_subdir = os.path.join(TARGET_DIR, group_name, anion_name)
+
    if not os.path.exists(target_subdir):
        os.makedirs(target_subdir)

-    # 3. 构建目标链接路径
-    target_link_path = os.path.join(target_subdir, f"{material_id}.cif")
+    # 3. 构建目标文件路径
+    target_file_path = os.path.join(target_subdir, f"{material_id}.cif")

-    # 4. 创建链接
+    # 4. 执行复制 (改为复制以确保结果独立)
    try:
-        # 如果目标已经存在（可能是旧的链接），先删除
-        if os.path.exists(target_link_path) or os.path.islink(target_link_path):
-            os.remove(target_link_path)
+        if os.path.exists(target_file_path):
+            os.remove(target_file_path)
+
+        shutil.copy(abs_source_path, target_file_path)
+        # 如果你非常确定要软链接，请注释上一行，解开下一行：
+        # os.symlink(abs_source_path, target_file_path)

-        os.symlink(abs_source_path, target_link_path)
-        # print(f"Link: {material_id} -> Passed")
    except OSError as e:
-        print(f"创建软链接失败 {material_id}: {e}")
+        print(f"创建文件失败 {material_id}: {e}")


 def process_all_csvs():
@@ -101,46 +109,53 @@ def process_all_csvs():

    print("开始执行 Step 2-4 联合筛选...")

-    # 遍历 output 目录
-    # 结构预期: ../output/Group/Anion/Anion.csv (例如 ../output/O+S/O/O.csv 或 ../output/O/O.csv)
    for root, dirs, files in os.walk(CSV_ROOT_DIR):
        for file in files:
            if file.endswith(".csv"):
                csv_path = os.path.join(root, file)

-                # 推断 Group 和 Anion
-                # root 的末尾应该是 .../Group/Anion
-                # 例如 root = ../output/O+S/O
+                # --- 核心修正 1: 路径解析逻辑 ---
+                # 获取相对于 output 根目录的路径部分
+                # 例如 root = ../output/S -> rel_root = S
+                # 例如 root = ../output/S+O/S -> rel_root = S+O/S
+                rel_root = os.path.relpath(root, CSV_ROOT_DIR)
+                path_parts = rel_root.split(os.sep)

-                path_parts = os.path.normpath(root).split(os.sep)
-                # 倒数第一级是 Anion (O), 倒数第二级是 Group (O+S)
-                if len(path_parts) >= 2:
-                    anion_name = path_parts[-1]
-                    group_name = path_parts[-2]
+                # 解析 Group 和 Anion
+                if len(path_parts) == 1:
+                    # 单层目录: output/S -> Group=S, Anion=S
+                    group_name = path_parts[0]
+                    anion_name = path_parts[0]
+                elif len(path_parts) >= 2:
+                    # 双层目录: output/S+O/S -> Group=S+O, Anion=S
+                    group_name = path_parts[0]
+                    anion_name = path_parts[1]
                else:
-                    print(f"跳过路径结构异常的 CSV: {csv_path}")
+                    # 根目录下直接有csv的情况 (不应该发生)
                    continue

-                # 确保这是一个有效的阴离子类型
                if anion_name not in THRESHOLDS:
+                    print(f"跳过不支持的阴离子类型: {anion_name}")
                    continue

                print(f"正在处理: Group={group_name}, Anion={anion_name} ({file})")

-                # 读取 CSV
-                df = pd.read_csv(csv_path)
+                # --- 核心修正 2: 防止 Filename 被读取为浮点数 ---
+                # dtypeStr={'Filename': str} 强制将第一列读取为字符串
+                df = pd.read_csv(csv_path, dtype={'Filename': str})

                pass_count = 0
                total_count = len(df)

                for index, row in df.iterrows():
-                    material_id = str(row['Filename'])
+                    # 去除可能存在的 .0 后缀 (以防万一 CSV 里已经写成了浮点格式)
+                    material_id = str(row['Filename']).replace('.0', '')

                    if check_requirements(row, anion_name):
-                        create_symlink(group_name, anion_name, material_id)
+                        create_result_file(group_name, anion_name, material_id)
                        pass_count += 1

-                print(f"  - 完成: {pass_count}/{total_count} 个材料通过筛选并建立链接。")
+                print(f"  - 完成: {pass_count}/{total_count} 个材料通过筛选并保存至 {TARGET_DIR}。")


 if __name__ == "__main__":