import os import pandas as pd import math # ================= 配置区域 ================= # 定义各阴离子的筛选阈值 # perc: Percolation diameter (对应 Step 2, 大于此值) # min_d: Minimum of d (对应 Step 3, 小于此值) # node: Maximum node length (对应 Step 4, 大于此值) THRESHOLDS = { "O": {"perc": 0.50, "min_d": 3.0, "node": 2.2}, "S": {"perc": 0.55, "min_d": 3.0, "node": 2.2}, "Cl": {"perc": 0.45, "min_d": 3.0, "node": 2.0}, "Br": {"perc": 0.45, "min_d": 3.0, "node": 2.0} } # 路径配置 CSV_ROOT_DIR = "../output" # CSV 所在的根目录 DATA_SOURCE_DIR = "../data/after_step1" # 原始 CIF 文件所在的根目录 (用于创建链接源) TARGET_DIR = "../data/after_screening" # 筛选后放置软链接的目标目录 # =========================================== def check_requirements(row, anion_type): """ 检查单行数据是否符合要求 """ # 获取该阴离子类型的阈值配置 config = THRESHOLDS.get(anion_type) if not config: print(f"Warning: 未知的阴离子类型 {anion_type},跳过筛选。") return False try: # 获取数值 (处理可能的空值或非数字情况) perc = float(row["Percolation Diameter (A)"]) min_d = float(row["Minimum of d"]) node = float(row["Maximum Node Length (A)"]) # 检查是否为 NaN if math.isnan(perc) or math.isnan(min_d) or math.isnan(node): return False # --- 筛选逻辑 --- # Step 2: 连通孔径 > 阈值 c1 = perc > config["perc"] # Step 3: 最短距离 < 3.0 (所有元素目前都是3.0) c2 = min_d < config["min_d"] # Step 4: 扩大锂离子节点 > 阈值 c3 = node > config["node"] return c1 and c2 and c3 except (ValueError, TypeError): return False def create_symlink(group_name, anion_name, material_id): """ 创建软链接 源: ../data/after_step1/Group/Anion/ID/ID.cif 目: ../data/after_screening/Group/Anion/ID.cif """ # 1. 构建源文件路径 (必须使用绝对路径以确保软链接在任何地方都有效) # 注意:根据你修改后的 step1,文件在 ID 文件夹内,如 141/141.cif rel_source_path = os.path.join(DATA_SOURCE_DIR, group_name, anion_name, material_id, f"{material_id}.cif") abs_source_path = os.path.abspath(rel_source_path) if not os.path.exists(abs_source_path): print(f"源文件不存在: {abs_source_path}") return # 2. 构建目标文件夹路径 target_subdir = os.path.join(TARGET_DIR, group_name, anion_name) if not os.path.exists(target_subdir): os.makedirs(target_subdir) # 3. 构建目标链接路径 target_link_path = os.path.join(target_subdir, f"{material_id}.cif") # 4. 创建链接 try: # 如果目标已经存在(可能是旧的链接),先删除 if os.path.exists(target_link_path) or os.path.islink(target_link_path): os.remove(target_link_path) os.symlink(abs_source_path, target_link_path) # print(f"Link: {material_id} -> Passed") except OSError as e: print(f"创建软链接失败 {material_id}: {e}") def process_all_csvs(): """ 遍历 output 文件夹下的所有 CSV 并处理 """ if not os.path.exists(CSV_ROOT_DIR): print(f"CSV 目录不存在: {CSV_ROOT_DIR}") return print("开始执行 Step 2-4 联合筛选...") # 遍历 output 目录 # 结构预期: ../output/Group/Anion/Anion.csv (例如 ../output/O+S/O/O.csv 或 ../output/O/O.csv) for root, dirs, files in os.walk(CSV_ROOT_DIR): for file in files: if file.endswith(".csv"): csv_path = os.path.join(root, file) # 推断 Group 和 Anion # root 的末尾应该是 .../Group/Anion # 例如 root = ../output/O+S/O path_parts = os.path.normpath(root).split(os.sep) # 倒数第一级是 Anion (O), 倒数第二级是 Group (O+S) if len(path_parts) >= 2: anion_name = path_parts[-1] group_name = path_parts[-2] else: print(f"跳过路径结构异常的 CSV: {csv_path}") continue # 确保这是一个有效的阴离子类型 if anion_name not in THRESHOLDS: continue print(f"正在处理: Group={group_name}, Anion={anion_name} ({file})") # 读取 CSV df = pd.read_csv(csv_path) pass_count = 0 total_count = len(df) for index, row in df.iterrows(): material_id = str(row['Filename']) if check_requirements(row, anion_name): create_symlink(group_name, anion_name, material_id) pass_count += 1 print(f" - 完成: {pass_count}/{total_count} 个材料通过筛选并建立链接。") if __name__ == "__main__": process_all_csvs()