CSM及TET,CS
This commit is contained in:
@@ -10,7 +10,7 @@ chmod -R u+w ../Screen
|
|||||||
source $(conda info --base)/etc/profile.d/conda.sh
|
source $(conda info --base)/etc/profile.d/conda.sh
|
||||||
|
|
||||||
# 激活 screen 环境
|
# 激活 screen 环境
|
||||||
conda activate screen
|
conda activate ~/anaconda3/envs/screen
|
||||||
cd py/
|
cd py/
|
||||||
export PYTHONPATH=$(pwd):$PYTHONPATH
|
export PYTHONPATH=$(pwd):$PYTHONPATH
|
||||||
|
|
||||||
@@ -31,7 +31,7 @@ python make_sh.py
|
|||||||
# 3. 运行 Zeo++ 计算
|
# 3. 运行 Zeo++ 计算
|
||||||
echo "============ Stage 2: Zeo++ Calculations ============"
|
echo "============ Stage 2: Zeo++ Calculations ============"
|
||||||
conda deactivate
|
conda deactivate
|
||||||
conda activate zeo
|
conda activate ~/anaconda3/envs/zeo
|
||||||
|
|
||||||
# 进入数据目录
|
# 进入数据目录
|
||||||
cd ../data/after_step1
|
cd ../data/after_step1
|
||||||
@@ -49,7 +49,7 @@ fi
|
|||||||
echo "============ Stage 3: Data Extraction & Analysis ============"
|
echo "============ Stage 3: Data Extraction & Analysis ============"
|
||||||
# 切回 screen 环境
|
# 切回 screen 环境
|
||||||
conda deactivate
|
conda deactivate
|
||||||
conda activate screen
|
conda activate ~/anaconda3/envs/screen
|
||||||
cd ../../py
|
cd ../../py
|
||||||
|
|
||||||
# 3.1 提取 Zeo++ 基础数据
|
# 3.1 提取 Zeo++ 基础数据
|
||||||
|
|||||||
130
py/step1.py
130
py/step1.py
@@ -1,69 +1,113 @@
|
|||||||
from pymatgen.core import Structure
|
from pymatgen.core import Structure
|
||||||
from pymatgen.core.periodic_table import Element, Specie
|
|
||||||
from pymatgen.io.cif import CifWriter
|
|
||||||
from crystal_2 import crystal
|
from crystal_2 import crystal
|
||||||
import crystal_2
|
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
|
|
||||||
|
def get_anion_type(structure):
|
||||||
|
"""
|
||||||
|
判断阴离子类型。
|
||||||
|
仅识别 O, S, Cl, Br 及其组合。
|
||||||
|
其他非金属元素(如 P, N, F 等)将被忽略。
|
||||||
|
"""
|
||||||
|
# 仅保留这四种目标阴离子
|
||||||
|
valid_anions = {'O', 'S', 'Cl', 'Br'}
|
||||||
|
|
||||||
|
# 获取结构中的所有元素符号
|
||||||
|
elements = set([e.symbol for e in structure.composition.elements])
|
||||||
|
|
||||||
|
# 取交集找到当前结构包含的目标阴离子
|
||||||
|
found_anions = elements.intersection(valid_anions)
|
||||||
|
|
||||||
|
if not found_anions:
|
||||||
|
return "Unknown"
|
||||||
|
|
||||||
|
# 如果有多个阴离子,按字母顺序排序并用 '+' 连接
|
||||||
|
sorted_anions = sorted(list(found_anions))
|
||||||
|
return "+".join(sorted_anions)
|
||||||
|
|
||||||
|
|
||||||
def read_files_check_basic(folder_path):
|
def read_files_check_basic(folder_path):
|
||||||
file_contents = []
|
"""
|
||||||
|
读取 CIF 文件,进行基础检查 (check_basic),
|
||||||
|
通过筛选后按自定义阴离子规则分类并整理到 after_step1 文件夹。
|
||||||
|
"""
|
||||||
|
# 输出基础路径
|
||||||
|
output_base = "../data/after_step1"
|
||||||
|
|
||||||
if not os.path.exists(folder_path):
|
if not os.path.exists(folder_path):
|
||||||
print(f"{folder_path} 文件夹不存在")
|
print(f"{folder_path} 文件夹不存在")
|
||||||
return file_contents
|
return
|
||||||
|
|
||||||
for filename in os.listdir(folder_path):
|
# 确保输出目录存在
|
||||||
|
if not os.path.exists(output_base):
|
||||||
|
os.makedirs(output_base)
|
||||||
|
|
||||||
|
cif_files = [f for f in os.listdir(folder_path) if f.endswith(".cif")]
|
||||||
|
print(f"在 {folder_path} 发现 {len(cif_files)} 个 CIF 文件,开始筛选与整理...")
|
||||||
|
|
||||||
|
count_pass = 0
|
||||||
|
|
||||||
|
for filename in cif_files:
|
||||||
file_path = os.path.join(folder_path, filename)
|
file_path = os.path.join(folder_path, filename)
|
||||||
|
|
||||||
if os.path.isfile(file_path):
|
# 1. 调用 crystal_2 进行基础筛选
|
||||||
try:
|
try:
|
||||||
temp = crystal(file_path)
|
temp = crystal(file_path)
|
||||||
file_contents.append(temp)
|
# 进行基础检查 (电荷平衡、化学式检查等)
|
||||||
except Exception as e:
|
|
||||||
print(e)
|
|
||||||
continue # 如果出错跳过当前循环,避免temp未定义报错
|
|
||||||
|
|
||||||
print(f"正在处理{filename}")
|
|
||||||
temp.check_basic()
|
temp.check_basic()
|
||||||
|
|
||||||
if temp.check_basic_result:
|
if not temp.check_basic_result:
|
||||||
# 获取不带后缀的文件名,用于创建同名文件夹
|
print(f"Skipped: {filename} (未通过 check_basic)")
|
||||||
file_base_name = os.path.splitext(filename)[0]
|
continue
|
||||||
|
|
||||||
if not "+" in temp.anion:
|
except Exception as e:
|
||||||
# 单一阴离子情况
|
print(f"Error checking {filename}: {e}")
|
||||||
# 路径变为: ../data/after_step1/Anion/FileBaseName/
|
continue
|
||||||
base_anion_folder = os.path.join("../data/after_step1", f"{temp.anion}")
|
|
||||||
target_folder = os.path.join(base_anion_folder, file_base_name)
|
|
||||||
|
|
||||||
|
# 2. 筛选通过,进行分类整理
|
||||||
|
try:
|
||||||
|
print(f"Processing: {filename} (Passed)")
|
||||||
|
count_pass += 1
|
||||||
|
|
||||||
|
# 为了确保分类逻辑与 Direct 版本一致,重新读取结构判断阴离子
|
||||||
|
# (忽略 crystal_2 内部可能基于 P/N 等元素的命名)
|
||||||
|
struct = Structure.from_file(file_path)
|
||||||
|
anion_type = get_anion_type(struct)
|
||||||
|
|
||||||
|
# 获取不带后缀的文件名 (ID)
|
||||||
|
file_base_name = os.path.splitext(filename)[0]
|
||||||
|
|
||||||
|
# --- 构建目标路径逻辑 (Anion/ID/ID.cif) ---
|
||||||
|
|
||||||
|
if "+" in anion_type:
|
||||||
|
# 混合阴离子情况 (如 S+O)
|
||||||
|
# 分别复制到 S+O/S 和 S+O/O 下
|
||||||
|
sub_anions = anion_type.split("+")
|
||||||
|
for sub in sub_anions:
|
||||||
|
# 路径: ../data/after_step1/S+O/S/123/123.cif
|
||||||
|
target_folder = os.path.join(output_base, anion_type, sub, file_base_name)
|
||||||
if not os.path.exists(target_folder):
|
if not os.path.exists(target_folder):
|
||||||
os.makedirs(target_folder)
|
os.makedirs(target_folder)
|
||||||
|
|
||||||
# 目标文件路径
|
target_file = os.path.join(target_folder, filename)
|
||||||
target_file_path = os.path.join(target_folder, filename)
|
shutil.copy(file_path, target_file)
|
||||||
# 复制文件到目标文件夹
|
else:
|
||||||
shutil.copy(file_path, target_file_path)
|
# 单一阴离子或 Unknown: ../data/after_step1/S/123/123.cif
|
||||||
print(f"文件 {filename}通过基本筛选,已复制到 {target_folder}")
|
target_folder = os.path.join(output_base, anion_type, file_base_name)
|
||||||
else:
|
if not os.path.exists(target_folder):
|
||||||
# 混合阴离子情况
|
os.makedirs(target_folder)
|
||||||
anions = temp.anion.split("+")
|
|
||||||
for anion in anions:
|
|
||||||
# 路径变为: ../data/after_step1/AnionCombination/Anion/FileBaseName/
|
|
||||||
base_group_folder = os.path.join("../data/after_step1", f"{temp.anion}")
|
|
||||||
base_anion_folder = os.path.join(base_group_folder, anion)
|
|
||||||
target_folder = os.path.join(base_anion_folder, file_base_name)
|
|
||||||
|
|
||||||
if not os.path.exists(target_folder):
|
target_file = os.path.join(target_folder, filename)
|
||||||
os.makedirs(target_folder)
|
shutil.copy(file_path, target_file)
|
||||||
|
|
||||||
# 目标文件路径
|
except Exception as e:
|
||||||
target_file_path = os.path.join(target_folder, filename)
|
print(f"Error copying {filename}: {e}")
|
||||||
# 复制文件到目标文件夹
|
|
||||||
shutil.copy(file_path, target_file_path)
|
print(f"处理完成。共 {len(cif_files)} 个文件,通过筛选 {count_pass} 个。")
|
||||||
print(f"文件 {filename}通过基本筛选,已复制到 {target_folder}")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
# 根据你的 readme,MP数据在 input_pre,ICSD在 input
|
||||||
|
# 这里默认读取 input,你可以根据实际情况修改
|
||||||
read_files_check_basic("../data/input")
|
read_files_check_basic("../data/input")
|
||||||
@@ -5,22 +5,26 @@ from pymatgen.core import Structure
|
|||||||
|
|
||||||
def get_anion_type(structure):
|
def get_anion_type(structure):
|
||||||
"""
|
"""
|
||||||
简单判断阴离子类型。
|
判断阴离子类型。
|
||||||
返回: 'O', 'S', 'S+O' 等字符串
|
仅识别 O, S, Cl, Br 及其组合。
|
||||||
|
其他非金属元素(如 P, N, F 等)将被忽略:
|
||||||
|
- Li3PS4 (含 P, S) -> 识别为 S
|
||||||
|
- LiFePO4 (含 P, O) -> 识别为 O
|
||||||
|
- Li3P (仅 P) -> 识别为 Unknown
|
||||||
"""
|
"""
|
||||||
# 定义常见的阴离子列表
|
# --- 修改处:仅保留这四种目标阴离子 ---
|
||||||
valid_anions = {'O', 'S', 'Se', 'Te', 'F', 'Cl', 'Br', 'I', 'N', 'P'}
|
valid_anions = {'O', 'S', 'Cl', 'Br'}
|
||||||
|
|
||||||
# 获取结构中的所有元素符号
|
# 获取结构中的所有元素符号
|
||||||
elements = set([e.symbol for e in structure.composition.elements])
|
elements = set([e.symbol for e in structure.composition.elements])
|
||||||
|
|
||||||
# 取交集找到当前结构包含的阴离子
|
# 取交集找到当前结构包含的目标阴离子
|
||||||
found_anions = elements.intersection(valid_anions)
|
found_anions = elements.intersection(valid_anions)
|
||||||
|
|
||||||
if not found_anions:
|
if not found_anions:
|
||||||
return "Unknown"
|
return "Unknown"
|
||||||
|
|
||||||
# 如果有多个阴离子,按字母顺序排序并用 '+' 连接 (模拟 step1 的逻辑)
|
# 如果有多个阴离子,按字母顺序排序并用 '+' 连接
|
||||||
sorted_anions = sorted(list(found_anions))
|
sorted_anions = sorted(list(found_anions))
|
||||||
return "+".join(sorted_anions)
|
return "+".join(sorted_anions)
|
||||||
|
|
||||||
@@ -30,70 +34,70 @@ def organize_files_direct(input_folder, output_base):
|
|||||||
print(f"输入文件夹不存在: {input_folder}")
|
print(f"输入文件夹不存在: {input_folder}")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# 确保输出目录存在
|
||||||
|
if not os.path.exists(output_base):
|
||||||
|
os.makedirs(output_base)
|
||||||
|
|
||||||
cif_files = [f for f in os.listdir(input_folder) if f.endswith(".cif")]
|
cif_files = [f for f in os.listdir(input_folder) if f.endswith(".cif")]
|
||||||
print(f"发现 {len(cif_files)} 个 CIF 文件,开始直接整理...")
|
print(f"发现 {len(cif_files)} 个 CIF 文件,开始直接整理...")
|
||||||
|
|
||||||
|
count_dict = {}
|
||||||
|
|
||||||
for filename in cif_files:
|
for filename in cif_files:
|
||||||
file_path = os.path.join(input_folder, filename)
|
file_path = os.path.join(input_folder, filename)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# 仅读取结构用于分类
|
# 读取结构分类
|
||||||
struct = Structure.from_file(file_path)
|
struct = Structure.from_file(file_path)
|
||||||
anion_type = get_anion_type(struct)
|
anion_type = get_anion_type(struct)
|
||||||
|
|
||||||
|
# 统计一下分类情况(可选)
|
||||||
|
count_dict[anion_type] = count_dict.get(anion_type, 0) + 1
|
||||||
|
|
||||||
# 获取不带后缀的文件名 (ID)
|
# 获取不带后缀的文件名 (ID)
|
||||||
file_base_name = os.path.splitext(filename)[0]
|
file_base_name = os.path.splitext(filename)[0]
|
||||||
|
|
||||||
# --- 构建目标路径逻辑 ---
|
# --- 构建目标路径逻辑 ---
|
||||||
# 逻辑:../data/after_step1 / 阴离子类别 / ID / ID.cif
|
# 目标: ../data/after_step1 / AnionType / ID / ID.cif
|
||||||
|
|
||||||
# 处理混合阴离子情况 (如 S+O)
|
|
||||||
if "+" in anion_type:
|
if "+" in anion_type:
|
||||||
# 按照之前的逻辑,如果是混合阴离子,通常会有多层
|
# 混合阴离子情况 (如 S+O)
|
||||||
# 但为了统一后续处理,我们这里将其放入组合名的文件夹下
|
# 将文件复制到 S+O 下的各个子阴离子文件夹中 (S+O/S/ID/ID.cif 和 S+O/O/ID/ID.cif)
|
||||||
# 比如: after_step1/S+O/S/123/123.cif (复杂)
|
# 这样既保留了组合关系,又方便后续脚本按元素查找
|
||||||
# 或者简化为 after_step1/S+O/123/123.cif (简单)
|
|
||||||
# 根据你之前的 make_sh.py 和 extract_data.py,
|
|
||||||
# 只要是 Folder/ID/ID.cif 结构即可。
|
|
||||||
# 为了兼容 analyze_cs.py 的逻辑 (group_name, anion_name),
|
|
||||||
# 这里我们采用 simplified 逻辑:
|
|
||||||
# 如果是混合,我们在第一层建 S+O,第二层建具体的 anion 文件夹(比如首字母排序第一个)
|
|
||||||
# 或者直接: after_step1/S+O/ID/ID.cif -> 这样 group=S+O, anion=ID (不对)
|
|
||||||
|
|
||||||
# 兼容旧代码的最佳实践:
|
|
||||||
# 对于混合 S+O,我们建立 S+O/S/ID/ID.cif 和 S+O/O/ID/ID.cif ?
|
|
||||||
# 不,原 Step1 是把一个文件复制了两份到不同文件夹。
|
|
||||||
# 这里为了简化,我们只复制一份到主阴离子文件夹,或者直接按组合命名。
|
|
||||||
|
|
||||||
# 让我们采用最稳妥的方式:如果是 S+O,放入 S+O/Mix/ID/ID.cif
|
|
||||||
# 这样 group=S+O, anion=Mix。
|
|
||||||
# 但为了让 CS_calc 正常工作,最好还是放入具体的元素文件夹。
|
|
||||||
# 这里我们简单处理:直接放入 S+O/Combined/ID/
|
|
||||||
# 或者根据你的 extract_data.py 逻辑:
|
|
||||||
# 它会遍历 top_dir (S+O) -> sub_anion (S, O)
|
|
||||||
|
|
||||||
# 策略:拆分放入。
|
|
||||||
sub_anions = anion_type.split("+")
|
sub_anions = anion_type.split("+")
|
||||||
for sub in sub_anions:
|
for sub in sub_anions:
|
||||||
|
# 路径: after_step1/S+O/S/123/123.cif
|
||||||
target_folder = os.path.join(output_base, anion_type, sub, file_base_name)
|
target_folder = os.path.join(output_base, anion_type, sub, file_base_name)
|
||||||
if not os.path.exists(target_folder):
|
if not os.path.exists(target_folder):
|
||||||
os.makedirs(target_folder)
|
os.makedirs(target_folder)
|
||||||
shutil.copy(file_path, os.path.join(target_folder, filename))
|
|
||||||
|
|
||||||
print(f"整理: {filename} -> {anion_type} (已复制到各子类)")
|
target_file = os.path.join(target_folder, filename)
|
||||||
|
shutil.copy(file_path, target_file)
|
||||||
|
|
||||||
|
# print(f"整理: {filename} -> {anion_type} (Split)")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# 单一阴离子: after_step1/S/ID/ID.cif
|
# 单一阴离子或 Unknown: after_step1/S/123/123.cif
|
||||||
target_folder = os.path.join(output_base, anion_type, file_base_name)
|
target_folder = os.path.join(output_base, anion_type, file_base_name)
|
||||||
if not os.path.exists(target_folder):
|
if not os.path.exists(target_folder):
|
||||||
os.makedirs(target_folder)
|
os.makedirs(target_folder)
|
||||||
|
|
||||||
shutil.copy(file_path, os.path.join(target_folder, filename))
|
target_file = os.path.join(target_folder, filename)
|
||||||
print(f"整理: {filename} -> {anion_type}")
|
shutil.copy(file_path, target_file)
|
||||||
|
# print(f"整理: {filename} -> {anion_type}")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"处理 {filename} 失败: {e}")
|
print(f"处理 {filename} 失败: {e}")
|
||||||
|
|
||||||
|
print("整理完成。分类统计:")
|
||||||
|
for k, v in count_dict.items():
|
||||||
|
print(f" {k}: {v}")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
organize_files_direct("../data/input", "../data/after_step1")
|
# 输入路径
|
||||||
|
input_dir = "../data/input" # 如果是MP数据请改为 ../data/input_pre
|
||||||
|
# 输出路径
|
||||||
|
output_dir = "../data/after_step1"
|
||||||
|
|
||||||
|
organize_files_direct(input_dir, output_dir)
|
||||||
Reference in New Issue
Block a user