Files
screen/py/pre_process.py
2025-12-07 13:56:33 +08:00

132 lines
5.4 KiB
Python

import re
import os
from pymatgen.core.structure import Structure
from pymatgen.core.periodic_table import Element
import yaml
from pymatgen.core.periodic_table import Specie
from expansion import expansion
from expansion import process_cif_file
def generate_valence_yaml(output_yaml_path):
"""
Generate a YAML file containing the most common oxidation states for elements.
Parameters:
output_yaml_path (str): Path to save the generated YAML file.
"""
valences = {}
for element in Element:
common_oxidation_states = element.common_oxidation_states
if common_oxidation_states:
# Metals/metalloids: take the maximum oxidation state
# Non-metals: take the minimum (most negative) oxidation state
if element.is_metalloid or element.is_metal:
valences[element.symbol] = max(common_oxidation_states)
else:
valences[element.symbol] = min(common_oxidation_states)
# Save the valences dictionary to a YAML file
with open(output_yaml_path, "w") as file:
yaml.dump(valences, file, default_flow_style=False)
def apply_oxidation_states_to_cif(input_cif_path, valence_yaml_path, output_cif_path,calculate_type='low',output_folder = None):
"""
Modify a CIF file to include oxidation states for each element based on a YAML file,
unless oxidation states are already present in the CIF.
"""
# Load the structure from the CIF file
structure = Structure.from_file(input_cif_path)
oxi = process_cif_file(structure)
# classsify类型
# if oxi:
# structure = expansion(input_cif_path,'../data/input_oxidation',3,calculate_type=calculate_type,keep_module='classify')
# # # 判断是否所有site都已经有oxidation state
# has_oxidation = all(
# all(isinstance(sp, Specie) for sp in site.species.keys())
# for site in structure.sites
# )
# if not has_oxidation:
# # 只有当没有价态时才读取yaml并赋值
# with open(valence_yaml_path, "r") as file:
# valences = yaml.safe_load(file)
# # Apply oxidation states to the structure
# structure.add_oxidation_state_by_element(valences)
#
# # Save the updated structure to a new CIF file
# structure.to(filename=output_cif_path)
structures=[]
names=[]
if oxi:
structures,names = expansion(input_cif_path,'../data/input_oxidation',3,calculate_type=calculate_type,keep_module='random')
# # 判断是否所有site都已经有oxidation state
for structure,name in zip(structures,names):
has_oxidation = all(
all(isinstance(sp, Specie) for sp in site.species.keys())
for site in structure.sites
)
if not has_oxidation:
# 只有当没有价态时才读取yaml并赋值
with open(valence_yaml_path, "r") as file:
valences = yaml.safe_load(file)
# Apply oxidation states to the structure
structure.add_oxidation_state_by_element(valences)
# Save the updated structure to a new CIF file
structure.to(filename=os.path.join(output_folder,name ))
else:
has_oxidation = all(
all(isinstance(sp, Specie) for sp in site.species.keys())
for site in structure.sites
)
if not has_oxidation:
# 只有当没有价态时才读取yaml并赋值
with open(valence_yaml_path, "r") as file:
valences = yaml.safe_load(file)
# Apply oxidation states to the structure
structure.add_oxidation_state_by_element(valences)
# Save the updated structure to a new CIF file
structure.to(filename=output_cif_path)
def data_add_state(input_folder, valence_yaml_path, output_folder,output_occupatition_folder=None,calculate_type='normal'):
if not os.path.exists(input_folder):
print(f"{input_folder} 文件夹不存在")
return
if not os.path.exists(output_folder):
os.makedirs(output_folder)
print(f"目录 {output_folder} 已创建")
for filename in os.listdir(input_folder):
if filename.endswith(".cif"): # 检查文件是否以.cif结尾
file_path = os.path.join(input_folder, filename)
# 提取文件名中的数字部分
match = re.search(r'\d+', filename)
if match:
new_filename = match.group(0) + ".cif" # 提取数字并加上 .cif 后缀
else:
print(f"文件名 {filename} 中未找到数字部分,跳过处理")
continue
# 构造输出文件路径
output_cif_path = os.path.join(output_folder, new_filename)
# 应用氧化态并保存新文件
print(f"正在处理{filename}")
try:
apply_oxidation_states_to_cif(file_path, valence_yaml_path, output_cif_path,calculate_type=calculate_type,output_folder=output_folder)
except Exception as e:
print(f"{filename}出现问题!")
print(f"{filename} 已完成")
if __name__ == "__main__":
# Example usage:
# Step 1: Generate the valence YAML file
valence_yaml = "../tool/valence_states.yaml"
# Step 2: Process CIF files in the input folder
data_add_state("../data/input_pre", valence_yaml, "../data/input","../data/input_oxidation")
# Step 3: Process Occupation
# data_process_Occupatiton("")