V1
This commit is contained in:
131
py/pre_process.py
Normal file
131
py/pre_process.py
Normal file
@@ -0,0 +1,131 @@
|
||||
import re
|
||||
import os
|
||||
from pymatgen.core.structure import Structure
|
||||
from pymatgen.core.periodic_table import Element
|
||||
import yaml
|
||||
from pymatgen.core.periodic_table import Specie
|
||||
from expansion import expansion
|
||||
from expansion import process_cif_file
|
||||
def generate_valence_yaml(output_yaml_path):
|
||||
"""
|
||||
Generate a YAML file containing the most common oxidation states for elements.
|
||||
|
||||
Parameters:
|
||||
output_yaml_path (str): Path to save the generated YAML file.
|
||||
"""
|
||||
valences = {}
|
||||
for element in Element:
|
||||
common_oxidation_states = element.common_oxidation_states
|
||||
if common_oxidation_states:
|
||||
# Metals/metalloids: take the maximum oxidation state
|
||||
# Non-metals: take the minimum (most negative) oxidation state
|
||||
if element.is_metalloid or element.is_metal:
|
||||
valences[element.symbol] = max(common_oxidation_states)
|
||||
else:
|
||||
valences[element.symbol] = min(common_oxidation_states)
|
||||
|
||||
# Save the valences dictionary to a YAML file
|
||||
with open(output_yaml_path, "w") as file:
|
||||
yaml.dump(valences, file, default_flow_style=False)
|
||||
|
||||
|
||||
def apply_oxidation_states_to_cif(input_cif_path, valence_yaml_path, output_cif_path,calculate_type='low',output_folder = None):
|
||||
"""
|
||||
Modify a CIF file to include oxidation states for each element based on a YAML file,
|
||||
unless oxidation states are already present in the CIF.
|
||||
"""
|
||||
# Load the structure from the CIF file
|
||||
structure = Structure.from_file(input_cif_path)
|
||||
oxi = process_cif_file(structure)
|
||||
# classsify类型
|
||||
# if oxi:
|
||||
# structure = expansion(input_cif_path,'../data/input_oxidation',3,calculate_type=calculate_type,keep_module='classify')
|
||||
# # # 判断是否所有site都已经有oxidation state
|
||||
# has_oxidation = all(
|
||||
# all(isinstance(sp, Specie) for sp in site.species.keys())
|
||||
# for site in structure.sites
|
||||
# )
|
||||
# if not has_oxidation:
|
||||
# # 只有当没有价态时才读取yaml并赋值
|
||||
# with open(valence_yaml_path, "r") as file:
|
||||
# valences = yaml.safe_load(file)
|
||||
# # Apply oxidation states to the structure
|
||||
# structure.add_oxidation_state_by_element(valences)
|
||||
#
|
||||
# # Save the updated structure to a new CIF file
|
||||
# structure.to(filename=output_cif_path)
|
||||
structures=[]
|
||||
names=[]
|
||||
if oxi:
|
||||
structures,names = expansion(input_cif_path,'../data/input_oxidation',3,calculate_type=calculate_type,keep_module='random')
|
||||
# # 判断是否所有site都已经有oxidation state
|
||||
for structure,name in zip(structures,names):
|
||||
has_oxidation = all(
|
||||
all(isinstance(sp, Specie) for sp in site.species.keys())
|
||||
for site in structure.sites
|
||||
)
|
||||
if not has_oxidation:
|
||||
# 只有当没有价态时才读取yaml并赋值
|
||||
with open(valence_yaml_path, "r") as file:
|
||||
valences = yaml.safe_load(file)
|
||||
# Apply oxidation states to the structure
|
||||
structure.add_oxidation_state_by_element(valences)
|
||||
|
||||
# Save the updated structure to a new CIF file
|
||||
structure.to(filename=os.path.join(output_folder,name ))
|
||||
else:
|
||||
has_oxidation = all(
|
||||
all(isinstance(sp, Specie) for sp in site.species.keys())
|
||||
for site in structure.sites
|
||||
)
|
||||
if not has_oxidation:
|
||||
# 只有当没有价态时才读取yaml并赋值
|
||||
with open(valence_yaml_path, "r") as file:
|
||||
valences = yaml.safe_load(file)
|
||||
# Apply oxidation states to the structure
|
||||
structure.add_oxidation_state_by_element(valences)
|
||||
|
||||
# Save the updated structure to a new CIF file
|
||||
structure.to(filename=output_cif_path)
|
||||
def data_add_state(input_folder, valence_yaml_path, output_folder,output_occupatition_folder=None,calculate_type='normal'):
|
||||
if not os.path.exists(input_folder):
|
||||
print(f"{input_folder} 文件夹不存在")
|
||||
return
|
||||
if not os.path.exists(output_folder):
|
||||
os.makedirs(output_folder)
|
||||
print(f"目录 {output_folder} 已创建")
|
||||
|
||||
for filename in os.listdir(input_folder):
|
||||
if filename.endswith(".cif"): # 检查文件是否以.cif结尾
|
||||
file_path = os.path.join(input_folder, filename)
|
||||
|
||||
# 提取文件名中的数字部分
|
||||
match = re.search(r'\d+', filename)
|
||||
if match:
|
||||
new_filename = match.group(0) + ".cif" # 提取数字并加上 .cif 后缀
|
||||
else:
|
||||
print(f"文件名 {filename} 中未找到数字部分,跳过处理")
|
||||
continue
|
||||
|
||||
# 构造输出文件路径
|
||||
output_cif_path = os.path.join(output_folder, new_filename)
|
||||
|
||||
# 应用氧化态并保存新文件
|
||||
print(f"正在处理{filename}")
|
||||
try:
|
||||
apply_oxidation_states_to_cif(file_path, valence_yaml_path, output_cif_path,calculate_type=calculate_type,output_folder=output_folder)
|
||||
except Exception as e:
|
||||
print(f"{filename}出现问题!")
|
||||
print(f"{filename} 已完成")
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Example usage:
|
||||
# Step 1: Generate the valence YAML file
|
||||
valence_yaml = "../tool/valence_states.yaml"
|
||||
|
||||
# Step 2: Process CIF files in the input folder
|
||||
data_add_state("../data/input_pre", valence_yaml, "../data/input","../data/input_oxidation")
|
||||
# Step 3: Process Occupation
|
||||
# data_process_Occupatiton("")
|
||||
Reference in New Issue
Block a user