Compare commits

...

2 Commits

Author SHA1 Message Date
76105a631d gsj+LiYCl data 2025-09-23 18:32:34 +08:00
28c2323ce8 CSM计算 2025-09-22 11:18:39 +08:00
13 changed files with 430784 additions and 37549 deletions

205087
MSD/data/320K/log.lammps Normal file

File diff suppressed because it is too large Load Diff

10002
MSD/data/320K/msd_li.dat Normal file

File diff suppressed because it is too large Load Diff

205087
MSD/data/340K/log.lammps Normal file

File diff suppressed because it is too large Load Diff

10002
MSD/data/340K/msd_li.dat Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -9,10 +9,11 @@ if __name__ == '__main__':
# 调用新函数
# 它会自动从 log.lammps 读取温度和体积
results = calculate_conductivity_from_msd(
msd_file_path='data/msd_li.dat',
log_file_path='data/log.lammps',
msd_file_path='data/700K/msd_li.dat',
log_file_path='data/700K/log.lammps',
ion_name='Li⁺',
charge=1,
num_ions=num_li_ions,
fit_fraction=0.5 # 可以根据图像调整此值
fit_start_fraction=0.0,
fit_end_fraction=1.0
)

View File

@@ -63,7 +63,8 @@ def calculate_conductivity_from_msd(
ion_name,
charge,
num_ions,
fit_fraction=0.5
fit_start_fraction=0.5,
fit_end_fraction=0.5
):
"""
从MSD数据计算电导率 (v2)。
@@ -102,9 +103,10 @@ def calculate_conductivity_from_msd(
time_ps = timesteps * timestep_ps
# --- 3. 线性拟合计算扩散系数 ---
fit_start_index = int(len(time_ps) * (1 - fit_fraction))
fit_time_ps = time_ps[fit_start_index:]
fit_msd_values = msd_values[fit_start_index:]
fit_start_index = int(len(time_ps) * fit_start_fraction)
fit_end_index = int(len(time_ps) * fit_end_fraction)
fit_time_ps = time_ps[fit_start_index:fit_end_index]
fit_msd_values = msd_values[fit_start_index:fit_end_index]
if len(fit_time_ps) < 2:
print("错误: 用于拟合的数据点不足 (少于2个),无法进行线性回归。")

289
MSD/utils/con2.py Normal file
View File

@@ -0,0 +1,289 @@
import os
import re
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
# 物理常数SI单位
kB = 1.380649e-23 # 玻尔兹曼常数 (J/K)
q = 1.602176634e-19 # 元电荷 (CLi+的电荷)
ps_to_s = 1e-12 # 皮秒转秒
angstrom2_to_m2 = 1e-20 # 埃²转米²
angstrom3_to_m3 = 1e-30 # 埃³转米³
def extract_temperature(folder_name):
"""从文件夹名如320k提取温度K"""
match = re.search(r'(\d+)K', folder_name)
if match:
return int(match.group(1))
return None
def read_msd(msd_path):
"""读取msd_li.dat返回时间ps和总MSDŲ"""
try:
data = np.loadtxt(msd_path, comments='#')
except Exception as e:
raise ValueError(f"读取MSD文件失败{e}")
if data.size == 0:
raise ValueError("MSD文件内容为空")
# 时间步转实际时间psTimeStep × 0.001
time = data[:, 0] * 0.001 # 第一列是TimeStep乘以0.001转为ps
msd_total = data[:, -1] # 最后一列是总MSD
# 打印时间范围(关键调试信息)
print(f" MSD时间范围{time.min():.1f} ~ {time.max():.1f} ps")
return time, msd_total
def calculate_diffusion(time, msd, t_start, t_end):
"""根据MSD计算扩散系数D单位m²/s"""
# 筛选时间范围内的数据
mask = (time >= t_start) & (time <= t_end)
t_filtered = time[mask] * ps_to_s # 转为秒
msd_filtered = msd[mask] * angstrom2_to_m2 # 转为m²
# 检查筛选后的数据是否为空
if len(t_filtered) < 2:
raise ValueError(f"筛选后的数据点不足(仅{len(t_filtered)}请调整t_start/t_end")
# 线性拟合MSD vs 时间,斜率 = 6D
slope, _, r_value, _, _ = stats.linregress(t_filtered, msd_filtered)
D = slope / 6
# 打印拟合信息(调试用)
print(f" MSD拟合斜率 = {slope:.6e}R² = {r_value**2:.4f}")
return D
def parse_lammps_data(data_path):
"""解析LAMMPS data文件提取盒子体积和Li+数量Li的原子类型为1"""
with open(data_path, 'r') as f:
lines = f.readlines()
# 提取原子总数
atom_count_line = next((line for line in lines if 'atoms' in line), None)
if not atom_count_line:
raise ValueError("无法在data文件中找到原子总数")
total_atoms = int(atom_count_line.split()[0])
# 提取盒子尺寸
x_line = next((line for line in lines if 'xlo xhi' in line), None)
y_line = next((line for line in lines if 'ylo yhi' in line), None)
z_line = next((line for line in lines if 'zlo zhi' in line), None)
if not (x_line and y_line and z_line):
raise ValueError("无法在data文件中找到盒子尺寸")
xlo, xhi = map(float, x_line.split()[:2])
ylo, yhi = map(float, y_line.split()[:2])
zlo, zhi = map(float, z_line.split()[:2])
# 计算盒子体积(ų)
volume_angstrom3 = (xhi - xlo) * (yhi - ylo) * (zhi - zlo)
# 手动指定Li的原子类型为1
li_type = 1
# 统计Li原子数量
atoms_section_start = next((i for i, line in enumerate(lines) if 'Atoms' in line), None)
if atoms_section_start is None:
raise ValueError("无法在data文件中找到Atoms部分")
li_count = 0
for i in range(atoms_section_start + 1, atoms_section_start + total_atoms + 1):
if i >= len(lines):
break
parts = lines[i].split()
if len(parts) >= 2 and int(parts[1]) == li_type:
li_count += 1
if li_count == 0:
raise ValueError(f"未找到原子类型为{li_type}的Li原子")
return li_count, volume_angstrom3
def calculate_conductivity(D, T, n):
"""根据Nernst-Einstein方程计算离子电导σ单位S/m"""
sigma = (n * q**2 * D) / (kB * T)
return sigma
def arrhenius_plot(temperatures, D_list, sigma_list):
"""绘制阿伦尼乌斯图拟合400K前后两条直线基于320-400K数据外推300K电导"""
plt.rcParams["axes.unicode_minus"] = False # 确保负号正常显示
temps = np.array(temperatures, dtype=float)
sigmas = np.array(sigma_list, dtype=float)
inv_T = 1000 / temps # 1000/T (1/K)
# 数据分组320-400K用于拟合和外推和>400K仅拟合
mask_below = (temps >= 320) & (temps <= 400) # 320-400K区间
mask_above = temps > 400 # 400K以上区间
# 检查低温度区间数据量
if np.sum(mask_below) < 2:
raise ValueError(f"320-400K区间数据点不足{np.sum(mask_below)}个),无法拟合直线")
# 1. 拟合320-400K区间用于外推300K
inv_T_below = inv_T[mask_below]
ln_sigma_below = np.log(sigmas[mask_below])
slope_below, intercept_below, r_below, _, _ = stats.linregress(inv_T_below, ln_sigma_below)
Ea_below = -slope_below * kB * 1000 # 活化能J
Ea_below_eV = Ea_below / q # 转换为eV
# 2. 拟合400K以上区间若有足够数据
slope_above = None
intercept_above = None
Ea_above_eV = None
r_above = None
if np.sum(mask_above) >= 2:
inv_T_above = inv_T[mask_above]
ln_sigma_above = np.log(sigmas[mask_above])
slope_above, intercept_above, r_above, _, _ = stats.linregress(inv_T_above, ln_sigma_above)
Ea_above = -slope_above * kB * 1000
Ea_above_eV = Ea_above / q
# 3. 外推300K的离子电导基于320-400K拟合线
T_target = 300 # 目标温度300K
inv_T_target = 1000 / T_target # 1000/300 (1/K)
ln_sigma_target = slope_below * inv_T_target + intercept_below # 用低温度区间拟合线外推
sigma_target = np.exp(ln_sigma_target) # 转换为电导值
# 4. 绘图
fig, ax1 = plt.subplots(figsize=(10, 6))
# 绘制数据点
ax1.scatter(inv_T_below, ln_sigma_below, label='320-400K Data', color='blue', s=60, zorder=3)
if np.sum(mask_above) >= 1:
ax1.scatter(inv_T[mask_above], np.log(sigmas[mask_above]), label='>400K Data', color='red', s=60, zorder=3)
# 绘制拟合线(低温度区间)
x_fit_below = np.linspace(min(inv_T_below), max(inv_T_below), 100)
y_fit_below = slope_below * x_fit_below + intercept_below
ax1.plot(x_fit_below, y_fit_below, '--', color='blue',
label=f'320-400K Fit (Eₐ={Ea_below_eV:.3f} eV, R²={r_below**2:.3f})', zorder=2)
# 绘制拟合线(高温度区间,若有)
if slope_above is not None:
x_fit_above = np.linspace(min(inv_T[mask_above]), max(inv_T[mask_above]), 100)
y_fit_above = slope_above * x_fit_above + intercept_above
ax1.plot(x_fit_above, y_fit_above, '--', color='red',
label=f'>400K Fit (Eₐ={Ea_above_eV:.3f} eV, R²={r_above**2:.3f})', zorder=2)
# 标记300K外推点
ax1.scatter([inv_T_target], [ln_sigma_target], color='green', marker='*', s=150,
label=f'300K Extrapolated', zorder=4)
# 坐标轴设置
ax1.set_xlabel('1000/T (1/K)')
ax1.set_ylabel('ln(σ) (S/m)')
ax1.set_title('Arrhenius Plot with Dual Fitting')
ax1.legend()
ax1.grid(alpha=0.3)
# 顶部温度坐标轴
ax2 = ax1.twiny()
x1_min, x1_max = ax1.get_xlim()
ax2.set_xlim(x1_min, x1_max)
ax2.set_xticks(1000 / np.unique(temps)) # 显示现有温度刻度
ax2.set_xticklabels([f"{int(t)}" for t in np.unique(temps)])
ax2.set_xlabel('Temperature (K)')
plt.tight_layout()
plt.savefig('arrhenius_dual_fit.png', dpi=300)
plt.show()
# 输出结果
print("\n===== 拟合结果 =====")
print(f"320-400K区间")
print(f" 活化能 Eₐ = {Ea_below_eV:.3f} eV (R² = {r_below**2:.3f})")
if Ea_above_eV is not None:
print(f">400K区间")
print(f" 活化能 Eₐ = {Ea_above_eV:.3f} eV (R² = {r_above**2:.3f})")
print(f"\n===== 300K外推结果基于320-400K拟合 =====")
print(f" 离子电导率 σ = {sigma_target:.6e} S/m")
print(f" 转换单位:σ = {sigma_target * 10:.6e} mS/cm") # 1 S/m = 0.1 mS/cm乘以10转换
if __name__ == "__main__":
# ------------ 必须根据实际数据调整的参数 ------------
base_dir = "../data" # 主文件夹路径
t_start = 100 # MSD拟合起始时间ps
t_end = 2000 # MSD拟合结束时间ps
# ------------------------------------------------
temperatures = []
D_list = []
sigma_list = []
temp_folder_pattern = re.compile(r'^\d+K$')
for folder in os.listdir(base_dir):
folder_path = os.path.join(base_dir, folder)
if not os.path.isdir(folder_path) or not temp_folder_pattern.match(folder):
continue # 跳过非温度文件夹
T = extract_temperature(folder)
print(f"\n处理文件夹: {folder}(温度 {T} K")
if T is None:
print(" 跳过:无法提取温度")
continue
# 检查文件是否存在
msd_path = os.path.join(folder_path, "msd_li.dat")
data_path = os.path.join(folder_path, "LYC.data")
if not os.path.exists(msd_path):
print(f" 跳过:缺少{msd_path}")
continue
if not os.path.exists(data_path):
print(f" 跳过:缺少{data_path}")
continue
# 解析Li数量和体积
try:
li_count, volume_angstrom3 = parse_lammps_data(data_path)
volume_m3 = volume_angstrom3 * angstrom3_to_m3
n = li_count / volume_m3
print(f" Li+数量: {li_count},盒子体积: {volume_angstrom3:.2f} ų")
print(f" 载流子浓度: {n:.6e} m⁻³")
except Exception as e:
print(f" 跳过解析data文件失败: {e}")
continue
# 读取MSD数据
try:
time, msd = read_msd(msd_path)
except Exception as e:
print(f" 跳过读取MSD失败: {e}")
continue
# 计算扩散系数
try:
D = calculate_diffusion(time, msd, t_start, t_end)
print(f" 扩散系数 D = {D:.6e} m²/s")
except Exception as e:
print(f" 跳过:计算扩散系数失败: {e}")
continue
# 计算离子电导
try:
sigma = calculate_conductivity(D, T, n)
print(f" 离子电导 σ = {sigma:.6e} S/m")
except Exception as e:
print(f" 跳过:计算电导失败: {e}")
continue
# 保存结果
temperatures.append(T)
D_list.append(D)
sigma_list.append(sigma)
# 绘制阿伦尼乌斯图(双拟合线)
if len(temperatures) >= 2:
print("\n===== 计算完成,绘制双拟合阿伦尼乌斯图 =====")
arrhenius_plot(temperatures, D_list, sigma_list)
else:
print(f"\n===== 错误:有效数据点不足(仅{len(temperatures)}个),无法绘图 =====")

View File

@@ -0,0 +1,134 @@
from pymatgen.core.structure import Structure
from pymatgen.analysis.local_env import VoronoiNN
import numpy as np
def check_real(nearest):
real_nearest = []
for site in nearest:
if np.all((site.frac_coords >= 0) & (site.frac_coords <= 1)):
real_nearest.append(site)
return real_nearest
def special_check_for_3(site, nearest):
real_nearest = []
distances = []
for site2 in nearest:
distance = np.linalg.norm(np.array(site.frac_coords) - np.array(site2.frac_coords))
distances.append(distance)
sorted_indices = np.argsort(distances)
for index in sorted_indices[:3]:
real_nearest.append(nearest[index])
return real_nearest
def CS_catulate(struct, sp='Li', anion=['O'], tol=0, cutoff=3.0,notice=False,ID=None):
"""
计算结构中目标元素与最近阴离子的共享关系。
参数:
struct (Structure): 输入结构。
sp (str): 目标元素符号,默认为 'Li'
anion (list): 阴离子列表,默认为 ['O']。
tol (float): VoronoiNN 的容差,默认为 0。
cutoff (float): VoronoiNN 的截断距离,默认为 3.0。
返回:
list: 包含每个目标位点及其最近阴离子索引的列表。
"""
# 初始化 VoronoiNN 对象
if sp=='Li':
tol = 0
cutoff = 3.0
voro_nn = VoronoiNN(tol=tol, cutoff=cutoff)
# 初始化字典,用于统计共享关系
shared_count = {"2": 0, "3": 0,"4":0,"5":0,"6":0}
# 存储结果的列表
atom_dice = []
# 遍历结构中的每个位点
for index,site in enumerate(struct.sites):
# 跳过阴离子位点
if site.specie.symbol in anion:
continue
# 跳过Li原子
if site.specie.symbol == sp:
continue
# 获取 Voronoi 多面体信息
voro_info = voro_nn.get_voronoi_polyhedra(struct, index)
# 找到最近的阴离子位点
nearest_anions = [
nn_info["site"] for nn_info in voro_info.values()
if nn_info["site"].specie.symbol in anion
]
# 如果没有找到最近的阴离子,跳过
if not nearest_anions:
print(f"No nearest anions found for {ID} site {index}.")
continue
if site.specie.symbol == 'B' or site.specie.symbol == 'N':
nearest_anions = special_check_for_3(site,nearest_anions)
nearest_anions = check_real(nearest_anions)
# 将结果添加到 atom_dice 列表中
atom_dice.append({
'index': index,
'nearest_index': [nn.index for nn in nearest_anions]
})
# 枚举 atom_dice 中的所有原子对
for i, atom_i in enumerate(atom_dice):
for j, atom_j in enumerate(atom_dice[i + 1:], start=i + 1):
# 获取两个原子的最近阴离子索引
nearest_i = set(atom_i['nearest_index'])
nearest_j = set(atom_j['nearest_index'])
# 比较最近阴离子的交集大小
shared_count_key = str(len(nearest_i & nearest_j))
# 更新字典中的计数
if shared_count_key in shared_count:
shared_count[shared_count_key] += 1
if notice:
if shared_count_key=='2':
print(f"{atom_j['index']}{atom_i['index']}之间存在共线")
print(f"共线的阴离子为{nearest_i & nearest_j}")
if shared_count_key=='3':
print(f"{atom_j['index']}{atom_i['index']}之间存在共面")
print(f"共面的阴离子为{nearest_i & nearest_j}")
# # 最后将字典中的值除以 2因为每个共享关系被计算了两次
# for key in shared_count.keys():
# shared_count[key] //= 2
return shared_count
def CS_count(struct, shared_count, sp='Li'):
count = 0
for site in struct.sites:
if site.specie.symbol == sp:
count += 1 # 累加符合条件的原子数量
CS_count = 0
for i in range(2, 7): # 遍历范围 [2, 3, 4, 5]
if str(i) in shared_count: # 检查键是否存在
CS_count += shared_count[str(i)] * i # 累加计算结果
if count > 0: # 防止除以零
CS_count /= count # 平均化结果
else:
CS_count = 0 # 如果 count 为 0直接返回 0
return CS_count
structure = Structure.from_file("../data/0921/wjy_001.cif")
a = CS_catulate(structure,notice=True)
b = CS_count(structure,a)
print(f"{a}\n{b}")

View File

@@ -47,28 +47,19 @@ class HiddenPrints:
def non_elements(struct, sp='Li'):
'''
struct : structure object from Pymatgen
"""
struct : 必须是一个有序结构
sp : the mobile specie
returns the structure with all of the mobile specie (Li) removed
'''
num_li = struct.species.count(Element(sp))
species = list(set(struct.species))
try:
species.remove(Element("O"))
except ValueError:
print("没有O")
try:
species.remove(Element("S"))
except ValueError:
print("没有S")
try:
species.remove(Element("N"))
except ValueError:
print("没有N")
returns a new structure containing only the framework anions (O, S, N).
"""
anions_to_keep = {"O", "S", "N"}
stripped = struct.copy()
stripped.remove_species(species)
stripped = stripped.get_sorted_structure(reverse=True)
species_to_remove = [el.symbol for el in stripped.composition.elements
if el.symbol not in anions_to_keep]
if species_to_remove:
stripped.remove_species(species_to_remove)
return stripped
@@ -160,22 +151,43 @@ def site_env(coord, struct, sp="Li", envtype='both'):
def extract_sites(struct, sp="Li", envtype='both'):
'''
"""
struct : structure object from Pymatgen
envtype : 'tet', 'oct', or 'both'
sp : target element to analyze environment
'''
"""
envlist = []
for i in range(len(struct.sites)):
if struct.sites[i].specie != Element(sp):
continue
site = struct.sites[i]
singleenv = site_env(site.frac_coords, struct, sp, envtype)
envlist.append({'frac_coords': site.frac_coords, 'type': singleenv['type'], 'csm': singleenv['csm'],
'volume': singleenv['vol']})
return envlist
# --- 关键修改:直接遍历原始结构,即使它是无序的 ---
# 我们不再调用 get_sorted_structure()
# 我们只关心那些含有目标元素 sp 的位点
# 遍历每一个位点 (site)
for i, site in enumerate(struct):
# 检查当前位点的组分(site.species)中是否包含我们感兴趣的元素(sp)
# site.species.elements 返回该位点上的元素列表,例如 [Element Li, Element Fe]
# [el.symbol for el in site.species.elements] 将其转换为符号列表 ['Li', 'Fe']
site_elements = [el.symbol for el in site.species.elements]
if sp in site_elements:
# 如果找到了Li我们就对这个位点进行环境分析
# 注意:我们将原始的、可能无序的 struct 传递给 site_env
# 因为 site_env 内部的函数 (如 LocalGeometryFinder) 知道如何处理它
# 为了让下游函数(特别是 non_elements能够工作
# 我们在这里创建一个一次性的、临时的有序结构副本给它
# 这可以避免我们之前遇到的所有 'ordered structures only' 错误
temp_ordered_struct = struct.get_sorted_structure()
singleenv = site_env(site.frac_coords, temp_ordered_struct, sp, envtype)
envlist.append({'frac_coords': site.frac_coords, 'type': singleenv['type'], 'csm': singleenv['csm'],
'volume': singleenv['vol']})
if not envlist:
print(f"警告: 在结构中未找到元素 {sp} 的占位。")
return envlist
def export_envs(envlist, sp='Li', envtype='both', fname=None):
'''
@@ -193,6 +205,6 @@ def export_envs(envlist, sp='Li', envtype='both', fname=None):
f.write("Site index " + str(index) + ": " + str(i) + '\n')
struct = Structure.from_file("../data/31960.cif")
site_info = extract_sites(struct, envtype="both")
export_envs(site_info, sp="Li", envtype="both")
# struct = Structure.from_file("../data/0921/wjy_475.cif")
# site_info = extract_sites(struct, envtype="both")
# export_envs(site_info, sp="Li", envtype="both")

127
data_get/data_get.py Normal file
View File

@@ -0,0 +1,127 @@
import pandas as pd
import os
import re
def extract_cif_from_xlsx(
xlsx_path: str,
output_dir: str,
naming_mode: str = 'formula',
name_col: int = 0,
cif_col: int = 1,
prefix: str = 'wjy'
):
"""
从 XLSX 文件中提取 CIF 数据并保存为单独的 .cif 文件。
Args:
xlsx_path (str): 输入的 XLSX 文件的路径。
output_dir (str): 输出 .cif 文件的文件夹路径。
naming_mode (str, optional): CIF 文件的命名模式。
可选值为 'formula' (使用第一列的名字) 或
'auto' (使用前缀+自动递增编号)。
默认为 'formula'
name_col (int, optional): 包含文件名的列的索引从0开始。默认为 0。
cif_col (int, optional): 包含 CIF 内容的列的索引从0开始。默认为 1。
prefix (str, optional): 在 'auto' 命名模式下使用的文件名前缀。默认为 'wjy'
Raises:
FileNotFoundError: 如果指定的 xlsx_path 文件不存在。
ValueError: 如果指定的 naming_mode 无效。
Exception: 处理过程中发生的其他错误。
"""
# --- 1. 参数校验和准备 ---
if not os.path.exists(xlsx_path):
raise FileNotFoundError(f"错误: 输入文件未找到 -> {xlsx_path}")
if naming_mode not in ['formula', 'auto']:
raise ValueError(f"错误: 'naming_mode' 参数必须是 'formula''auto',但收到了 '{naming_mode}'")
# 创建输出目录(如果不存在)
os.makedirs(output_dir, exist_ok=True)
print(f"CIF 文件将保存到: {output_dir}")
try:
# --- 2. 读取 XLSX 文件 ---
# header=None 表示第一行不是标题,将其作为数据读取
df = pd.read_excel(xlsx_path, header=None)
# 跳过原始文件的表头行('formula', 'cif'
if str(df.iloc[0, name_col]).strip().lower() == 'formula' and str(df.iloc[0, cif_col]).strip().lower() == 'cif':
df = df.iloc[1:]
print("检测到并跳过了表头行。")
# --- 3. 遍历数据并生成文件 ---
success_count = 0
for index, row in df.iterrows():
# 获取文件名和 CIF 内容
formula_name = str(row[name_col])
cif_content = str(row[cif_col])
# 跳过内容为空的行
if pd.isna(row[name_col]) or pd.isna(row[cif_col]) or not cif_content.strip():
print(f"警告: 第 {index + 2} 行数据不完整,已跳过。")
continue
# --- 4. 根据命名模式确定文件名 ---
if naming_mode == 'formula':
# 清理文件名,替换掉不适合做文件名的特殊字符
# 例如:将 (PO4)3 替换为 _PO4_3将 / 替换为 _
safe_filename = re.sub(r'[\\/*?:"<>|()]', '_', formula_name)
filename = f"{safe_filename}.cif"
else: # naming_mode == 'auto'
# 使用 format 方法来确保编号格式统一,例如 001, 002
filename = f"{prefix}_{success_count + 1:03d}.cif"
# 构造完整的输出文件路径
output_path = os.path.join(output_dir, filename)
# --- 5. 写入 CIF 文件 ---
try:
with open(output_path, 'w', encoding='utf-8') as f:
f.write(cif_content)
success_count += 1
except IOError as e:
print(f"错误: 无法写入文件 {output_path}。原因: {e}")
print(f"\n处理完成!成功提取并生成了 {success_count} 个 CIF 文件。")
except Exception as e:
print(f"处理 XLSX 文件时发生错误: {e}")
# --- 函数使用示例 ---
if __name__ == '__main__':
# 假设您的 XLSX 文件名为 'materials.xlsx',且与此脚本在同一目录下
source_xlsx_file = 'input/cif_dataset.xlsx'
# 检查示例文件是否存在,如果不存在则创建一个
if not os.path.exists(source_xlsx_file):
print(f"未找到示例文件 '{source_xlsx_file}',正在创建一个...")
example_data = {
'formula': ['Li3Al0.3Ti1.7(PO4)3', 'Li6.5La3Zr1.75W0.25O12', 'Invalid/Name*Test'],
'cif': ['# CIF Data for Li3Al0.3...\n_atom_site_type_symbol\n Li\n Al\n Ti\n P\n O',
'# CIF Data for Li6.5La3...\n_symmetry_space_group_name_H-M \'I a -3 d\'',
'# CIF Data for Invalid Name Test']
}
pd.DataFrame(example_data).to_excel(source_xlsx_file, index=False, header=True)
print("示例文件创建成功。")
# --- 示例 1: 使用第一列的 'formula' 命名 ---
# print("\n--- 示例 1: 使用 'formula' 命名模式 ---")
# output_folder_1 = 'cif_by_formula'
# extract_cif_from_xlsx(
# xlsx_path=source_xlsx_file,
# output_dir=output_folder_1,
# naming_mode='formula'
# )
# --- 示例 2: 使用 'wjy+编号' 自动命名 ---
print("\n--- 示例 2: 使用 'auto' 命名模式 ---")
output_folder_2 = 'cif_by_auto'
extract_cif_from_xlsx(
xlsx_path=source_xlsx_file,
output_dir=output_folder_2,
naming_mode='auto',
prefix='wjy'
)

Binary file not shown.