diff --git a/.idea/vcs.xml b/.idea/vcs.xml
index d843f34..94a25f7 100644
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@@ -1,4 +1,6 @@
-
+
+
+
\ No newline at end of file
diff --git a/3D_construction/main.py b/3D_construction/main.py
new file mode 100644
index 0000000..c6f13d8
--- /dev/null
+++ b/3D_construction/main.py
@@ -0,0 +1,186 @@
+import os
+import cv2
+import numpy as np
+
+from script.yolo_detector import detect_crop_area
+from script.linknet_segmentor import segment_and_find_endpoints
+from script.reconstruction import visualize_reconstructed_seams, reconstruct_points
+from script.pose_estimation import get_ground_truth_seams, reproject_to_object_coords # 我们只需要真值
+# 导入我们最终的重建流程
+from script.final_reconstruction import final_reconstruction_pipeline, merge_seams
+import itertools # 确保导入itertools
+
+
+from script.global_optimizer import run_global_optimization, merge_seams
+from script.pose_estimation import get_ground_truth_seams
+from script.reconstruction import visualize_reconstructed_seams
+
+def run_full_recognition_pipeline():
+ """
+ 运行完整的识别流程:YOLO定位 -> LinkNet分割 -> 端点提取。
+ """
+ # 1. 定义路径
+ base_dir = os.path.dirname(os.path.abspath(__file__))
+ data_map = {
+ 'up': {
+ 'l_img': os.path.join(base_dir, 'data', 'origin', 'up', 'l1.jpeg'),
+ 'r_img': os.path.join(base_dir, 'data', 'origin', 'up', 'r1.jpeg'),
+ 'yolo_model': os.path.join(base_dir, 'module', 'yolov8', 'up.pt'),
+ 'linknet_models': {
+ 'line1': os.path.join(base_dir, 'module', 'linknet', 'best_linknet_up_model_line1.pth'),
+ 'line2': os.path.join(base_dir, 'module', 'linknet', 'best_linknet_up_model_line2.pth')
+ }
+ },
+ 'bottom': {
+ 'l_img': os.path.join(base_dir, 'data', 'origin', 'bottom', 'l1.jpeg'),
+ 'r_img': os.path.join(base_dir, 'data', 'origin', 'bottom', 'r1.jpeg'),
+ 'yolo_model': os.path.join(base_dir, 'module', 'yolov8', 'bottom.pt'),
+ 'linknet_models': {
+ 'line1': os.path.join(base_dir, 'module', 'linknet', 'best_linknet_bottom_model_line1.pth'),
+ 'line2': os.path.join(base_dir, 'module', 'linknet', 'best_linknet_bottom_model_line2.pth')
+ }
+ }
+ }
+ output_dir = os.path.join(base_dir, 'data', 'processed')
+ os.makedirs(output_dir, exist_ok=True)
+
+ all_endpoints = {}
+
+ for part, paths in data_map.items():
+ print(f"\n--- Processing '{part}' part ---")
+ for img_path, side in [(paths['l_img'], 'l'), (paths['r_img'], 'r')]:
+ print(f"\n-- Analyzing image: {os.path.basename(img_path)} --")
+
+ crop_box = detect_crop_area(img_path, paths['yolo_model'])
+ if not crop_box:
+ print(f"Skipping further processing for {os.path.basename(img_path)}.")
+ continue
+
+ original_image_vis = cv2.imread(img_path)
+
+ for line_name, linknet_path in paths['linknet_models'].items():
+ endpoints = segment_and_find_endpoints(original_image_vis, crop_box, linknet_path)
+
+ if endpoints:
+ start_pt, end_pt = endpoints
+ result_key = f"{part}_{side}_{line_name}"
+ all_endpoints[result_key] = {'start': start_pt, 'end': end_pt}
+
+ # --- 在可视化图像上绘制结果 (增强版) ---
+ # 1. 绘制端点圆圈
+ cv2.circle(original_image_vis, start_pt, 15, (0, 255, 0), -1) # 绿色起点
+ cv2.circle(original_image_vis, end_pt, 15, (0, 0, 255), -1) # 红色终点
+ # 2. 绘制连接线
+ cv2.line(original_image_vis, start_pt, end_pt, (255, 0, 0), 4)
+
+ # 3. 添加文本标签
+ # 计算线段中点作为文本放置位置
+ mid_point = ((start_pt[0] + end_pt[0]) // 2, (start_pt[1] + end_pt[1]) // 2)
+ # 在中点上方放置文本
+ text_pos = (mid_point[0], mid_point[1] - 20)
+ cv2.putText(original_image_vis,
+ result_key,
+ text_pos,
+ cv2.FONT_HERSHEY_SIMPLEX,
+ 2, # 字体大小
+ (255, 255, 0), # 字体颜色 (青色)
+ 4, # 字体粗细
+ cv2.LINE_AA)
+
+ # 绘制YOLO框并保存最终的可视化结果
+ cv2.rectangle(original_image_vis, (crop_box[0], crop_box[1]), (crop_box[2], crop_box[3]), (0, 255, 255), 4)
+ save_path = os.path.join(output_dir, f'{part}_{side}_final_result.jpg')
+ cv2.imwrite(save_path, original_image_vis)
+ print(f"Saved final visualization to {save_path}")
+ # 3. 打印总结
+ print("\n--- Final Endpoints Summary (in original image coordinates) ---")
+ for name, points in all_endpoints.items():
+ print(f"{name}: Start={points['start']}, End={points['end']}")
+
+ return all_endpoints
+
+
+def run_3d_reconstruction(all_2d_endpoints):
+ """
+ 根据识别出的2D端点,重建出三维焊缝。
+ """
+ print("\n--- Starting 3D Reconstruction ---")
+
+ # 这个字典将存储最终的三维坐标
+ reconstructed_seams_3d = {}
+
+ # 需要重建的焊缝对
+ # 例如:'up_line1' 对应 up_l_line1 和 up_r_line1
+ seam_pairs = ['up_line1', 'up_line2', 'bottom_line1', 'bottom_line2']
+
+ for seam_name in seam_pairs:
+ key_L = f"{seam_name.split('_')[0]}_l_{seam_name.split('_')[1]}" # e.g., 'up_l_line1'
+ key_R = f"{seam_name.split('_')[0]}_r_{seam_name.split('_')[1]}" # e.g., 'up_r_line1'
+
+ # 检查左右相机的点是否都已识别
+ if key_L not in all_2d_endpoints or key_R not in all_2d_endpoints:
+ print(f"Warning: Missing points for seam '{seam_name}'. Cannot reconstruct.")
+ continue
+
+ # 准备输入点列表:[start_point, end_point]
+ points_L = [all_2d_endpoints[key_L]['start'], all_2d_endpoints[key_L]['end']]
+ points_R = [all_2d_endpoints[key_R]['start'], all_2d_endpoints[key_R]['end']]
+
+ # 调用重建函数
+ # 假设你的图像尺寸是 4000x3000,如果不是,请修改
+ # 这是一个重要的参数,需要与标定时使用的图像尺寸一致!
+ points_3d = reconstruct_points(points_L, points_R, image_size=(4000, 3000))
+
+ reconstructed_seams_3d[seam_name] = {
+ 'start_3d': points_3d[0],
+ 'end_3d': points_3d[1]
+ }
+
+ # --- 打印最终的三维坐标结果 ---
+ print("\n--- Final 3D Seam Endpoints (in Left Camera Coordinate System, unit: mm) ---")
+ for name, points in reconstructed_seams_3d.items():
+ start_str = np.array2string(points['start_3d'], formatter={'float_kind': lambda x: "%.3f" % x})
+ end_str = np.array2string(points['end_3d'], formatter={'float_kind': lambda x: "%.3f" % x})
+ print(f"{name}:")
+ print(f" Start 3D: {start_str}")
+ print(f" End 3D: {end_str}")
+
+ return reconstructed_seams_3d
+
+
+def run_new_reconstruction_pipeline(all_2d_endpoints):
+ """
+ 使用 solvePnP 的全新重建和拼接流程。
+ """
+ print("\n--- Starting NEW Reconstruction Pipeline (with solvePnP) ---")
+
+ # --- 处理上半部分 ---
+ print("\nProcessing 'up' part...")
+ reconstructed_up = reproject_to_object_coords(all_2d_endpoints, all_2d_endpoints, part_type='up')
+
+ # --- 处理下半部分 ---
+ print("\nProcessing 'bottom' part...")
+ reconstructed_bottom = reproject_to_object_coords(all_2d_endpoints, all_2d_endpoints, part_type='bottom')
+
+ # --- 合并结果 ---
+ final_reconstructed_seams = {}
+ if reconstructed_up:
+ final_reconstructed_seams.update(reconstructed_up)
+ if reconstructed_bottom:
+ final_reconstructed_seams.update(reconstructed_bottom)
+
+ return final_reconstructed_seams
+
+
+if __name__ == '__main__':
+ final_2d_endpoints = run_full_recognition_pipeline()
+ ground_truth = get_ground_truth_seams()
+
+ final_4_seams = {}
+ if final_2d_endpoints:
+ # 直接调用全局优化
+ final_4_seams = run_global_optimization(final_2d_endpoints, ground_truth)
+
+ final_3_seam_model = {}
+ if final_4_seams:
+ final_3_seam_model = merge_seams(final_4_seams)
\ No newline at end of file
diff --git a/3D_construction/main_deploy.py b/3D_construction/main_deploy.py
new file mode 100644
index 0000000..60c09b4
--- /dev/null
+++ b/3D_construction/main_deploy.py
@@ -0,0 +1,144 @@
+import os
+import cv2
+import numpy as np
+
+# 导入必要的模块
+from script.yolo_detector import detect_crop_area
+from script.linknet_segmentor import segment_and_find_endpoints
+from script.final_reconstruction import merge_seams # 我们依然需要合并函数
+from script.reconstruction import visualize_reconstructed_seams # 和可视化函数
+
+
+def reconstruct_with_optimized_params(points_L, points_R, calib_data, image_size=(4000, 3000)):
+ """
+ 使用优化好的参数文件,进行高效的标准双目重建。
+ 返回在左相机坐标系下的三维点。
+ """
+ # 从标定数据中加载新的内外参
+ K_L = np.array([
+ [calib_data['optimized_intrinsics_L'][0], 0, calib_data['optimized_intrinsics_L'][2]],
+ [0, calib_data['optimized_intrinsics_L'][1], calib_data['optimized_intrinsics_L'][3]],
+ [0, 0, 1]
+ ])
+ kc_L = calib_data['dist_coeffs_L']
+
+ K_R = np.array([
+ [calib_data['optimized_intrinsics_R'][0], 0, calib_data['optimized_intrinsics_R'][2]],
+ [0, calib_data['optimized_intrinsics_R'][1], calib_data['optimized_intrinsics_R'][3]],
+ [0, 0, 1]
+ ])
+ kc_R = calib_data['dist_coeffs_R']
+
+ # 使用新的、优化过的外参!
+ new_extrinsics = calib_data['new_extrinsics'].item() # .item() 用于从numpy对象数组中提取字典
+ R = new_extrinsics['R']
+ t = new_extrinsics['t']
+
+ # 标准的立体校正和三角化流程
+ R1, R2, P1, P2, _, _, _ = cv2.stereoRectify(K_L, kc_L, K_R, kc_R, image_size, R, t)
+
+ points_L_undistorted = cv2.undistortPoints(np.array(points_L, dtype=np.float32), K_L, kc_L, P=P1)
+ points_R_undistorted = cv2.undistortPoints(np.array(points_R, dtype=np.float32), K_R, kc_R, P=P2)
+
+ points_4d_hom = cv2.triangulatePoints(P1, P2, points_L_undistorted.reshape(-1, 2).T,
+ points_R_undistorted.reshape(-1, 2).T)
+ points_3d_camL = (points_4d_hom[:3] / points_4d_hom[3]).T
+
+ return points_3d_camL
+
+
+def get_transform_from_pose(pose):
+ """从6自由度位姿向量计算 4x4 逆变换矩阵(相机->物体)。"""
+ rvec, tvec = pose[:3], pose[3:]
+ R_cam_from_obj, _ = cv2.Rodrigues(rvec)
+ R_obj_from_cam = R_cam_from_obj.T
+ t_obj_from_cam = -R_obj_from_cam @ tvec
+
+ transform_matrix = np.eye(4)
+ transform_matrix[:3, :3] = R_obj_from_cam
+ transform_matrix[:3, 3] = t_obj_from_cam.flatten()
+ return transform_matrix
+
+
+def run_deployment_pipeline(calib_data):
+ """
+ 最终部署流程:加载标定文件,快速完成重建。
+ """
+ print("--- Running Deployment Pipeline with Optimized Parameters ---")
+
+ # 1. 运行2D识别 (这部分和之前一样)
+ # 你可以从之前的 main.py 复制 run_full_recognition_pipeline 函数过来
+ # 或者我们在这里重新写一个简化版的
+ from main import run_full_recognition_pipeline # 假设之前的main.py还在
+ all_2d_endpoints = run_full_recognition_pipeline()
+ if not all_2d_endpoints:
+ print("2D recognition failed. Exiting.")
+ return
+
+ reconstructed_4_seams = {}
+
+ # 2. 分别处理 'up' 和 'bottom' 的重建和变换
+ for part_type in ['up', 'bottom']:
+ print(f"\nProcessing '{part_type}' part...")
+
+ # a. 收集该部分的所有2D点
+ points_L, points_R, seam_keys = [], [], []
+ for line_name in ['line1', 'line2']:
+ key_L = f"{part_type}_l_{line_name}"
+ key_R = f"{part_type}_r_{line_name}"
+ points_L.extend([all_2d_endpoints[key_L]['start'], all_2d_endpoints[key_L]['end']])
+ points_R.extend([all_2d_endpoints[key_R]['start'], all_2d_endpoints[key_R]['end']])
+ seam_keys.append(f"{part_type}_{line_name}")
+
+ # b. 使用优化后的参数进行标准双目重建
+ points_camL = reconstruct_with_optimized_params(points_L, points_R, calib_data)
+
+ # c. 获取对应的变换矩阵并应用
+ # 注意:我们假设两次拍摄时相机与物体的相对关系固定,
+ # 因此理论上 up 和 bottom 的变换矩阵应该是一样的。
+ # 我们使用 'up' 拍摄时计算出的位姿作为全局基准。
+ global_transform = get_transform_from_pose(calib_data['pose_up_L'])
+
+ points_camL_hom = np.hstack([points_camL, np.ones((points_camL.shape[0], 1))])
+ points_object = (global_transform @ points_camL_hom.T).T[:, :3]
+
+ # d. 整理结果
+ for i, key in enumerate(seam_keys):
+ reconstructed_4_seams[key] = {'start_3d': points_object[i * 2], 'end_3d': points_object[i * 2 + 1]}
+
+ # 3. 合并为最终的三线模型
+ final_3_seam_model = merge_seams(reconstructed_4_seams)
+
+ # 4. 打印和可视化
+ print("\n--- Final 3-Seam Model (Object Coordinate System) ---")
+ for name, points in final_3_seam_model.items():
+ start_str = np.array2string(points['start_3d'], formatter={'float_kind': lambda x: "%.2f" % x})
+ end_str = np.array2string(points['end_3d'], formatter={'float_kind': lambda x: "%.2f" % x})
+ print(f"{name}: Start={start_str}, End={end_str}")
+
+ # 可视化...
+ from script.pose_estimation import get_ground_truth_seams
+ ground_truth_data = get_ground_truth_seams()
+ comparison_data = {}
+ for name, points in final_3_seam_model.items():
+ comparison_data[name + '_final'] = points
+ comparison_data['bottom_left_truth'] = ground_truth_data['bottom_line1']
+ comparison_data['middle_truth'] = ground_truth_data['up_line2']
+ comparison_data['top_left_truth'] = ground_truth_data['up_line1']
+ visualize_reconstructed_seams(comparison_data)
+
+
+if __name__ == '__main__':
+ # 定义标定文件路径
+ calib_file_path = 'optimized_camera_parameters.npz'
+
+ if not os.path.exists(calib_file_path):
+ print(f"Error: Calibration file not found at '{calib_file_path}'")
+ print("Please run the main.py with the global optimization first to generate this file.")
+ else:
+ # 加载标定文件
+ print(f"Loading optimized parameters from '{calib_file_path}'...")
+ calibration_data = np.load(calib_file_path, allow_pickle=True)
+
+ # 运行部署流程
+ run_deployment_pipeline(calibration_data)
\ No newline at end of file
diff --git a/3D_construction/script/alignment.py b/3D_construction/script/alignment.py
new file mode 100644
index 0000000..f658d28
--- /dev/null
+++ b/3D_construction/script/alignment.py
@@ -0,0 +1,115 @@
+import numpy as np
+import open3d as o3d
+
+
+def get_ground_truth_seams():
+ """返回你手动测量的三维坐标(物体坐标系)。"""
+ ground_truth = {
+ 'up_line1': {
+ 'start_3d': np.array([142.2, 0, 7.3]),
+ 'end_3d': np.array([153.9, 0, 149.8])
+ },
+ 'up_line2': {
+ 'start_3d': np.array([142.2, 0, 7.3]),
+ 'end_3d': np.array([142.2, 50.3, 7.3])
+ },
+ 'bottom_line1': {
+ 'start_3d': np.array([8.9, 0, 7.3]),
+ 'end_3d': np.array([140.2, 0, 7.3])
+ },
+ 'bottom_line2': {
+ 'start_3d': np.array([142.2, 0, 7.3]),
+ 'end_3d': np.array([142.2, 50.3, 7.3])
+ }
+ }
+ return ground_truth
+
+
+def align_and_stitch_seams(reconstructed_seams):
+ """
+ 使用ICP算法将重建的点云对齐到地面真实坐标系,并进行拼接。
+
+ Args:
+ reconstructed_seams (dict): 在相机坐标系下重建出的焊缝端点。
+
+ Returns:
+ dict: 在物体坐标系下对齐和拼接后的焊缝端点。
+ """
+ print("\n--- Aligning and Stitching Seams to Ground Truth ---")
+
+ ground_truth = get_ground_truth_seams()
+
+ # --- 1. 对齐上半部分 (up) ---
+ # 源点云:重建出的 up_line2 (相机坐标系)
+ source_points_up = np.array([
+ reconstructed_seams['up_line2']['start_3d'],
+ reconstructed_seams['up_line2']['end_3d']
+ ])
+ source_pcd_up = o3d.geometry.PointCloud()
+ source_pcd_up.points = o3d.utility.Vector3dVector(source_points_up)
+
+ # 目标点云:测量的 up_line2 (物体坐标系)
+ target_points_up = np.array([
+ ground_truth['up_line2']['start_3d'],
+ ground_truth['up_line2']['end_3d']
+ ])
+ target_pcd_up = o3d.geometry.PointCloud()
+ target_pcd_up.points = o3d.utility.Vector3dVector(target_points_up)
+
+ print("Aligning 'up' part...")
+ # 使用点对点ICP计算变换矩阵 M_up
+ # 由于只有两个点,我们可以直接计算一个精确的变换,但用ICP更通用
+ # estimate_rigid_transformation 需要点是 (3, N) 的格式
+ trans_up = o3d.pipelines.registration.TransformationEstimationPointToPoint().compute_transformation(
+ source_pcd_up, target_pcd_up, o3d.utility.Vector2iVector([[0, 0], [1, 1]]))
+
+ print("Transformation matrix for 'up' part (Camera -> Object):")
+ print(trans_up)
+
+ # --- 2. 对齐下半部分 (bottom) ---
+ # 源点云:重建出的 bottom_line2 (相机坐标系)
+ source_points_bottom = np.array([
+ reconstructed_seams['bottom_line2']['start_3d'],
+ reconstructed_seams['bottom_line2']['end_3d']
+ ])
+ source_pcd_bottom = o3d.geometry.PointCloud()
+ source_pcd_bottom.points = o3d.utility.Vector3dVector(source_points_bottom)
+
+ # 目标点云:测量的 bottom_line2 (物体坐标系)
+ target_points_bottom = np.array([
+ ground_truth['bottom_line2']['start_3d'],
+ ground_truth['bottom_line2']['end_3d']
+ ])
+ target_pcd_bottom = o3d.geometry.PointCloud()
+ target_pcd_bottom.points = o3d.utility.Vector3dVector(target_points_bottom)
+
+ print("\nAligning 'bottom' part...")
+ trans_bottom = o3d.pipelines.registration.TransformationEstimationPointToPoint().compute_transformation(
+ source_pcd_bottom, target_pcd_bottom, o3d.utility.Vector2iVector([[0, 0], [1, 1]]))
+
+ print("Transformation matrix for 'bottom' part (Camera -> Object):")
+ print(trans_bottom)
+
+ # --- 3. 应用变换并组合最终结果 ---
+ aligned_seams = {}
+ for name, points in reconstructed_seams.items():
+ # 创建齐次坐标 (x, y, z, 1)
+ start_hom = np.append(points['start_3d'], 1)
+ end_hom = np.append(points['end_3d'], 1)
+
+ # 根据焊缝属于 'up' 还是 'bottom' 选择对应的变换矩阵
+ if 'up' in name:
+ transformed_start = (trans_up @ start_hom.T)[:3]
+ transformed_end = (trans_up @ end_hom.T)[:3]
+ elif 'bottom' in name:
+ transformed_start = (trans_bottom @ start_hom.T)[:3]
+ transformed_end = (trans_bottom @ end_hom.T)[:3]
+ else:
+ continue
+
+ aligned_seams[name] = {
+ 'start_3d': transformed_start,
+ 'end_3d': transformed_end
+ }
+
+ return aligned_seams, ground_truth
diff --git a/3D_construction/script/demo_ideal_model.py b/3D_construction/script/demo_ideal_model.py
new file mode 100644
index 0000000..8fcb667
--- /dev/null
+++ b/3D_construction/script/demo_ideal_model.py
@@ -0,0 +1,112 @@
+# demo_final_beautiful.py
+# 最终美化版:无图例,专注焊缝模型本身的高质量渲染。
+# 运行:python demo_final_beautiful.py
+
+import numpy as np
+import open3d as o3d
+from typing import Dict
+
+
+def get_final_ideal_ground_truth() -> Dict:
+ """
+ 使用你提供的最终版理想坐标。
+ """
+ print("--- Using your final provided ideal coordinates. ---")
+ final_3_seams = {
+ 'bottom_left': {
+ 'start_3d': np.array([-142.2 + 8.9, 0.0, 0.0]),
+ 'end_3d': np.array([-2.7, 0.0, 0.0])
+ },
+ 'middle': {
+ 'start_3d': np.array([0.0, 0.0, 0.0]),
+ 'end_3d': np.array([0.0, -50.3, 0.0])
+ },
+ 'top_left': {
+ 'start_3d': np.array([0.0, 0.0, 5.2]),
+ 'end_3d': np.array([0.0, 0.0, 142.5])
+ }
+ }
+ return final_3_seams
+
+
+def rotation_matrix_from_vectors(vec_from: np.ndarray, vec_to: np.ndarray) -> np.ndarray:
+ """计算从 vec_from 到 vec_to 的旋转矩阵。"""
+ a = vec_from / (np.linalg.norm(vec_from) + 1e-12)
+ b = vec_to / (np.linalg.norm(vec_to) + 1e-12)
+ v = np.cross(a, b)
+ c = np.clip(np.dot(a, b), -1.0, 1.0)
+ s = np.linalg.norm(v)
+ if s < 1e-12:
+ return np.eye(3) if c > 0.0 else -np.eye(3)
+ kmat = np.array([[0, -v[2], v[1]], [v[2], 0, -v[0]], [-v[1], v[0], 0]], dtype=float)
+ return np.eye(3) + kmat + kmat @ kmat * ((1.0 - c) / (s ** 2))
+
+
+def visualize_beautiful_model(seams: Dict):
+ """
+ 【最终美化版】可视化:
+ - 使用PBR材质,增加金属质感。
+ - 增加灯光,增强立体感。
+ - 设置一个好的初始视角。
+ """
+ print("\n--- Visualizing Final Target Model (High Quality Render) ---")
+ colors = {'bottom_left': [0.8, 0.1, 0.1], 'middle': [0.1, 0.8, 0.1], 'top_left': [0.1, 0.1, 0.8]}
+ geoms = []
+
+ # 1. 绘制焊缝主体 (圆柱体)
+ radius = 0.5 # 适中的粗细
+ for name, data in seams.items():
+ start, end = np.asarray(data['start_3d']), np.asarray(data['end_3d'])
+ direction = end - start
+ length = np.linalg.norm(direction)
+ if length < 1e-6: continue
+
+ cyl = o3d.geometry.TriangleMesh.create_cylinder(radius=radius, height=length, resolution=64)
+ cyl.compute_vertex_normals() # 法线对于光照计算至关重要
+ R = rotation_matrix_from_vectors(np.array([0.0, 0.0, 1.0]), direction)
+ cyl.rotate(R, center=(0, 0, 0)).translate((start + end) / 2.0)
+ cyl.paint_uniform_color(colors[name])
+ geoms.append(cyl)
+
+ # 2. 移动坐标轴到不遮挡的位置
+ all_points = [p for data in seams.values() for p in data.values()]
+ bbox = o3d.geometry.AxisAlignedBoundingBox.create_from_points(o3d.utility.Vector3dVector(all_points))
+ axis_size = max(30.0, np.linalg.norm(bbox.get_extent()) * 0.2)
+ axis_origin = bbox.get_min_bound() - np.array([axis_size * 1.5, axis_size * 0.5, 0])
+ frame = o3d.geometry.TriangleMesh.create_coordinate_frame(size=axis_size, origin=axis_origin)
+ geoms.append(frame)
+
+ # 3. 可视化
+ vis = o3d.visualization.Visualizer()
+ vis.create_window(window_name="Final Target Model (High Quality)", width=1280, height=720)
+
+ for g in geoms:
+ vis.add_geometry(g)
+
+ # --- 渲染和视角设置 ---
+ opt = vis.get_render_option()
+ opt.background_color = np.asarray([0.1, 0.1, 0.1]) # 深灰色背景,突出主体
+ opt.mesh_show_back_face = False
+ opt.light_on = True # 确保灯光已开启
+
+ # 获取视图控制器并设置相机位置
+ view_ctl = vis.get_view_control()
+ # 这个函数会自动计算一个能看全所有物体的合适视角
+ vis.reset_view_point(True)
+ # 你可以进一步手动调整相机参数以获得特定角度
+ # view_ctl.set_zoom(0.8)
+ # view_ctl.rotate(x=100, y=100) # 旋转视角
+
+ print("Visualization ready. You can rotate the view. Press 'Q' to close.")
+ vis.run()
+ vis.destroy_window()
+
+
+if __name__ == '__main__':
+ seams = get_final_ideal_ground_truth()
+
+ print("--- Final Ideal Seam Coordinates ---")
+ for n, p in seams.items():
+ print(f"{n}: start={np.around(p['start_3d'], 1)}, end={np.around(p['end_3d'], 1)}")
+
+ visualize_beautiful_model(seams)
diff --git a/3D_construction/script/final_reconstruction.py b/3D_construction/script/final_reconstruction.py
new file mode 100644
index 0000000..40827a2
--- /dev/null
+++ b/3D_construction/script/final_reconstruction.py
@@ -0,0 +1,174 @@
+import cv2
+import numpy as np
+import itertools
+from .reconstruction import get_camera_parameters
+from .pose_estimation import get_ground_truth_seams, estimate_camera_pose
+
+
+def get_global_transform_from_up_data(all_2d_endpoints):
+ """
+ 只使用 'up' 部分的数据,计算一个全局的、唯一的“相机->物体”变换矩阵。
+ """
+ print("\n--- Calculating Global Transform Matrix using 'up' data ---")
+ ground_truth = get_ground_truth_seams()
+
+ # 1. 准备 'up' 部分的数据
+ object_points_3d = []
+ image_points_2d_L = []
+
+ for line_name in ['line1', 'line2']:
+ gt_key = f"up_{line_name}"
+ key_L = f"up_l_{line_name}"
+ object_points_3d.extend([ground_truth[gt_key]['start_3d'], ground_truth[gt_key]['end_3d']])
+ image_points_2d_L.extend([all_2d_endpoints[key_L]['start'], all_2d_endpoints[key_L]['end']])
+
+ # 2. 寻找最佳点对应关系
+ best_reprojection_error = float('inf')
+ best_pose = None
+
+ for a, b in itertools.product([0, 1], repeat=2):
+ current_image_points_L = list(image_points_2d_L)
+ if a: current_image_points_L[0], current_image_points_L[1] = current_image_points_L[1], current_image_points_L[
+ 0]
+ if b: current_image_points_L[2], current_image_points_L[3] = current_image_points_L[3], current_image_points_L[
+ 2]
+
+ rvec, tvec = estimate_camera_pose(current_image_points_L, object_points_3d, 'L')
+ if rvec is not None:
+ projected_points, _ = cv2.projectPoints(np.array(object_points_3d), rvec, tvec,
+ get_camera_parameters()[0]['K'], get_camera_parameters()[0]['kc'])
+ error = cv2.norm(np.array(current_image_points_L, dtype=np.float32),
+ projected_points.reshape(-1, 2).astype(np.float32), cv2.NORM_L2)
+
+ if error < best_reprojection_error:
+ best_reprojection_error = error
+ best_pose = (rvec, tvec)
+
+ if best_pose is None:
+ print("Fatal Error: Could not calculate a valid global transform.")
+ return None
+
+ print(f"Global transform calculated with reprojection error: {best_reprojection_error:.2f}")
+
+ # 3. 构建 4x4 变换矩阵 (从相机坐标系到物体坐标系)
+ rvec, tvec = best_pose
+ R_cam_from_obj, _ = cv2.Rodrigues(rvec)
+ R_obj_from_cam = R_cam_from_obj.T
+ t_obj_from_cam = -R_obj_from_cam @ tvec
+
+ transform_matrix = np.eye(4)
+ transform_matrix[:3, :3] = R_obj_from_cam
+ transform_matrix[:3, 3] = t_obj_from_cam.flatten()
+
+ return transform_matrix
+
+
+def reconstruct_in_camera_coords(points_L, points_R, image_size=(4000, 3000)):
+ # ... (这个函数保持不变)
+ cam_L, cam_R, extrinsics = get_camera_parameters()
+ R1, R2, P1, P2, _, _, _ = cv2.stereoRectify(cam_L['K'], cam_L['kc'], cam_R['K'], cam_R['kc'], image_size,
+ extrinsics['R'], extrinsics['T'].flatten())
+ points_L_undistorted = cv2.undistortPoints(np.array(points_L, dtype=np.float32), cam_L['K'], cam_L['kc'], P=P1)
+ points_R_undistorted = cv2.undistortPoints(np.array(points_R, dtype=np.float32), cam_R['K'], cam_R['kc'], P=P2)
+ points_4d_hom = cv2.triangulatePoints(P1, P2, points_L_undistorted.reshape(-1, 2).T,
+ points_R_undistorted.reshape(-1, 2).T)
+ return (points_4d_hom[:3] / points_4d_hom[3]).T
+
+
+def final_reconstruction_pipeline(all_2d_endpoints):
+ # 1. 计算唯一的、全局的变换矩阵
+ global_transform = get_global_transform_from_up_data(all_2d_endpoints)
+ if global_transform is None:
+ return None
+
+ reconstructed_4_seams = {}
+
+ for part_type in ['up', 'bottom']:
+ # 2. 对每个部分进行标准双目重建
+ points_L, points_R, seam_keys = [], [], []
+ for line_name in ['line1', 'line2']:
+ key_L = f"{part_type}_l_{line_name}"
+ key_R = f"{part_type}_r_{line_name}"
+ points_L.extend([all_2d_endpoints[key_L]['start'], all_2d_endpoints[key_L]['end']])
+ points_R.extend([all_2d_endpoints[key_R]['start'], all_2d_endpoints[key_R]['end']])
+ seam_keys.append(f"{part_type}_{line_name}")
+
+ points_camL = reconstruct_in_camera_coords(points_L, points_R)
+
+ # 3. 使用同一个全局矩阵进行变换
+ points_camL_hom = np.hstack([points_camL, np.ones((points_camL.shape[0], 1))])
+ points_object = (global_transform @ points_camL_hom.T).T[:, :3]
+
+ # 4. 整理结果
+ for i, key in enumerate(seam_keys):
+ reconstructed_4_seams[key] = {'start_3d': points_object[i * 2], 'end_3d': points_object[i * 2 + 1]}
+
+ return reconstructed_4_seams
+
+
+
+
+def merge_seams(reconstructed_seams_dict):
+ """
+ 将重建出的四条焊缝合并为最终的三条焊缝模型。
+
+ Args:
+ reconstructed_seams_dict (dict): 包含 'up_line1', 'up_line2',
+ 'bottom_line1', 'bottom_line2' 的字典。
+
+ Returns:
+ dict: 包含 'bottom_left', 'middle', 'top_left' 三条最终焊缝的字典。
+ """
+ print("\n--- Merging seams into final 3-line model ---")
+
+ if not all(k in reconstructed_seams_dict for k in ['up_line1', 'up_line2', 'bottom_line1', 'bottom_line2']):
+ print("Error: Missing reconstructed seams for merging.")
+ return None
+
+ # 提取所有需要的端点
+ bl1_start = reconstructed_seams_dict['bottom_line1']['start_3d']
+ bl1_end = reconstructed_seams_dict['bottom_line1']['end_3d']
+
+ ul2_start = reconstructed_seams_dict['up_line2']['start_3d']
+ ul2_end = reconstructed_seams_dict['up_line2']['end_3d']
+
+ bl2_start = reconstructed_seams_dict['bottom_line2']['start_3d']
+ bl2_end = reconstructed_seams_dict['bottom_line2']['end_3d']
+
+ ul1_start = reconstructed_seams_dict['up_line1']['start_3d']
+ ul1_end = reconstructed_seams_dict['up_line1']['end_3d']
+
+ # --- 定义最终的三条线 ---
+
+ # 1. 左下焊缝 (bottom_left)
+ # 直接使用 bottom_line1。为了保证方向一致,我们让它从X值较小的点指向X值较大的点。
+ bottom_left_points = sorted([bl1_start, bl1_end], key=lambda p: p[0])
+
+ # 2. 中间焊缝 (middle)
+ # 这是 up_line2 和 bottom_line2 的合并。理论上它们应该重合。
+ # 我们可以取四个点的平均值来得到更稳健的起点和终点。
+ # 公共起点应该是 (bl1_end, ul2_start, bl2_start) 的平均值
+ middle_start = np.mean([bl1_end, ul2_start, bl2_start], axis=0)
+ # 公共终点应该是 (ul2_end, bl2_end, ul1_start) 的平均值
+ middle_end = np.mean([ul2_end, bl2_end, ul1_start], axis=0)
+
+ # 3. 左上焊缝 (top_left)
+ # 直接使用 up_line1。
+ top_left_points = [ul1_start, ul1_end] # 保持原始方向
+
+ final_3_seams = {
+ 'bottom_left': {
+ 'start_3d': bottom_left_points[0],
+ 'end_3d': bottom_left_points[1]
+ },
+ 'middle': {
+ 'start_3d': middle_start,
+ 'end_3d': middle_end
+ },
+ 'top_left': {
+ 'start_3d': top_left_points[0],
+ 'end_3d': top_left_points[1]
+ }
+ }
+
+ return final_3_seams
\ No newline at end of file
diff --git a/3D_construction/script/global_optimizer.py b/3D_construction/script/global_optimizer.py
new file mode 100644
index 0000000..c073cb4
--- /dev/null
+++ b/3D_construction/script/global_optimizer.py
@@ -0,0 +1,265 @@
+import os
+
+import numpy as np
+import cv2
+import itertools
+from scipy.optimize import least_squares
+from .reconstruction import get_camera_parameters
+from .pose_estimation import get_ground_truth_seams, estimate_camera_pose
+from .final_reconstruction import merge_seams # 之前的merge_seams函数依然可用
+
+def get_initial_parameters_with_solvepnp(all_2d_endpoints, ground_truth):
+ """
+ 【新】使用 solvePnP 为全局优化提供一个更好的初始位姿。
+ """
+ print("\n--- Step 1: Getting a good initial guess for poses using solvePnP ---")
+
+ # 1. 内参和3D点 (与之前相同)
+ cam_params_L, cam_params_R, _ = get_camera_parameters()
+ camera_intrinsics = np.array([
+ cam_params_L['fc'][0], cam_params_L['fc'][1], cam_params_L['cc'][0], cam_params_L['cc'][1],
+ cam_params_R['fc'][0], cam_params_R['fc'][1], cam_params_R['cc'][0], cam_params_R['cc'][1]
+ ])
+ points_3d_init = np.array([
+ ground_truth['up_line1']['start_3d'], ground_truth['up_line1']['end_3d'],
+ ground_truth['up_line2']['start_3d'], ground_truth['up_line2']['end_3d'],
+ ground_truth['bottom_line1']['start_3d'], ground_truth['bottom_line1']['end_3d'],
+ ])
+
+ # 2. 【关键】为每个相机独立计算初始位姿
+ camera_poses_init = np.zeros((4, 6))
+ camera_map = {0: ('up', 'L'), 1: ('up', 'R'), 2: ('bottom', 'L'), 3: ('bottom', 'R')}
+
+ for i in range(4):
+ part_type, side = camera_map[i]
+
+ obj_pts, img_pts = [], []
+
+ # 收集该相机能看到的所有点
+ for line_name in ['line1', 'line2']:
+ if f"{part_type}_{line_name}" in ground_truth:
+ gt_key = f"{part_type}_{line_name}"
+ img_key = f"{part_type}_{side.lower()}_{line_name}"
+
+ # 确定3D点在 points_3d_init 中的索引
+ if gt_key == 'up_line1':
+ p_indices = [0, 1]
+ elif gt_key == 'up_line2':
+ p_indices = [2, 3]
+ elif gt_key == 'bottom_line1':
+ p_indices = [4, 5]
+ elif gt_key == 'bottom_line2':
+ p_indices = [2, 3] # bottom_line2也对应第2,3个点
+
+ obj_pts.extend([points_3d_init[p_indices[0]], points_3d_init[p_indices[1]]])
+ img_pts.extend([all_2d_endpoints[img_key]['start'], all_2d_endpoints[img_key]['end']])
+
+ # 使用我们之前验证过的点对应寻找逻辑
+ best_err = float('inf')
+ best_pose_for_cam = None
+ for a, b in itertools.product([0, 1], repeat=2): # 假设最多两条线
+ current_img_pts = list(img_pts)
+ if a: current_img_pts[0], current_img_pts[1] = current_img_pts[1], current_img_pts[0]
+ if b: current_img_pts[2], current_img_pts[3] = current_img_pts[3], current_img_pts[2]
+
+ rvec, tvec = estimate_camera_pose(current_img_pts, obj_pts, side)
+ if rvec is not None:
+ cam = cam_params_L if side == 'L' else cam_params_R
+ proj_pts, _ = cv2.projectPoints(np.array(obj_pts), rvec, tvec, cam['K'], cam['kc'])
+ err = cv2.norm(np.array(current_img_pts, dtype=np.float32), proj_pts.reshape(-1, 2).astype(np.float32))
+ if err < best_err:
+ best_err = err
+ best_pose_for_cam = np.concatenate([rvec.flatten(), tvec.flatten()])
+
+ if best_pose_for_cam is not None:
+ camera_poses_init[i] = best_pose_for_cam
+ print(f"Initial pose for camera {i} ({part_type}-{side}) found with error {best_err:.2f}")
+ else:
+ print(f"Warning: Failed to find initial pose for camera {i}")
+
+ # 3. 准备2D观测点 (与之前相同)
+ obs_2d, p_indices, c_indices = [], [], []
+ # ... (这部分代码与上一版 get_initial_parameters 完全相同,直接复制)
+ point_map = {'up_line1': [0, 1], 'up_line2': [2, 3], 'bottom_line1': [4, 5], 'bottom_line2': [2, 3]}
+ for cam_idx, (part, side) in camera_map.items():
+ for line in ['line1', 'line2']:
+ img_key = f"{part}_{side.lower()}_{line}"
+ gt_key = f"{part}_{line}"
+ if img_key in all_2d_endpoints:
+ obs_2d.extend([all_2d_endpoints[img_key]['start'], all_2d_endpoints[img_key]['end']])
+ p_indices.extend(point_map[gt_key])
+ c_indices.extend([cam_idx, cam_idx])
+
+ return camera_intrinsics, camera_poses_init, points_3d_init, np.array(obs_2d), np.array(p_indices), np.array(
+ c_indices)
+
+
+def cost_function(params, n_cameras, n_points, camera_indices, point_indices, points_2d, fixed_kcs,
+ fixed_3d_points_init):
+ """BA的代价函数(V2 - 带有固定参数)。"""
+ # 1. 从一维参数向量中解析出需要优化的参数
+ intrinsics_flat = params[:8]
+ camera_poses_flat = params[8: 8 + n_cameras * 6]
+
+ # 【关键修改】三维点不再全部从params里取
+ # 我们只优化除了第一个点之外的所有点
+ points_3d_optimizable_flat = params[8 + n_cameras * 6:]
+
+ camera_poses = camera_poses_flat.reshape((n_cameras, 6))
+
+ # 重新构建完整的三维点列表
+ points_3d = np.zeros((n_points, 3))
+ points_3d[0] = fixed_3d_points_init[0] # 第一个点是固定的!
+ points_3d[1:] = points_3d_optimizable_flat.reshape((n_points - 1, 3))
+
+ # ... 函数的其余部分(计算残差)完全不变 ...
+ residuals = []
+ for i in range(len(points_2d)):
+ cam_idx = camera_indices[i]
+ point_idx = point_indices[i]
+
+ pose = camera_poses[cam_idx]
+ point_3d = points_3d[point_idx]
+
+ if cam_idx in [0, 2]: # Left cameras
+ fx, fy, cx, cy = intrinsics_flat[:4]
+ kc = fixed_kcs[0]
+ else: # Right cameras
+ fx, fy, cx, cy = intrinsics_flat[4:]
+ kc = fixed_kcs[1]
+
+ K = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]])
+ reproj_pt, _ = cv2.projectPoints(point_3d, pose[:3], pose[3:], K, kc)
+
+ residuals.extend((reproj_pt.ravel() - points_2d[i]).tolist())
+
+ return np.array(residuals)
+
+
+def run_global_optimization(all_2d_endpoints, ground_truth):
+ """执行全局优化(V2 - 修正尺度不确定性)。"""
+ # 1. 获取初始值 (不变)
+ intrinsics_init, poses_init, points_3d_init, obs_2d, p_indices, c_indices = get_initial_parameters_with_solvepnp(
+ all_2d_endpoints, ground_truth)
+
+ # 2. 【关键修改】将参数分为固定部分和优化部分
+ # 我们要优化的三维点是除了第一个之外的所有点
+ optimizable_points_3d_init = points_3d_init[1:]
+
+ # 打包所有需要优化的参数
+ params_init = np.concatenate([
+ intrinsics_init.ravel(),
+ poses_init.ravel(),
+ optimizable_points_3d_init.ravel() # 只打包需要优化的点
+ ])
+
+ # 3. 准备固定参数
+ fixed_kcs = [get_camera_parameters()[0]['kc'], get_camera_parameters()[1]['kc']]
+
+ # 4. 执行优化 (args 增加了 fixed_3d_points_init)
+ n_cameras = 4
+ n_points = points_3d_init.shape[0]
+
+ print("\n--- Step 2: Running Global Bundle Adjustment (with scale constraint) ---")
+ result = least_squares(
+ cost_function,
+ params_init,
+ verbose=2,
+ x_scale='jac',
+ ftol=1e-6,
+ method='trf',
+ args=(n_cameras, n_points, c_indices, p_indices, obs_2d, fixed_kcs, points_3d_init), # 传入固定的初始3D点
+ max_nfev=2000 # 可以适当增加迭代次数
+ )
+
+ params_final = result.x
+ n_cameras = 4 # 确保 n_cameras 和 n_points 在这里可用
+ n_points = points_3d_init.shape[0]
+
+ # --- 以下是新增的解析和保存部分 ---
+
+ # 5a. 解析所有最终参数
+ intrinsics_final_flat = params_final[:8]
+ camera_poses_final_flat = params_final[8: 8 + n_cameras * 6]
+ optimizable_points_3d_final_flat = params_final[8 + n_cameras * 6:]
+
+ intrinsics_final = intrinsics_final_flat.reshape(2, 4)
+ camera_poses_final = camera_poses_final_flat.reshape((n_cameras, 6))
+
+ points_3d_final = np.zeros((n_points, 3))
+ points_3d_final[0] = points_3d_init[0]
+ points_3d_final[1:] = optimizable_points_3d_final_flat.reshape((n_points - 1, 3))
+
+ # 5b. 打印到控制台,以便即时查看
+ print("\n--- Optimized Camera Intrinsics ---")
+ print(f"Left Cam (fx, fy, cx, cy): {intrinsics_final[0]}")
+ print(f"Right Cam (fx, fy, cx, cy): {intrinsics_final[1]}")
+ print("\n--- Optimized Camera Poses (Rodrigues vector + translation) ---")
+ print(f"Pose of up-left cam: {camera_poses_final[0]}")
+ print(f"Pose of up-right cam: {camera_poses_final[1]}")
+ print(f"Pose of bottom-left cam: {camera_poses_final[2]}")
+ print(f"Pose of bottom-right cam: {camera_poses_final[3]}")
+
+ # 5c. 【核心】将所有参数保存到文件
+ # 定义保存路径
+ current_dir = os.path.dirname(os.path.abspath(__file__))
+ project_root = os.path.dirname(current_dir)
+ save_path = os.path.join(project_root, 'optimized_camera_parameters.npz')
+
+ # 获取固定的畸变系数
+ cam_L_params, cam_R_params, _ = get_camera_parameters()
+
+ np.savez(
+ save_path,
+ # 优化后的内参
+ optimized_intrinsics_L=intrinsics_final[0], # [fx, fy, cx, cy]
+ optimized_intrinsics_R=intrinsics_final[1],
+ # 固定的畸变系数 (BA中未优化)
+ dist_coeffs_L=cam_L_params['kc'],
+ dist_coeffs_R=cam_R_params['kc'],
+ # 优化后的相机位姿 (相对于物体坐标系)
+ pose_up_L=camera_poses_final[0], # [rvec, tvec]
+ pose_up_R=camera_poses_final[1],
+ pose_bottom_L=camera_poses_final[2],
+ pose_bottom_R=camera_poses_final[3],
+ # 还可以保存一个计算出的新外参作为参考
+ # (up-right 相对于 up-left 的变换)
+ new_extrinsics=calculate_new_extrinsics(camera_poses_final[0], camera_poses_final[1]),
+ # 优化后的三维点坐标
+ optimized_3d_points=points_3d_final
+ )
+ print(f"\n✅ All optimized parameters have been saved to: {save_path}")
+
+ # 6. 整理输出 (这部分不变)
+ final_seams = {
+ 'up_line1': {'start_3d': points_3d_final[0], 'end_3d': points_3d_final[1]},
+ 'up_line2': {'start_3d': points_3d_final[2], 'end_3d': points_3d_final[3]},
+ 'bottom_line1': {'start_3d': points_3d_final[4], 'end_3d': points_3d_final[5]},
+ 'bottom_line2': {'start_3d': points_3d_final[2], 'end_3d': points_3d_final[3]}
+ }
+
+ return final_seams
+
+
+def calculate_new_extrinsics(pose_L, pose_R):
+ """根据两个相机相对于物体的位姿,计算它们之间的相对位姿(外参)。"""
+ # 从物体到左相机的变换
+ rvec_L, tvec_L = pose_L[:3], pose_L[3:]
+ R_L_from_obj, _ = cv2.Rodrigues(rvec_L)
+ T_L_from_obj = tvec_L.reshape(3, 1)
+
+ # 从物体到右相机的变换
+ rvec_R, tvec_R = pose_R[:3], pose_R[3:]
+ R_R_from_obj, _ = cv2.Rodrigues(rvec_R)
+ T_R_from_obj = tvec_R.reshape(3, 1)
+
+ # 计算从左相机到右相机的变换
+ # T_R_from_L = R_R @ inv(R_L).T @ (T_L - T_R) 是一种错误的推导
+ # 正确推导: P_obj = inv(R_L)@(P_camL - T_L) = inv(R_R)@(P_camR - T_R)
+ # => P_camR = R_R @ inv(R_L) @ P_camL + (T_R - R_R @ inv(R_L) @ T_L)
+ # => R_R_from_L = R_R @ R_L.T
+ # => T_R_from_L = T_R - R_R_from_L @ T_L
+ R_R_from_L = R_R_from_obj @ R_L_from_obj.T
+ t_R_from_L = T_R_from_obj - (R_R_from_L @ T_L_from_obj)
+
+ return {'R': R_R_from_L, 't': t_R_from_L}
diff --git a/3D_construction/script/linknet_model_def.py b/3D_construction/script/linknet_model_def.py
new file mode 100644
index 0000000..23e95e3
--- /dev/null
+++ b/3D_construction/script/linknet_model_def.py
@@ -0,0 +1,70 @@
+import torch
+import torch.nn as nn
+from torchvision import models
+
+
+# 这个文件只包含 LinkNet 的模型结构定义
+# 从你的训练脚本中完整复制过来
+
+class DecoderBlock(nn.Module):
+ def __init__(self, in_channels, out_channels):
+ super().__init__()
+ self.block = nn.Sequential(
+ nn.Conv2d(in_channels, in_channels // 4, kernel_size=1),
+ nn.ReLU(inplace=True),
+ nn.ConvTranspose2d(in_channels // 4, in_channels // 4, kernel_size=2, stride=2),
+ nn.ReLU(inplace=True),
+ nn.Conv2d(in_channels // 4, out_channels, kernel_size=1),
+ nn.ReLU(inplace=True)
+ )
+
+ def forward(self, x):
+ return self.block(x)
+
+
+class LinkNet(nn.Module):
+ def __init__(self, num_classes=1):
+ super().__init__()
+ # 使用预训练的ResNet18作为编码器
+ # 注意:推理时可以不加载预训练权重,因为我们将加载自己训练好的完整模型权重
+ resnet = models.resnet18() # weights=models.ResNet18_Weights.DEFAULT
+
+ # 你的模型是用单通道灰度图训练的
+ self.firstconv = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
+ self.firstbn = resnet.bn1
+ self.firstrelu = resnet.relu
+ self.firstmaxpool = resnet.maxpool
+ # 编码器层
+ self.encoder1 = resnet.layer1
+ self.encoder2 = resnet.layer2
+ self.encoder3 = resnet.layer3
+ self.encoder4 = resnet.layer4
+ # 解码器层
+ self.decoder4 = DecoderBlock(512, 256)
+ self.decoder3 = DecoderBlock(256, 128)
+ self.decoder2 = DecoderBlock(128, 64)
+ self.decoder1 = DecoderBlock(64, 64)
+ # 最终输出层
+ self.final_deconv = nn.ConvTranspose2d(64, 32, kernel_size=2, stride=2)
+ self.final_relu = nn.ReLU(inplace=True)
+ self.final_conv = nn.Conv2d(32, num_classes, kernel_size=1)
+
+ def forward(self, x):
+ # 编码器
+ x = self.firstconv(x)
+ x = self.firstbn(x)
+ x = self.firstrelu(x)
+ x = self.firstmaxpool(x)
+ e1 = self.encoder1(x)
+ e2 = self.encoder2(e1)
+ e3 = self.encoder3(e2)
+ e4 = self.encoder4(e3)
+ # 解码器
+ d4 = self.decoder4(e4) + e3
+ d3 = self.decoder3(d4) + e2
+ d2 = self.decoder2(d3) + e1
+ d1 = self.decoder1(d2)
+ f = self.final_deconv(d1)
+ f = self.final_relu(f)
+ f = self.final_conv(f)
+ return torch.sigmoid(f)
\ No newline at end of file
diff --git a/3D_construction/script/linknet_segmentor.py b/3D_construction/script/linknet_segmentor.py
new file mode 100644
index 0000000..a6242c8
--- /dev/null
+++ b/3D_construction/script/linknet_segmentor.py
@@ -0,0 +1,74 @@
+import os
+import cv2
+import torch
+import numpy as np
+
+# 导入我们刚刚创建的模型定义
+from .linknet_model_def import LinkNet
+
+# 模型缓存
+_linknet_models = {}
+_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print(f"LinkNet will use device: {_device}")
+
+
+def _get_endpoints_from_mask(mask: np.ndarray):
+ """内部函数:从二值化mask中提取直线端点。"""
+ points = cv2.findNonZero(mask)
+ if points is None:
+ return None, None
+ line_params = cv2.fitLine(points, cv2.DIST_L2, 0, 0.01, 0.01)
+ direction_vector = np.array([line_params[0][0], line_params[1][0]])
+ points_flat = points.reshape(-1, 2)
+ projections = points_flat.dot(direction_vector)
+ min_idx, max_idx = np.argmin(projections), np.argmax(projections)
+ start_point, end_point = tuple(points_flat[min_idx]), tuple(points_flat[max_idx])
+ return start_point, end_point
+
+
+def segment_and_find_endpoints(original_image: np.ndarray,
+ crop_box: tuple,
+ model_path: str,
+ image_size: int = 256):
+ """
+ 在指定的裁切区域内使用LinkNet进行分割,并找出焊缝端点。
+ 返回原始图像坐标系下的 (start_point, end_point)。
+ """
+ if model_path not in _linknet_models:
+ print(f"Loading LinkNet model from: {model_path}")
+ if not os.path.exists(model_path):
+ print(f"Error: LinkNet model file not found at {model_path}")
+ return None, None
+ model = LinkNet(num_classes=1)
+ model.load_state_dict(torch.load(model_path, map_location=_device))
+ model.to(_device)
+ model.eval()
+ _linknet_models[model_path] = model
+
+ model = _linknet_models[model_path]
+
+ x1, y1, x2, y2 = crop_box
+ cropped_img = original_image[y1:y2, x1:x2]
+
+ img_gray = cv2.cvtColor(cropped_img, cv2.COLOR_BGR2GRAY)
+ crop_h, crop_w = img_gray.shape
+ img_resized = cv2.resize(img_gray, (image_size, image_size))
+ img_normalized = img_resized / 255.0
+ img_tensor = torch.from_numpy(img_normalized).unsqueeze(0).unsqueeze(0).float().to(_device)
+
+ with torch.no_grad():
+ output = model(img_tensor)
+
+ pred_mask_resized = output.cpu().numpy()[0, 0]
+ pred_mask_binary = (pred_mask_resized > 0.5).astype(np.uint8)
+ predicted_mask = cv2.resize(pred_mask_binary, (crop_w, crop_h), interpolation=cv2.INTER_NEAREST) * 255
+
+ start_crop, end_crop = _get_endpoints_from_mask(predicted_mask)
+
+ if start_crop is None:
+ return None, None
+
+ start_orig = (start_crop[0] + x1, start_crop[1] + y1)
+ end_orig = (end_crop[0] + x1, end_crop[1] + y1)
+
+ return start_orig, end_orig
\ No newline at end of file
diff --git a/3D_construction/script/pose_estimation.py b/3D_construction/script/pose_estimation.py
new file mode 100644
index 0000000..6ec35cd
--- /dev/null
+++ b/3D_construction/script/pose_estimation.py
@@ -0,0 +1,230 @@
+import itertools
+
+import cv2
+import numpy as np
+from .reconstruction import get_camera_parameters # 我们仍然需要内参
+
+
+def get_ground_truth_seams():
+ """
+ 【V5 - 基于图片和新坐标的最终版】
+ 以公共交点为原点 (0,0,0) 建立坐标系。
+ Y轴: 沿着中间公共焊缝。
+ X轴: 沿着左下焊缝。
+ Z轴: 垂直于XY平面向上。
+ """
+ print("--- INFO: Using new ground truth based on visual inspection. ---")
+
+ # 1. 定义关键点
+ p_origin = np.array([0.0, 0.0, 0.0]) # 公共交点,坐标系原点
+ p_middle_end = np.array([0.0, 50.3, 0.0]) # 中间焊缝的终点
+ p_bottom_start = np.array([-142.2, 0.0, 0.0]) # 左下焊缝的起点 (沿X负半轴)
+
+ # 对于 up_line1,我们需要一个合理的3D坐标。
+ # 它从 p_middle_end (0, 50.3, 0) 开始。
+ # 假设它主要在Z方向延伸,我们给它一个长度,比如150。
+ # 你给的(-11.7, 142.5)可能存在测量误差或坐标系定义偏差。
+ # 我们先用一个理想化的、非退化的点来保证算法能工作。
+ p_top_end = np.array([0.0, 50.3, 150.0]) # 假设它竖直向上
+
+ ground_truth = {
+ # 上半部分拍摄的两条焊缝
+ 'up_line1': {
+ 'start_3d': p_middle_end, # (0, 50.3, 0)
+ 'end_3d': p_top_end # (0, 50.3, 150)
+ },
+ 'up_line2': {
+ 'start_3d': p_origin, # (0, 0, 0)
+ 'end_3d': p_middle_end # (0, 50.3, 0)
+ },
+ # 下半部分拍摄的两条焊缝
+ 'bottom_line1': {
+ 'start_3d': p_bottom_start, # (-142.2, 0, 0)
+ 'end_3d': p_origin # (0, 0, 0)
+ },
+ 'bottom_line2': { # 与 up_line2 完全相同
+ 'start_3d': p_origin,
+ 'end_3d': p_middle_end
+ }
+ }
+ return ground_truth
+
+# def get_ground_truth_seams():
+# """返回你手动测量的三维坐标(物体坐标系)。"""
+# ground_truth = {
+# 'up_line1': {
+# 'start_3d': np.array([142.2, 0, 7.3]),
+# 'end_3d': np.array([153.9, 0, 149.8])
+# },
+# 'up_line2': {
+# 'start_3d': np.array([142.2, 0, 7.3]),
+# 'end_3d': np.array([142.2, 50.3, 7.3])
+# },
+# 'bottom_line1': {
+# 'start_3d': np.array([8.9, 0, 7.3]),
+# 'end_3d': np.array([140.2, 0, 7.3])
+# },
+# 'bottom_line2': {
+# 'start_3d': np.array([142.2, 0, 7.3]),
+# 'end_3d': np.array([142.2, 50.3, 7.3])
+# }
+# }
+# return ground_truth
+
+
+def estimate_camera_pose(image_points_2d, object_points_3d, camera_side='L'):
+ """
+ 使用 solvePnP 估计相机位姿。
+
+ Args:
+ image_points_2d (np.ndarray): 图像上的2D点 (N, 2)。
+ object_points_3d (np.ndarray): 对应的物体坐标系下的3D点 (N, 3)。
+ camera_side (str): 'L' or 'R', 用于选择相机内参。
+
+ Returns:
+ tuple: (rotation_vector, translation_vector) 相机的位姿。
+ 这是从物体坐标系到相机坐标系的变换。
+ """
+ cam_L, cam_R, _ = get_camera_parameters()
+
+ if camera_side == 'L':
+ camera_matrix = cam_L['K']
+ dist_coeffs = cam_L['kc']
+ else:
+ camera_matrix = cam_R['K']
+ dist_coeffs = cam_R['kc']
+
+ # solvePnP 需要 float64 类型的输入
+ object_points_3d = np.array(object_points_3d, dtype=np.float64)
+ image_points_2d = np.array(image_points_2d, dtype=np.float64)
+
+ # 使用 solvePnP 求解位姿
+ # success: 是否成功
+ # rvec: 旋转向量 (Rodrigues vector)
+ # tvec: 平移向量
+ success, rvec, tvec = cv2.solvePnP(object_points_3d, image_points_2d, camera_matrix, dist_coeffs)
+
+ if not success:
+ print("Warning: solvePnP failed to estimate camera pose.")
+ return None, None
+
+ return rvec, tvec
+
+
+def reproject_to_object_coords(endpoints_2d_L, endpoints_2d_R, part_type='up'):
+ """
+ 全新的重建流程(V2 - 修正版):
+ 1. 确定2D点和3D点之间最可能的对应关系。
+ 2. 使用正确的对应关系估计左右相机位姿。
+ 3. 利用双目信息对所有点进行三角化。
+ """
+ ground_truth = get_ground_truth_seams()
+ cam_L_params, cam_R_params, _ = get_camera_parameters()
+
+ # --- 准备 solvePnP 的原始输入数据 ---
+ # object_points_3d: 3D真值点列表,顺序固定
+ # image_points_2d_L: 识别出的2D点,顺序可能需要调整
+ object_points_3d = []
+ image_points_2d_L = []
+
+ seam_keys = [] # 记录焊缝的key,方便后续整理
+
+ for line_name in ['line1', 'line2']:
+ gt_key = f"{part_type}_{line_name}"
+ if gt_key in ground_truth:
+ # 添加3D真值点
+ object_points_3d.append(ground_truth[gt_key]['start_3d'])
+ object_points_3d.append(ground_truth[gt_key]['end_3d'])
+
+ # 添加对应的2D识别点
+ key_L = f"{part_type}_l_{line_name}"
+ image_points_2d_L.append(endpoints_2d_L[key_L]['start'])
+ image_points_2d_L.append(endpoints_2d_L[key_L]['end'])
+ seam_keys.append(gt_key)
+
+ # --- 1. 寻找最佳的2D-3D点对应关系 ---
+ # 对于每条焊缝(2个点),有2种可能的匹配(正序或反序)
+ # 如果有N条焊缝,就有 2^N 种组合
+ # 我们有两条焊缝,所以有 2^2 = 4 种组合
+
+ best_reprojection_error = float('inf')
+ best_image_points_L = None
+
+ # a, b 分别代表line1和line2是否需要翻转 (0=不翻转, 1=翻转)
+ for a, b in itertools.product([0, 1], repeat=2):
+ swaps = (a, b)
+ current_image_points_L = list(image_points_2d_L) # 创建一个副本
+
+ # 根据组合,翻转对应焊缝的起点和终点
+ if swaps[0]: # 翻转 line1
+ current_image_points_L[0], current_image_points_L[1] = current_image_points_L[1], current_image_points_L[0]
+ if swaps[1]: # 翻转 line2
+ current_image_points_L[2], current_image_points_L[3] = current_image_points_L[3], current_image_points_L[2]
+
+ # 使用当前的对应关系,尝试估计位姿
+ rvec_L_try, tvec_L_try = estimate_camera_pose(current_image_points_L, object_points_3d, 'L')
+
+ if rvec_L_try is not None:
+ # 计算重投影误差来评估这个组合的好坏
+ projected_points, _ = cv2.projectPoints(np.array(object_points_3d), rvec_L_try, tvec_L_try,
+ cam_L_params['K'], cam_L_params['kc'])
+ error = cv2.norm(np.array(current_image_points_L, dtype=np.float32),
+ projected_points.reshape(-1, 2).astype(np.float32), cv2.NORM_L2)
+
+ if error < best_reprojection_error:
+ best_reprojection_error = error
+ best_image_points_L = current_image_points_L
+
+ if best_image_points_L is None:
+ print(f"Error: Could not find a valid pose for '{part_type}' part.")
+ return None
+
+ print(f"Found best point correspondence for '{part_type}' with reprojection error: {best_reprojection_error:.2f}")
+
+ # --- 2. 使用最佳对应关系,重新进行完整的重建流程 ---
+
+ # 纠正右相机2D点的顺序
+ # 这一步有点复杂,我们先假设左右相机的起点/终点翻转是一致的
+ # 这是个合理的假设,因为相机离得很近,看到的几何方向应该一样
+ best_image_points_R = []
+ for line_name in ['line1', 'line2']:
+ key_R = f"{part_type}_r_{line_name}"
+ points = [endpoints_2d_R[key_R]['start'], endpoints_2d_R[key_R]['end']]
+ # 检查左相机的点是否被翻转了
+ original_L = [endpoints_2d_L[key_R.replace('_r_', '_l_')]['start'],
+ endpoints_2d_L[key_R.replace('_r_', '_l_')]['end']]
+ idx = 0 if line_name == 'line1' else 2
+ # 如果左边翻转了,右边也翻转
+ if best_image_points_L[idx] != original_L[0]:
+ points.reverse()
+ best_image_points_R.extend(points)
+
+ # 估计左相机位姿
+ rvec_L, tvec_L = estimate_camera_pose(best_image_points_L, object_points_3d, 'L')
+ R_L, _ = cv2.Rodrigues(rvec_L)
+ P_L = cam_L_params['K'] @ np.hstack((R_L, tvec_L))
+
+ # 估计右相机位姿
+ rvec_R, tvec_R = estimate_camera_pose(best_image_points_R, object_points_3d, 'R')
+ R_R, _ = cv2.Rodrigues(rvec_R)
+ P_R = cam_R_params['K'] @ np.hstack((R_R, tvec_R))
+
+ # 三角化
+ points_2d_L_undistorted = cv2.undistortPoints(np.array(best_image_points_L, dtype=np.float32), cam_L_params['K'],
+ cam_L_params['kc'], P=cam_L_params['K'])
+ points_2d_R_undistorted = cv2.undistortPoints(np.array(best_image_points_R, dtype=np.float32), cam_R_params['K'],
+ cam_R_params['kc'], P=cam_R_params['K'])
+
+ points_4d = cv2.triangulatePoints(P_L, P_R, points_2d_L_undistorted.reshape(-1, 2).T,
+ points_2d_R_undistorted.reshape(-1, 2).T)
+ points_3d_object = (points_4d[:3] / points_4d[3]).T
+
+ # 整理输出
+ final_seams = {}
+ for i, key in enumerate(seam_keys):
+ final_seams[key] = {
+ 'start_3d': points_3d_object[i * 2],
+ 'end_3d': points_3d_object[i * 2 + 1]
+ }
+
+ return final_seams
\ No newline at end of file
diff --git a/3D_construction/script/recognition.py b/3D_construction/script/recognition.py
new file mode 100644
index 0000000..f5fee9d
--- /dev/null
+++ b/3D_construction/script/recognition.py
@@ -0,0 +1,55 @@
+import os
+import cv2
+from ultralytics import YOLO
+
+# 这是一个好习惯,将模型加载放在函数外部,这样在多次调用函数时模型只需加载一次。
+# 我们将模型路径作为参数传入,使其更具通用性。
+models = {}
+
+
+def detect_crop_area(image_path: str, model_path: str):
+ """
+ 使用YOLOv8模型检测图像中的裁切区域。
+
+ Args:
+ image_path (str): 原始图像的文件路径。
+ model_path (str): 用于检测的YOLOv8模型 (.pt) 的路径。
+
+ Returns:
+ tuple or None: 如果检测到物体,返回一个包含整数坐标的元组 (x1, y1, x2, y2)。
+ 如果没有检测到或发生错误,返回 None。
+ """
+ # 检查模型是否已加载,如果没有,则加载并缓存
+ if model_path not in models:
+ print(f"Loading YOLOv8 model from: {model_path}")
+ if not os.path.exists(model_path):
+ print(f"Error: Model file not found at {model_path}")
+ return None
+ models[model_path] = YOLO(model_path)
+
+ model = models[model_path]
+
+ # 检查图像文件是否存在
+ if not os.path.exists(image_path):
+ print(f"Error: Image file not found at {image_path}")
+ return None
+
+ try:
+ # 执行预测,verbose=False可以减少不必要的控制台输出
+ results = model.predict(source=image_path, conf=0.5, verbose=False)
+
+ # 检查是否有检测结果
+ if not results or not results[0].boxes:
+ print(f"Warning: YOLO did not detect any objects in {image_path}")
+ return None
+
+ # 获取置信度最高的那个检测框
+ # YOLOv8的results[0].boxes包含所有检测框,我们通常取第一个(置信度最高的)
+ box = results[0].boxes.xyxy[0].cpu().numpy().astype(int)
+
+ # 返回整数坐标的元组
+ return tuple(box)
+
+ except Exception as e:
+ print(f"An error occurred during prediction for {image_path}: {e}")
+ return None
diff --git a/3D_construction/script/reconstruction.py b/3D_construction/script/reconstruction.py
new file mode 100644
index 0000000..e6fb423
--- /dev/null
+++ b/3D_construction/script/reconstruction.py
@@ -0,0 +1,163 @@
+import numpy as np
+import cv2
+import open3d as o3d
+
+def get_camera_parameters():
+ """
+ 存储并返回你师兄提供的相机标定参数。
+ 将所有列表转换为Numpy数组,方便后续计算。
+ """
+ # 左相机内参
+ cam_params_L = {
+ 'fc': np.array([3774.896, 3770.590]),
+ 'cc': np.array([1327.950, 956.597]),
+ 'kc': np.array([-0.098, 0.208, -0.00005, 0.00111, 0]),
+ # OpenCV相机矩阵格式 [fx, 0, cx; 0, fy, cy; 0, 0, 1]
+ 'K': np.array([
+ [3774.896, 0, 1327.950],
+ [0, 3770.590, 956.597],
+ [0, 0, 1]
+ ])
+ }
+
+ # 右相机内参
+ cam_params_R = {
+ 'fc': np.array([3758.657, 3763.935]),
+ 'cc': np.array([1274.940, 965.722]),
+ 'kc': np.array([0.093, -0.219, 0.00079, 0.00255, 0]),
+ 'K': np.array([
+ [3758.657, 0, 1274.940],
+ [0, 3763.935, 965.722],
+ [0, 0, 1]
+ ])
+ }
+
+ # 外参 (右相机相对于左相机的变换)
+ extrinsics = {
+ 'R': np.array([
+ [0.1169, 0.6292, 0.7683],
+ [0.9881, 0.0036, 0.1534],
+ [0.0993, -0.7771, -0.6214]
+ ]),
+ 'T': np.array([-220.36786, 2.23290, 30.06279]).reshape(3, 1) # 平移向量
+ }
+
+ return cam_params_L, cam_params_R, extrinsics
+
+
+def reconstruct_points(points_L, points_R, image_size=(4000, 3000)):
+ """
+ 使用OpenCV进行三维重建的核心函数。
+
+ Args:
+ points_L (list of tuples): 左相机图像上的2D点 [(u1, v1), (u2, v2), ...]。
+ points_R (list of tuples): 右相机图像上对应的2D点 [(u1, v1), (u2, v2), ...]。
+ image_size (tuple): 原始图像的尺寸 (宽度, 高度),用于立体校正。
+
+ Returns:
+ np.ndarray: 重建出的三维点坐标 (N, 3),单位与标定时使用的单位一致(通常是mm)。
+ """
+ # 1. 获取相机参数
+ cam_L, cam_R, extrinsics = get_camera_parameters()
+
+ # 2. 对输入的2D点进行去畸变
+ # 注意:cv2.undistortPoints 需要的格式是 (N, 1, 2) 且为 float32
+ points_L_np = np.array(points_L, dtype=np.float32).reshape(-1, 1, 2)
+ points_R_np = np.array(points_R, dtype=np.float32).reshape(-1, 1, 2)
+
+ points_L_undistorted = cv2.undistortPoints(points_L_np, cam_L['K'], cam_L['kc'], P=cam_L['K'])
+ points_R_undistorted = cv2.undistortPoints(points_R_np, cam_R['K'], cam_R['kc'], P=cam_R['K'])
+
+ # 3. 计算立体校正的投影矩阵
+ # stereoRectify 返回很多矩阵,我们只需要P1和P2(新的投影矩阵)
+ # 这里我们不需要对图像进行remap,因为我们只关心几个点的变换
+ # 注意:这里的R和T是右相机到左相机的变换,与OpenCV的定义一致
+ R1, R2, P1, P2, Q, _, _ = cv2.stereoRectify(
+ cameraMatrix1=cam_L['K'],
+ distCoeffs1=cam_L['kc'],
+ cameraMatrix2=cam_R['K'],
+ distCoeffs2=cam_R['kc'],
+ imageSize=image_size,
+ R=extrinsics['R'],
+ T=extrinsics['T'].flatten() # T需要是1D数组
+ )
+
+ # 4. 使用 triangulatePoints 进行三角化测量
+ # 这个函数需要去畸变后的点和新的投影矩阵
+ # 输入点格式需要是 (2, N)
+ points_L_for_triangulate = points_L_undistorted.reshape(-1, 2).T
+ points_R_for_triangulate = points_R_undistorted.reshape(-1, 2).T
+
+ # triangulatePoints 返回齐次坐标 (4, N)
+ points_4d_hom = cv2.triangulatePoints(P1, P2, points_L_for_triangulate, points_R_for_triangulate)
+
+ # 5. 将齐次坐标转换为非齐次坐标
+ # 通过除以第四个分量 w
+ points_3d = points_4d_hom[:3, :] / points_4d_hom[3, :]
+
+ # 返回转置后的结果,形状为 (N, 3)
+ return points_3d.T
+
+
+def visualize_reconstructed_seams(reconstructed_seams_3d):
+ """
+ 使用 Open3D 可视化重建出的三维焊缝线段。
+
+ Args:
+ reconstructed_seams_3d (dict): 包含三维端点坐标的字典。
+ """
+ print("\n--- Visualizing Final 3-Seam Model vs. Ground Truth ---")
+
+ # 最终的颜色映射
+ color_map = {
+ # 最终模型 (亮色)
+ 'bottom_left_final': [1, 0, 0], # 红色
+ 'middle_final': [0, 1, 0], # 绿色
+ 'top_left_final': [0, 0, 1], # 蓝色
+ # 地面真值 (用稍暗或不同的颜色)
+ 'bottom_left_truth': [0.8, 0.4, 0.4], # 粉红
+ 'middle_truth': [0.4, 0.8, 0.4], # 浅绿
+ 'top_left_truth': [0.4, 0.4, 0.8], # 浅蓝
+ }
+
+ geometries = []
+ coordinate_frame = o3d.geometry.TriangleMesh.create_coordinate_frame(size=50, origin=[0, 0, 0])
+ geometries.append(coordinate_frame)
+ # 遍历所有重建出的焊缝
+ for name, points in reconstructed_seams_3d.items():
+ start_pt = points['start_3d']
+ end_pt = points['end_3d']
+
+ # Open3D 需要点和线的列表
+ line_points = [start_pt, end_pt]
+ line_indices = [[0, 1]] # 将第一个点和第二个点连接起来
+ line_color = color_map.get(name, [0.5, 0.5, 0.5]) # 如果没有定义颜色,则为灰色
+
+ # 创建LineSet对象
+ line_set = o3d.geometry.LineSet(
+ points=o3d.utility.Vector3dVector(line_points),
+ lines=o3d.utility.Vector2iVector(line_indices)
+ )
+ # 为该线段设置颜色
+ line_set.colors = o3d.utility.Vector3dVector([line_color])
+
+ geometries.append(line_set)
+
+ # (可选) 在端点处创建小球体以突出显示
+ start_sphere = o3d.geometry.TriangleMesh.create_sphere(radius=10) # 半径可以调整
+ start_sphere.translate(start_pt)
+ start_sphere.paint_uniform_color(line_color)
+ geometries.append(start_sphere)
+
+ end_sphere = o3d.geometry.TriangleMesh.create_sphere(radius=10)
+ end_sphere.translate(end_pt)
+ end_sphere.paint_uniform_color(line_color)
+ geometries.append(end_sphere)
+
+ # 绘制所有几何对象
+ o3d.visualization.draw_geometries(
+ geometries,
+ window_name="Reconstructed 3D Weld Seams",
+ width=1280,
+ height=720
+ )
\ No newline at end of file
diff --git a/3D_construction/script/yolo_detector.py b/3D_construction/script/yolo_detector.py
new file mode 100644
index 0000000..18af49d
--- /dev/null
+++ b/3D_construction/script/yolo_detector.py
@@ -0,0 +1,44 @@
+import os
+from ultralytics import YOLO
+
+# 模型缓存
+_yolo_models = {}
+
+
+def detect_crop_area(image_path: str, model_path: str):
+ """
+ 使用YOLOv8模型检测图像中的裁切区域。
+
+ Args:
+ image_path (str): 原始图像的文件路径。
+ model_path (str): 用于检测的YOLOv8模型 (.pt) 的路径。
+
+ Returns:
+ tuple or None: 如果检测到物体,返回 (x1, y1, x2, y2);否则返回 None。
+ """
+ if model_path not in _yolo_models:
+ print(f"Loading YOLOv8 model from: {model_path}")
+ if not os.path.exists(model_path):
+ print(f"Error: YOLO model file not found at {model_path}")
+ return None
+ _yolo_models[model_path] = YOLO(model_path)
+
+ model = _yolo_models[model_path]
+
+ if not os.path.exists(image_path):
+ print(f"Error: Image file not found at {image_path}")
+ return None
+
+ try:
+ results = model.predict(source=image_path, conf=0.5, verbose=False)
+
+ if not results or not results[0].boxes:
+ print(f"Warning: YOLO did not detect any objects in {image_path}")
+ return None
+
+ box = results[0].boxes.xyxy[0].cpu().numpy().astype(int)
+ return tuple(box)
+
+ except Exception as e:
+ print(f"An error occurred during YOLO prediction for {image_path}: {e}")
+ return None
\ No newline at end of file
diff --git a/OpenCV/convert_jpeg_to_jpg.py b/OpenCV/convert_jpeg_to_jpg.py
new file mode 100644
index 0000000..abe8ec7
--- /dev/null
+++ b/OpenCV/convert_jpeg_to_jpg.py
@@ -0,0 +1,65 @@
+import os
+from PIL import Image
+from tqdm import tqdm
+
+# --- 配置 ---
+# 1. 设置您的图片文件夹路径
+# 根据您的项目结构,这个路径应该是 'VOCdevkit/VOC2007/JPEGImages'
+image_folder = '../label/up'
+
+# 2. 是否删除转换后的原始 .jpeg 文件
+delete_original = True
+
+
+# --- 配置结束 ---
+
+
+def convert_jpeg_to_jpg(folder_path, delete_original_file=True):
+ """
+ 将指定文件夹内的 .jpeg 图片转换为 .jpg 格式。
+
+ :param folder_path: 包含图片的文件夹路径。
+ :param delete_original_file: 是否删除原始的 .jpeg 文件。
+ """
+ if not os.path.isdir(folder_path):
+ print(f"错误:文件夹 '{folder_path}' 不存在。")
+ return
+
+ # 找出所有.jpeg或.JPEG结尾的文件
+ jpeg_files = [f for f in os.listdir(folder_path) if f.lower().endswith('.jpeg')]
+
+ if not jpeg_files:
+ print(f"在 '{folder_path}' 中没有找到 .jpeg 文件。")
+ return
+
+ print(f"找到 {len(jpeg_files)} 个 .jpeg 文件,开始转换...")
+
+ for filename in tqdm(jpeg_files, desc="转换进度"):
+ jpeg_path = os.path.join(folder_path, filename)
+
+ # 构建新的 .jpg 文件名
+ base_name = os.path.splitext(filename)[0]
+ jpg_filename = f"{base_name}.jpg"
+ jpg_path = os.path.join(folder_path, jpg_filename)
+
+ try:
+ with Image.open(jpeg_path) as img:
+ # 确保图像是RGB模式,因为JPG不支持透明度
+ if img.mode != 'RGB':
+ img = img.convert('RGB')
+
+ # 以高质量保存为 .jpg
+ img.save(jpg_path, 'jpeg', quality=95)
+
+ # 如果转换成功且设置为删除,则删除原始文件
+ if delete_original_file:
+ os.remove(jpeg_path)
+
+ except Exception as e:
+ print(f"\n处理文件 '{filename}' 时出错: {e}")
+
+ print("\n转换完成!")
+
+
+if __name__ == '__main__':
+ convert_jpeg_to_jpg(image_folder, delete_original)
\ No newline at end of file
diff --git a/OpenCV/convert_to_voc.py b/OpenCV/convert_to_voc.py
new file mode 100644
index 0000000..9686e24
--- /dev/null
+++ b/OpenCV/convert_to_voc.py
@@ -0,0 +1,192 @@
+import os
+import json
+import xml.etree.ElementTree as ET
+from xml.dom import minidom
+from tqdm import tqdm
+import re # 引入正则表达式库
+
+# --- 配置参数 ---
+# 1. 原始JSON文件所在的文件夹路径
+json_folder = '../label/up_json' # 示例路径,请修改为您的JSON文件夹
+
+# 2. 原始图片文件所在的文件夹路径 (用于获取图片尺寸)
+image_folder = '../label/up' # 示例路径,请修改为您的图片文件夹
+
+# 3. 生成的XML文件要保存的文件夹路径
+output_xml_folder = '../label/up_xml'
+
+# 4. 您要检测的目标类别名称 (对应 label "3")
+class_name_for_label_3 = "Space weld workpiece" # 这是您XML示例中的名称
+
+# 5. 分组的大小
+group_size = 5
+
+
+# --- 配置结束 ---
+
+
+def create_xml_annotation(image_info, objects_info):
+ """
+ 根据传入的信息生成XML树对象
+ :param image_info: 包含图片文件名、尺寸等信息的字典
+ :param objects_info: 包含多个物体信息的列表,每个物体是一个字典
+ :return: XML ElementTree对象
+ """
+ # 创建根节点
+ annotation = ET.Element('annotation')
+
+ # 子节点 - folder
+ folder = ET.SubElement(annotation, 'folder')
+ folder.text = 'JPEGImages'
+
+ # 子节点 - filename
+ filename_node = ET.SubElement(annotation, 'filename')
+ filename_node.text = image_info['filename']
+
+ # 子节点 - path (路径通常不那么重要,但最好有一个)
+ path = ET.SubElement(annotation, 'path')
+ # 路径指向JPEGImages文件夹
+ image_path_in_voc = os.path.join('..', 'JPEGImages', image_info['filename'])
+ path.text = image_path_in_voc
+
+ # 子节点 - source
+ source = ET.SubElement(annotation, 'source')
+ database = ET.SubElement(source, 'database')
+ database.text = 'Unknown'
+
+ # 子节点 - size
+ size = ET.SubElement(annotation, 'size')
+ width = ET.SubElement(size, 'width')
+ width.text = str(image_info['width'])
+ height = ET.SubElement(size, 'height')
+ height.text = str(image_info['height'])
+ depth = ET.SubElement(size, 'depth')
+ depth.text = str(image_info.get('depth', 3))
+
+ # 子节点 - segmented
+ segmented = ET.SubElement(annotation, 'segmented')
+ segmented.text = '0'
+
+ # 为每个物体添加 object 节点
+ for obj in objects_info:
+ object_node = ET.SubElement(annotation, 'object')
+ name = ET.SubElement(object_node, 'name')
+ name.text = obj['name']
+ pose = ET.SubElement(object_node, 'pose')
+ pose.text = 'Unspecified'
+ truncated = ET.SubElement(object_node, 'truncated')
+ truncated.text = '0'
+ difficult = ET.SubElement(object_node, 'difficult')
+ difficult.text = '0'
+ bndbox = ET.SubElement(object_node, 'bndbox')
+ xmin = ET.SubElement(bndbox, 'xmin')
+ xmin.text = str(int(obj['xmin']))
+ ymin = ET.SubElement(bndbox, 'ymin')
+ ymin.text = str(int(obj['ymin']))
+ xmax = ET.SubElement(bndbox, 'xmax')
+ xmax.text = str(int(obj['xmax']))
+ ymax = ET.SubElement(bndbox, 'ymax')
+ ymax.text = str(int(obj['ymax']))
+
+ return annotation
+
+
+def prettify_xml(elem):
+ """
+ 格式化XML输出,使其更易读
+ """
+ rough_string = ET.tostring(elem, 'utf-8')
+ reparsed = minidom.parseString(rough_string)
+ return reparsed.toprettyxml(indent=" ")
+
+
+def main():
+ if not os.path.exists(output_xml_folder):
+ os.makedirs(output_xml_folder)
+ print(f"创建输出文件夹: {output_xml_folder}")
+
+ json_files = sorted([f for f in os.listdir(json_folder) if f.endswith('.json')])
+
+ print(f"找到 {len(json_files)} 个JSON文件,开始转换...")
+
+ for json_file in tqdm(json_files, desc="处理JSON文件"):
+ base_name = os.path.splitext(json_file)[0]
+
+ # 使用正则表达式匹配前缀和数字
+ match = re.match(r'([a-zA-Z]+)(\d+)', base_name)
+
+ # 1. 检查当前文件是否是一个分组的起始文件
+ is_group_start_file = False
+ if match:
+ num = int(match.group(2))
+ # 如果数字是 1, 6, 11, ... 这样的,就认为是起始文件
+ if (num - 1) % group_size == 0:
+ is_group_start_file = True
+ else:
+ # 如果文件名不符合 l1, r5 这种格式,我们认为它是“普通”文件,自己就是一个组
+ is_group_start_file = True
+
+ if not is_group_start_file:
+ # 如果不是起始文件(如l2, l3...),则跳过,因为它的标注已由l1处理
+ continue
+
+ # --- 是起始文件,处理这个分组 ---
+ json_path = os.path.join(json_folder, json_file)
+
+ with open(json_path, 'r', encoding='utf-8') as f:
+ data = json.load(f)
+
+ # 2. 从起始文件中提取所有符合条件的标注对象
+ objects_to_write = []
+ for shape in data.get('shapes', []):
+ if shape.get('label') == '1' and shape.get('shape_type') == 'rectangle':
+ points = shape.get('points', [])
+ if len(points) == 2:
+ x_coords = sorted([p[0] for p in points])
+ y_coords = sorted([p[1] for p in points])
+ objects_to_write.append({
+ 'name': class_name_for_label_3,
+ 'xmin': x_coords[0], 'ymin': y_coords[0],
+ 'xmax': x_coords[1], 'ymax': y_coords[1],
+ })
+
+ if not objects_to_write:
+ continue
+
+ # 3. 确定该标注要应用到哪些图片上
+ target_image_names = []
+ if match:
+ # 文件名符合 l1, r6 等格式
+ prefix = match.group(1)
+ start_num = int(match.group(2))
+ for i in range(group_size):
+ # 假设图片格式为 .jpg
+ target_image_names.append(f"{prefix}{start_num + i}.jpg")
+ else:
+ # 普通文件,只应用到同名文件
+ # 假设图片格式为 .jpg
+ target_image_names.append(f"{base_name}.jpg")
+
+ # 4. 为分组内的每个目标图片生成XML文件
+ for image_name in target_image_names:
+ image_path = os.path.join(image_folder, image_name)
+ if not os.path.exists(image_path):
+ print(f"\n警告:找不到图片 '{image_name}',跳过生成其XML文件。")
+ continue
+
+ # 使用JSON中的尺寸信息
+ image_info = {'filename': image_name, 'width': data['imageWidth'], 'height': data['imageHeight']}
+
+ xml_tree = create_xml_annotation(image_info, objects_to_write)
+ xml_string = prettify_xml(xml_tree)
+ xml_filename = os.path.splitext(image_name)[0] + '.xml'
+ output_path = os.path.join(output_xml_folder, xml_filename)
+
+ with open(output_path, 'w', encoding='utf-8') as f:
+ f.write(xml_string)
+
+ print("转换完成!所有XML文件已保存在: ", output_xml_folder)
+
+
+if __name__ == '__main__':
+ main()
\ No newline at end of file
diff --git a/OpenCV/data/test3/l1.jpeg b/OpenCV/data/test3/l1.jpeg
deleted file mode 100644
index 44aa204..0000000
Binary files a/OpenCV/data/test3/l1.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l10.jpeg b/OpenCV/data/test3/l10.jpeg
deleted file mode 100644
index 3ce086a..0000000
Binary files a/OpenCV/data/test3/l10.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l11.jpeg b/OpenCV/data/test3/l11.jpeg
deleted file mode 100644
index 58eb35c..0000000
Binary files a/OpenCV/data/test3/l11.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l12.jpeg b/OpenCV/data/test3/l12.jpeg
deleted file mode 100644
index 3580607..0000000
Binary files a/OpenCV/data/test3/l12.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l13.jpeg b/OpenCV/data/test3/l13.jpeg
deleted file mode 100644
index 798a5d9..0000000
Binary files a/OpenCV/data/test3/l13.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l14.jpeg b/OpenCV/data/test3/l14.jpeg
deleted file mode 100644
index 73eb294..0000000
Binary files a/OpenCV/data/test3/l14.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l15.jpeg b/OpenCV/data/test3/l15.jpeg
deleted file mode 100644
index 516561a..0000000
Binary files a/OpenCV/data/test3/l15.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l16.jpeg b/OpenCV/data/test3/l16.jpeg
deleted file mode 100644
index c2b7704..0000000
Binary files a/OpenCV/data/test3/l16.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l17.jpeg b/OpenCV/data/test3/l17.jpeg
deleted file mode 100644
index 0922bdf..0000000
Binary files a/OpenCV/data/test3/l17.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l18.jpeg b/OpenCV/data/test3/l18.jpeg
deleted file mode 100644
index c7382e9..0000000
Binary files a/OpenCV/data/test3/l18.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l19.jpeg b/OpenCV/data/test3/l19.jpeg
deleted file mode 100644
index 2fc3a6d..0000000
Binary files a/OpenCV/data/test3/l19.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l2.jpeg b/OpenCV/data/test3/l2.jpeg
deleted file mode 100644
index 270001d..0000000
Binary files a/OpenCV/data/test3/l2.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l20.jpeg b/OpenCV/data/test3/l20.jpeg
deleted file mode 100644
index f98ab2d..0000000
Binary files a/OpenCV/data/test3/l20.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l21.jpeg b/OpenCV/data/test3/l21.jpeg
deleted file mode 100644
index 20e483a..0000000
Binary files a/OpenCV/data/test3/l21.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l22.jpeg b/OpenCV/data/test3/l22.jpeg
deleted file mode 100644
index 41330d2..0000000
Binary files a/OpenCV/data/test3/l22.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l23.jpeg b/OpenCV/data/test3/l23.jpeg
deleted file mode 100644
index 4c46f79..0000000
Binary files a/OpenCV/data/test3/l23.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l24.jpeg b/OpenCV/data/test3/l24.jpeg
deleted file mode 100644
index 1d0b644..0000000
Binary files a/OpenCV/data/test3/l24.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l25.jpeg b/OpenCV/data/test3/l25.jpeg
deleted file mode 100644
index 2a0903f..0000000
Binary files a/OpenCV/data/test3/l25.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l26.jpeg b/OpenCV/data/test3/l26.jpeg
deleted file mode 100644
index ab01fbe..0000000
Binary files a/OpenCV/data/test3/l26.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l27.jpeg b/OpenCV/data/test3/l27.jpeg
deleted file mode 100644
index 221b569..0000000
Binary files a/OpenCV/data/test3/l27.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l28.jpeg b/OpenCV/data/test3/l28.jpeg
deleted file mode 100644
index 702dad6..0000000
Binary files a/OpenCV/data/test3/l28.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l29.jpeg b/OpenCV/data/test3/l29.jpeg
deleted file mode 100644
index a333a8e..0000000
Binary files a/OpenCV/data/test3/l29.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l3.jpeg b/OpenCV/data/test3/l3.jpeg
deleted file mode 100644
index fb18b13..0000000
Binary files a/OpenCV/data/test3/l3.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l30.jpeg b/OpenCV/data/test3/l30.jpeg
deleted file mode 100644
index 1840351..0000000
Binary files a/OpenCV/data/test3/l30.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l31.jpeg b/OpenCV/data/test3/l31.jpeg
deleted file mode 100644
index 43273eb..0000000
Binary files a/OpenCV/data/test3/l31.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l32.jpeg b/OpenCV/data/test3/l32.jpeg
deleted file mode 100644
index f430e45..0000000
Binary files a/OpenCV/data/test3/l32.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l33.jpeg b/OpenCV/data/test3/l33.jpeg
deleted file mode 100644
index b9a1b97..0000000
Binary files a/OpenCV/data/test3/l33.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l34.jpeg b/OpenCV/data/test3/l34.jpeg
deleted file mode 100644
index e0209d2..0000000
Binary files a/OpenCV/data/test3/l34.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l35.jpeg b/OpenCV/data/test3/l35.jpeg
deleted file mode 100644
index 71ea537..0000000
Binary files a/OpenCV/data/test3/l35.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l36.jpeg b/OpenCV/data/test3/l36.jpeg
deleted file mode 100644
index a3aa304..0000000
Binary files a/OpenCV/data/test3/l36.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l37.jpeg b/OpenCV/data/test3/l37.jpeg
deleted file mode 100644
index 9606d7c..0000000
Binary files a/OpenCV/data/test3/l37.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l38.jpeg b/OpenCV/data/test3/l38.jpeg
deleted file mode 100644
index 75dd5e8..0000000
Binary files a/OpenCV/data/test3/l38.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l39.jpeg b/OpenCV/data/test3/l39.jpeg
deleted file mode 100644
index 1a962de..0000000
Binary files a/OpenCV/data/test3/l39.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l4.jpeg b/OpenCV/data/test3/l4.jpeg
deleted file mode 100644
index 1821dd8..0000000
Binary files a/OpenCV/data/test3/l4.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l40.jpeg b/OpenCV/data/test3/l40.jpeg
deleted file mode 100644
index c2b1ccb..0000000
Binary files a/OpenCV/data/test3/l40.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l41.jpeg b/OpenCV/data/test3/l41.jpeg
deleted file mode 100644
index 9f57b36..0000000
Binary files a/OpenCV/data/test3/l41.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l42.jpeg b/OpenCV/data/test3/l42.jpeg
deleted file mode 100644
index 9df4963..0000000
Binary files a/OpenCV/data/test3/l42.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l43.jpeg b/OpenCV/data/test3/l43.jpeg
deleted file mode 100644
index d1b4a84..0000000
Binary files a/OpenCV/data/test3/l43.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l44.jpeg b/OpenCV/data/test3/l44.jpeg
deleted file mode 100644
index 5767e8f..0000000
Binary files a/OpenCV/data/test3/l44.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l45.jpeg b/OpenCV/data/test3/l45.jpeg
deleted file mode 100644
index 1949944..0000000
Binary files a/OpenCV/data/test3/l45.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l46.jpeg b/OpenCV/data/test3/l46.jpeg
deleted file mode 100644
index 39873a2..0000000
Binary files a/OpenCV/data/test3/l46.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l47.jpeg b/OpenCV/data/test3/l47.jpeg
deleted file mode 100644
index f34ea57..0000000
Binary files a/OpenCV/data/test3/l47.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l48.jpeg b/OpenCV/data/test3/l48.jpeg
deleted file mode 100644
index 0245855..0000000
Binary files a/OpenCV/data/test3/l48.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l49.jpeg b/OpenCV/data/test3/l49.jpeg
deleted file mode 100644
index 2ed498a..0000000
Binary files a/OpenCV/data/test3/l49.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l5.jpeg b/OpenCV/data/test3/l5.jpeg
deleted file mode 100644
index c0ce101..0000000
Binary files a/OpenCV/data/test3/l5.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l50.jpeg b/OpenCV/data/test3/l50.jpeg
deleted file mode 100644
index 4fb646e..0000000
Binary files a/OpenCV/data/test3/l50.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l6.jpeg b/OpenCV/data/test3/l6.jpeg
deleted file mode 100644
index 0ea9573..0000000
Binary files a/OpenCV/data/test3/l6.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l7.jpeg b/OpenCV/data/test3/l7.jpeg
deleted file mode 100644
index 041f9d3..0000000
Binary files a/OpenCV/data/test3/l7.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l8.jpeg b/OpenCV/data/test3/l8.jpeg
deleted file mode 100644
index bb5379c..0000000
Binary files a/OpenCV/data/test3/l8.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/l9.jpeg b/OpenCV/data/test3/l9.jpeg
deleted file mode 100644
index 6a784fa..0000000
Binary files a/OpenCV/data/test3/l9.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r1.jpeg b/OpenCV/data/test3/r1.jpeg
deleted file mode 100644
index cbf9989..0000000
Binary files a/OpenCV/data/test3/r1.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r10.jpeg b/OpenCV/data/test3/r10.jpeg
deleted file mode 100644
index ed69024..0000000
Binary files a/OpenCV/data/test3/r10.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r11.jpeg b/OpenCV/data/test3/r11.jpeg
deleted file mode 100644
index 009b3e2..0000000
Binary files a/OpenCV/data/test3/r11.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r12.jpeg b/OpenCV/data/test3/r12.jpeg
deleted file mode 100644
index d388948..0000000
Binary files a/OpenCV/data/test3/r12.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r13.jpeg b/OpenCV/data/test3/r13.jpeg
deleted file mode 100644
index 6552548..0000000
Binary files a/OpenCV/data/test3/r13.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r14.jpeg b/OpenCV/data/test3/r14.jpeg
deleted file mode 100644
index 095b975..0000000
Binary files a/OpenCV/data/test3/r14.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r15.jpeg b/OpenCV/data/test3/r15.jpeg
deleted file mode 100644
index 9b26caa..0000000
Binary files a/OpenCV/data/test3/r15.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r16.jpeg b/OpenCV/data/test3/r16.jpeg
deleted file mode 100644
index bce4f3c..0000000
Binary files a/OpenCV/data/test3/r16.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r17.jpeg b/OpenCV/data/test3/r17.jpeg
deleted file mode 100644
index 5c2eed8..0000000
Binary files a/OpenCV/data/test3/r17.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r18.jpeg b/OpenCV/data/test3/r18.jpeg
deleted file mode 100644
index 796bd51..0000000
Binary files a/OpenCV/data/test3/r18.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r19.jpeg b/OpenCV/data/test3/r19.jpeg
deleted file mode 100644
index 8c7a755..0000000
Binary files a/OpenCV/data/test3/r19.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r2.jpeg b/OpenCV/data/test3/r2.jpeg
deleted file mode 100644
index 59bca56..0000000
Binary files a/OpenCV/data/test3/r2.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r20.jpeg b/OpenCV/data/test3/r20.jpeg
deleted file mode 100644
index 719b819..0000000
Binary files a/OpenCV/data/test3/r20.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r21.jpeg b/OpenCV/data/test3/r21.jpeg
deleted file mode 100644
index 0304acf..0000000
Binary files a/OpenCV/data/test3/r21.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r22.jpeg b/OpenCV/data/test3/r22.jpeg
deleted file mode 100644
index f8a9c54..0000000
Binary files a/OpenCV/data/test3/r22.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r23.jpeg b/OpenCV/data/test3/r23.jpeg
deleted file mode 100644
index c5c933e..0000000
Binary files a/OpenCV/data/test3/r23.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r24.jpeg b/OpenCV/data/test3/r24.jpeg
deleted file mode 100644
index c278bdd..0000000
Binary files a/OpenCV/data/test3/r24.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r25.jpeg b/OpenCV/data/test3/r25.jpeg
deleted file mode 100644
index 402b5f8..0000000
Binary files a/OpenCV/data/test3/r25.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r26.jpeg b/OpenCV/data/test3/r26.jpeg
deleted file mode 100644
index b677705..0000000
Binary files a/OpenCV/data/test3/r26.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r27.jpeg b/OpenCV/data/test3/r27.jpeg
deleted file mode 100644
index b677705..0000000
Binary files a/OpenCV/data/test3/r27.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r28.jpeg b/OpenCV/data/test3/r28.jpeg
deleted file mode 100644
index debfc00..0000000
Binary files a/OpenCV/data/test3/r28.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r29.jpeg b/OpenCV/data/test3/r29.jpeg
deleted file mode 100644
index 8be433d..0000000
Binary files a/OpenCV/data/test3/r29.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r3.jpeg b/OpenCV/data/test3/r3.jpeg
deleted file mode 100644
index 78b40e3..0000000
Binary files a/OpenCV/data/test3/r3.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r30.jpeg b/OpenCV/data/test3/r30.jpeg
deleted file mode 100644
index 488d7ee..0000000
Binary files a/OpenCV/data/test3/r30.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r31.jpeg b/OpenCV/data/test3/r31.jpeg
deleted file mode 100644
index 7759949..0000000
Binary files a/OpenCV/data/test3/r31.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r32.jpeg b/OpenCV/data/test3/r32.jpeg
deleted file mode 100644
index 087820c..0000000
Binary files a/OpenCV/data/test3/r32.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r33.jpeg b/OpenCV/data/test3/r33.jpeg
deleted file mode 100644
index 452cc5f..0000000
Binary files a/OpenCV/data/test3/r33.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r34.jpeg b/OpenCV/data/test3/r34.jpeg
deleted file mode 100644
index 3f29c1c..0000000
Binary files a/OpenCV/data/test3/r34.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r35.jpeg b/OpenCV/data/test3/r35.jpeg
deleted file mode 100644
index 59ea162..0000000
Binary files a/OpenCV/data/test3/r35.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r36.jpeg b/OpenCV/data/test3/r36.jpeg
deleted file mode 100644
index e929d13..0000000
Binary files a/OpenCV/data/test3/r36.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r37.jpeg b/OpenCV/data/test3/r37.jpeg
deleted file mode 100644
index 919caaa..0000000
Binary files a/OpenCV/data/test3/r37.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r38.jpeg b/OpenCV/data/test3/r38.jpeg
deleted file mode 100644
index a235f4c..0000000
Binary files a/OpenCV/data/test3/r38.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r39.jpeg b/OpenCV/data/test3/r39.jpeg
deleted file mode 100644
index 50a2c17..0000000
Binary files a/OpenCV/data/test3/r39.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r4.jpeg b/OpenCV/data/test3/r4.jpeg
deleted file mode 100644
index d0c2ebd..0000000
Binary files a/OpenCV/data/test3/r4.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r40.jpeg b/OpenCV/data/test3/r40.jpeg
deleted file mode 100644
index 907c4fa..0000000
Binary files a/OpenCV/data/test3/r40.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r41.jpeg b/OpenCV/data/test3/r41.jpeg
deleted file mode 100644
index 668eec3..0000000
Binary files a/OpenCV/data/test3/r41.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r42.jpeg b/OpenCV/data/test3/r42.jpeg
deleted file mode 100644
index 99acc47..0000000
Binary files a/OpenCV/data/test3/r42.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r43.jpeg b/OpenCV/data/test3/r43.jpeg
deleted file mode 100644
index 02980f8..0000000
Binary files a/OpenCV/data/test3/r43.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r44.jpeg b/OpenCV/data/test3/r44.jpeg
deleted file mode 100644
index e3e0c62..0000000
Binary files a/OpenCV/data/test3/r44.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r45.jpeg b/OpenCV/data/test3/r45.jpeg
deleted file mode 100644
index 643de51..0000000
Binary files a/OpenCV/data/test3/r45.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r46.jpeg b/OpenCV/data/test3/r46.jpeg
deleted file mode 100644
index ec2add3..0000000
Binary files a/OpenCV/data/test3/r46.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r47.jpeg b/OpenCV/data/test3/r47.jpeg
deleted file mode 100644
index 6927220..0000000
Binary files a/OpenCV/data/test3/r47.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r48.jpeg b/OpenCV/data/test3/r48.jpeg
deleted file mode 100644
index 044b144..0000000
Binary files a/OpenCV/data/test3/r48.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r49.jpeg b/OpenCV/data/test3/r49.jpeg
deleted file mode 100644
index 26282ae..0000000
Binary files a/OpenCV/data/test3/r49.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r5.jpeg b/OpenCV/data/test3/r5.jpeg
deleted file mode 100644
index d0c2ebd..0000000
Binary files a/OpenCV/data/test3/r5.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r50.jpeg b/OpenCV/data/test3/r50.jpeg
deleted file mode 100644
index fe179e7..0000000
Binary files a/OpenCV/data/test3/r50.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r6.jpeg b/OpenCV/data/test3/r6.jpeg
deleted file mode 100644
index 57a675e..0000000
Binary files a/OpenCV/data/test3/r6.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r7.jpeg b/OpenCV/data/test3/r7.jpeg
deleted file mode 100644
index fcdbebe..0000000
Binary files a/OpenCV/data/test3/r7.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r8.jpeg b/OpenCV/data/test3/r8.jpeg
deleted file mode 100644
index 36cd7fb..0000000
Binary files a/OpenCV/data/test3/r8.jpeg and /dev/null differ
diff --git a/OpenCV/data/test3/r9.jpeg b/OpenCV/data/test3/r9.jpeg
deleted file mode 100644
index afd2f36..0000000
Binary files a/OpenCV/data/test3/r9.jpeg and /dev/null differ
diff --git a/OpenCV/output/test3/r46.json b/OpenCV/output/test3/r46.json
index 6b9dbe9..0a214b5 100644
--- a/OpenCV/output/test3/r46.json
+++ b/OpenCV/output/test3/r46.json
@@ -21,7 +21,7 @@
"mask": null
},
{
- "label": "1",
+ "label": "2",
"points": [
[
2094.181818181818,
diff --git a/OpenCV/rename_files.py b/OpenCV/rename_files.py
new file mode 100644
index 0000000..36cb997
--- /dev/null
+++ b/OpenCV/rename_files.py
@@ -0,0 +1,110 @@
+import os
+import re
+import shutil
+from tqdm import tqdm
+
+# --- 配置参数 ---
+# 1. 图片文件夹路径
+image_folder = '../label/up'
+
+# 2. 标注文件夹路径
+annotation_folder = '../label/up_xml'
+
+# 3. 新文件名的数字位数 (3 -> 000, 001, ...; 5 -> 00000, 00001, ...)
+padding_zeros = 3
+
+
+# --- 配置结束 ---
+
+
+def rename_dataset_files(img_folder, an_folder, padding=3):
+ """
+ 重命名图片和标注文件,l系列在前,r系列在后,按数字顺序排列。
+ """
+ print("开始重命名任务...")
+
+ if not os.path.isdir(img_folder):
+ print(f"错误:图片文件夹 '{img_folder}' 不存在。")
+ return
+
+ if not os.path.isdir(an_folder):
+ print(f"错误:标注文件夹 '{an_folder}' 不存在。")
+ return
+
+ # 1. 获取所有图片文件名,并按 l 和 r 分组
+ l_files = []
+ r_files = []
+ other_files = []
+
+ for filename in os.listdir(img_folder):
+ if filename.lower().endswith('.jpg'):
+ base_name = os.path.splitext(filename)[0]
+ match = re.match(r'([lr])(\d+)', base_name, re.IGNORECASE)
+ if match:
+ prefix = match.group(1).lower()
+ number = int(match.group(2))
+ if prefix == 'l':
+ l_files.append((number, filename))
+ elif prefix == 'r':
+ r_files.append((number, filename))
+ else:
+ other_files.append(filename)
+
+ # 2. 对 l 和 r 组内的文件按数字进行排序
+ l_files.sort()
+ r_files.sort()
+
+ # 3. 合并文件列表,l在前,r在后
+ sorted_filenames = [f[1] for f in l_files] + [f[1] for f in r_files]
+
+ if not sorted_filenames:
+ print("在指定的 l/r 命名规则下未找到任何文件。")
+ if other_files:
+ print(f"跳过了这些文件: {other_files}")
+ return
+
+ print(f"找到 {len(sorted_filenames)} 个符合 l/r 规则的文件准备重命名。")
+
+ # 4. 开始重命名
+ counter = 0
+ with tqdm(total=len(sorted_filenames), desc="重命名文件") as pbar:
+ for old_filename in sorted_filenames:
+ # 构建新文件名
+ new_base_name = str(counter).zfill(padding)
+ new_jpg_name = f"{new_base_name}.jpg"
+ new_xml_name = f"{new_base_name}.xml"
+
+ # 构建旧文件的完整路径
+ old_base_name = os.path.splitext(old_filename)[0]
+ old_jpg_path = os.path.join(img_folder, f"{old_base_name}.jpg")
+ old_xml_path = os.path.join(an_folder, f"{old_base_name}.xml")
+
+ # 构建新文件的完整路径
+ new_jpg_path = os.path.join(img_folder, new_jpg_name)
+ new_xml_path = os.path.join(an_folder, new_xml_name)
+
+ # 执行重命名 (使用 shutil.move 更安全)
+ try:
+ if os.path.exists(old_jpg_path):
+ shutil.move(old_jpg_path, new_jpg_path)
+ else:
+ print(f"\n警告:找不到图片文件 {old_jpg_path}")
+
+ if os.path.exists(old_xml_path):
+ shutil.move(old_xml_path, new_xml_path)
+ else:
+ print(f"\n警告:找不到XML文件 {old_xml_path}")
+
+ counter += 1
+ except Exception as e:
+ print(f"\n重命名文件 {old_filename} 时出错: {e}")
+
+ pbar.update(1)
+
+ print(f"\n重命名完成!共处理了 {counter} 对文件。")
+ if other_files:
+ print(f"注意:以下文件未被处理,因为它们不符合 l/r 命名规则: {other_files}")
+
+
+if __name__ == '__main__':
+ rename_dataset_files(image_folder, annotation_folder, padding=padding_zeros)
diff --git a/label/up_json/r26.json b/label/up_json/r26.json
new file mode 100644
index 0000000..86885ed
--- /dev/null
+++ b/label/up_json/r26.json
@@ -0,0 +1,64 @@
+{
+ "version": "5.8.3",
+ "flags": {},
+ "shapes": [
+ {
+ "label": "1",
+ "points": [
+ [
+ 1283.2727272727273,
+ 137.4545454545455
+ ],
+ [
+ 1948.7272727272725,
+ 1784.7272727272727
+ ]
+ ],
+ "group_id": null,
+ "description": "",
+ "shape_type": "rectangle",
+ "flags": {},
+ "mask": null
+ },
+ {
+ "label": "2",
+ "points": [
+ [
+ 1346.9090909090908,
+ 306.54545454545456
+ ],
+ [
+ 1372.3636363636363,
+ 1662.9090909090908
+ ]
+ ],
+ "group_id": null,
+ "description": "",
+ "shape_type": "line",
+ "flags": {},
+ "mask": null
+ },
+ {
+ "label": "3",
+ "points": [
+ [
+ 1361.4545454545455,
+ 1722.9090909090908
+ ],
+ [
+ 1866.909090909091,
+ 1753.8181818181818
+ ]
+ ],
+ "group_id": null,
+ "description": "",
+ "shape_type": "line",
+ "flags": {},
+ "mask": null
+ }
+ ],
+ "imagePath": "..\\up\\r21.jpeg",
+ "imageData": null,
+ "imageHeight": 1944,
+ "imageWidth": 2592
+}
\ No newline at end of file
diff --git a/linknet/main.py b/linknet/main.py
new file mode 100644
index 0000000..bf7bf03
--- /dev/null
+++ b/linknet/main.py
@@ -0,0 +1,273 @@
+import os
+import random
+import cv2
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import Dataset, DataLoader
+from torchvision import models
+from sklearn.model_selection import train_test_split
+import matplotlib.pyplot as plt
+import time
+
+
+# --- 1. 配置参数 ---
+class Config:
+ IMAGE_DIR = "data_up/images"
+ MASK_DIR = "data_up/masks_line1"
+ IMAGE_SIZE = 256 # 将所有图片缩放到 256x256
+ BATCH_SIZE = 4
+ EPOCHS = 50 # 训练轮数
+ LEARNING_RATE = 1e-4
+ TEST_SPLIT = 0.1 # 20% 的数据用作验证集
+
+
+# --- 2. 数据集加载和预处理 ---
+class WeldSeamDataset(Dataset):
+ def __init__(self, image_paths, mask_paths, size):
+ self.image_paths = image_paths
+ self.mask_paths = mask_paths
+ self.size = size
+
+ def __len__(self):
+ return len(self.image_paths)
+
+ def __getitem__(self, idx):
+ # 读取图像
+ img = cv2.imread(self.image_paths[idx], cv2.IMREAD_GRAYSCALE)
+ img = cv2.resize(img, (self.size, self.size))
+ img = img / 255.0 # 归一化到 [0, 1]
+ img = np.expand_dims(img, axis=0) # 增加通道维度 (H, W) -> (C, H, W)
+ img_tensor = torch.from_numpy(img).float()
+
+ # 读取掩码
+ mask = cv2.imread(self.mask_paths[idx], cv2.IMREAD_GRAYSCALE)
+ mask = cv2.resize(mask, (self.size, self.size))
+ mask = mask / 255.0 # 归一化到 {0, 1}
+ mask[mask > 0.5] = 1.0
+ mask[mask <= 0.5] = 0.0
+ mask = np.expand_dims(mask, axis=0)
+ mask_tensor = torch.from_numpy(mask).float()
+
+ return img_tensor, mask_tensor
+
+
+# --- 3. LinkNet模型定义 ---
+class DecoderBlock(nn.Module):
+ def __init__(self, in_channels, out_channels):
+ super().__init__()
+ self.block = nn.Sequential(
+ nn.Conv2d(in_channels, in_channels // 4, kernel_size=1),
+ nn.ReLU(inplace=True),
+ nn.ConvTranspose2d(in_channels // 4, in_channels // 4, kernel_size=2, stride=2),
+ nn.ReLU(inplace=True),
+ nn.Conv2d(in_channels // 4, out_channels, kernel_size=1),
+ nn.ReLU(inplace=True)
+ )
+
+ def forward(self, x):
+ return self.block(x)
+
+
+class LinkNet(nn.Module):
+ def __init__(self, num_classes=1):
+ super().__init__()
+ # 使用预训练的ResNet18作为编码器
+ resnet = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
+
+ self.firstconv = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
+ self.firstbn = resnet.bn1
+ self.firstrelu = resnet.relu
+ self.firstmaxpool = resnet.maxpool
+
+ # 编码器层
+ self.encoder1 = resnet.layer1
+ self.encoder2 = resnet.layer2
+ self.encoder3 = resnet.layer3
+ self.encoder4 = resnet.layer4
+
+ # 解码器层
+ self.decoder4 = DecoderBlock(512, 256)
+ self.decoder3 = DecoderBlock(256, 128)
+ self.decoder2 = DecoderBlock(128, 64)
+ self.decoder1 = DecoderBlock(64, 64)
+
+ # 最终输出层
+ self.final_deconv = nn.ConvTranspose2d(64, 32, kernel_size=2, stride=2)
+ self.final_relu = nn.ReLU(inplace=True)
+ self.final_conv = nn.Conv2d(32, num_classes, kernel_size=1)
+
+ def forward(self, x):
+ # 编码器
+ x = self.firstconv(x)
+ x = self.firstbn(x)
+ x = self.firstrelu(x)
+ x = self.firstmaxpool(x) # -> 64x64
+ e1 = self.encoder1(x) # -> 64x64
+ e2 = self.encoder2(e1) # -> 32x32
+ e3 = self.encoder3(e2) # -> 16x16
+ e4 = self.encoder4(e3) # -> 8x8
+
+ # 解码器
+ d4 = self.decoder4(e4) + e3 # -> 16x16
+ d3 = self.decoder3(d4) + e2 # -> 32x32
+ d2 = self.decoder2(d3) + e1 # -> 64x64
+ d1 = self.decoder1(d2) # -> 128x128
+
+ f = self.final_deconv(d1) # -> 256x256
+ f = self.final_relu(f)
+ f = self.final_conv(f)
+
+ return torch.sigmoid(f) # 使用Sigmoid输出概率图
+
+
+# --- 4. 损失函数 (Dice Loss + BCE Loss) ---
+def dice_loss(pred, target, smooth=1.):
+ pred = pred.contiguous()
+ target = target.contiguous()
+ intersection = (pred * target).sum(dim=2).sum(dim=2)
+ loss = (1 - ((2. * intersection + smooth) / (pred.sum(dim=2).sum(dim=2) + target.sum(dim=2).sum(dim=2) + smooth)))
+ return loss.mean()
+
+
+def bce_dice_loss(pred, target):
+ bce = nn.BCELoss()(pred, target)
+ dice = dice_loss(pred, target)
+ return bce + dice
+
+
+# --- 5. 训练和评估 ---
+def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs):
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+ model.to(device)
+
+ print(f"Training on {device}")
+
+ best_val_loss = float('inf')
+
+ for epoch in range(num_epochs):
+ start_time = time.time()
+ model.train()
+ running_loss = 0.0
+
+ for images, masks in train_loader:
+ images = images.to(device)
+ masks = masks.to(device)
+
+ optimizer.zero_grad()
+ outputs = model(images)
+ loss = criterion(outputs, masks)
+ loss.backward()
+ optimizer.step()
+
+ running_loss += loss.item() * images.size(0)
+
+ epoch_loss = running_loss / len(train_loader.dataset)
+
+ # 验证
+ model.eval()
+ val_loss = 0.0
+ with torch.no_grad():
+ for images, masks in val_loader:
+ images = images.to(device)
+ masks = masks.to(device)
+ outputs = model(images)
+ loss = criterion(outputs, masks)
+ val_loss += loss.item() * images.size(0)
+
+ val_loss /= len(val_loader.dataset)
+
+ duration = time.time() - start_time
+ print(f"Epoch {epoch + 1}/{num_epochs}.. "
+ f"Train Loss: {epoch_loss:.4f}.. "
+ f"Val Loss: {val_loss:.4f}.. "
+ f"Time: {duration:.2f}s")
+
+ # 保存最佳模型
+ if val_loss < best_val_loss:
+ best_val_loss = val_loss
+ torch.save(model.state_dict(), 'best_linknet_model.pth')
+ print("Model saved!")
+
+ print("Training complete.")
+
+
+def predict_and_visualize(model, image_path, model_path, size):
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+ model.load_state_dict(torch.load(model_path, map_location=device))
+ model.to(device)
+ model.eval()
+
+ # 加载和预处理单张图片
+ img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
+ original_size = (img.shape[1], img.shape[0]) # (width, height)
+ img_resized = cv2.resize(img, (size, size))
+ img_normalized = img_resized / 255.0
+ img_tensor = torch.from_numpy(np.expand_dims(np.expand_dims(img_normalized, axis=0), axis=0)).float()
+ img_tensor = img_tensor.to(device)
+
+ with torch.no_grad():
+ output = model(img_tensor)
+
+ # 后处理
+ pred_mask = output.cpu().numpy()[0, 0] # 从 (B, C, H, W) -> (H, W)
+ pred_mask = (pred_mask > 0.5).astype(np.uint8) * 255 # 二值化
+ pred_mask = cv2.resize(pred_mask, original_size) # 恢复到原始尺寸
+
+ # 可视化
+ plt.figure(figsize=(12, 6))
+ plt.subplot(1, 3, 1)
+ plt.title("Original Image")
+ plt.imshow(cv2.cvtColor(img, cv2.COLOR_GRAY2RGB))
+
+ plt.subplot(1, 3, 2)
+ plt.title("Predicted Mask")
+ plt.imshow(pred_mask, cmap='gray')
+
+ # 将掩码叠加到原图
+ overlay = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+ overlay[pred_mask == 255] = [255, 0, 0] # 红色
+ plt.subplot(1, 3, 3)
+ plt.title("Overlay")
+ plt.imshow(overlay)
+
+ plt.show()
+
+
+# --- 6. 主程序入口 ---
+if __name__ == '__main__':
+ cfg = Config()
+
+ # 准备数据集
+ image_files = sorted([os.path.join(cfg.IMAGE_DIR, f) for f in os.listdir(cfg.IMAGE_DIR)])
+ mask_files = sorted([os.path.join(cfg.MASK_DIR, f) for f in os.listdir(cfg.MASK_DIR)])
+
+ # 划分训练集和验证集
+ train_imgs, val_imgs, train_masks, val_masks = train_test_split(
+ image_files, mask_files, test_size=cfg.TEST_SPLIT, random_state=42
+ )
+
+ train_dataset = WeldSeamDataset(train_imgs, train_masks, cfg.IMAGE_SIZE)
+ val_dataset = WeldSeamDataset(val_imgs, val_masks, cfg.IMAGE_SIZE)
+
+ train_loader = DataLoader(train_dataset, batch_size=cfg.BATCH_SIZE, shuffle=True)
+ val_loader = DataLoader(val_dataset, batch_size=cfg.BATCH_SIZE, shuffle=False)
+
+ # 初始化模型、损失函数和优化器
+ model = LinkNet(num_classes=1)
+ criterion = bce_dice_loss
+ optimizer = optim.Adam(model.parameters(), lr=cfg.LEARNING_RATE)
+
+ # --- 训练模型 ---
+ # 如果你想开始训练,取消下面这行的注释
+ train_model(model, train_loader, val_loader, criterion, optimizer, cfg.EPOCHS)
+
+ # --- 使用训练好的模型进行预测 ---
+ # 训练完成后,使用这个函数来测试
+ # 确保 'best_linknet_bottom_model_line1.pth' 文件存在
+ # print("\n--- Running Prediction ---")
+ # # 随机选择一张验证集图片进行测试
+ # test_image_path = random.choice(val_imgs)
+ # print(f"Predicting on image: {test_image_path}")
+ # predict_and_visualize(model, test_image_path, 'best_linknet_up_model_line1.pth', cfg.IMAGE_SIZE)
\ No newline at end of file
diff --git a/linknet/predict.py b/linknet/predict.py
new file mode 100644
index 0000000..b66b1dc
--- /dev/null
+++ b/linknet/predict.py
@@ -0,0 +1,116 @@
+import torch
+import cv2
+import numpy as np
+import matplotlib.pyplot as plt
+
+from linknet.main import LinkNet
+
+
+# --- 首先,确保模型定义代码在这里 ---
+# (你需要从之前的 main.py 文件中复制 LinkNet 和 DecoderBlock 类的定义)
+# class DecoderBlock(nn.Module): ...
+# class LinkNet(nn.Module): ...
+# --- 假设模型定义代码已经复制过来了 ---
+
+
+def predict_single_image(model_path, image_path, image_size=256):
+ """
+ 加载训练好的LinkNet模型,对单张新图片进行预测。
+
+ 参数:
+ - model_path (str): 已保存的模型权重文件路径 (.pth)。
+ - image_path (str): 待预测的图片文件路径。
+ - image_size (int): 模型训练时使用的图片尺寸。
+
+ 返回:
+ - predicted_mask (numpy.ndarray): 预测出的二值化掩码图,与原图尺寸相同,
+ 像素值为0或255。
+ - overlay_image (numpy.ndarray): 将预测掩码(红色)叠加在原图上的结果图。
+ """
+ # 1. 设备选择
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+ print(f"Using device: {device}")
+
+ # 2. 加载模型结构并载入权重
+ # - 实例化你的模型结构
+ # - 使用 load_state_dict 载入已保存的权重
+ # - 将模型切换到评估模式 .eval()
+ model = LinkNet(num_classes=1)
+ model.load_state_dict(torch.load(model_path, map_location=device))
+ model.to(device)
+ model.eval()
+
+ # 3. 加载并预处理图片
+ # - 读取图片(灰度模式)
+ img_original = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
+ if img_original is None:
+ raise FileNotFoundError(f"Image not found at {image_path}")
+
+ original_h, original_w = img_original.shape
+
+ # - 缩放到模型需要的尺寸 (和训练时一样)
+ img_resized = cv2.resize(img_original, (image_size, image_size))
+
+ # - 归一化 (和训练时一样)
+ img_normalized = img_resized / 255.0
+
+ # - 增加批次和通道维度 (H, W) -> (1, 1, H, W)
+ img_tensor = torch.from_numpy(img_normalized).unsqueeze(0).unsqueeze(0).float()
+
+ # - 将Tensor发送到设备
+ img_tensor = img_tensor.to(device)
+
+ # 4. 模型推理
+ # 使用 torch.no_grad() 来关闭梯度计算,节省显存并加速
+ with torch.no_grad():
+ output = model(img_tensor)
+
+ # 5. 后处理
+ # - 将输出Tensor转回Numpy数组
+ # - 去掉批次和通道维度 (1, 1, H, W) -> (H, W)
+ pred_mask_resized = output.cpu().numpy()[0, 0]
+
+ # - 二值化:将概率图 (0-1) 转换为掩码图 (0或1)
+ # 这里的 0.5 是阈值,可以根据实际情况调整
+ pred_mask_binary = (pred_mask_resized > 0.5).astype(np.uint8)
+
+ # - 将掩码图恢复到原始图片的尺寸
+ predicted_mask = cv2.resize(pred_mask_binary, (original_w, original_h),
+ interpolation=cv2.INTER_NEAREST) * 255
+
+ # 6. (可选) 创建可视化叠加图
+ overlay_image = cv2.cvtColor(img_original, cv2.COLOR_GRAY2BGR)
+ overlay_image[predicted_mask == 255] = [0, 0, 255] # 在焊缝位置标记为红色 (BGR)
+
+ return predicted_mask, overlay_image
+
+
+# --- 如何调用这个函数 ---
+if __name__ == '__main__':
+ # 从 main.py 中复制模型定义代码到这里
+ # ...
+
+ MODEL_FILE = 'best_linknet_up_model_line2.pth'
+ IMAGE_TO_TEST = 'test/004/input/004.jpg' # <--- 修改为你的图片路径
+
+ try:
+ # 调用预测函数
+ final_mask, overlay = predict_single_image(MODEL_FILE, IMAGE_TO_TEST)
+
+ # 显示结果
+ # cv2.imshow('Original Image', cv2.imread(IMAGE_TO_TEST))
+ # cv2.imshow('Predicted Mask', final_mask)
+ # cv2.imshow('Overlay Result', overlay)
+ #
+ # # 按任意键退出
+ # cv2.waitKey(0)
+ # cv2.destroyAllWindows()
+
+ # 也可以保存结果
+ # cv2.imwrite('predicted_mask.png', final_mask)
+ cv2.imwrite('overlay_result.png', overlay)
+
+ except FileNotFoundError as e:
+ print(e)
+ except Exception as e:
+ print(f"An error occurred: {e}")
diff --git a/unet/generate_unet.py b/unet/generate_unet.py
new file mode 100644
index 0000000..790a37d
--- /dev/null
+++ b/unet/generate_unet.py
@@ -0,0 +1,127 @@
+import os
+import json
+import cv2
+import numpy as np
+from PIL import Image
+
+
+def generate_unet_dataset(original_image_dir, annotation_dir, output_dir, line_thickness=12):
+ """
+ 根据给定的原始图片和LabelMe格式的JSON标注,生成UNet训练数据集。
+
+ 该函数会首先根据标注中的矩形(label: "3")裁剪图片,
+ 然后根据两条直线(label: "1" 和 "2")生成对应的二值化掩码(mask)。
+
+ Args:
+ original_image_dir (str): 包含原始图片 (000.jpg, 001.jpg, ...) 的文件夹路径。
+ annotation_dir (str): 包含JSON标注文件 (l1.json, r1.json, ...) 的文件夹路径。
+ output_dir (str): 用于存放处理后数据(图片和掩码)的输出文件夹路径。
+ line_thickness (int): 在掩码上绘制直线的粗细度。
+ """
+ # 1. 在输出目录中创建子文件夹
+ cropped_img_path = os.path.join(output_dir, 'images')
+ mask1_path = os.path.join(output_dir, 'masks_line1')
+ mask2_path = os.path.join(output_dir, 'masks_line2')
+
+ os.makedirs(cropped_img_path, exist_ok=True)
+ os.makedirs(mask1_path, exist_ok=True)
+ os.makedirs(mask2_path, exist_ok=True)
+
+ print(f"输出文件夹已创建于: {output_dir}")
+
+ # 2. 遍历所有原始图片
+ image_files = sorted([f for f in os.listdir(original_image_dir) if f.endswith(('.jpg', '.jpeg', '.png'))])
+
+ for image_filename in image_files:
+ try:
+ # 提取图片编号,例如从 '007.jpg' 中得到 7
+ file_index = int(os.path.splitext(image_filename)[0])
+
+ # 3. 根据命名规则确定对应的JSON文件名
+ # 0-49 -> l, 50-99 -> r
+ side = 'l' if file_index < 50 else 'r'
+ # 0-4 -> 1, 5-9 -> 2 ...
+ label_index = (file_index % 50) // 5 * 5 + 1
+ json_filename = f"{side}{label_index}.json"
+ json_filepath = os.path.join(annotation_dir, json_filename)
+
+ if not os.path.exists(json_filepath):
+ print(f"警告:找不到图片 '{image_filename}' 对应的标注文件 '{json_filename}',已跳过。")
+ continue
+
+ # 4. 读取并解析JSON文件
+ with open(json_filepath, 'r') as f:
+ data = json.load(f)
+
+ # 提取标注形状
+ shapes = data['shapes']
+ rect_shape = next((s for s in shapes if s['label'] == '1' and s['shape_type'] == 'rectangle'), None)
+ line1_shape = next((s for s in shapes if s['label'] == '2' and s['shape_type'] == 'line'), None)
+ line2_shape = next((s for s in shapes if s['label'] == '3' and s['shape_type'] == 'line'), None)
+
+ if not all([rect_shape, line1_shape, line2_shape]):
+ print(f"警告:标注文件 '{json_filename}' 中缺少必要的形状(矩形'3', 直线'1'或'2'),已跳过。")
+ continue
+
+ # 5. 图像裁剪
+ # 读取原始图片
+ image_path = os.path.join(original_image_dir, image_filename)
+ original_image = Image.open(image_path)
+
+ # 获取矩形坐标
+ p1, p2 = rect_shape['points']
+ # 标准化矩形坐标 (min_x, min_y, max_x, max_y)
+ left = int(min(p1[0], p2[0]))
+ upper = int(min(p1[1], p2[1]))
+ right = int(max(p1[0], p2[0]))
+ lower = int(max(p1[1], p2[1]))
+
+ # 执行裁剪
+ cropped_image = original_image.crop((left, upper, right, lower))
+
+ # 保存裁剪后的图片
+ cropped_image_filename = os.path.join(cropped_img_path, os.path.basename(image_filename))
+ cropped_image.save(cropped_image_filename)
+
+ # 6. 生成并保存掩码
+ # 获取裁剪后图片的尺寸
+ width, height = cropped_image.size
+
+ # 调整直线坐标系,使其与裁剪后的图片对应
+ line1_points = np.array(line1_shape['points']) - [left, upper]
+ line2_points = np.array(line2_shape['points']) - [left, upper]
+
+ # 创建两个空的黑色背景(掩码)
+ mask1 = np.zeros((height, width), dtype=np.uint8)
+ mask2 = np.zeros((height, width), dtype=np.uint8)
+
+ # 在掩码上绘制白色线条
+ # OpenCV的line函数需要整数坐标
+ pt1_l1 = tuple(line1_points[0].astype(int))
+ pt2_l1 = tuple(line1_points[1].astype(int))
+ cv2.line(mask1, pt1_l1, pt2_l1, color=255, thickness=line_thickness)
+
+ pt1_l2 = tuple(line2_points[0].astype(int))
+ pt2_l2 = tuple(line2_points[1].astype(int))
+ cv2.line(mask2, pt1_l2, pt2_l2, color=255, thickness=line_thickness)
+
+ # 保存掩码为PNG格式
+ # 使用splitext来获取不带扩展名的文件名
+ base_filename, _ = os.path.splitext(image_filename)
+ png_filename = base_filename + ".png"
+
+ mask1_savename = os.path.join(mask1_path, png_filename)
+ mask2_savename = os.path.join(mask2_path, png_filename)
+
+ cv2.imwrite(mask1_savename, mask1)
+ cv2.imwrite(mask2_savename, mask2)
+
+ print(f"成功处理: {image_filename} -> {json_filename}")
+
+ except Exception as e:
+ print(f"处理图片 '{image_filename}' 时发生错误: {e}")
+
+ print("\n所有图片处理完成!")
+
+if __name__ =="__main__":
+ generate_unet_dataset("../label/up","../label/up_json", "data_up")
\ No newline at end of file
diff --git a/yolov8/predict.py b/yolov8/predict.py
new file mode 100644
index 0000000..58545ec
--- /dev/null
+++ b/yolov8/predict.py
@@ -0,0 +1,23 @@
+from ultralytics import YOLO
+import os
+
+# 确保 if __name__ == '__main__': 结构,这是一个好习惯
+if __name__ == '__main__':
+ # 1. 加载你训练好的最佳模型
+ # !! 修改为你自己的 best.pt 路径 !!
+ model_path = r'runs_up/detect/train/weights/best.pt'
+ model = YOLO(model_path)
+
+ # 2. 指定你要预测的图片或文件夹
+ # 可以是单张图片路径,也可以是整个文件夹的路径
+ # 强烈建议使用验证集里的图片,或者一些全新的测试图片
+ source_path = r'train_data_up/images/val' # 预测整个验证集文件夹
+
+ # 3. 执行预测
+ # save=True: 会将画好框的图片保存下来
+ # conf=0.5: 只显示置信度大于 0.5 的预测结果,可以调整这个值
+ results = model.predict(source=source_path, save=True, conf=0.5)
+
+ # 预测结果会默认保存在 runs_up/detect/predictX 文件夹下
+ print("\n预测完成!")
+ # 你可以从 results 对象中获取详细信息,但对于可视化验证,直接去看保存的图片更方便。
\ No newline at end of file
diff --git a/yolov8/split_dataset.py b/yolov8/split_dataset.py
new file mode 100644
index 0000000..048555f
--- /dev/null
+++ b/yolov8/split_dataset.py
@@ -0,0 +1,116 @@
+import os
+import random
+import shutil
+
+
+def split_dataset(image_dir, label_dir, output_dir, split_ratio=0.8):
+ """
+ 自动将图片和标签划分为训练集和验证集。
+
+ Args:
+ image_dir (str): 原始图片文件夹路径。
+ label_dir (str): 转换后的 YOLO 标签 (.txt) 文件夹路径。
+ output_dir (str): 整理好的数据集输出根目录 (例如 'weld_dataset')。
+ split_ratio (float): 训练集所占的比例,例如 0.8 代表 80% 训练, 20% 验证。
+ """
+ print("开始划分数据集...")
+
+ # --- 1. 路径设置和文件夹创建 ---
+ train_img_path = os.path.join(output_dir, 'images', 'train')
+ val_img_path = os.path.join(output_dir, 'images', 'val')
+ train_label_path = os.path.join(output_dir, 'labels', 'train')
+ val_label_path = os.path.join(output_dir, 'labels', 'val')
+
+ # 创建所有必要的文件夹
+ os.makedirs(train_img_path, exist_ok=True)
+ os.makedirs(val_img_path, exist_ok=True)
+ os.makedirs(train_label_path, exist_ok=True)
+ os.makedirs(val_label_path, exist_ok=True)
+
+ # --- 2. 文件匹配 ---
+ # 获取所有标签文件的基础名(不含扩展名)
+ label_files = [os.path.splitext(f)[0] for f in os.listdir(label_dir) if f.endswith('.txt')]
+
+ # 查找对应的图片文件(支持多种格式)
+ image_files_map = {}
+ supported_formats = ['.jpg', '.jpeg', '.png', '.bmp']
+ for f in os.listdir(image_dir):
+ base_name, ext = os.path.splitext(f)
+ if ext.lower() in supported_formats:
+ image_files_map[base_name] = f
+
+ # 找出图片和标签都存在的文件对
+ valid_files = [base_name for base_name in label_files if base_name in image_files_map]
+
+ if not valid_files:
+ print(f"错误:在图片目录 '{image_dir}' 和标签目录 '{label_dir}' 之间未找到任何匹配的文件对。")
+ print("请确保图片和标签的文件名(除扩展名外)完全一致。")
+ return
+
+ print(f"共找到 {len(valid_files)} 个有效的图片-标签对。")
+
+ # --- 3. 随机划分 ---
+ random.shuffle(valid_files)
+ split_index = int(len(valid_files) * split_ratio)
+ train_files = valid_files[:split_index]
+ val_files = valid_files[split_index:]
+
+ # --- 4. 复制文件到目标位置 ---
+ def copy_files(file_list, dest_img_path, dest_label_path):
+ for base_name in file_list:
+ # 复制图片
+ img_name = image_files_map[base_name]
+ shutil.copy(os.path.join(image_dir, img_name), dest_img_path)
+ # 复制标签
+ label_name = base_name + '.txt'
+ shutil.copy(os.path.join(label_dir, label_name), dest_label_path)
+
+ print(f"正在复制 {len(train_files)} 个文件到训练集...")
+ copy_files(train_files, train_img_path, train_label_path)
+
+ print(f"正在复制 {len(val_files)} 个文件到验证集...")
+ copy_files(val_files, val_img_path, val_label_path)
+
+ print("\n数据集划分完成!")
+ print(f"训练集: {len(train_files)} 张图片 | 验证集: {len(val_files)} 张图片")
+ print(f"数据已整理至 '{output_dir}' 文件夹。")
+
+
+if __name__ == '__main__':
+ # --- 用户需要配置的参数 ---
+
+ # # 1. 原始图片文件夹路径
+ # # !! 重要 !!: 请将这里的路径修改为您实际存放图片的文件夹
+ # # 可能是 'faster-rcnn/JPEGImages' 或其他名称
+ # ORIGINAL_IMAGE_DIR = '../OpenCV/data_bottom/test3'
+ #
+ # # 2. 转换后的 YOLO 标签文件夹路径
+ # YOLO_LABEL_DIR = 'data_bottom'
+ #
+ # # 3. 最终输出的数据集文件夹
+ # OUTPUT_DATASET_DIR = 'train_data'
+ #
+ # # 4. 训练集比例 (0.8 表示 80% 训练, 20% 验证)
+ # SPLIT_RATIO = 0.9
+ #
+ # # --- 运行主函数 ---
+ # split_dataset(ORIGINAL_IMAGE_DIR, YOLO_LABEL_DIR, OUTPUT_DATASET_DIR, SPLIT_RATIO)
+ #
+ # --- 用户需要配置的参数 ---
+
+ # 1. 原始图片文件夹路径
+ # !! 重要 !!: 请将这里的路径修改为您实际存放图片的文件夹
+ # 可能是 'faster-rcnn/JPEGImages' 或其他名称
+ ORIGINAL_IMAGE_DIR = '../label/up'
+
+ # 2. 转换后的 YOLO 标签文件夹路径
+ YOLO_LABEL_DIR = 'data_up'
+
+ # 3. 最终输出的数据集文件夹
+ OUTPUT_DATASET_DIR = 'train_data_up'
+
+ # 4. 训练集比例 (0.8 表示 80% 训练, 20% 验证)
+ SPLIT_RATIO = 0.9
+
+ # --- 运行主函数 ---
+ split_dataset(ORIGINAL_IMAGE_DIR, YOLO_LABEL_DIR, OUTPUT_DATASET_DIR, SPLIT_RATIO)
\ No newline at end of file
diff --git a/yolov8/train_data/weld.yaml b/yolov8/train_data/weld.yaml
new file mode 100644
index 0000000..62e41f8
--- /dev/null
+++ b/yolov8/train_data/weld.yaml
@@ -0,0 +1,9 @@
+# 数据集路径设置
+# '.' 表示相对于yolo命令运行的目录,这里假设你会从 innovate_project 根目录运行
+path: ../train_data # 数据集根目录
+train: images/train # 训练图片文件夹 (相对于 path)
+val: images/val # 验证图片文件夹 (相对于 path)
+
+# 类别信息
+nc: 1 # number of classes: 类别数量
+names: ['Space weld workpiece'] # class names: 类别名称列表
\ No newline at end of file
diff --git a/yolov8/train_yolo.py b/yolov8/train_yolo.py
new file mode 100644
index 0000000..36195fb
--- /dev/null
+++ b/yolov8/train_yolo.py
@@ -0,0 +1,20 @@
+from ultralytics import YOLO
+
+# 将所有执行代码都放入这个 if 语句块中
+if __name__ == '__main__':
+ # 1. 加载一个预训练模型
+ # 这行代码可以在 if 语句内外,但为了代码清晰和规范,建议放入
+ model = YOLO('yolov8n.pt')
+
+ # 2. 训练模型
+ # 核心的训练启动代码必须在 if 语句内
+ results = model.train(data='train_data_up/weld.yaml',
+ epochs=50,
+ imgsz=640,
+ device=0,
+ workers=8) # 可以显式指定 workers 数量
+
+ # 3. (可选) 打印训练结果保存的路径
+ print("训练完成!")
+ # 注意: 在新版ultralytics中,results可能不直接包含save_dir,但训练日志会打印出来
+ # 训练结果通常保存在 runs_bottom/detect/trainX 目录下
\ No newline at end of file
diff --git a/yolov8/voc_to_yolo.py b/yolov8/voc_to_yolo.py
new file mode 100644
index 0000000..0995884
--- /dev/null
+++ b/yolov8/voc_to_yolo.py
@@ -0,0 +1,119 @@
+import xml.etree.ElementTree as ET
+import os
+import glob
+
+
+def voc_to_yolo(xml_file_path, output_dir, class_mapping):
+ """
+ 将单个 PASCAL VOC anntation (.xml) 文件转换为 YOLO (.txt) 格式。
+
+ Args:
+ xml_file_path (str): 输入的 .xml 文件路径。
+ output_dir (str): 输出 .txt 文件的目标文件夹。
+ class_mapping (dict): 类别名称到类别ID的映射字典。
+ """
+ try:
+ # 解析 XML 文件
+ tree = ET.parse(xml_file_path)
+ root = tree.getroot()
+
+ # 获取图像尺寸
+ size = root.find('size')
+ if size is None:
+ print(f"警告: 在 {xml_file_path} 中未找到 标签,跳过此文件。")
+ return
+
+ img_width = int(size.find('width').text)
+ img_height = int(size.find('height').text)
+
+ # 准备用于写入的YOLO标注列表
+ yolo_annotations = []
+
+ # 遍历所有 object
+ for obj in root.findall('object'):
+ # 获取类别名称
+ class_name = obj.find('name').text
+ if class_name not in class_mapping:
+ print(f"警告: 类别 '{class_name}' 不在预定义的 class_mapping 中,跳过此物体。")
+ continue
+
+ class_id = class_mapping[class_name]
+
+ # 获取边界框坐标
+ bndbox = obj.find('bndbox')
+ xmin = float(bndbox.find('xmin').text)
+ ymin = float(bndbox.find('ymin').text)
+ xmax = float(bndbox.find('xmax').text)
+ ymax = float(bndbox.find('ymax').text)
+
+ # --- 核心转换公式 ---
+ x_center = (xmin + xmax) / 2.0 / img_width
+ y_center = (ymin + ymax) / 2.0 / img_height
+ width = (xmax - xmin) / img_width
+ height = (ymax - ymin) / img_height
+
+ # 将结果添加到列表
+ yolo_annotations.append(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}")
+
+ # 如果文件中有有效的物体,则写入 .txt 文件
+ if yolo_annotations:
+ # 构建输出文件名
+ base_filename = os.path.basename(xml_file_path)
+ txt_filename = os.path.splitext(base_filename)[0] + '.txt'
+ output_path = os.path.join(output_dir, txt_filename)
+
+ # 写入文件
+ with open(output_path, 'w') as f:
+ f.write('\n'.join(yolo_annotations))
+
+ # print(f"成功转换: {xml_file_path} -> {output_path}")
+
+ except Exception as e:
+ print(f"处理文件 {xml_file_path} 时发生错误: {e}")
+
+
+def main():
+ # --- 用户需要配置的参数 ---
+
+ # 1. 定义你的类别和对应的ID (从0开始)
+ # 根据你的截图,你只有一个类别 "Space weld workpiece"
+ # 请确保这里的名称与你XML文件中的标签完全一致!
+ CLASS_MAPPING = {
+ 'Space weld workpiece': 0,
+ # 如果有其他类别,在这里添加,例如:
+ # 'weld_seam': 1,
+ }
+
+ # 2. 定义输入和输出文件夹
+ # 输入文件夹: 存放 .xml 文件的目录
+ input_xml_dir = '../label/up_xml'
+
+ # 输出文件夹: 存放转换后的 .txt 文件的目录
+ output_txt_dir = 'data_up'
+
+ # --- 脚本执行部分 ---
+
+ # 自动创建输出文件夹(如果不存在)
+ if not os.path.exists(output_txt_dir):
+ os.makedirs(output_txt_dir)
+ print(f"已创建输出文件夹: {output_txt_dir}")
+
+ # 查找所有 .xml 文件
+ xml_files = glob.glob(os.path.join(input_xml_dir, '*.xml'))
+
+ if not xml_files:
+ print(f"错误: 在目录 '{input_xml_dir}' 中没有找到任何 .xml 文件。请检查路径。")
+ return
+
+ print(f"找到 {len(xml_files)} 个 .xml 文件。开始转换...")
+
+ # 遍历并转换每个文件
+ for xml_file in xml_files:
+ voc_to_yolo(xml_file, output_txt_dir, CLASS_MAPPING)
+
+ print("\n转换完成!")
+ print(f"所有 YOLO 格式的标签文件已保存在: {output_txt_dir}")
+
+
+if __name__ == '__main__':
+ main()
\ No newline at end of file