姿态估计数据集准备与 COCO 关键点标注实战

2026/6/30 22:43:51

姿态估计数据集准备与 COCO 关键点标注实战

1. 数据集选择

公开姿态估计数据集： ├── COCO Keypoints（推荐） │ ├── 20 万张图片，25 万人体实例 │ ├── 17 个关键点标注 │ └── 下载：https://cocodataset.org ├── MPII Human Pose │ ├── 2.5 万张图片 │ ├── 16 个关键点 │ └── 适合学术研究 ├── CrowdPose │ ├── 2 万张拥挤场景 │ └── 适合多人场景 └── 自定义数据集 ├── 使用 Labelme/CVAT 标注 └── 转换为 COCO 格式

2. COCO 数据集格式

{"images":[{"id":1,"file_name":"000001.jpg","width":640,"height":480}],"annotations":[{"id":1,"image_id":1,"category_id":1,"bbox":[100,100,200,300],"area":60000,"iscrowd":0,"keypoints":[300,150,2,// 鼻子: x, y, visibility290,140,2,// 左眼310,140,2,// 右眼280,145,2,// 左耳320,145,2,// 右耳250,200,2,// 左肩350,200,2,// 右肩230,280,2,// 左肘370,280,2,// 右肘220,350,2,// 左腕380,350,2,// 右腕270,350,2,// 左髋330,350,2,// 右髋260,450,2,// 左膝340,450,2,// 右膝250,550,2,// 左踝350,550,2// 右踝],"num_keypoints":17}],"categories":[{"id":1,"name":"person","keypoints":["nose","left_eye","right_eye","left_ear","right_ear","left_shoulder","right_shoulder","left_elbow","right_elbow","left_wrist","right_wrist","left_hip","right_hip","left_knee","right_knee","left_ankle","right_ankle"],"skeleton":[[16,14],[14,12],[17,15],[15,13],[12,13],[6,12],[7,13],[6,7],[6,8],[7,9],[8,10],[9,11],[2,3],[1,2],[1,3],[2,4],[3,5],[4,6],[5,7]]}]}

3. YOLO 格式转换

#!/usr/bin/env python3"""coco_to_yolo_pose.py - COCO 转 YOLO 姿态格式"""importjsonimportosfrompathlibimportPathdefcoco_to_yolo_pose(coco_json,output_dir,img_dir):"""CCO 关键点标注转 YOLO 格式"""withopen(coco_json)asf:data=json.load(f)os.makedirs(output_dir,exist_ok=True)# 构建图片 ID → 文件名映射img_map={img['id']:imgforimgindata['images']}# 按图片分组标注ann_by_img={}forannindata['annotations']:img_id=ann['image_id']ifimg_idnotinann_by_img:ann_by_img[img_id]=[]ann_by_img[img_id].append(ann)forimg_id,img_infoinimg_map.items():w,h=img_info['width'],img_info['height']filename=Path(img_info['file_name']).stem lines=[]foranninann_by_img.get(img_id,[]):# 边界框归一化bbox=ann['bbox']cx=(bbox[0]+bbox[2]/2)/w cy=(bbox[1]+bbox[3]/2)/h bw=bbox[2]/w bh=bbox[3]/h# 关键点归一化kpts=ann['keypoints']kpts_str=""foriinrange(17):kx=kpts[i*3]/w ky=kpts[i*3+1]/h vis=kpts[i*3+2]kpts_str+=f"{kx:.6f}{ky:.6f}{vis}"lines.append(f"0{cx:.6f}{cy:.6f}{bw:.6f}{bh:.6f}{kpts_str}")# 写入标签文件label_path=os.path.join(output_dir,f"{filename}.txt")withopen(label_path,'w')asf:f.write('\n'.join(lines))print(f"转换完成:{len(img_map)}张图片")if__name__=="__main__":coco_to_yolo_pose("coco/annotations/person_keypoints_train2017.json","dataset/train/labels","coco/train2017")

4. 自定义数据集标注

4.1 使用 CVAT 标注

# 安装 CVATdockercompose up-d# 访问 http://localhost:8080# 创建项目 → 选择 "Pose Estimation" 任务类型# 标注 17 个关键点# 导出为 COCO 格式

4.2 使用 Labelme 标注

# Labelme 标注关键点# 1. 安装 labelmepip install labelme# 2. 启动标注labelme--config labelme_pose.yaml# 3. 标注时选择 "Point" 工具# 4. 按顺序标注 17 个关键点# 5. 导出为 COCO 格式

5. 数据增强

# 姿态估计数据增强配置augmentation:hsv_h:0.015hsv_s:0.5hsv_v:0.3degrees:10.0# 姿态允许更大旋转translate:0.1scale:0.3# 缩放影响关键点shear:5.0flipud:0.0# 不上下翻转（重力方向固定）fliplr:0.5# 左右翻转（需镜像关键点）mosaic:0.8mixup:0.0# MixUp 可能扭曲姿态

5.1 左右镜像关键点

defmirror_keypoints(keypoints,img_width):"""左右镜像关键点"""# 镜像关键点索引映射mirror_map={0:0,# 鼻子1:2,# 左眼 ↔ 右眼2:1,3:4,# 左耳 ↔ 右耳4:3,5:6,# 左肩 ↔ 右肩6:5,7:8,# 左肘 ↔ 右肘8:7,9:10,# 左腕 ↔ 右腕10:9,11:12,# 左髋 ↔ 右髋12:11,13:14,# 左膝 ↔ 右膝14:13,15:16,# 左踝 ↔ 右踝16:15,}mirrored=[None]*17forold_idx,new_idxinmirror_map.items():x,y,vis=keypoints[old_idx]mirrored[new_idx]=(img_width-x,y,vis)returnmirrored

6. 数据集配置

# data_pose.yamlpath:./datasettrain:train/imagesval:valid/imagestest:test/imagesnames:0:personnc:1task:pose# 姿态估计任务kpt_shape:[17,3]# 17 个关键点，每个 3 维 (x, y, visibility)

7. 数据集质量检查

#!/usr/bin/env python3"""check_dataset.py - 数据集质量检查"""importosimportcv2importnumpyasnpdefcheck_pose_dataset(label_dir,image_dir):"""检查姿态数据集质量"""issues=[]forlabel_fileinos.listdir(label_dir):ifnotlabel_file.endswith('.txt'):continuelabel_path=os.path.join(label_dir,label_file)img_path=os.path.join(image_dir,label_file.replace('.txt','.jpg'))ifnotos.path.exists(img_path):issues.append(f"缺少图片:{img_path}")continueimg=cv2.imread(img_path)h,w=img.shape[:2]withopen(label_path)asf:forline_num,lineinenumerate(f.readlines(),1):parts=line.strip().split()iflen(parts)<55:# 5 + 17*3 = 56issues.append(f"{label_file}:{line_num}关键点数量不足")continue# 检查坐标范围foriinrange(5,56,3):x,y=float(parts[i]),float(parts[i+1])ifx<0orx>1ory<0ory>1:issues.append(f"{label_file}:{line_num}坐标越界: ({x},{y})")ifissues:print(f"发现{len(issues)}个问题:")forissueinissues[:20]:print(f" -{issue}")else:print("✅ 数据集质量检查通过")if__name__=="__main__":check_pose_dataset("dataset/train/labels","dataset/train/images")