From 7517efe998ba8c775e6a6ea9b709783298768595 Mon Sep 17 00:00:00 2001
From: hezhexi2002 <1223552020@qq.com>
Date: Fri, 7 Apr 2023 12:34:29 +0800
Subject: [PATCH] add the surpport to store the splited images into specified
 folder

---
 .gitignore         |  1 +
 sjtu2coco-armor.py | 61 ++++++++++++++++++++++++++++++----------------
 2 files changed, 41 insertions(+), 21 deletions(-)
 create mode 100644 .gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..723ef36
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+.idea
\ No newline at end of file
diff --git a/sjtu2coco-armor.py b/sjtu2coco-armor.py
index 5382f4a..d369b0f 100644
--- a/sjtu2coco-armor.py
+++ b/sjtu2coco-armor.py
@@ -1,8 +1,10 @@
 """
 YOLO 格式的数据集转化为 COCO 格式的数据集
 --root_dir 输入根路径
+--train_dir 保存训练集图片路径
+--val_dir 保存验证集图片路径
 --save_path 保存文件的名字(没有random_split时使用)
---random_split 有则会随机划分数据集，然后再分别保存为3个文件。
+--random_split 有则会随机划分数据集, 然后再分别保存为3个文件。
 """
 
 import os
@@ -14,9 +16,11 @@
 import argparse
 
 parser = argparse.ArgumentParser()
-parser.add_argument('--root_dir', default='./data',type=str, help="root path of images and labels, include ./images and ./labels and classes.txt")
-parser.add_argument('--save_path', type=str,default='./train.json', help="if not split the dataset, give a path to a json file")
+parser.add_argument('--root_dir', default='/mnt/h/current_dataset/rune_coco/',type=str, help="root path of images and labels, include ./images and ./labels and classes.txt")
+parser.add_argument('--save_path', type=str,default='train.json', help="if not split the dataset, give a path to a json file")
 parser.add_argument('--random_split', action='store_true', help="random split the dataset, default ratio is 8:1:1")
+parser.add_argument('--train_dir', default='/mnt/h/current_dataset/rune_coco/train2017',type=str, help="path to store the splited train images")
+parser.add_argument('--val_dir', default='/mnt/h/current_dataset/rune_coco/val2027',type=str, help="path to store the splited val images")
 arg = parser.parse_args()
 
 def train_test_val_split(img_paths,ratio_train=0.1,ratio_test=0,ratio_val=0.2):
@@ -35,7 +39,7 @@ def train_val_split(img_paths,ratio_train=0.1,ratio_val=0.2):
     return train_img, val_img
 
 
-def yolo2coco(root_path, random_split):
+def yolo2coco(root_path, train_img_folder, val_img_folder, random_split):
     originLabelsDir = os.path.join(root_path, 'labels')                                        
     originImagesDir = os.path.join(root_path, 'images')
     with open(os.path.join(root_path, 'classes.txt')) as f:
@@ -59,7 +63,7 @@ def yolo2coco(root_path, random_split):
         dataset = {'categories': [], 'annotations': [], 'images': []}
         for i, cls in enumerate(classes, 0):
             dataset['categories'].append({'id': i, 'name': cls, 'supercategory': 'mark'})
-    
+
     # 标注的id
     ann_id_cnt = 0
     for k, index in enumerate(tqdm(indexes)):
@@ -72,8 +76,19 @@ def yolo2coco(root_path, random_split):
             # 切换dataset的引用对象，从而划分数据集
                 if index in train_img:
                     dataset = train_dataset
+                    # 创建保存train图片文件夹
+                    if not os.path.exists(train_img_folder):
+                        os.makedirs(train_img_folder)
+                    # 将划分好的训练集图片保存到指定文件夹下
+                    cv2.imwrite(os.path.join(train_img_folder, index), im)
                 elif index in val_img:
                     dataset = val_dataset
+                    # 创建保存val图片文件夹
+                    if not os.path.exists(val_img_folder):
+                        os.makedirs(val_img_folder)
+                    # 将划分好的验证集图片保存到指定文件夹下
+                    cv2.imwrite(os.path.join(val_img_folder, index), im)
+                    
         # 添加图像的信息
         dataset['images'].append({'file_name': index,
                                     'id': k,
@@ -88,37 +103,39 @@ def yolo2coco(root_path, random_split):
             for label in labelList:
                 label = label.split(" ")
                 H, W, _ = im.shape
-                x1 = float(label[1]) * W
-                y1 = float(label[2]) * H
-                x2 = float(label[3]) * W
-                y2 = float(label[4]) * H
-                x3 = float(label[5]) * W
-                y3 = float(label[6]) * H
-                x4 = float(label[7]) * W
-                y4 = float(label[8]) * H
+                x1 = float(label[5]) * W
+                y1 = float(label[6]) * H
+                x2 = float(label[7]) * W
+                y2 = float(label[8]) * H
+                x3 = float(label[9]) * W
+                y3 = float(label[10]) * H
+                x4 = float(label[11]) * W
+                y4 = float(label[12]) * H
+                x5 = float(label[13]) * W
+                y5 = float(label[14]) * H
 
 
-                keypoints = np.array([x1, y1, x2, y2, x3, y3, x4, y4])
+                keypoints = np.array([x1, y1, x2, y2, x3, y3, x4, y4, x5, y5])
                 num_keypoints = int(len(keypoints) / 2)
 
-                keypoints = keypoints.reshape(-1,2)
+                keypoints = keypoints.reshape(-1,2) # 
                 keypoints_type = 2 * np.ones((num_keypoints, 1))
                 keypoints = np.concatenate((keypoints,keypoints_type),axis=1)
                 keypoints = keypoints.reshape(-1).tolist()
 
                 # 标签序号从0开始计算, coco2017数据集标号混乱，不管它了。
                 cls_id = int(label[0])   
-                width = max(x1, x2, x3, x4) - min(x1, x2, x3, x4)
-                height = max(y1, y2, y3, y4) - min(y1, y2, y3, y4)
+                width = max(x1, x2, x3, x4, x5) - min(x1, x2, x3, x4, x5)
+                height = max(y1, y2, y3, y4, y5) - min(y1, y2, y3, y4, y5)
                 dataset['annotations'].append({
                     'area': width * height,
-                    'bbox': [min(x1, x2, x3, x4), min(y1, y2, y3, y4) , width, height],
+                    'bbox': [min(x1, x2, x3, x5), min(y1, y2, y3, y5) , width, height],
                     'category_id': cls_id,
                     'id': ann_id_cnt,
                     'image_id': k,
                     'iscrowd': 0,
                     # mask, 矩形是从左上角点按顺时针的四个顶点
-                    'segmentation': [[x1, y1, x2, y2, x3, y3, x4, y4]],
+                    'segmentation': [[x1, y1, x2, y2, x3, y3, x4, y4, x5, y5]],
                     'num_keypoints': num_keypoints,
                     'keypoints': keypoints
                 })
@@ -138,7 +155,7 @@ def yolo2coco(root_path, random_split):
                     json.dump(val_dataset, f)
                 elif phase == 'test':
                     json.dump(test_dataset, f)
-            print('Save annotation to {}'.format(json_name))
+            print('Save annotation to {}'.format(json_name))   
     else:
         json_name = os.path.join(root_path, 'annotations/{}'.format(arg.save_path))
         with open(json_name, 'w') as f:
@@ -147,7 +164,9 @@ def yolo2coco(root_path, random_split):
 
 if __name__ == "__main__":
     root_path = arg.root_dir
+    train_img_folder = arg.train_dir
+    val_img_folder = arg.val_dir
     assert os.path.exists(root_path)
     random_split = arg.random_split
     print("Loading data from ",root_path,"\nWhether to split the data:",random_split)
-    yolo2coco(root_path,random_split)
+    yolo2coco(root_path, train_img_folder, val_img_folder, random_split)