-
file_sampling.py카테고리 없음 2022. 3. 27. 20:59
import random import os import shutil class_name = "n02111129" train_path = f"../../dataset/LGDdataset/imagenet/train/{class_name}/" val_path = f"../../dataset/LGDdataset/imagenet/val/{class_name}/" file_list = os.listdir(train_path) train_destination = f"../../dataset/LGDdataset/220327_relabel_dataset/binary_dataset_ng/train/{class_name}/" valid_destination = f"../../dataset/LGDdataset/220327_relabel_dataset/binary_dataset_ng/val/{class_name}" num = 262 val_limit = 45 txt = f"../../dataset/LGDdataset/semi_data/binary/bad/{class_name}.txt" f = open(txt, "r") txt_list = [] while True: line = f.readline().split() if not line: break txt_list.append(line[0]) if not (os.path.exists(train_destination)): os.makedirs(train_destination) if not (os.path.exists(valid_destination)): os.makedirs(valid_destination) # train while(len(os.listdir(train_destination))<num): for i in range(num): sample = f"{txt_list[i]}" sample_name_list = sample.split("/") name = sample_name_list[7] shutil.copy(f"{train_path}/{name}", f"{train_destination}/{name}") if(i<val_limit): shutil.copy(f"{val_path}/{os.listdir(val_path)[i]}", f"{valid_destination}/{name}") print("done")
file_sampling2
import random import os import shutil type_name = "multi" class_name_list = [ "n02098105", "n02095889", "n02096051", "n02093859", "n02097298", "n02095570", "n02093647", "n02093991", "n02095314", "n02097130", "n02097209", "n02099267", "n02105251", "n02090721", "n02091635", "n02105505", "n02099429", "n02102973", "n02092002", "n02090622"] num_list = [457, 757, 643, 574, 342, 342, 291, 713, 575, 631, 272, 767, 696, 372, 356, 357, 420, 535, 487, 413] val_list = [46, 76, 64, 57, 34, 34, 29, 71, 57, 63, 27, 77, 70, 37, 36, 36, 42, 54, 49, 41] for idx in range(len(class_name_list)): class_name = class_name_list[idx] num = num_list[idx] val_limit = val_list[idx] train_path = f"../../dataset/LGDdataset/imagenet/train/{class_name}" val_path = f"../../dataset/LGDdataset/imagenet/val/{class_name}" train_destination = f"../../dataset/LGDdataset/220418_relabel_dataset/{type_name}_hard/train/{class_name}" valid_destination = f"../../dataset/LGDdataset/220418_relabel_dataset/{type_name}_hard/val/{class_name}" test_destination = f"../../dataset/LGDdataset/220418_relabel_dataset/{type_name}_hard/test/{class_name}" txt = f"../../dataset/LGDdataset/semi_data/{type_name}/{class_name}.txt" f = open(txt, "r") txt_list = [] while True: line = f.readline().split() if not line: break txt_list.append(line[0]) if not (os.path.exists(train_destination)): os.makedirs(train_destination) if not (os.path.exists(valid_destination)): os.makedirs(valid_destination) if not (os.path.exists(test_destination)): os.makedirs(test_destination) # train print(valid_destination) while(len(os.listdir(train_destination))<num): for i in range(num): sample = f"{txt_list[i]}" sample_name_list = sample.split("/") name = sample_name_list[7] shutil.copy(f"{train_path}/{name}", f"{train_destination}/{name}") # if(i<val_limit): # shutil.copy(f"{val_path}/{os.listdir(val_path)[i]}", f"{valid_destination}/{name}") if(len(os.listdir(train_destination))>=num): for j in range(val_list[idx]): sample = f"{txt_list[num+j]}" sample_name_list = sample.split("/") name = sample_name_list[7] shutil.copy(f"{train_path}/{name}", f"{test_destination}/{name}") if(len(os.listdir(test_destination))>=val_limit): for j in range(val_limit): sample = f"{txt_list[num+j]}" sample_name_list = sample.split("/") name = sample_name_list[7] shutil.copy(f"{train_path}/{name}", f"{valid_destination}/{name}") if len(os.listdir(train_destination))==num_list[idx]: print(f"{class_name},train_done") else: print(f"{class_name},train not done", len(os.listdir(train_destination)), num_list[idx]) if len(os.listdir(valid_destination))==val_list[idx]: print(f"{class_name},val_done") else: print(f"{class_name},val not done", len(os.listdir(valid_destination)), val_list[idx]) if len(os.listdir(test_destination))==val_list[idx]: print(f"{class_name},test_done") else : print(f"{class_name},test not done", len(os.listdir(test_destination)), val_list[idx]) print("done")
.py