# Split the dataset import os import random import shutil import csv import numpy as np def CopyFile(imageDir, test_rate, val_rate, save_test_dir, save_train_dir, save_val_dir): """ Split the dataset into training, validation, and test sets. Args: imageDir (list): List of paths to all images in a category. test_rate (float): Proportion of images to be used for the test set. val_rate (float): Proportion of images to be used for the validation set. save_test_dir (str): Directory to save the test set. save_train_dir (str): Directory to save the training set. save_val_dir (str): Directory to save the validation set. """ image_number = len(imageDir) # Total number of images test_number = int(image_number * test_rate) # Number of images for the test set print("Number of images to be moved to the test directory (%s): %d" % (save_test_dir, test_number)) test_samples = random.sample(imageDir, test_number) # Randomly select test_number images from imageDir val_number = int(image_number * val_rate) print("Number of images to be moved to the validation directory (%s): %d" % (save_val_dir, val_number)) val_samples = random.sample(list(set(imageDir) - set(test_samples)), val_number) # Randomly select val_number images from the remaining images # Copy images to the target directories if not os.path.exists(save_test_dir): os.makedirs(save_test_dir) print("Test directory has been created successfully!") else: print("Test directory already exists!") if not os.path.exists(save_val_dir): os.makedirs(save_val_dir) print("Validation directory has been created successfully!") else: print("Validation directory already exists!") if not os.path.exists(save_train_dir): os.makedirs(save_train_dir) print("Training directory has been created successfully!") else: print("Training directory already exists!") for i, j in enumerate(test_samples): shutil.copy(test_samples[i], save_test_dir + test_samples[i].split("/")[-1]) print("Test images have been moved!") for i, j in enumerate(val_samples): shutil.copy(val_samples[i], save_val_dir + val_samples[i].split("/")[-1]) print("Validation images have been moved!") for train_imgs in imageDir: if train_imgs not in test_samples and train_imgs not in val_samples: shutil.copy(train_imgs, save_train_dir + train_imgs.split("/")[-1]) print("Training images have been moved") # Just provide file_path and test_rate to complete the task # Original path + split ratio ################################ file_path = "E:/PytorchObjectTwo/helloPytorch/mobilenet/archive/" # File path test_rate = 0.1 val_rate = 0.1 ################################ file_dirs = os.listdir(file_path) origion_paths = [] save_test_dirs = [] save_train_dirs = [] save_val_dirs = [] for path in file_dirs: origion_paths.append(file_path + "/" + path + "/") save_train_dirs.append(file_path + "1/train/" + path + "/") # Save path save_test_dirs.append(file_path + "1/test/" + path + "/") save_val_dirs.append(file_path + "1/val/" + path + "/") for i, origion_path in enumerate(origion_paths): image_list = os.listdir(origion_path) # Get the names of all images in the original path (assuming all files in the directory are images) image_Dir = [] for x, y in enumerate(image_list): image_Dir.append(os.path.join(origion_path, y)) print("There are %d images in the directory %s!" % (len(image_Dir), origion_path)) CopyFile(image_Dir, test_rate, val_rate, save_test_dirs[i], save_train_dirs[i], save_val_dirs[i]) print("All data has been moved successfully!")