import os import random file_path = "output" # Path format for outputting files image_dir = "umad_data/output/image_02/data" # Actual path where images are stored output_dir = "splits/umad" # Train, validation, test set ratios train_ratio = 0.8 # 80% for training set val_ratio = 0.1 # 10% for validation set test_ratio = 0.1 # 10% for test set # Check and create the target output directory os.makedirs(output_dir, exist_ok=True) # Get the list of image files and extract file numbers image_files = [f for f in os.listdir(image_dir) if f.endswith('.jpg')] numbers = [int(f.split('.')[0]) for f in image_files] # Assume the filename format is 'number.jpg' # Sort and remove the first and last numbers numbers.sort() numbers = numbers[1:-1] # Shuffle the order of numbers random.shuffle(numbers) # Split the numbers into training, validation, and test sets according to the ratios train_size = int(len(numbers) * train_ratio) val_size = int(len(numbers) * val_ratio) train_numbers = numbers[:train_size] val_numbers = numbers[train_size:train_size + val_size] test_numbers = numbers[train_size + val_size:] # Write to training set file # with open(os.path.join(output_dir, "train_files.txt"), "w") as train_file: # for num in train_numbers: # train_file.write(f"{file_path} {num} l\\n") # Output file path and number # Write to validation set file # with open(os.path.join(output_dir, "val_files.txt"), "w") as val_file: # for num in val_numbers: # val_file.write(f"{file_path} {num} l\\n") # Output file path and number # Write to test set file, padding the number with zeros to make it 10 digits with open(os.path.join(output_dir, "test_files.txt"), "w") as test_file: for num in test_numbers: test_file.write(f"{file_path} {str(num).zfill(10)} l\\n") # Use .zfill(10) to pad the number to 10 digits print(f"File generation completed: train_files.txt, val_files.txt, and test_files.txt have been saved to {output_dir} folder")