import os
import random

file_path = "output"  # Path format for outputting files
image_dir = "umad_data/output/image_02/data"  # Actual path where images are stored
output_dir = "splits/umad"

# Train, validation, test set ratios
train_ratio = 0.8  # 80% for training set
val_ratio = 0.1    # 10% for validation set
test_ratio = 0.1   # 10% for test set

# Check and create the target output directory
os.makedirs(output_dir, exist_ok=True)

# Get the list of image files and extract file numbers
image_files = [f for f in os.listdir(image_dir) if f.endswith('.jpg')]
numbers = [int(f.split('.')[0]) for f in image_files]  # Assume the filename format is 'number.jpg'

# Sort and remove the first and last numbers
numbers.sort()
numbers = numbers[1:-1]

# Shuffle the order of numbers
random.shuffle(numbers)

# Split the numbers into training, validation, and test sets according to the ratios
train_size = int(len(numbers) * train_ratio)
val_size = int(len(numbers) * val_ratio)
train_numbers = numbers[:train_size]
val_numbers = numbers[train_size:train_size + val_size]
test_numbers = numbers[train_size + val_size:]

# Write to training set file
# with open(os.path.join(output_dir, "train_files.txt"), "w") as train_file:
#     for num in train_numbers:
#         train_file.write(f"{file_path} {num} l\\n")  # Output file path and number

# Write to validation set file
# with open(os.path.join(output_dir, "val_files.txt"), "w") as val_file:
#     for num in val_numbers:
#         val_file.write(f"{file_path} {num} l\\n")  # Output file path and number

# Write to test set file, padding the number with zeros to make it 10 digits
with open(os.path.join(output_dir, "test_files.txt"), "w") as test_file:
    for num in test_numbers:
        test_file.write(f"{file_path} {str(num).zfill(10)} l\\n")  # Use .zfill(10) to pad the number to 10 digits

print(f"File generation completed: train_files.txt, val_files.txt, and test_files.txt have been saved to {output_dir} folder")