import random # Generate random data services = ['MAPPMatching', 'Compound2', 'USDAData', 'GBNIRHolidayDates', 'CasUsers', 'interop2'] batch_size = 10000 # Generate data in batches, each batch contains 10000 rows print("Generating dataset") with open('data2.txt', 'w') as f: for batch in range(800): # Generate a total of 800 batches, 8000000 rows in total data = [] for i in range(batch_size): row = {} row['Response time'] = random.randint(1, 5000) row['Availability'] = 100 - random.randint(1, 100) # Anti-correlated processing row['Throughput'] = round(50 - random.uniform(0.1, 50), 1) # Anti-correlated processing row['Successability'] = 100 - random.randint(1, 100) # Anti-correlated processing row['Reliability'] = 100 - random.randint(1, 100) # Anti-correlated processing row['Compliance'] = 100 - random.randint(1, 100) # Anti-correlated processing row['Best Practices'] = 100 - random.randint(1, 100) # Anti-correlated processing row['Latency'] = 500 - random.randint(1, 500) # Anti-correlated processing row['Documentation'] = 100 - random.randint(1, 100) # Anti-correlated processing data.append(row) dots = "." * (batch % 3 + 1) message = dots print(message, end="", flush=True) print("\r", end="", flush=True) # Assign unique service name and WSDL address to each row for i, row in enumerate(data): service_name = services[i % len(services)] + str(i + batch * batch_size) row['Service Name'] = service_name row['WSDL Address'] = 'http://example.com/' + service_name + '.asmx?wsdl' # Save data to file for row in data: values = [row['Response time'], row['Availability'], row['Throughput'], row['Successability'], row['Reliability'], row['Compliance'], row['Best Practices'], row['Latency'], row['Documentation'], row['Service Name'], row['WSDL Address']] line = ','.join(str(v) for v in values) + '\n' f.write(line)