#clear everything rm(list = setdiff(ls(), lsf.str())) #merging datasets with same features BasePath = "/Users/rizwan/Library/Mobile Documents/com~apple~CloudDocs/PhD/initialexperiments/" inputPath = paste0(BasePath, "3-SubDatasetBeforeMerging/") outputPath = paste0(BasePath, "4-Merged Dataset/") filesName <- list.files(path = inputPath) uniqueFeatureSet <- integer(0) for (ds in 1:NROW(filesName)) { CompletePathAndFile = paste0(inputPath, filesName[ds]) mydata = read.csv(CompletePathAndFile, header = TRUE) columnNames <- paste0(colnames(mydata)[-ncol(mydata)], collapse = ",") uniqueFeatureSet <- append(uniqueFeatureSet, columnNames) cat(columnNames, filesName[ds], "\n") } uniqueFeatureSet <- unique(uniqueFeatureSet) for (ufs in 1:length(uniqueFeatureSet)) { currentFeatuerset <- uniqueFeatureSet[ufs] Files <- character(0) mydf <- as.data.frame(matrix(0, ncol = length(unlist(strsplit(currentFeatuerset, split=",")))+1, nrow = 0)) colnames(mydf) <- c( unlist(strsplit(currentFeatuerset, split=",")), "BUG") for (ds in 1:NROW(filesName)) { CompletePathAndFile = paste0(inputPath, filesName[ds]) mydata = read.csv(CompletePathAndFile, header = TRUE) thisFeatureset <- paste0(colnames(mydata)[-ncol(mydata)], collapse = ",") if(thisFeatureset==currentFeatuerset){ mydf <- rbind(mydf, mydata) Files <- append(Files, filesName[ds]) } } write.csv( mydf, file = paste0(outputPath, currentFeatuerset, ".csv"), row.names = FALSE ) cat(ncol(mydf)-1,ufs , currentFeatuerset, ":(", length(Files),"), \n") }