#clear everything rm(list = setdiff(ls(), lsf.str())) library(caret) BasePath = "D:/initialexperiments/initialexperiments/" PathOfDataSet = paste0(BasePath, "6-Scaled Dataset/") dataSetName = datasetRepository = list.files(path = PathOfDataSet) totalbreaks = 10 size = 50 serial <- 0 SrNo <- integer(0) datasets <- character(0) featureQuantity <- integer(0) featureList <- character(0) instances <- integer(0) faultyInstances <- integer(0) totalTPs <- integer(0) totalTNs <- integer(0) totalFNs <- integer(0) totalFPs <- integer(0) selectedModels <- list() hiddenUnits <- str(0) foldTPs <- str(0) foldTNs <- str(0) foldFNs <- str(0) foldFPs <- str(0) library(nnet) #for confusion metrix modelQuantity <- 0 for (names in 1:NROW(dataSetName)) { names <- 1 CompletePathAndFile = paste0(PathOfDataSet, dataSetName[names]) mydata = read.csv(CompletePathAndFile, header = TRUE) mydata$BUG <- as.numeric(mydata$BUG) currentDataSet <- mydata #Randomly shuffle the data 10 times for (k in 1:10) { currentDataSet <- currentDataSet[sample(nrow(currentDataSet)), ] } folds <- createFolds(currentDataSet$BUG, k = totalbreaks) TPs <- integer(0) FPs <- integer(0) FNs <- integer(0) TNs <- integer(0) hiddenUnit <- integer(0) for (l in 1:totalbreaks) { l <- 1 modelQuantity <- modelQuantity + 1 testIndexes <- unlist(folds[l]) testData <- currentDataSet[testIndexes, ] trainData <- currentDataSet[-testIndexes, ] set.seed(200) ######################## model building ################### n <- colnames(trainData) f <- as.formula(paste("BUG ~", paste(n[!n %in% "BUG"], collapse = " + "))) cat("model building...") values <- integer(0) myModels <- list() modelLength <- 0 for (i in seq(1, size, 1)) { i <- 10 cat( "in process---------------------------------------------", i, "--", names, "Folds", l, "\n" ) modelLength <- modelLength + 1 myModel <- nnet(f, data = trainData, linear.output = F, size = i) myModels[[modelLength]] <- myModel values <- append(values, myModel$value) } myModel <- myModels[[match(min(values), values)]] selectedModels[[modelQuantity]] <- myModel cat("model built...") test.predictions <- predict(myModel, testData) ########################model building ################### results <- ifelse(test.predictions > 0.5, 1, 0) original_values <- testData[, ncol(testData)] length(which(results == original_values)) # ordering the level of the prediction model, so that cf would work TP <- length(which(results == original_values & results == 1)) FN <- length(which(original_values == 1 & results == 0)) FP <- length(which(original_values == 0 & results == 1)) TN <- length(which(results == original_values & results == 0)) TPs <- append(TPs, TP) TNs <- append(TNs, TN) FPs <- append(FPs, FP) FNs <- append(FNs, FN) hiddenUnit <- append(hiddenUnit, myModel$nunits) } currentFeature <- paste(sort(colnames(currentDataSet)[-length(colnames(currentDataSet))]), collapse = ",") serial <- serial + 1 SrNo <- append(SrNo, serial) datasets <- append(datasets, dataSetName[names]) featureQuantity <- append(featureQuantity, length(colnames(currentDataSet[-length(colnames(currentDataSet))]))) featureList <- append(featureList, paste(sort(colnames(currentDataSet)[-length(colnames(currentDataSet))]), collapse = ",")) instances <- append(instances, nrow(currentDataSet)) faultyInstances <- append(faultyInstances, prop.table(table(currentDataSet$BUG))[2]) totalTPs <- append(totalTPs, sum(TPs)) totalTNs <- append(totalTNs, sum(TNs)) totalFNs <- append(totalFNs, sum(FNs)) totalFPs <- append(totalFPs, sum(FPs)) foldTPs <- append(foldTPs, paste(as.character(TPs), collapse = ';')) foldTNs <- append(foldTNs, paste(as.character(TNs), collapse = ';')) foldFNs <- append(foldFNs, paste(as.character(FNs), collapse = ';')) foldFPs <- append(foldFPs, paste(as.character(FPs), collapse = ';')) hiddenUnits <- append(hiddenUnits, paste(as.character(hiddenUnit), collapse = ';')) } df <- data.frame( SrNo, datasets, featureQuantity, featureList, instances, faultyInstances, totalTPs, totalTNs, totalFNs, totalFPs, foldTPs, foldTNs, foldFNs, foldFPs, hiddenUnits ) write.csv(df, row.names = FALSE, file = paste0(BasePath, "Results/", "NNResult6.csv")) ##################################### Results synthesis