library(MASS) library(rpart) library(plyr) #simulation default settings p_default=0.1 m1=15 n1=500 Num_count=8 Cat_count=7 Cat_count2=c(2,2,2,2,2,3,3) mu_vector=c(2,1,3,3,2,4,1,2) conv_threshold=1 conv_threshold2=0.01 conv_threshold3=1 max_iter_times=50 imp_choice=3 if (imp_choice==1){ miss_var=c(1,3,5,7,8,9,11,12,13,15) imp_var=c(1,3,5,7,8,9,11,12,13,15) miss_relate_var=c(2,4,2,2,4,6,6,4,6,2) } if (imp_choice==2){ miss_var=c(1,3,5,7,8,9,11,12,13,15) imp_var=c(1,3,5,7,8) miss_relate_var=c(2,4,2,2,4,6,6,4,6,2) } if (imp_choice==3){ miss_var=c(1,3,5,7,8,9,11,12,13,15) imp_var=c(9,11,12,13,15) miss_relate_var=c(2,4,2,2,4,6,6,4,6,2) } small_or_large=1 if (small_or_large==1){Cat_to_miss=c(1,1)} if (small_or_large==2){Cat_to_miss=c(2,3)} if (p_default==0.1 && small_or_large==1){miss_prob=c(5/9,3/9,1/9)} if (p_default==0.2 && small_or_large==1){miss_prob=c(8/9,6/9,4/9)} if (p_default==0.3 && small_or_large==1){miss_prob=c(13/15,11/15,9/15,7/15,5/15)} if (p_default==0.4 && small_or_large==1){miss_prob=c(14/15,13/15,12/15,11/15,10/15)} if (p_default==0.1 && small_or_large==2){miss_prob=c(1/9,3/9,5/9)} if (p_default==0.2 && small_or_large==2){miss_prob=c(4/9,6/9,8/9)} if (p_default==0.3 && small_or_large==2){miss_prob=c(5/15,7/15,9/15,11/15,13/15)} if (p_default==0.4 && small_or_large==2){miss_prob=c(10/15,11/15,12/15,13/15,14/15)} simu_or_dataana=1 seed=44 Null_or_not=4 type_of_missing=2 #tree default settings node_default=ceiling(n1*0.15) times_default=20 chi_min=20 re_sam_min_obs=2 #Input Data if (simu_or_dataana==2){ data.com=read.csv("C:/Users/USER/Desktop/dataset/CA/CAdata.csv") m1=ncol(data.com) n1=nrow(data.com) node_default=ceiling(n1*0.15) Catgory=c(1,4:7,9,10,12,13,16) for (i in Catgory){ data.com[,i]=as.factor(data.com[,i]) } Num_Var=setdiff((1:m1),Catgory) max_iter_times=60 relate_prob=list() nonrelate_prob=list() relate_cut_point=list() nonrelate_cut_point=list() if (imp_choice==2){ miss_var=c(2,3,11) imp_var=c(1:7,11,14) miss_relate_var=c(8,8,8) if (small_or_large==1){ tmp_quan1=quantile(data.com[which(!is.na(data.com[,miss_relate_var[1]])),miss_relate_var[1]],c(0.2,0.4,0.6)) relate_cut_point[[1]]=c(tmp_quan1[1],tmp_quan1[2],tmp_quan1[3]) tmp_quan2=quantile(data.com[which(!is.na(data.com[,miss_relate_var[2]])),miss_relate_var[2]],c(0.2,0.4,0.6)) relate_cut_point[[2]]=c(tmp_quan2[1],tmp_quan2[2],tmp_quan2[3]) tmp_quan3=quantile(data.com[which(!is.na(data.com[,miss_relate_var[3]])),miss_relate_var[3]],c(0.2,0.4,0.6)) relate_cut_point[[3]]=c(tmp_quan3[1],tmp_quan3[2],tmp_quan3[3]) tmp_quan11=quantile(data.com[which(!is.na(data.com[,miss_var[1]])),miss_var[1]],c(0.2,0.4,0.6)) nonrelate_cut_point[[1]]=c(tmp_quan11[1],tmp_quan11[2],tmp_quan11[3]) tmp_quan21=quantile(data.com[which(!is.na(data.com[,miss_var[2]])),miss_var[2]],c(0.2,0.4,0.6)) nonrelate_cut_point[[2]]=c(tmp_quan21[1],tmp_quan21[2],tmp_quan21[3]) tmp_quan31=quantile(data.com[which(!is.na(data.com[,miss_var[3]])),miss_var[3]],c(0.2,0.4,0.6)) nonrelate_cut_point[[3]]=c(tmp_quan31[1],tmp_quan31[2],tmp_quan31[3]) } if (small_or_large==2){ tmp_quan1=quantile(data.com[which(!is.na(data.com[,miss_relate_var[1]])),miss_relate_var[1]],c(0.4,0.6,0.8)) relate_cut_point[[1]]=c(tmp_quan1[1],tmp_quan1[2],tmp_quan1[3]) tmp_quan2=quantile(data.com[which(!is.na(data.com[,miss_relate_var[2]])),miss_relate_var[2]],c(0.4,0.6,0.8)) relate_cut_point[[2]]=c(tmp_quan2[1],tmp_quan2[2],tmp_quan2[3]) tmp_quan3=quantile(data.com[which(!is.na(data.com[,miss_relate_var[3]])),miss_relate_var[3]],c(0.4,0.6,0.8)) relate_cut_point[[3]]=c(tmp_quan3[1],tmp_quan3[2],tmp_quan3[3]) tmp_quan11=quantile(data.com[which(!is.na(data.com[,miss_var[1]])),miss_var[1]],c(0.4,0.6,0.8)) nonrelate_cut_point[[1]]=c(tmp_quan11[1],tmp_quan11[2],tmp_quan11[3]) tmp_quan21=quantile(data.com[which(!is.na(data.com[,miss_var[2]])),miss_var[2]],c(0.4,0.6,0.8)) nonrelate_cut_point[[2]]=c(tmp_quan21[1],tmp_quan21[2],tmp_quan21[3]) tmp_quan31=quantile(data.com[which(!is.na(data.com[,miss_var[3]])),miss_var[3]],c(0.4,0.6,0.8)) nonrelate_cut_point[[3]]=c(tmp_quan31[1],tmp_quan31[2],tmp_quan31[3]) } relate_prob[[1]]=relate_prob[[2]]=relate_prob[[3]]=c(5/12,3/12,1/12) nonrelate_prob[[1]]=nonrelate_prob[[2]]=nonrelate_prob[[3]]=c(5/12,3/12,1/12) } if (imp_choice==3){ miss_var=c(4,9,10,12,13) imp_var=c(1,2,4:7,9,10,12:14) miss_relate_var=c(5,16,16,5,5) relate_prob[[1]]=c(0.2,0,0.1);relate_prob[[2]]=c(0.2,0.1) relate_prob[[3]]=c(0.2,0.1);relate_prob[[4]]=c(0.2,0,0.1) relate_prob[[5]]=c(0.2,0,0.1) nonrelate_prob[[1]]=c(0,0.2,0.05);nonrelate_prob[[2]]=c(0.05,0.25) nonrelate_prob[[3]]=c(0.25,0.05);nonrelate_prob[[4]]=c(0.25,0.05) nonrelate_prob[[5]]=c(0.2,0.05) } set.seed(seed) if (type_of_missing==1){MISS1=MCAR(data.com)} else if (type_of_missing==2){MISS1=MAR(data.com)} else if (type_of_missing==3){MISS1=MNAR(data.com)} data.imp=MISS1[[1]] NA_posit=MISS1[[2]] } if (simu_or_dataana==1){ Simu_data=matrix(999,nrow=n1,ncol=m1) if (Null_or_not==1){ set.seed(seed) sigma=rep(1,Num_count) for (i in 1:Num_count){ Simu_data[,i]=rnorm(n1,mu_vector[i],sigma[i]) } tmp=Simu_data for (i in (Num_count+1):m1){ tmp[,i]=rnorm(n1) if (i!=14 && i!=15){ if(i==9 || i==10 || i==11){ cut_point=(quantile(tmp[,i],0.6)) } else if (i==12 || i==13){ cut_point=(quantile(tmp[,i],0.5)) } for (mm in 1:n1){ if(tmp[mm,i]<=cut_point){Simu_data[mm,i]=1} else{Simu_data[mm,i]=2} } } if (i==14 || i==15){ if (i==15){cut_point=quantile(tmp[,i],c(0.4,0.7))} else if (i==14){cut_point=quantile(tmp[,i],c(0.3,0.6))} for (mm in 1:n1){ if(tmp[mm,i]<=cut_point[1]){Simu_data[mm,i]=1} else if (tmp[mm,i]>cut_point[2]){Simu_data[mm,i]=3} else {Simu_data[mm,i]=2} } } };rm(tmp) } else if (Null_or_not==2){ sigma=matrix(c(1,-0.26,0.25,0.39,0.39,0.37,0.29,0.25, -0.26,1,-0.4,-0.29,0.28,-0.35,0.28,-0.29, 0.25,-0.4,1,0.39,-0.39,0.29,0.31,0.3, 0.39,-0.29,0.39,1,0.31,0.4,-0.22,0.22, 0.39,0.28,-0.39,0.31,1,-0.4,-0.26,0.37, 0.37,-0.35,0.29,0.4,-0.4,1,0.22,-0.2, 0.29,0.28,0.31,-0.22,-0.26,0.22,1,-0.25, 0.25,-0.29,0.3,0.22,0.37,-0.2,-0.25,1),8,8) set.seed(seed) Simu_data[,(1:8)]=mvrnorm(n1,mu_vector,sigma) relate_var=c(rep(0,8),6,5,6,4,6,3,2) tmp=Simu_data for (i in 9:15){ mu_x=mu_vector[relate_var[i]] for (nn in 1:n1){ tmp[nn,i]=rnorm(1,0.3*(Simu_data[nn,relate_var[i]]-mu_x),0.91) } if (i!=14 && i!=15){ if(i==9 || i==10 || i==11){ cut_point=(quantile(tmp[,i],0.6)) } else if (i==12 || i==13){ cut_point=(quantile(tmp[,i],0.5)) } for (mm in 1:n1){ if(tmp[mm,i]<=cut_point){Simu_data[mm,i]=1} else{Simu_data[mm,i]=2} } } if (i==14 || i==15){ if (i==15){cut_point=quantile(tmp[,i],c(0.4,0.7))} else if (i==14){cut_point=quantile(tmp[,i],c(0.3,0.6))} for (mm in 1:n1){ if(tmp[mm,i]<=cut_point[1]){Simu_data[mm,i]=1} else if (tmp[mm,i]>cut_point[2]){Simu_data[mm,i]=3} else {Simu_data[mm,i]=2} } } };rm(tmp) } else if (Null_or_not==3){ sigma=matrix(c(1,-0.53,0.48,0.46,-0.42,0.49,-0.51,0.56, -0.53,1,-0.5,-0.47,0.43,-0.46,0.5,-0.45, 0.48,-0.5,1,0.41,-0.59,0.52,-0.54,0.56, 0.46,-0.47,0.41,1,-0.56,0.54,-0.6,0.55, -0.42,0.43,-0.59,-0.56,1,-0.45,0.49,-0.46, 0.49,-0.46,0.52,0.54,-0.45,1,-0.42,0.55, -0.51,0.5,-0.54,-0.6,0.49,-0.42,1,-0.47, 0.56,-0.45,0.56,0.55,-0.46,0.55,-0.47,1),8,8) set.seed(seed) Simu_data[,(1:8)]=mvrnorm(n1,mu_vector,sigma) relate_var=c(rep(0,8),6,5,6,4,6,3,2) tmp=Simu_data for (i in 9:15){ mu_x=mu_vector[relate_var[i]] for (nn in 1:n1){ tmp[nn,i]=rnorm(1,0.5*(Simu_data[nn,relate_var[i]]-mu_x),0.75) } if (i!=14 && i!=15){ if(i==9 || i==10 || i==11){ cut_point=(quantile(tmp[,i],0.6)) } else if (i==12 || i==13){ cut_point=(quantile(tmp[,i],0.5)) } for (mm in 1:n1){ if(tmp[mm,i]<=cut_point){Simu_data[mm,i]=1} else{Simu_data[mm,i]=2} } } if (i==14 || i==15){ if (i==15){cut_point=quantile(tmp[,i],c(0.4,0.7))} else if (i==14){cut_point=quantile(tmp[,i],c(0.3,0.6))} for (mm in 1:n1){ if(tmp[mm,i]<=cut_point[1]){Simu_data[mm,i]=1} else if (tmp[mm,i]>cut_point[2]){Simu_data[mm,i]=3} else {Simu_data[mm,i]=2} } } };rm(tmp) } else if (Null_or_not==4){ sigma=matrix(c(1,-0.61,0.74,0.67,-0.63,-0.77,0.64,-0.61, -0.61,1,-0.78,-0.72,0.64,0.6,-0.8,0.79, 0.74,-0.78,1,0.67,-0.7,-0.72,0.77,-0.71, 0.67,-0.72,0.67,1,-0.68,-0.77,0.72,-0.78, -0.63,0.64,-0.7,-0.68,1,0.64,-0.69,0.64, -0.77,0.6,-0.72,-0.77,0.64,1,-0.62,0.66, 0.64,-0.8,0.77,0.72,-0.69,-0.62,1,-0.62, -0.61,0.79,-0.71,-0.78,0.64,0.66,-0.62,1),8,8) set.seed(seed) Simu_data[,(1:8)]=mvrnorm(n1,mu_vector,sigma) relate_var=c(rep(0,8),6,5,6,4,6,3,2) tmp=Simu_data for (i in 9:15){ mu_x=mu_vector[relate_var[i]] for (nn in 1:n1){ tmp[nn,i]=rnorm(1,0.7*(Simu_data[nn,relate_var[i]]-mu_x),0.51) } if (i!=14 && i!=15){ if(i==9 || i==10 || i==11){ cut_point=(quantile(tmp[,i],0.6)) } else if (i==12 || i==13){ cut_point=(quantile(tmp[,i],0.5)) } for (mm in 1:n1){ if(tmp[mm,i]<=cut_point){Simu_data[mm,i]=1} else{Simu_data[mm,i]=2} } } if (i==14 || i==15){ if (i==15){cut_point=quantile(tmp[,i],c(0.4,0.7))} else if (i==14){cut_point=quantile(tmp[,i],c(0.3,0.6))} for (mm in 1:n1){ if(tmp[mm,i]<=cut_point[1]){Simu_data[mm,i]=1} else if (tmp[mm,i]>cut_point[2]){Simu_data[mm,i]=3} else {Simu_data[mm,i]=2} } } };rm(tmp) } Simu_data=data.frame(Simu_data) for (i in (Num_count+1):m1){ Simu_data[,i]=as.factor(Simu_data[,i]) } data.com=Simu_data set.seed(seed+1) if (type_of_missing==1){MISS=MCAR(data.com)} else if (type_of_missing==2){MISS=MAR(data.com)} else if (type_of_missing==3){ if (small_or_large==1){Cat_to_miss=c(1,1)} if (small_or_large==2){Cat_to_miss=c(2,3)} MISS=MNAR(data.com) } data.imp=MISS[[1]] NA_posit=MISS[[2]] Catgory=(Num_count+1):m1 Num_Var=setdiff((1:m1),Catgory) if (imp_choice==2 || imp_choice==3){ data.imp[,setdiff(miss_var,imp_var)]=data.com[,setdiff(miss_var,imp_var)] } } if (simu_or_dataana==1){acc_var=imp_var} if (simu_or_dataana==2){acc_var=miss_var} # Method 0 tree_list0=list() data.impu0=data.imp for (k in imp_var){ tmp_data_impu0=data.impu0[which(!is.na(data.impu0[,k])),] tree_list0[[k]]=list() if (is.element(k,Catgory)){ tree_list0[[k]]=rpart(noquote(paste("X",k,"~.",sep="")),minsplit=node_default,method="class",data=tmp_data_impu0) } else{ tree_list0[[k]]=rpart(noquote(paste("X",k,"~.",sep="")),minsplit=node_default,method="anova",data=tmp_data_impu0) } } for (j in 1:n1){ for (k in imp_var){ if (is.na(data.impu0[j,k])){ if(is.element(k,Catgory)){ data.impu0[j,k]=predict(tree_list0[[k]],newdata=data.impu0[j,],type="class") } else{ data.impu0[j,k]=predict(tree_list0[[k]],newdata=data.impu0[j,],method="anova") } } } } #accuracy acc0=c() for (s in 1:m1){ if (is.element(s,acc_var)){ if (is.element(s,Catgory)){ total=0 count=0 for (t in 1:n1){ if(NA_posit[t,s]==1){ total=total+1 if(data.impu0[t,s]==data.com[t,s]){count=count+1} } } acc0[s]=round(count/total,3) } else { total=0 rmse=0 for (t in 1:n1){ if(NA_posit[t,s]==1){ total=total+1 rmse=rmse+(data.impu0[t,s]-data.com[t,s])^2 } } acc0[s]=round(sqrt(rmse/total),3) } } else{acc0[s]=-1} } #Method 1 tree_list1=save_tree(data.imp,1) data.impu1=data.imp for (j in 1:n1){ for (k in imp_var){ if (is.na(data.impu1[j,k])){ XY_k=data.impu1[,-k] inp_point=data.impu1[j,-k] if (is.element(k,Catgory)){data.impu1[j,k]=levels(data.imp[,k])[Predict(inp_point,tree_list1[[k]],XY_k)]} else {data.impu1[j,k]=Predict(inp_point,tree_list1[[k]],XY_k)} } } } #accuracy acc1=c() for (s in 1:m1){ if (is.element(s,acc_var)){ if (is.element(s,Catgory)){ total=0 count=0 for (t in 1:n1){ if(NA_posit[t,s]==1){ total=total+1 if(data.impu1[t,s]==data.com[t,s]){count=count+1} } } acc1[s]=round(count/total,3) } else { total=0 rmse=0 for (t in 1:n1){ if(NA_posit[t,s]==1){ total=total+1 rmse=rmse+(data.impu1[t,s]-data.com[t,s])^2 } } acc1[s]=round(sqrt(rmse/total),3) } } else{acc1[s]=-1} } #Method 2 tree_list2=save_tree(data.imp,2) data.impu2=data.imp for (j in 1:n1){ for (k in imp_var){ if (is.na(data.impu2[j,k])){ XY_k=data.impu2[,-k] inp_point=data.impu2[j,-k] if (is.element(k,Catgory)){data.impu2[j,k]=levels(data.imp[,k])[Predict(inp_point,tree_list2[[k]],XY_k)]} else {data.impu2[j,k]=Predict(inp_point,tree_list2[[k]],XY_k)} } } } #accuracy acc2=c() for (s in 1:m1){ if (is.element(s,acc_var)){ if (is.element(s,Catgory)){ total=0 count=0 for (t in 1:n1){ if(NA_posit[t,s]==1){ total=total+1 if(data.impu2[t,s]==data.com[t,s]){count=count+1} } } acc2[s]=round(count/total,3) } else { total=0 rmse=0 for (t in 1:n1){ if(NA_posit[t,s]==1){ total=total+1 rmse=rmse+(data.impu2[t,s]-data.com[t,s])^2 } } acc2[s]=round(sqrt(rmse/total),3) } } else{acc2[s]=-1} } #Method 3 tree_list3=save_tree(data.imp,3) data.impu3=data.imp for (j in 1:n1){ for (k in imp_var){ if (is.na(data.impu3[j,k])){ XY_k=data.impu3[,-k] inp_point=data.impu3[j,-k] if (is.element(k,Catgory)){data.impu3[j,k]=levels(data.imp[,k])[Predict(inp_point,tree_list3[[k]],XY_k)]} else {data.impu3[j,k]=Predict(inp_point,tree_list3[[k]],XY_k)} } } } #accuracy acc3=c() for (s in 1:m1){ if (is.element(s,acc_var)){ if (is.element(s,Catgory)){ total=0 count=0 for (t in 1:n1){ if(NA_posit[t,s]==1){ total=total+1 if(data.impu3[t,s]==data.com[t,s]){count=count+1} } } acc3[s]=round(count/total,3) } else { total=0 rmse=0 for (t in 1:n1){ if(NA_posit[t,s]==1){ total=total+1 rmse=rmse+(data.impu3[t,s]-data.com[t,s])^2 } } acc3[s]=round(sqrt(rmse/total),3) } } else{acc3[s]=-1} } #Method 4 tree_list4=save_tree(data.imp,4) data.impu4=data.imp for (j in 1:n1){ for (k in imp_var){ if (is.na(data.impu4[j,k])){ XY_k=data.impu4[,-k] inp_point=data.impu4[j,-k] if (is.element(k,Catgory)){data.impu4[j,k]=levels(data.imp[,k])[Predict(inp_point,tree_list4[[k]],XY_k)]} else {data.impu4[j,k]=Predict(inp_point,tree_list4[[k]],XY_k)} } } } #accuracy acc4=c() for (s in 1:m1){ if (is.element(s,acc_var)){ if (is.element(s,Catgory)){ total=0 count=0 for (t in 1:n1){ if(NA_posit[t,s]==1){ total=total+1 if(data.impu4[t,s]==data.com[t,s]){count=count+1} } } acc4[s]=round(count/total,3) } else { total=0 rmse=0 for (t in 1:n1){ if(NA_posit[t,s]==1){ total=total+1 rmse=rmse+(data.impu4[t,s]-data.com[t,s])^2 } } acc4[s]=round(sqrt(rmse/total),3) } } else{acc4[s]=-1} } #Method 5 tree_list5=save_tree(data.imp,5) data.impu5=data.imp for (j in 1:n1){ for (k in imp_var){ if (is.na(data.impu5[j,k])){ XY_k=data.impu5[,-k] inp_point=data.impu5[j,-k] if (is.element(k,Catgory)){data.impu5[j,k]=levels(data.imp[,k])[Predict(inp_point,tree_list5[[k]],XY_k)]} else {data.impu5[j,k]=Predict(inp_point,tree_list5[[k]],XY_k)} } } } #accuracy acc5=c() for (s in 1:m1){ if (is.element(s,acc_var)){ if (is.element(s,Catgory)){ total=0 count=0 for (t in 1:n1){ if(NA_posit[t,s]==1){ total=total+1 if(data.impu5[t,s]==data.com[t,s]){count=count+1} } } acc5[s]=round(count/total,3) } else { total=0 rmse=0 for (t in 1:n1){ if(NA_posit[t,s]==1){ total=total+1 rmse=rmse+(data.impu5[t,s]-data.com[t,s])^2 } } acc5[s]=round(sqrt(rmse/total),3) } } else{acc5[s]=-1} } #Method 6 tree_list6=save_tree(data.imp,6) data.impu6=data.imp for (j in 1:n1){ for (k in imp_var){ if (is.na(data.impu6[j,k])){ XY_k=data.impu6[,-k] inp_point=data.impu6[j,-k] if (is.element(k,Catgory)){data.impu6[j,k]=levels(data.imp[,k])[Predict(inp_point,tree_list6[[k]],XY_k)]} else {data.impu6[j,k]=Predict(inp_point,tree_list6[[k]],XY_k)} } } } #accuracy acc6=c() for (s in 1:m1){ if (is.element(s,acc_var)){ if (is.element(s,Catgory)){ total=0 count=0 for (t in 1:n1){ if(NA_posit[t,s]==1){ total=total+1 if(data.impu6[t,s]==data.com[t,s]){count=count+1} } } acc6[s]=round(count/total,3) } else { total=0 rmse=0 for (t in 1:n1){ if(NA_posit[t,s]==1){ total=total+1 rmse=rmse+(data.impu6[t,s]-data.com[t,s])^2 } } acc6[s]=round(sqrt(rmse/total),3) } } else{acc6[s]=-1} } #Method 7 NA_posit2=matrix(0,nrow=n1,ncol=m1) for (i in 1:m1){ for (j in 1:n1){ if (is.na(data.imp[j,i])){NA_posit2[j,i]=1} } } data.impu7=data.imp Iter_times=0 conv_or_not=F tree_list_list=list() imp_result=list() while (conv_or_not==F){ Iter_times=Iter_times+1 tree_list_list[[Iter_times]]=list() imp_result[[Iter_times]]=list() for (ii in 1:m1){ if (is.element(ii,imp_var)){ miss_place=which(NA_posit2[,ii]==1) data.impu7[miss_place,ii]=NA data.impu7.copy=data.impu7 response_var=ii tree_list_list[[Iter_times]][[ii]]=save_tree(data.impu7,7) for (nn in miss_place){ XY_ii=data.impu7[,-ii] inp_point=data.impu7[nn,-ii] if (is.element(ii,Catgory)){data.impu7[nn,ii]=levels(data.impu7.copy[,ii])[Predict(inp_point,tree_list_list[[Iter_times]][[ii]],XY_ii)]} else {data.impu7[nn,ii]=Predict(inp_point,tree_list_list[[Iter_times]][[ii]],XY_ii)} } imp_result[[Iter_times]][[ii]]=data.impu7[,ii] } } if (Iter_times>=2){ conv_count=0 for (ii in Catgory){ if (is.element(ii,imp_var)){ total_count=0 same_count=0 for (nn in 1:n1){ if (NA_posit2[nn,ii]==1){ total_count=total_count+1 if (data.impu7[nn,ii]==imp_result[[Iter_times-1]][[ii]][[nn]]){ same_count=same_count+1 } } } if (same_count/total_count>=conv_threshold){conv_count=conv_count+1} } } for (jj in Num_Var){ if (is.element(jj,imp_var)){ total_count=0 same_count=0 for (nn in 1:n1){ if (NA_posit2[nn,jj]==1){ total_count=total_count+1 if ((imp_result[[Iter_times-1]][[jj]][[nn]])!=0){ if (abs((data.impu7[nn,jj]-imp_result[[Iter_times-1]][[jj]][[nn]])/imp_result[[Iter_times-1]][[jj]][[nn]])<=conv_threshold2){ same_count=same_count+1 } } else{ if (data.impu7[nn,jj]==imp_result[[Iter_times-1]][[jj]][[nn]]){ same_count=same_count+1 } } } } if (same_count/total_count>=conv_threshold){conv_count=conv_count+1} } } if ((conv_count/length(imp_var))>=conv_threshold3){ conv_or_not=T real_conv=T tree_list7=tree_list_list[[Iter_times]] } } if (Iter_times>=max_iter_times && conv_or_not==F){ conv_or_not=T real_conv=F tree_list7=tree_list_list[[Iter_times]] } } if (real_conv==F){ cycle_list=list() for (i in imp_var){ cycle_i=rep(1,n1) for (nn in which(NA_posit2[,i]==1)){ cycle_length=0 cycle_exist=F while(cycle_exist==F){ cycle_length=cycle_length+1 if (cycle_length<4){ if (cycle_length==1){ tmp_vec=c() for (ii in 0:5){ tmp_vec=c(tmp_vec,imp_result[[max_iter_times-ii]][[i]][[nn]]) } if (length(unique(tmp_vec))==1){ cycle_i[nn]=cycle_length cycle_exist=T } } else if (cycle_length>1){ tmp_vec1=tmp_vec2=tmp_vec3=c() for (j in (3*cycle_length-1):(2*cycle_length)){ tmp_vec1=c(tmp_vec1,imp_result[[max_iter_times-j]][[i]][[nn]]) } for (j in (2*cycle_length-1):cycle_length){ tmp_vec2=c(tmp_vec2,imp_result[[max_iter_times-j]][[i]][[nn]]) } for (j in (cycle_length-1):0){ tmp_vec3=c(tmp_vec3,imp_result[[max_iter_times-j]][[i]][[nn]]) } if (sum(tmp_vec1==tmp_vec2)==cycle_length && sum(tmp_vec1==tmp_vec3)==cycle_length){ cycle_i[nn]=cycle_length cycle_exist=T } } } else if (cycle_length>=4){ tmp_vec1=tmp_vec2=c() for (j in (2*cycle_length-1):cycle_length){ tmp_vec1=c(tmp_vec1,imp_result[[max_iter_times-j]][[i]][[nn]]) } for (j in (cycle_length-1):0){ tmp_vec2=c(tmp_vec2,imp_result[[max_iter_times-j]][[i]][[nn]]) } if (sum(tmp_vec1==tmp_vec2)==cycle_length){ cycle_i[nn]=cycle_length cycle_exist=T } } if (cycle_length>=20 && cycle_exist==F){ cycle_i[nn]=500 cycle_exist=T } } } cycle_list[[i]]=cycle_i } lcm_list=rep(0,m1) for (i in imp_var){ tmp_lcm=pracma::Lcm(cycle_list[[i]][[1]],cycle_list[[i]][[2]]) for (ii in 3:n1){ tmp_lcm=pracma::Lcm(tmp_lcm,cycle_list[[i]][[ii]]) } lcm_list[i]=tmp_lcm if (tmp_lcm1){ data.impu7[nn,i]=levels(data.imp[,i])[sample(Modes(tmp_iter_value),1)] } else{data.impu7[nn,i]=levels(data.imp[,i])[Modes(tmp_iter_value)]} } else{ data.impu7[nn,i]=mean(tmp_iter_value) } } } else{ for (nn in which(NA_posit2[,i]==1)){ tmp_iter_value=c() if (is.element(i,Catgory)){ for (ii in 0:10){ tmp_iter_value=c(tmp_iter_value,imp_result[[max_iter_times-ii]][[i]][[nn]]) } data.impu7[nn,i]=levels(data.imp[,i])[getmode(tmp_iter_value)] } else{ for (ii in 0:9){ tmp_iter_value=c(tmp_iter_value,imp_result[[max_iter_times-ii]][[i]][[nn]]) } data.impu7[nn,i]=mean(tmp_iter_value) } } } } } #accuracy acc7=c() for (s in 1:m1){ if (is.element(s,acc_var)){ if (is.element(s,Catgory)){ total=0 count=0 for (t in 1:n1){ if(NA_posit[t,s]==1){ total=total+1 if(data.impu7[t,s]==data.com[t,s]){count=count+1} } } acc7[s]=round(count/total,3) } else { total=0 rmse=0 for (t in 1:n1){ if(NA_posit[t,s]==1){ total=total+1 rmse=rmse+(data.impu7[t,s]-data.com[t,s])^2 } } acc7[s]=round(sqrt(rmse/total),3) } } else{acc7[s]=-1} } acc_matrix=rbind(acc0,acc1,acc2,acc3,acc4,acc5,acc6,acc7) name_of_columns=c() for (ii in 1:m1){ name_of_columns[ii]=paste("X",ii,sep="") } colnames(acc_matrix)=name_of_columns acc_matrix2=acc_matrix[,-which(acc_matrix[1,]==-1)] name_of_columns2=c() for (ii in 1:length(acc_var)){ name_of_columns2[ii]=paste("X",acc_var[ii],sep="") } imp_outcome_length=c() imp_missing_position=list() for (ii in 1:length(acc_var)){ imp_outcome_length[ii]=sum(NA_posit[,acc_var[ii]]==1) imp_missing_position[[ii]]=which(NA_posit[,acc_var[ii]]==1) } max_outcome_length=max(imp_outcome_length) imp_outcome_matrix0=imp_outcome_matrix1=imp_outcome_matrix2=imp_outcome_matrix3=imp_outcome_matrix4=imp_outcome_matrix5=imp_outcome_matrix6=imp_outcome_matrix7=real_outcome_matrix=matrix(999,nrow=max_outcome_length,ncol=length(acc_var)) for (ii in 1:length(acc_var)){ imp_outcome_matrix0[(1:imp_outcome_length[ii]),ii]=data.impu0[imp_missing_position[[ii]],acc_var[ii]] imp_outcome_matrix1[(1:imp_outcome_length[ii]),ii]=data.impu1[imp_missing_position[[ii]],acc_var[ii]] imp_outcome_matrix2[(1:imp_outcome_length[ii]),ii]=data.impu2[imp_missing_position[[ii]],acc_var[ii]] imp_outcome_matrix3[(1:imp_outcome_length[ii]),ii]=data.impu3[imp_missing_position[[ii]],acc_var[ii]] imp_outcome_matrix4[(1:imp_outcome_length[ii]),ii]=data.impu4[imp_missing_position[[ii]],acc_var[ii]] imp_outcome_matrix5[(1:imp_outcome_length[ii]),ii]=data.impu5[imp_missing_position[[ii]],acc_var[ii]] imp_outcome_matrix6[(1:imp_outcome_length[ii]),ii]=data.impu6[imp_missing_position[[ii]],acc_var[ii]] imp_outcome_matrix7[(1:imp_outcome_length[ii]),ii]=data.impu7[imp_missing_position[[ii]],acc_var[ii]] real_outcome_matrix[(1:imp_outcome_length[ii]),ii]=data.com[imp_missing_position[[ii]],acc_var[ii]] } colnames(imp_outcome_matrix0)=name_of_columns2 colnames(imp_outcome_matrix1)=name_of_columns2 colnames(imp_outcome_matrix2)=name_of_columns2 colnames(imp_outcome_matrix3)=name_of_columns2 colnames(imp_outcome_matrix4)=name_of_columns2 colnames(imp_outcome_matrix5)=name_of_columns2 colnames(imp_outcome_matrix6)=name_of_columns2 colnames(imp_outcome_matrix7)=name_of_columns2 colnames(real_outcome_matrix)=name_of_columns2