Kmeans

打印 被阅读次数


Kmeans R 

 #running cluster of genes to stratify group

library(factoextra)

library(cluster)

dat <- read.csv("C:/Users/?/Desktop/dat.csv")

df <- na.omit(dat)

df <- scale(df)

#DETERMINE HOW MANY CLUSTERS IS OPTIMAL(number at the curve)

#plot number of clusters vs. total within sum of squares

fviz_nbclust(df, kmeans, method = "wss")

#calculate gap statistic based on number of clusters

gap_stat <- clusGap(df,

                                FUN = kmeans,

                                nstart = 25,

                                K.max = 10,

                                B = 50)

#plot number of clusters vs. gap statistic

fviz_gap_stat(gap_stat)

#PERFORM K-MEANS CLUSTERING WITH OPTIMAL K

#make this example reproducible

set.seed(1)

#perform k-means clustering with k = 4 clusters

km <- kmeans(df, centers = 4, nstart = 25)

km

#plot results of final k-means model

fviz_cluster(km, data = df)

#find mean of each cluster

aggregate(dat, by=list(cluster=km$cluster), mean)

#add cluster assigment to original data

finaldat<- cbind(dat, cluster = km$cluster)

head(finaldat)

kmeans(df[!is.na(df)], 3)

?

登录后才可评论.