R语言实现高级的韦恩图可视化

编程/开发
409
0
0
2022-10-12
标签   R语言

韦恩图大家应该都不陌生,主要用来展示不同事物或者组之间的数学或逻辑关系,主要用于集合的运算结果展示。今天给大家介绍一个在R语言中更加高级的展示形式,实现此功能的R包是UpSetR。首先看下包的安装:

install.packages("UpSetR")

接下来我们直接通过实例来看下如何进行数据的可视化:

##载入包
library(UpSetR)
library(ggplot2)
library(grid)
library(plyr)
##构建数据

# example oflist input (list of named vectors)
listInput <-list(one = c(1, 2, 3, 5, 7, 8, 11, 12, 13), two = c(1, 2, 4, 5, 10), three =c(1, 5, 6, 7, 8, 9, 10, 12, 13))
 
# example ofexpression input
expressionInput<- c(one = 2, two = 1, three = 2, `one&two` = 1, `one&three` = 4,`two&three` = 1, `one&two&three` = 2)
##可视化结果
upset(fromList(listInput),order.by = "freq")

img

upset(fromExpression(expressionInput),order.by = "freq")

img

##载入数据并绘图
movies <-read.csv(system.file("extdata", "movies.csv", package ="UpSetR"), header = T, sep = ";")
 
## nsets(频数最多的前六个变量),text.scale =c(intersection size title, intersection size ticklabels, set size title, set size tick labels, set names, numbers above bars)
upset(movies,nsets = 6, number.angles = 30, point.size = 3.5, line.size = 2, mainbar.y.label = "Genre Intersections", sets.x.label = "Movies Per Genre", text.scale =c(1.3, 1.3, 1, 1, 2, 0.75))

img

##自定义交集的组
upset(movies,sets = c("Action", "Adventure", "Comedy","Drama", "Mystery", "Thriller","Romance", "War", "Western"), mb.ratio = c(0.55,0.45), order.by = "freq")

img

##基于相交的等级进行排序
upset(movies,sets = c("Action", "Adventure", "Comedy","Drama", "Mystery", "Thriller","Romance", "War", "Western"), mb.ratio = c(0.55,0.45), order.by = "degree")

img

##基于等级和频率共同排序,通过先后来确定排序顺序
upset(movies,sets = c("Action", "Adventure", "Comedy","Drama", "Mystery", "Thriller","Romance", "War", "Western"), mb.ratio = c(0.55,0.45), order.by = c("degree", "freq"))

img

##保留各组频数,不排序
upset(movies,sets = c("Action", "Adventure", "Comedy","Drama", "Mystery", "Thriller","Romance", "War", "Western"), mb.ratio = c(0.55,0.45), order.by = "freq", keep.order = TRUE)

img

##对交集结果进行分组,nintersects交叉点的数目阈值,cutoff交叉结果阈值。
upset(movies,nintersects = 70, group.by = "sets", cutoff = 7)

img

##空的交叉点展示
upset(movies,empty.intersections = "on", order.by = "freq")

img

##利用不同的颜色显示重要的信息
upset(movies,queries = list(list(query = intersects, params = list("Drama", "Comedy","Action"), color = "orange", active = T), list(query =intersects, params = list("Drama"), color = "red", active =F), list(query = intersects, params = list("Action","Drama"), active = T)))

img

##通过设置阈值进行标记
upset(movies,queries = list(list(query = elements, params = list("AvgRating", 3.5,4.1), color = "blue", active = T), list(query = elements, params =list("ReleaseDate", 1980, 1990, 2000), color = "red",active = F)))

img

##通过expression进行筛选
upset(movies,queries = list(list(query = intersects, params = list("Action", "Drama"),active = T), list(query = elements, params = list("ReleaseDate", 1980,1990, 2000), color = "red", active = F)), expression ="AvgRating > 3 & Watches > 100")

img

##自定义的query结构
Myfunc <-function(row, release, rating) {data <- (row["ReleaseDate"] %in%release) & (row["AvgRating"] > rating)}#row数据源,release,rating指的是parms中的第一,二个参数
upset(movies,queries = list(list(query = Myfunc, params = list(c(1970, 1980, 1990, 1999,2000), 2.5), color = "blue", active = T)))

img

##增加query的标签legend
upset(movies,query.legend = "top", queries = list(list(query = intersects,
    params = list("Drama","Comedy", "Action"), color = "orange", active =T,
    query.name = "Funny action"),list(query = intersects, params = list("Drama"),
    color = "red", active = F),list(query = intersects, params = list("Action",
"Drama"), active = T, query.name = "Emotionalaction")))

img

##综合前面的方式的完整例子
upset(movies, query.legend = "bottom", queries =list(list(query = Myfunc, params = list(c(1970,
    1980, 1990, 1999, 2000),2.5), color = "orange", active = T), list(query = intersects,
    params = list("Action","Drama"), active = F), list(query = elements, params =list("ReleaseDate",
    1980, 1990, 2000), color ="red", active = F, query.name = "Decades")),
    expression ="AvgRating > 3 & Watches > 100")

img

##通过柱状图增加变量的其它数据信息其中type=bar plot("hist") or heat map ("heat"/“bool”)
sets <- names(movies[3:19])
avgRottenTomatoesScore <- round(runif(17, min = 0, max = 90))
metadata <- as.data.frame(cbind(sets, avgRottenTomatoesScore))
names(metadata) <- c("sets","avgRottenTomatoesScore")
metadata$avgRottenTomatoesScore <-as.numeric(as.character(metadata$avgRottenTomatoesScore))
upset(movies, set.metadata = list(data = metadata, plots =list(list(type = "hist",
    column ="avgRottenTomatoesScore", assign = 20))))

img

##增加热图信息
Cities <- sample(c("Boston", "NYC","LA"), 17, replace = T)
metadata <- cbind(metadata, Cities)
metadata$Cities <- as.character(metadata$Cities)
metadata[which(metadata$sets %in% c("Drama","Comedy", "Action", "Thriller",
    "Romance")), ]
upset(movies, set.metadata = list(data = metadata, plots =list(list(type = "heat",
    column ="Cities", assign = 10, colors = c(Boston = "green", NYC ="navy",
        LA ="purple")))))

img

##增加文字信息
upset(movies, set.metadata = list(data = metadata, plots =list(list(type = "text",
    column ="Cities", assign = 10, colors = c(Boston = "green", NYC ="navy",
        LA ="purple")))))

img

##直接设置连线区域背景
upset(movies, set.metadata = list(data = metadata, plots =list(list(type = "hist",
    column ="avgRottenTomatoesScore", assign = 20), list(type ="matrix_rows",
    column ="Cities", colors = c(Boston = "green", NYC ="navy", LA = "purple"),
    alpha = 0.5))))

img

##一次添加多种信息
accepted <- round(runif(17, min = 0, max = 1))
metadata <- cbind(metadata, accepted)
metadata[which(metadata$sets %in% c("Drama","Comedy", "Action", "Thriller", "Romance")),]
upset(movies, set.metadata = list(data = metadata, plots =list(list(type = "hist", column = "avgRottenTomatoesScore",assign = 20), list(type = "bool", column = "accepted", assign= 5, colors = c("#FF3333", "#006400")), list(type ="text", column = "Cities", assign = 5, colors = c(Boston ="green", NYC = "navy", LA = "purple")))))

img

##混合图的绘制,通过attribute.plots添加
upset(movies, set.metadata = list(data = metadata, plots =list(list(type = "hist",
    column ="avgRottenTomatoesScore", assign = 20), list(type = "bool",column = "accepted",
    assign = 5, colors =c("#FF3333", "#006400")), list(type = "text",column = "Cities",
    assign = 5, colors =c(Boston = "green", NYC = "navy", LA ="purple")),
    list(type ="matrix_rows", column = "Cities", colors = c(Boston ="green",
        NYC ="navy", LA = "purple"), alpha = 0.5))), queries =list(list(query = intersects,
    params =list("Drama"), color = "red", active = F), list(query =intersects,
    params =list("Action", "Drama"), active = T), list(query =intersects,
    params =list("Drama", "Comedy", "Action"), color ="orange", active = T)),
    attribute.plots =list(gridrows = 45, plots = list(list(plot = scatter_plot,
        x ="ReleaseDate", y = "AvgRating", queries = T), list(plot =scatter_plot,
        x ="AvgRating", y = "Watches", queries = F)), ncols = 2),query.legend = "bottom")

img

##自定义绘图函数的混合绘图

myplot <- function(mydata, x, y) {
    plot <- (ggplot(data =mydata, aes_string(x = x, y = y, colour = "color")) +
        geom_point() + scale_color_identity()+ theme(plot.margin = unit(c(0,
        0, 0, 0),"cm")))
}
 
another.plot <- function(data, x, y) {
    data$decades <-round_any(as.integer(unlist(data[y])), 10, ceiling)
    data <-data[which(data$decades >= 1970), ]
    myplot <- (ggplot(data,aes_string(x = x)) + geom_density(aes(fill = factor(decades)),
        alpha = 0.4) +theme(plot.margin = unit(c(0, 0, 0, 0), "cm"), legend.key.size =unit(0.4,
        "cm")))
}
upset(movies, main.bar.color = "black", queries =list(list(query = intersects,
    params =list("Drama"), color = "red", active = F), list(query =intersects,
    params =list("Action", "Drama"), active = T), list(query =intersects,
    params =list("Drama", "Comedy", "Action"), color ="orange", active = T)),
    attribute.plots =list(gridrows = 45, plots = list(list(plot = myplot, x ="ReleaseDate",
        y ="AvgRating", queries = T), list(plot = another.plot, x ="AvgRating",
        y ="ReleaseDate", queries = F)), ncols = 2))

img

##增加箱线图
upset(movies, boxplot.summary = c("AvgRating","ReleaseDate"))

img

欢迎大家互相学习交流!