5 min read

研究のつながりを可視化する (R)

データ可視化系のネタ。

成果物はこれ。

データ読み込み

.bibファイルに適当な項目名 (projectとした) でテーマを登録する。

@article{Murakami_et_al2018PCE,
  title={Quantification of excitation energy distribution between photosystems based on a mechanistic model of photosynthetic electron transport},
  author={Murakami, Keach and Matsuda, Ryo and Fujiwara, Kazuhiro},
  journal=PCE,
  volume={41},
  number={1},
  pages={148--159},
  year={2018},
  month={January},
  doi={10.1111/pce.12986},
  project={Photosynthetic Response to Light Spectrum}
}
@conference{intl005,
  title={Time course of leaf content of cytochrome $b_6f$ complex and photosynthetic capacity after changes in growth irradiance},
  author={Murakami, Keach and Zhu, Hui and Zeng, Ling-Da and Yi, Xiao-Ping and Peng, Chang-Lian and Zhang, Wang-Feng and Chow, Wah Soon},
  year={2018},
  month={September},
  date={23--26},
  type={Poster presentation},
  symposium={ComBio2018},
  short_symposium={ComBio2018},
  address={Australia},
  venue={International Convention Centre Sydney/Sydney},
  project={Light Use in Horticulture, Modeling Temporal Canopy Photosynthesis}
}

bib2dfパッケージを使って読み込み。

suppressMessages(library(tidyverse))
suppressMessages(library(bib2df))

bib_data <-
  dir("~/Dropbox/r_packages/fudukue/bibtex/list_bib/", pattern = "^mywork", full.names = T) %>%
  map_df(bib2df)

bib_data[1:4, 1:4] %>%
  knitr::kable()
CATEGORY BIBTEXKEY ADDRESS ANNOTE
INCOLLECTION Murakami_Matsuda2016Chap Singapore NA
INCOLLECTION Matsuda_Murakami2016Chap Switzerland NA
ARTICLE Matsuda_et_al2016SH NA NA
CONFERENCE domest001 大阪 NA

データクレンジング。

suppressMessages(library(janitor))

tidy_bib_data <-
  bib_data %>%
  janitor::remove_empty("cols") %>%
  janitor::clean_names() %>%
  mutate(category = if_else(category %in% c("BOOK", "INBOOK", "INCOLLECTION"), "book/chapter", tolower(category)),
         id = dplyr::row_number() %>%
              stringr::str_pad(width = 3, side = "left", pad = "0") %>%
              paste0(category, "_", .)) %>%
  dplyr::select(id, project, category, type, title, year, doi, short = short_symposium, venue) %>%
  tidyr::separate_rows(project, sep = ", ")


tidy_bib_data[1:4, 1:4] %>%
  knitr::kable()
id project category type
book/chapter_001 Others book/chapter NA
book/chapter_001 Light Use in Horticulture book/chapter NA
book/chapter_002 Others book/chapter NA
article_003 Light Use in Horticulture article NA

ネットワーク作成

igraph::graph_from_data_frameは1列目と2列目を繋ぐので、列順が重要。 毎回ネットワークの形状が変わるので、一応set.seedしておく。

suppressMessages(library(igraph))

set.seed(123)

igraph_obj <-
  igraph::graph_from_data_frame(d = tidy_bib_data, directed = F)

plot(igraph_obj)

class(igraph_obj)
## [1] "igraph"

見栄えをよくするべく、ggnetworkパッケージでigprahオブジェクトをggplot用に整える。 ggnetworkこのページがわかりやすかった。

suppressMessages(library(ggnetwork))

df_network <-
  igraph_obj %>%
  ggnetwork(layout = "fruchtermanreingold")

ggnetworkの出力をdplyrで扱おうとして引っかかった。

df_network %>%
  mutate(hoge = "huga")
#> Error: Columns `x`, `y`, `xend`, `yend` must be 1d atomic vectors or lists

tibbledata.frameと異なり、matrixを包含することができないのが原因らしい。 これはバグのようで、近いうちに直されそうな感じ (Allow matrix and data frame columns)。

n <- network::network(sna::rgraph(5, tprob = 0.2))
net <- ggnetwork::ggnetwork(n)

mutate(net, hoge = "hoge")
#> Error: Columns `x`, `y`, `xend`, `yend` must be 1d atomic vectors or lists

net$x
#>           [,1]
#> [1,] 0.2249716
#> [2,] 1.0000000
#> [3,] 0.8017759
#> [4,] 0.5301314
#> [5,] 0.0000000
#> [6,] 0.0000000
#> [7,] 0.5301314
#> [8,] 0.8017759

net %>% as.tibble
#> Error: Columns `x`, `y`, `xend`, `yend` must be 1d atomic vectors or lists

tibble(x = matrix(1:5, ncol = 1))
#> Error: Column `x` must be a 1d atomic vector or a list

# リストで包むと通る
(tbl_mtrx <- tibble(x = list(matrix(1:5, ncol = 1))))
#> # A tibble: 1 x 1
#>   x            
#>   <list>       
#> 1 <int [5 × 1]>

# unnestするとvectorにdropするっぽい
tidyr::unnest(tbl_mtrx)
# A tibble: 5 x 1
#>       x
#>   <int>
#> 1     1
#> 2     2
#> 3     3
#> 4     4
#> 5     5

as.matrix %>% as_tibbleで強引にtibble型に変えると、dplyr系関数が使える。

df_network <-
  df_network %>%
  as.matrix %>%
  as_tibble %>%
  mutate_at(.vars = vars(x, y, xend, yend, year), .funs = as.numeric) %>% # 文字列を数値に戻す
  mutate(group = str_extract(vertex.names, "[a-z]*"),
         group = if_else(group %in% c("article", "conference", "book"), group, "project")) # あとでカテゴリ別に色分けしたいので

# node部分
df_network[1:5, 1:8] %>%
  knitr::kable()

# edge部分
df_network[30:34, 1:8] %>%
  knitr::kable()
x y na.x vertex.names xend yend category doi
0.3564629 0.7606544 FALSE book/chapter_001 0.3564629 0.7606544 NA NA
0.5682415 1.0000000 FALSE book/chapter_002 0.5682415 1.0000000 NA NA
0.1973102 0.6080856 FALSE article_003 0.1973102 0.6080856 NA NA
0.6329070 0.8842712 FALSE conference_004 0.6329070 0.8842712 NA NA
0.4339224 0.3767279 FALSE conference_005 0.4339224 0.3767279 NA NA
x y na.x vertex.names xend yend category doi
0.4790799 0.8769860 FALSE Others 0.4790799 0.8769860 NA NA
0.3596180 0.5660283 FALSE Light Use in Horticulture 0.3596180 0.5660283 NA NA
0.6359569 0.2937061 FALSE Photosynthetic Response to Light Spectrum 0.6359569 0.2937061 NA NA
0.9314290 0.3610911 FALSE Statistics in Plant Science 0.9314290 0.3610911 NA NA
0.1044156 0.1370176 FALSE Imaging Photosynthesis 0.1044156 0.1370176 NA NA

このままだと取り回しが悪いので、node部分とedge部分を分離して扱う。 ggplotとplotlyでの図示用にデータフレームを整える。

# node数をカウント
node_num <-
  is.na(df_network$na.y) %>% sum

# node部分だけ抜き出す
df_node <-
  df_network[seq_len(node_num),] %>%
  janitor::remove_empty("cols") %>%
  mutate(node_size = if_else(group == "project", 10, 1),
         node_size = if_else(group %in% c("article", "book"), 2, node_size)) # ノードサイズをカテゴリ別に

# 残るedge部分をハンドリング
df_edge <-
  df_network[-seq_len(node_num),] %>%
  janitor::remove_empty("cols")

# hoverの要不要に応じてprojectとそれ以外を分ける
# 空行でもNAが表示されて鬱陶しいので
transmute(df_edge, vertex.names, year) %>%
  left_join(df_node, ., by = "vertex.names") %>%
  {
    df_node_projects <<-
      dplyr::filter(., group == "project")
    df_node_items <<-
      dplyr::filter(., group != "project")
  }

ggplotで重ねていく。

fig_ggplot <-
  df_node %>%
  ggplot(aes(x, y, col = group)) +
  geom_segment(data = df_edge, aes(xend = xend, yend = yend)) +
  geom_point(data = df_node_items, aes(size = node_size, text = year)) +
  geom_point(data = df_node_projects, aes(size = node_size), hoverinfo = "none") +
  geom_text(data = df_node_projects, aes(label = vertex.names), col = "black") +
  scale_color_manual(values = c("cyan2", "magenta2", "grey50", "grey75")) +
  guides(size = F) +
  theme_blank()

fig_ggplot

plotlyでinteractiveにして冒頭の図が完成。

suppressMessages(library(plotly))

delete_axis <- 
  list(title = "", ticklen = 0, zeroline = FALSE, showline = FALSE, showticklabels = FALSE, showgrid = FALSE)

fig_plotly <-
  ggplotly(fig_ggplot, tooltip = "text") %>%
  layout(xaxis = delete_axis, yaxis = delete_axis)

# htmlwidgetとして保存
setwd("../plotly")
htmlwidgets::saveWidget(fig_plotly, "research_network.html", selfcontained = T)
setwd("../post")

ネットワークがもっと騒がしくなるように精進しよう。

Session Info

devtools::session_info()
##  setting  value                       
##  version  R version 3.5.0 (2018-04-23)
##  system   x86_64, darwin15.6.0        
##  ui       X11                         
##  language (EN)                        
##  collate  en_US.UTF-8                 
##  tz       Australia/Brisbane          
##  date     2018-06-24                  
## 
##  package        * version    date       source                            
##  assertthat       0.2.0      2017-04-11 CRAN (R 3.5.0)                    
##  backports        1.1.2      2017-12-13 CRAN (R 3.5.0)                    
##  base           * 3.5.0      2018-04-24 local                             
##  bib2df         * 1.0.1      2018-06-22 Github (ottlngr/bib2df@027215c)   
##  bindr            0.1.1      2018-03-13 CRAN (R 3.5.0)                    
##  bindrcpp       * 0.2.2      2018-03-29 CRAN (R 3.5.0)                    
##  blogdown         0.6        2018-04-18 CRAN (R 3.5.0)                    
##  bookdown         0.7        2018-02-18 CRAN (R 3.5.0)                    
##  broom            0.4.4.9000 2018-06-21 Github (tidyverse/broom@2721de4)  
##  cellranger       1.1.0      2016-07-27 CRAN (R 3.5.0)                    
##  cli              1.0.0      2017-11-05 CRAN (R 3.5.0)                    
##  coda             0.19-1     2016-12-08 CRAN (R 3.5.0)                    
##  codetools        0.2-15     2016-10-05 CRAN (R 3.5.0)                    
##  colorspace       1.3-2      2016-12-14 CRAN (R 3.5.0)                    
##  compiler         3.5.0      2018-04-24 local                             
##  crayon           1.3.4      2017-09-16 CRAN (R 3.5.0)                    
##  crosstalk        1.0.0      2016-12-21 CRAN (R 3.5.0)                    
##  data.table       1.10.4-3   2017-10-27 CRAN (R 3.5.0)                    
##  datasets       * 3.5.0      2018-04-24 local                             
##  devtools         1.13.5     2018-02-18 CRAN (R 3.5.0)                    
##  digest           0.6.15     2018-01-28 CRAN (R 3.5.0)                    
##  dplyr          * 0.7.5      2018-05-19 cran (@0.7.5)                     
##  evaluate         0.10.1     2017-06-24 CRAN (R 3.5.0)                    
##  forcats        * 0.3.0      2018-02-19 CRAN (R 3.5.0)                    
##  ggnetwork      * 0.5.1      2016-03-25 CRAN (R 3.5.0)                    
##  ggplot2        * 2.2.1.9000 2018-06-21 Github (tidyverse/ggplot2@1c09bae)
##  ggrepel          0.7.0      2017-09-29 CRAN (R 3.5.0)                    
##  glue             1.2.0      2017-10-29 CRAN (R 3.5.0)                    
##  graphics       * 3.5.0      2018-04-24 local                             
##  grDevices      * 3.5.0      2018-04-24 local                             
##  grid             3.5.0      2018-04-24 local                             
##  gtable           0.2.0      2016-02-26 CRAN (R 3.5.0)                    
##  haven            1.1.1      2018-01-18 CRAN (R 3.5.0)                    
##  highr            0.6        2016-05-09 CRAN (R 3.5.0)                    
##  hms              0.4.2      2018-03-10 CRAN (R 3.5.0)                    
##  htmltools        0.3.6      2017-04-28 CRAN (R 3.5.0)                    
##  htmlwidgets      1.2        2018-04-19 CRAN (R 3.5.0)                    
##  httpuv           1.4.3      2018-05-10 cran (@1.4.3)                     
##  httr             1.3.1      2017-08-20 CRAN (R 3.5.0)                    
##  humaniformat     0.6.0      2016-04-24 cran (@0.6.0)                     
##  igraph         * 1.2.1      2018-03-10 CRAN (R 3.5.0)                    
##  intergraph       2.0-2      2016-12-05 CRAN (R 3.5.0)                    
##  janitor        * 1.0.0      2018-03-22 CRAN (R 3.5.0)                    
##  jsonlite         1.5        2017-06-01 CRAN (R 3.5.0)                    
##  knitr            1.20       2018-02-20 CRAN (R 3.5.0)                    
##  labeling         0.3        2014-08-23 CRAN (R 3.5.0)                    
##  later            0.7.2      2018-05-01 cran (@0.7.2)                     
##  lattice          0.20-35    2017-03-25 CRAN (R 3.5.0)                    
##  lazyeval         0.2.1      2017-10-29 CRAN (R 3.5.0)                    
##  lubridate        1.7.4      2018-04-11 CRAN (R 3.5.0)                    
##  magrittr       * 1.5        2014-11-22 CRAN (R 3.5.0)                    
##  memoise          1.1.0      2017-04-21 CRAN (R 3.5.0)                    
##  methods        * 3.5.0      2018-04-24 local                             
##  mime             0.5        2016-07-07 CRAN (R 3.5.0)                    
##  modelr           0.1.2      2018-05-11 cran (@0.1.2)                     
##  munsell          0.4.3      2016-02-13 CRAN (R 3.5.0)                    
##  network        * 1.13.0.1   2018-04-02 CRAN (R 3.5.0)                    
##  nlme             3.1-137    2018-04-07 CRAN (R 3.5.0)                    
##  pillar           1.2.3      2018-05-25 cran (@1.2.3)                     
##  pkgconfig        2.0.1      2017-03-21 CRAN (R 3.5.0)                    
##  plotly         * 4.7.1      2017-07-29 CRAN (R 3.5.0)                    
##  plyr             1.8.4      2016-06-08 CRAN (R 3.5.0)                    
##  promises         1.0.1      2018-04-13 CRAN (R 3.5.0)                    
##  purrr          * 0.2.5      2018-05-29 cran (@0.2.5)                     
##  R6               2.2.2      2017-06-17 CRAN (R 3.5.0)                    
##  Rcpp             0.12.17    2018-05-18 cran (@0.12.17)                   
##  readr          * 1.1.1      2017-05-16 CRAN (R 3.5.0)                    
##  readxl           1.1.0      2018-04-20 CRAN (R 3.5.0)                    
##  reshape2         1.4.3      2017-12-11 CRAN (R 3.5.0)                    
##  rlang            0.2.1      2018-05-30 cran (@0.2.1)                     
##  rmarkdown        1.9        2018-03-01 CRAN (R 3.5.0)                    
##  rprojroot        1.3-2      2018-01-03 CRAN (R 3.5.0)                    
##  rstudioapi       0.7        2017-09-07 CRAN (R 3.5.0)                    
##  rvest            0.3.2      2016-06-17 CRAN (R 3.5.0)                    
##  scales           0.5.0      2017-08-24 CRAN (R 3.5.0)                    
##  shiny            1.1.0      2018-05-17 cran (@1.1.0)                     
##  sna            * 2.4        2016-08-08 CRAN (R 3.5.0)                    
##  snakecase        0.9.1      2018-03-25 CRAN (R 3.5.0)                    
##  statnet.common * 4.1.2      2018-06-05 CRAN (R 3.5.0)                    
##  stats          * 3.5.0      2018-04-24 local                             
##  stringi          1.2.2      2018-05-02 cran (@1.2.2)                     
##  stringr        * 1.3.1      2018-05-10 cran (@1.3.1)                     
##  tibble         * 1.4.2      2018-01-22 CRAN (R 3.5.0)                    
##  tidyr          * 0.8.1      2018-05-18 cran (@0.8.1)                     
##  tidyselect       0.2.4      2018-02-26 CRAN (R 3.5.0)                    
##  tidyverse      * 1.2.1.9000 2018-06-21 Github (hadley/tidyverse@a720dcd) 
##  tools            3.5.0      2018-04-24 local                             
##  utils          * 3.5.0      2018-04-24 local                             
##  viridisLite      0.3.0      2018-02-01 CRAN (R 3.5.0)                    
##  withr            2.1.2      2018-03-15 CRAN (R 3.5.0)                    
##  xfun             0.1        2018-01-22 CRAN (R 3.5.0)                    
##  xml2             1.2.0      2018-01-24 CRAN (R 3.5.0)                    
##  xtable           1.8-2      2016-02-05 CRAN (R 3.5.0)                    
##  yaml             2.1.18     2018-03-08 CRAN (R 3.5.0)