中間生成なしでカラム名を変更する

中間生成的なことをせずに、dataframeのカラム名を変更するには、
dplyr::renamemagrittr::set_namesdata.table::setnames
の3つがある (知っている限り

data_frame(a = 1:5, b = 2:6) %>%
  dplyr::rename(a2 = a, b2 = b)
## Source: local data frame [5 x 2]
## 
##      a2    b2
##   (int) (int)
## 1     1     2
## 2     2     3
## 3     3     4
## 4     4     5
## 5     5     6
data_frame(a = 1:5, b = 2:6) %>%
  magrittr::set_names(x = ., value = c("a2", "b2"))
## Source: local data frame [5 x 2]
## 
##      a2    b2
##   (int) (int)
## 1     1     2
## 2     2     3
## 3     3     4
## 4     4     5
## 5     5     6
data_frame(a = 1:5, b = 2:6) %>%
  data.table::setnames(c("a2", "b2")) %>%
  print
## Source: local data frame [5 x 2]
## 
##      a2    b2
##   (int) (int)
## 1     1     2
## 2     2     3
## 3     3     4
## 4     4     5
## 5     5     6

と思っていたけど、dplyr::selectでもいける。

data_frame(a = 1:5, b = 2:6) %>%
select(a2 = a, b2 = b)
## Source: local data frame [5 x 2]
## 
##      a2    b2
##   (int) (int)
## 1     1     2
## 2     2     3
## 3     3     4
## 4     4     5
## 5     5     6
# 他のカラムも残す
data_frame(a = 1:5, b = 2:6, c = 3:7, d = 4:8) %>%
select(a2 = a, everything())
## Source: local data frame [5 x 4]
## 
##      a2     b     c     d
##   (int) (int) (int) (int)
## 1     1     2     3     4
## 2     2     3     4     5
## 3     3     4     5     6
## 4     4     5     6     7
## 5     5     6     7     8

selectで列の複製ができるのでは?」と思って、以下のようなコードを書いた結果だめだったので気づいた

# やったこと
data_frame(a = 1:5, b = 2:6) %>%
select(a2 = a, a3 = a, b2 = b)
## Source: local data frame [5 x 2]
## 
##      a3    b2
##   (int) (int)
## 1     1     2
## 2     2     3
## 3     3     4
## 4     4     5
## 5     5     6
# ほしかったもの
data_frame(a = 1:5, b = 2:6) %>%
  mutate(a2 = a, a3 = a, b2 = b) %>%
  select(-a, -b)
## Source: local data frame [5 x 3]
## 
##      a2    a3    b2
##   (int) (int) (int)
## 1     1     1     2
## 2     2     2     3
## 3     3     3     4
## 4     4     4     5
## 5     5     5     6

dplyr::tranmuteで1行にまとめる

# もう少しよい道筋
data_frame(a = 1:5, b = 2:6) %>%
transmute(a2 = a, a3 = a, b2 = b)
## Source: local data frame [5 x 3]
## 
##      a2    a3    b2
##   (int) (int) (int)
## 1     1     1     2
## 2     2     2     3
## 3     3     3     4
## 4     4     4     5
## 5     5     5     6

とりあえず、速さくらべ

colnames_rename <- 
  function(iter){
    for(i in 1:iter){
      data_frame(a = 1:5, b = 2:6) %>%
      dplyr::rename(a2 = a, b2 = b)
    }
  }

colnames_set_names <- 
  function(iter){
    for(i in 1:iter){
      data_frame(a = 1:5, b = 2:6) %>%
      magrittr::set_names(x = ., value = c("a2", "b2"))
    }
  }

colnames_setnames <- 
  function(iter){
    for(i in 1:iter){
      data_frame(a = 1:5, b = 2:6) %>%
      data.table::setnames(c("a2", "b2"))
    }
  }

colnames_select <- 
  function(iter){
    for(i in 1:iter){
      data_frame(a = 1:5, b = 2:6) %>%
      dplyr::select(a2 = a, b2 = b)
    }
  }

colnames_transmute <- 
  function(iter){
    for(i in 1:iter){
      data_frame(a = 1:5, b = 2:6) %>%
      dplyr::transmute(a2 = a, b2 = b)
    }
  }

(time_dplyr_rename <- system.time(colnames_rename(10000)))
##    user  system elapsed 
##   4.047   0.024   4.091
(time_mag <- system.time(colnames_set_names(10000)))
##    user  system elapsed 
##   2.171   0.009   2.199
(time_dt <- system.time(colnames_setnames(10000)))
##    user  system elapsed 
##   2.434   0.008   2.452
(time_dplyr_select <- system.time(colnames_select(10000)))
##    user  system elapsed 
##   4.324   0.017   4.375
(time_dplyr_transmute <- system.time(colnames_transmute(10000)))
##    user  system elapsed 
##  10.245   0.092  11.241
# とりあえず可視化する
data_frame(funs = factor(c("dplyr::rename", "dplyr::select", "dplyr::transmute", "magrittr::set_names", "data.table::setnames")),
           Time = c(time_dplyr_rename[3], time_dplyr_select[3], time_dplyr_transmute[3], time_mag[3], time_dt[3])) %>%
  ggplot(aes(x = funs, y = Time, fill = funs)) +
  geom_bar(stat = "identity") +
  guides(fill = F)

plot of chunk SpeedCompare

transmuteがかなり遅く、set_names、setnamesが速い

とはいえ

  1. カラム名変更を繰り返すシチュエーションが考えづらいこと
  2. 列名が多いデータ (wide形式) をほとんど使わないこと
    から無視してもよさそう

それと、データハンドリングの過程でmutate, selectに頼らないパターンが稀なので、
その意味でもdplyr系をわざわざ外すメリットが少ない気がする。

session_info()
##  setting  value                       
##  version  R version 3.2.3 (2015-12-10)
##  system   x86_64, darwin14.5.0        
##  ui       X11                         
##  language (EN)                        
##  collate  en_US.UTF-8                 
##  tz       Asia/Tokyo                  
##  date     2016-04-01                  
## 
##  package      * version    date       source                          
##  agricolae    * 1.2-3      2015-10-06 CRAN (R 3.1.3)                  
##  AlgDesign      1.1-7.3    2014-10-15 CRAN (R 3.1.2)                  
##  assertthat     0.1        2013-12-06 CRAN (R 3.1.0)                  
##  bitops       * 1.0-6      2013-08-17 CRAN (R 3.1.0)                  
##  boot           1.3-17     2015-06-29 CRAN (R 3.2.3)                  
##  chron          2.3-47     2015-06-24 CRAN (R 3.1.3)                  
##  cluster        2.0.3      2015-07-21 CRAN (R 3.2.3)                  
##  coda           0.18-1     2015-10-16 CRAN (R 3.1.3)                  
##  codetools      0.2-14     2015-07-15 CRAN (R 3.2.3)                  
##  colorspace     1.2-6      2015-03-11 CRAN (R 3.1.3)                  
##  combinat       0.0-8      2012-10-29 CRAN (R 3.1.0)                  
##  data.table   * 1.9.6      2015-09-19 CRAN (R 3.1.3)                  
##  DBI            0.3.1      2014-09-24 CRAN (R 3.1.1)                  
##  deldir         0.1-9      2015-03-09 CRAN (R 3.1.3)                  
##  devtools     * 1.9.1      2015-09-11 CRAN (R 3.2.0)                  
##  digest         0.6.8      2014-12-31 CRAN (R 3.1.2)                  
##  doParallel     1.0.10     2015-10-14 CRAN (R 3.1.3)                  
##  doRNG          1.6        2014-03-07 CRAN (R 3.1.2)                  
##  dplyr        * 0.4.3      2015-09-01 CRAN (R 3.1.3)                  
##  evaluate       0.8        2015-09-18 CRAN (R 3.1.3)                  
##  foreach      * 1.4.3      2015-10-13 CRAN (R 3.1.3)                  
##  formatR        1.2.1      2015-09-18 CRAN (R 3.1.3)                  
##  ggplot2      * 2.0.0      2015-12-18 CRAN (R 3.2.3)                  
##  gridExtra    * 2.0.0      2015-07-14 CRAN (R 3.1.3)                  
##  gtable       * 0.1.2      2012-12-05 CRAN (R 3.1.0)                  
##  httr           1.0.0      2015-06-25 CRAN (R 3.1.3)                  
##  iterators      1.0.8      2015-10-13 CRAN (R 3.1.3)                  
##  jsonlite       0.9.19     2015-11-28 CRAN (R 3.1.3)                  
##  klaR           0.6-12     2014-08-06 CRAN (R 3.1.1)                  
##  knitr        * 1.11       2015-08-14 CRAN (R 3.2.3)                  
##  labeling       0.3        2014-08-23 CRAN (R 3.1.1)                  
##  lattice        0.20-33    2015-07-14 CRAN (R 3.2.3)                  
##  lazyeval       0.1.10     2015-01-02 CRAN (R 3.1.2)                  
##  LearnBayes     2.15       2014-05-29 CRAN (R 3.1.0)                  
##  lubridate    * 1.5.0      2015-12-03 CRAN (R 3.2.3)                  
##  magrittr     * 1.5        2014-11-22 CRAN (R 3.1.2)                  
##  MASS         * 7.3-45     2015-11-10 CRAN (R 3.2.3)                  
##  Matrix         1.2-3      2015-11-28 CRAN (R 3.2.3)                  
##  memoise        0.2.1      2014-04-22 CRAN (R 3.1.0)                  
##  munsell        0.4.2      2013-07-11 CRAN (R 3.1.0)                  
##  nlme           3.1-122    2015-08-19 CRAN (R 3.2.3)                  
##  pforeach     * 1.3        2015-12-21 Github (hoxo-m/pforeach@2c44f3b)
##  pkgmaker       0.22       2014-05-14 CRAN (R 3.1.3)                  
##  plyr         * 1.8.3      2015-06-12 CRAN (R 3.1.3)                  
##  R6             2.1.1      2015-08-19 CRAN (R 3.1.3)                  
##  RColorBrewer * 1.1-2      2014-12-07 CRAN (R 3.1.2)                  
##  Rcpp           0.12.2     2015-11-15 CRAN (R 3.1.3)                  
##  RCurl        * 1.95-4.7   2015-06-30 CRAN (R 3.1.3)                  
##  registry       0.3        2015-07-08 CRAN (R 3.1.3)                  
##  reshape2     * 1.4.1      2014-12-06 CRAN (R 3.1.2)                  
##  rJava        * 0.9-7      2015-07-29 CRAN (R 3.1.3)                  
##  rngtools       1.2.4      2014-03-06 CRAN (R 3.1.2)                  
##  scales       * 0.3.0      2015-08-25 CRAN (R 3.1.3)                  
##  slackr       * 1.3.1.9001 2015-12-07 Github (hrbrmstr/slackr@27f777e)
##  sp             1.2-1      2015-10-18 CRAN (R 3.2.3)                  
##  spdep          0.5-92     2015-12-22 CRAN (R 3.2.3)                  
##  stringi        1.0-1      2015-10-22 CRAN (R 3.1.3)                  
##  stringr      * 1.0.0      2015-04-30 CRAN (R 3.1.3)                  
##  tidyr        * 0.3.1      2015-09-10 CRAN (R 3.2.0)                  
##  xlsx         * 0.5.7      2014-08-02 CRAN (R 3.1.1)                  
##  xlsxjars     * 0.6.1      2014-08-22 CRAN (R 3.1.1)                  
##  xtable         1.8-0      2015-11-02 CRAN (R 3.1.3)