
Rの初心者の域を脱していないが、自分にわかる (わかっているつもりになっている) 範囲には答えるようにしている
答える → プロが現れる → わかっていなかったことがわかる → ひとり赤面する → 勉強するのループが重要だ (たぶん)


どうせなので、こつこつためたマイデータ (研究室への出入などの記録およそ2年分) を可視化する

# データ読み込む
Timecard <-

# ざっとみる
Timecard %>% head
##        day week Arrive    Leave Event Alc
## 1: 8/30/14  Sat  13:30 24:20:00        NA
## 2: 8/31/14  Sun  14:30    22:15        NA
## 3:  9/1/14  Mon  09:59    19:52        NA
## 4:  9/2/14  Tue  09:56    21:10        NA
## 5:  9/3/14  Wed  09:58    20:58        NA
## 6:  9/4/14  Thu  08:15    22:10        NA
Timecard %>% str
## Classes 'data.table' and 'data.frame':	554 obs. of  6 variables:
##  $ day   : chr  "8/30/14" "8/31/14" "9/1/14" "9/2/14" ...
##  $ week  : chr  "Sat" "Sun" "Mon" "Tue" ...
##  $ Arrive: chr  "13:30" "14:30" "09:59" "09:56" ...
##  $ Leave : chr  "24:20:00" "22:15" "19:52" "21:10" ...
##  $ Event : chr  "" "" "" "" ...
##  $ Alc   : int  NA NA NA NA NA NA NA NA NA NA ...
##  - attr(*, ".internal.selfref")=<externalptr>


InOutLab <-
  Timecard %>%
  # {lubridate}あたりでどうにかなりそう、勉強する
  transmute(arrive = as.numeric(str_sub(Arrive, 1, 2)) + as.numeric(str_sub(Arrive, 4, 5)) / 60,
            leave = as.numeric(str_sub(Leave, 1, 2)) + as.numeric(str_sub(Leave, 4, 5)) / 60) %>%
  melt(id.vars = NULL)

InOutLab %>% str
## Classes 'data.table' and 'data.frame':	1108 obs. of  2 variables:
##  $ variable: Factor w/ 2 levels "arrive","leave": 1 1 1 1 1 1 1 1 1 1 ...
##  $ value   : num  13.5 14.5 9.98 9.93 9.97 ...
##  - attr(*, ".internal.selfref")=<externalptr>




InOutLab %>%
  ggplot(aes(x = value, fill = variable, group = variable)) + 

InOutLab %>%
  ggplot(aes(x = value, fill = variable, group = variable)) + 
  geom_histogram(position = "identity", alpha = .5)

..XXX..はgenerated variables、あるいはcomputed variablesと呼ばれているらしい
stat = "bin"を指定しているのは、geom_text関数のデフォルトでは度数を計算してくれないから

InOutLab %>%
  ggplot(aes(x = value, fill = variable, group = variable)) + 
  geom_histogram(position = "identity", alpha = .5) +
  geom_text(aes(y = ..count.., label = ..count.., col = variable),
            stat = "bin")

vjustで鉛直 (vertical) 方向の位置を調節 (adjust) する

InOutLab %>%
  ggplot(aes(x = value, fill = variable, group = variable)) + 
  geom_histogram(position = "identity", alpha = .5) +
  geom_text(aes(y = ..count.., label = ..count.., col = variable),
            stat = "bin", vjust = -.5)

ifelse関数を使って、..count.. > 0 なら..count..を、それ以外なら空白を返す

InOutLab %>%
  ggplot(aes(x = value, fill = variable, group = variable)) + 
  geom_histogram(position = "identity", alpha = .5) +
  geom_text(aes(y = ..count.., label = ifelse(..count.. > 0, ..count.., ""), col = variable),
            stat = "bin", vjust = -.5)

ifelse関数を使って、 == 1(arriveが1、leaveが2) なら..count..を、それ以外なら-1 * ..count..を返す

InOutLab %>%
  ggplot(aes(x = value, fill = variable, group = variable)) + 
  geom_histogram(aes(y = ifelse( == 1, ..count.., -1 * ..count..)),
                 position="identity", alpha = .5)

InOutLab %>%
  ggplot(aes(x = value, fill = variable, group = variable)) + 
  geom_histogram(aes(y = ifelse( == 1, ..count.., -1 * ..count..)),
                 position="identity", alpha = .5) +
  geom_text(aes(y = ..count.. * ifelse( == 1, 1, -1), label = ifelse(..count.. != 0, ..count.., ""), col = variable),
            stat = "bin", vjust = -.5)

InOutLab %>%
  ggplot(aes(x = value, fill = variable, group = variable)) + 
  geom_histogram(aes(y = ifelse( == 1, ..count.., -1 * ..count..)),
                 position="identity", alpha = .5) +
  geom_text(aes(y = ..count.. * ifelse( == 1, 1, -1), label = ifelse(..count.. != 0, ..count.., ""), col = variable, vjust = ifelse( == 1, -.5, 1.5)),
            stat = "bin")

stat系は簡単な計算には便利だが速度は遅いため、大規模データの可視化では先にready plotな状態にしてからggplot2に渡した方がよいらしい

hist_ <-
  function(vec, algorithm = "Sturges"){
    x_bins <-
      vec %>%
        hist(., breaks = algorithm, plot = FALSE) %>%
        .[["breaks"]] %>%
        stats::filter(., c(1/2, 1/2)) %>%
        na.omit %>%
    x_counts <-
      vec %>%
        hist(., breaks = algorithm, plot = FALSE) %>%
    data.frame(bins = x_bins, freqs = x_counts) %>%

InOutLab %>%
  group_by(variable) %>% # groupごとにbinwidthが違うことがありえる仕様になっている
  do(.$value %>% hist_ %>% return) %>%
  ggplot(aes(x = bins, y = freqs, col = variable, fill = variable)) +
  geom_bar(stat = "identity", position = "identity", alpha = .5) +
  geom_text(aes(label = freqs), vjust = -.5)

generated variablesはaes()の中でしか呼べない

InOutLab %>%
  ggplot(aes(x = value, fill = variable, group = variable)) + 
  geom_histogram(aes(y = ifelse( == 1, ..count.., -1 * ..count..)),
                 position="identity", alpha = .5) +
  geom_text(aes(y = ..count.. * ifelse( == 1, 1, -1), label = ifelse(..count.. != 0, ..count.., ""), col = variable),
            stat = "bin", vjust = ifelse( == 1, -.5, 1.5))

"Error in ifelse( == 1, -0.5, 1.5) : object '' not found"

stat_bin(stat = “text”) とgeom_text(stat = “bin”) の違い

こちらのページで答えられている方法にしたがって、stat_bin()内でgeom = "text"を指定すると、うまくグループ化するとラベルが表示されない

InOutLab %>%
  ggplot(aes(x = value, fill = variable, group = variable)) + 
  geom_histogram(position = "identity", alpha = .5) +
  stat_bin(aes(group = variable, y = ..count.., label = ..count.., col = variable), geom = "text", vjust = -.5)

stat_bin()では、geom_histogram()で指定したposition = "identity"を引き継いでおらず、ラベルの表示位置が積み上げ型の場合の表示位置になる
これを修正するためには、stat_bin()内でもposition = "identity"を指定する必要がある
冗長になるので、geom_text(stat = "bin")の方がよさそう?

InOutLab %>%
  ggplot(aes(x = value, fill = variable, group = variable)) + 
  geom_histogram(position = "identity", alpha = .5) +
  stat_bin(aes(group = variable, y = ..count.., label = ..count.., col = variable), position = "identity", geom = "text", vjust = -.5)

# 参照用:
#  上で示したgeom_textを使った場合
#  geom_text()ではposition = "identity"を指定しなくてもOK 
# InOutLab %>%
#   ggplot(aes(x = value, fill = variable, group = variable)) + 
#   geom_histogram(position = "identity", alpha = .5) +
#   geom_text(aes(y = ..count.., label = ..count.., col = variable),
#             stat = "bin", vjust = -.5)


r-wakalangへようこそ (uriさん@Qiita)
ggplot2のgenerated variables(..変数名..)の使い方 (Technically, technophobic.@Hatena::Diary)
ggplot2で指定できるgenerated variableの一覧 (Technically, technophobic.@Hatena::Diary)
How to show count of each bin on histogram on the plot (Stack Overflow)
ggplot2再入門 (yutannihilationさん@SlideShare)

