require("knitr")
## Loading required package: knitr
opts_knit$set(root.dir = ".")
knitr::opts_chunk$set(echo = TRUE)

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.3     ✓ purrr   0.3.4
## ✓ tibble  3.1.1     ✓ dplyr   1.0.5
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

1. Аватар. Легенда об Аанге

avatar <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-08-11/avatar.csv')
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   id = col_double(),
##   book = col_character(),
##   book_num = col_double(),
##   chapter = col_character(),
##   chapter_num = col_double(),
##   character = col_character(),
##   full_text = col_character(),
##   character_words = col_character(),
##   writer = col_character(),
##   director = col_character(),
##   imdb_rating = col_double()
## )

Посчитаем число слов, произнесённых каждым персонажем и выберем 5 наиболее “болтливых”

avatar_count <- avatar %>%
  filter(character != "Scene Description") %>%
  drop_na() %>%
  mutate(num_words = str_count(character_words, '\\S+'))
top5_avatar <- avatar_count %>%
  group_by(character) %>%
  summarise(sum = sum(num_words)) %>%
  arrange(desc(sum)) %>%
  slice_head(n=5)
avatar_per_episode <- avatar_count %>%
  filter(character %in% top5_avatar$character) %>%
  group_by(book, chapter_num, character) %>%
  mutate('words_per_episode' = sum(num_words))
avatar_per_episode$book <- factor(avatar_per_episode$book, levels = c('Water', 'Earth', 'Fire'))
pal1=c('#447738','#bcb483','#8d7842','#7f8063','#4f5743')
pal2=c('#285c19','#b0a86e','#765d23','#656646','#283415')
ggplot(avatar_per_episode, aes(x = character, y = words_per_episode, fill=character)) +
  labs(title = "WHO IS THE CHATTIEST CHARACTER\nIN AVATAR AANG TV-SERIES?",
       subtitle = "Five most talkative characters have more than a hundred\nwords every episode.Toph is introduced only in book two: Earth.") +
  theme(plot.title = element_text(size = 18, hjust = .5),
        axis.text.x = element_text(angle = 45, hjust = .5, vjust = .5),
        plot.subtitle = element_text(hjust = .5),
        text=element_text(family="Herculanum"),
        legend.position = "none",
        panel.background = element_rect(fill = "#ebe5d5",
                                        colour = "black",
                                        size = 0.5,
                                        linetype = "solid"),
        plot.background = element_rect(fill = "#ebe5d5"),
        strip.background = element_rect(fill="#ebe5d5", colour = "black")) +
  geom_boxplot() +
  geom_point(size=.5) +
  xlab(element_blank()) +
  ylab('Number of words spoken per episode') +
  scale_fill_manual(values=pal1) +
  scale_color_manual(values=pal2) +
  facet_wrap(~ book)

2. Использование соевой пищи

soy <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-04-06/soybean_use.csv')
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   entity = col_character(),
##   code = col_character(),
##   year = col_double(),
##   human_food = col_double(),
##   animal_feed = col_double(),
##   processed = col_double()
## )
continents <- c('Africa', 'Europe', 'Asia', 'Northern America', 'South America', 'Australia & New Zealand')
soy %>%
  filter(year >= 1981, year <= 2013) %>%
  filter(entity %in% continents) -> soy_cont
soy_new <- soy_cont %>%
  pivot_longer(cols = c(human_food, animal_feed, processed), names_to = "type") %>%
  group_by(entity, type) %>% 
  summarise(mean = mean(value), min = mean(value) - 0.75*sd(value), max = mean(value) + 0.75*sd(value), .groups = 'drop') %>% 
  ungroup() %>%
  group_by(entity) %>%
  mutate(totalmean = mean(mean))
soy_new$entity <- factor(soy_new$entity, levels = c('Asia', 'South America', 'Northern America', 'Europe', 'Africa', 'Australia & New Zealand'))
pal3 = c('#f4b7a6','#da9b9c','#ae858d')
ggplot(soy_new, aes(x=entity)) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = .95, size = 10),
        legend.title = element_blank()) +
  geom_col(aes(x = entity, y = mean, fill = type), position = "dodge") +
  geom_errorbar(aes(x = entity, ymin = min, ymax = max), position = position_dodge2(padding = .7)) +
  ylab('Average soy use (t) 1981-2013') +
  xlab(element_blank()) +
  scale_fill_manual(values=pal3) +
  scale_color_manual(values=pal3) +
  scale_y_log10()

3. Цветение сакуры

sakura <- read_csv('https://raw.githubusercontent.com/tacookson/data/master/sakura-flowering/sakura-modern.csv')
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   station_id = col_double(),
##   station_name = col_character(),
##   latitude = col_double(),
##   longitude = col_double(),
##   year = col_double(),
##   flower_date = col_date(format = ""),
##   flower_doy = col_double(),
##   full_bloom_date = col_date(format = ""),
##   full_bloom_doy = col_double()
## )
sakura_done <- sakura %>%
  drop_na() %>%
  group_by(station_name)
sakura_kyoto <- sakura_done %>%
  filter(station_name == 'Kyoto')
sakura_other <- sakura_done %>%
  filter(station_name != 'Kyoto')
ggplot(sakura_other, aes(x=full_bloom_date, y=full_bloom_doy, group=station_name)) +
  labs(title = "Full sakura bloom\nin Kyoto and other Japan cities") +
  theme_classic() +
  theme(plot.title = element_text(size = 15, hjust = .5),
        text=element_text(family="Courier"),
        legend.position = "top") +
  geom_line(data = sakura_other, color = '#ee8796',alpha = .1) +
  geom_line(data = sakura_kyoto, color = '#ee8796') +
  ylab('Day of year of full bloom') +
  xlab('Year')