require("knitr")
## Loading required package: knitr
opts_knit$set(root.dir = ".")
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.3 ✓ purrr 0.3.4
## ✓ tibble 3.1.1 ✓ dplyr 1.0.5
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
avatar <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-08-11/avatar.csv')
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## id = col_double(),
## book = col_character(),
## book_num = col_double(),
## chapter = col_character(),
## chapter_num = col_double(),
## character = col_character(),
## full_text = col_character(),
## character_words = col_character(),
## writer = col_character(),
## director = col_character(),
## imdb_rating = col_double()
## )
Посчитаем число слов, произнесённых каждым персонажем и выберем 5 наиболее “болтливых”
avatar_count <- avatar %>%
filter(character != "Scene Description") %>%
drop_na() %>%
mutate(num_words = str_count(character_words, '\\S+'))
top5_avatar <- avatar_count %>%
group_by(character) %>%
summarise(sum = sum(num_words)) %>%
arrange(desc(sum)) %>%
slice_head(n=5)
avatar_per_episode <- avatar_count %>%
filter(character %in% top5_avatar$character) %>%
group_by(book, chapter_num, character) %>%
mutate('words_per_episode' = sum(num_words))
avatar_per_episode$book <- factor(avatar_per_episode$book, levels = c('Water', 'Earth', 'Fire'))
pal1=c('#447738','#bcb483','#8d7842','#7f8063','#4f5743')
pal2=c('#285c19','#b0a86e','#765d23','#656646','#283415')
ggplot(avatar_per_episode, aes(x = character, y = words_per_episode, fill=character)) +
labs(title = "WHO IS THE CHATTIEST CHARACTER\nIN AVATAR AANG TV-SERIES?",
subtitle = "Five most talkative characters have more than a hundred\nwords every episode.Toph is introduced only in book two: Earth.") +
theme(plot.title = element_text(size = 18, hjust = .5),
axis.text.x = element_text(angle = 45, hjust = .5, vjust = .5),
plot.subtitle = element_text(hjust = .5),
text=element_text(family="Herculanum"),
legend.position = "none",
panel.background = element_rect(fill = "#ebe5d5",
colour = "black",
size = 0.5,
linetype = "solid"),
plot.background = element_rect(fill = "#ebe5d5"),
strip.background = element_rect(fill="#ebe5d5", colour = "black")) +
geom_boxplot() +
geom_point(size=.5) +
xlab(element_blank()) +
ylab('Number of words spoken per episode') +
scale_fill_manual(values=pal1) +
scale_color_manual(values=pal2) +
facet_wrap(~ book)
soy <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-04-06/soybean_use.csv')
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## entity = col_character(),
## code = col_character(),
## year = col_double(),
## human_food = col_double(),
## animal_feed = col_double(),
## processed = col_double()
## )
continents <- c('Africa', 'Europe', 'Asia', 'Northern America', 'South America', 'Australia & New Zealand')
soy %>%
filter(year >= 1981, year <= 2013) %>%
filter(entity %in% continents) -> soy_cont
soy_new <- soy_cont %>%
pivot_longer(cols = c(human_food, animal_feed, processed), names_to = "type") %>%
group_by(entity, type) %>%
summarise(mean = mean(value), min = mean(value) - 0.75*sd(value), max = mean(value) + 0.75*sd(value), .groups = 'drop') %>%
ungroup() %>%
group_by(entity) %>%
mutate(totalmean = mean(mean))
soy_new$entity <- factor(soy_new$entity, levels = c('Asia', 'South America', 'Northern America', 'Europe', 'Africa', 'Australia & New Zealand'))
pal3 = c('#f4b7a6','#da9b9c','#ae858d')
ggplot(soy_new, aes(x=entity)) +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = .95, size = 10),
legend.title = element_blank()) +
geom_col(aes(x = entity, y = mean, fill = type), position = "dodge") +
geom_errorbar(aes(x = entity, ymin = min, ymax = max), position = position_dodge2(padding = .7)) +
ylab('Average soy use (t) 1981-2013') +
xlab(element_blank()) +
scale_fill_manual(values=pal3) +
scale_color_manual(values=pal3) +
scale_y_log10()
sakura <- read_csv('https://raw.githubusercontent.com/tacookson/data/master/sakura-flowering/sakura-modern.csv')
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## station_id = col_double(),
## station_name = col_character(),
## latitude = col_double(),
## longitude = col_double(),
## year = col_double(),
## flower_date = col_date(format = ""),
## flower_doy = col_double(),
## full_bloom_date = col_date(format = ""),
## full_bloom_doy = col_double()
## )
sakura_done <- sakura %>%
drop_na() %>%
group_by(station_name)
sakura_kyoto <- sakura_done %>%
filter(station_name == 'Kyoto')
sakura_other <- sakura_done %>%
filter(station_name != 'Kyoto')
ggplot(sakura_other, aes(x=full_bloom_date, y=full_bloom_doy, group=station_name)) +
labs(title = "Full sakura bloom\nin Kyoto and other Japan cities") +
theme_classic() +
theme(plot.title = element_text(size = 15, hjust = .5),
text=element_text(family="Courier"),
legend.position = "top") +
geom_line(data = sakura_other, color = '#ee8796',alpha = .1) +
geom_line(data = sakura_kyoto, color = '#ee8796') +
ylab('Day of year of full bloom') +
xlab('Year')