L <- list("A",c(1,2),30)L
[[1]][1] "A"[[2]][1] 1 2[[3]][1] 30
print('L1')
[1] "L1"
L1 <- list(L,40)L1
[[1]][[1]][[1]][1] "A"[[1]][[2]][1] 1 2[[1]][[3]][1] 30[[2]][1] 40
L[[4]] <- 'new_element'L
[[1]][1] "A"[[2]][1] 1 2[[3]][1] 30[[4]][1] "new_element"
L
[[1]][1] "A"[[2]][1] 1 2[[3]][1] 30[[4]][1] "new_element"
L[3]
[[1]][1] 30
L[[3]]
[1] 30
mtcars$mpg
[1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 10.4[16] 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4 15.8 19.7[31] 15.0 21.4
dim(mtcars$mpg)
NULL
mtcars$mpg[3]
[1] 22.8
dim(mtcars$mpg[3])
NULL
mtcars[3,]
mpg cyl disp hp drat wt qsec vs am gear carbDatsun 710 22.8 4 108 93 3.85 2.32 18.61 1 1 4 1
dim(mtcars[3,])
[1] 1 11
mtcars[3,][2]
cylDatsun 710 4
dim(mtcars[3,][2])
[1] 1 1
mtcars[3,][[2]]
[1] 4
dim(mtcars[3,][[2]])
NULL
L1
[[1]][[1]][[1]][1] "A"[[1]][[2]][1] 1 2[[1]][[3]][1] 30[[2]][1] 40
L1 [[1]]
[[1]][1] "A"[[2]][1] 1 2[[3]][1] 30
L1 [[1]] [[2]]
[1] 1 2
L <- list(10,20)L
[[1]][1] 10[[2]][1] 20
L$abc <- 123L
[[1]][1] 10[[2]][1] 20$abc[1] 123
names(L)
[1] "" "" "abc"
L[[3]]
[1] 123
L$abc
[1] 123
L[['abc']]
[1] 123
Циклы в R медленные!
for(year in c(2010:2015)){ print(paste('The year is', year))}
[1] "The year is 2010"[1] "The year is 2011"[1] "The year is 2012"[1] "The year is 2013"[1] "The year is 2014"[1] "The year is 2015"
mt <- mtcarshead(mt,2)
mpg cyl disp hp drat wt qsec vs am gear carbMazda RX4 21 6 160 110 3.9 2.620 16.46 0 1 4 4Mazda RX4 Wag 21 6 160 110 3.9 2.875 17.02 0 1 4 4
for(i in 1:nrow(mt)){ mt$new[i] <- i^2}head(mt,4)
mpg cyl disp hp drat wt qsec vs am gear carb newMazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 1Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 4Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 9Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 16
5/2
[1] 2.5
5%%2
[1] 1
mt <- mtcarsfor(i in 1:nrow(mt)){ if((i%%2) == 0){ mt$new[i] = i^2 mt$type[i] = 'even' } else{ mt$new[i] = i^3 mt$type[i] = 'odd' }}mt[1:4,(ncol(mt)-5):ncol(mt)]
vs am gear carb new typeMazda RX4 0 1 4 4 1 oddMazda RX4 Wag 0 1 4 4 4 evenDatsun 710 1 1 4 1 27 oddHornet 4 Drive 1 0 3 1 16 even
v = c(14,7,6,9,2)ifelse(v %% 2 == 1,"odd","even")
[1] "even" "odd" "even" "odd" "even"
x <- 1:5for(val in x){if (val == 3){break}print(val)}
[1] 1[1] 2
x <- 1:5for(val in x){if (val == 3){next}print(val)}
[1] 1[1] 2[1] 4[1] 5
i <- 1while (i < 6){ print(i) i = i+1}
[1] 1[1] 2[1] 3[1] 4[1] 5
set.seed(123)mt <- matrix(sample(1:5,10000,replace=T),ncol=10)dim(mt)
[1] 1000 10
mt[1:5,1:5]
[,1] [,2] [,3] [,4] [,5][1,] 3 4 5 4 1[2,] 3 4 4 1 2[3,] 2 2 3 2 3[4,] 2 4 1 4 5[5,] 3 5 2 4 2
colnames(mt) <- paste("D",1:ncol(mt),sep='_')rownames(mt) <- paste("Stud",1:nrow(mt),sep='.')head(mt)
D_1 D_2 D_3 D_4 D_5 D_6 D_7 D_8 D_9 D_10Stud.1 3 4 5 4 1 1 5 1 5 1Stud.2 3 4 4 1 2 3 5 1 3 3Stud.3 2 2 3 2 3 3 5 5 1 3Stud.4 2 4 1 4 5 2 3 5 5 2Stud.5 3 5 2 4 2 3 5 1 3 1Stud.6 5 4 5 5 1 1 5 3 3 4
x <- rep(NA,ncol(mt))length(x)
[1] 10
x
[1] NA NA NA NA NA NA NA NA NA NA
for(i in 1:ncol(mt)){ x[i] <- mean(mt[,i])}x
[1] 2.985 2.987 2.966 2.954 2.974 3.003 2.960 2.938 2.989 2.962
x.2 <- apply(mt,2,mean)x
[1] 2.985 2.987 2.966 2.954 2.974 3.003 2.960 2.938 2.989 2.962
x.2
D_1 D_2 D_3 D_4 D_5 D_6 D_7 D_8 D_9 D_10 2.985 2.987 2.966 2.954 2.974 3.003 2.960 2.938 2.989 2.962
a <- apply(mt,1,sum)a[1:6]
Stud.1 Stud.2 Stud.3 Stud.4 Stud.5 Stud.6 30 29 29 33 29 36
x <- list(a=1,b=1:3,d=10:100)lapply(x, FUN = length)
$a[1] 1$b[1] 3$d[1] 91
x <- list(a=1,b=1:3,d=10:100)sapply(x, FUN = length)
a b d 1 3 91
Забудьте все, чему вас учили...
CRAN - https://cran.r-project.org/: install.packages()
Bioconductor - https://www.bioconductor.org/
Github - https://github.com/
remove.packages("packagename") - удалить пакет
update.packages() - обновить все пакеты
library() - список доступных пакетов
library("packagename") - загрузить установленный пакет в текущую R сессию
vignette("packagename") - посмотреть "красивый" мануал по пакету, есть не для всех пакетов
Большинство нужных пакетов уже установлено в web RStudio на kodomo!
Нужно только подгрузить пакет с помощью library("packagename").
Если пакета не обнаружено - тогда устанавливайте.
Установить пакет нужно один раз, но подгружать при каждом запуске рабочего сеанса.
library(tidyverse)
install.packages("tidyverse")
library(tidyverse)
tidyverse_packages()
[1] "broom" "cli" "crayon" "dbplyr" [5] "dplyr" "dtplyr" "forcats" "ggplot2" [9] "googledrive" "googlesheets4" "haven" "hms" [13] "httr" "jsonlite" "lubridate" "magrittr" [17] "modelr" "pillar" "purrr" "readr" [21] "readxl" "reprex" "rlang" "rstudioapi" [25] "rvest" "stringr" "tibble" "tidyr" [29] "xml2" "tidyverse"
Похоже на data.frame
Ведут себя более предсказуемо и удобно
При работе с данными с помощью коллекции пакетов tidyverse в большинстве случаев на выходе получается tibble-фреймы
tibble( x = 1:5, y = 1, `1z b` = x^2 + y)
# A tibble: 5 × 3 x y `1z b` <int> <dbl> <dbl>1 1 1 22 2 1 53 3 1 104 4 1 175 5 1 26
Не преобразует строки в факторы
Не изменяет имена переменных
Можно (но нужно ли?) использовать "недопустимые" имена столбцов
Выводит информацию о размере выводимого фрейма и типе данных в столбцах
Позволяет ссылаться на только что созданные переменные
tribble( ~x,~y,~z, "a",2,3.6, "b",5,1.0)
# A tibble: 2 × 3 x y z <chr> <dbl> <dbl>1 a 2 3.62 b 5 1
head(iris)
Sepal.Length Sepal.Width Petal.Length Petal.Width Species1 5.1 3.5 1.4 0.2 setosa2 4.9 3.0 1.4 0.2 setosa3 4.7 3.2 1.3 0.2 setosa4 4.6 3.1 1.5 0.2 setosa5 5.0 3.6 1.4 0.2 setosa6 5.4 3.9 1.7 0.4 setosa
as_tibble(iris)
# A tibble: 150 × 5 Sepal.Length Sepal.Width Petal.Length Petal.Width Species <dbl> <dbl> <dbl> <dbl> <fct> 1 5.1 3.5 1.4 0.2 setosa 2 4.9 3 1.4 0.2 setosa 3 4.7 3.2 1.3 0.2 setosa 4 4.6 3.1 1.5 0.2 setosa 5 5 3.6 1.4 0.2 setosa 6 5.4 3.9 1.7 0.4 setosa 7 4.6 3.4 1.4 0.3 setosa 8 5 3.4 1.5 0.2 setosa 9 4.4 2.9 1.4 0.2 setosa 10 4.9 3.1 1.5 0.1 setosa # … with 140 more rows
Выведет только 10 первых строк и помещающиеся столбцы, а не "простыню"
https://dplyr.tidyverse.org/reference/index.html
Очень много разных функций
У каждой функции очень много разных опций
Разберем только некоторые наиболее употребимые
nrow(starwars)
[1] 87
a <- filter(starwars, height > 150)nrow(a)
[1] 69
Подключаем конвейер %>%
starwars %>% filter(height > 150)
# A tibble: 69 × 14 name height mass hair_…¹ skin_…² eye_c…³ birth…⁴ sex gender homew…⁵ <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr> <chr> 1 Luke Skywa… 172 77 blond fair blue 19 male mascu… Tatooi… 2 C-3PO 167 75 <NA> gold yellow 112 none mascu… Tatooi… 3 Darth Vader 202 136 none white yellow 41.9 male mascu… Tatooi… 4 Owen Lars 178 120 brown,… light blue 52 male mascu… Tatooi… 5 Beru White… 165 75 brown light blue 47 fema… femin… Tatooi… 6 Biggs Dark… 183 84 black light brown 24 male mascu… Tatooi… 7 Obi-Wan Ke… 182 77 auburn… fair blue-g… 57 male mascu… Stewjon 8 Anakin Sky… 188 84 blond fair blue 41.9 male mascu… Tatooi… 9 Wilhuff Ta… 180 NA auburn… fair blue 64 male mascu… Eriadu 10 Chewbacca 228 112 brown unknown blue 200 male mascu… Kashyy…# … with 59 more rows, 4 more variables: species <chr>, films <list>,# vehicles <list>, starships <list>, and abbreviated variable names# ¹hair_color, ²skin_color, ³eye_color, ⁴birth_year, ⁵homeworld
starwars %>% filter(height > 150, mass < 100, hair_color == "blond")
# A tibble: 2 × 14 name height mass hair_…¹ skin_…² eye_c…³ birth…⁴ sex gender homew…⁵ <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr> <chr> 1 Luke Skywal… 172 77 blond fair blue 19 male mascu… Tatooi…2 Anakin Skyw… 188 84 blond fair blue 41.9 male mascu… Tatooi…# … with 4 more variables: species <chr>, films <list>, vehicles <list>,# starships <list>, and abbreviated variable names ¹hair_color, ²skin_color,# ³eye_color, ⁴birth_year, ⁵homeworld
starwars %>% slice(10:20)
# A tibble: 11 × 14 name height mass hair_…¹ skin_…² eye_c…³ birth…⁴ sex gender homew…⁵ <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr> <chr> 1 Obi-Wan Ke… 182 77 auburn… fair blue-g… 57 male mascu… Stewjon 2 Anakin Sky… 188 84 blond fair blue 41.9 male mascu… Tatooi… 3 Wilhuff Ta… 180 NA auburn… fair blue 64 male mascu… Eriadu 4 Chewbacca 228 112 brown unknown blue 200 male mascu… Kashyy… 5 Han Solo 180 80 brown fair brown 29 male mascu… Corell… 6 Greedo 173 74 <NA> green black 44 male mascu… Rodia 7 Jabba Desi… 175 1358 <NA> green-… orange 600 herm… mascu… Nal Hu… 8 Wedge Anti… 170 77 brown fair hazel 21 male mascu… Corell… 9 Jek Tono P… 180 110 brown fair blue NA male mascu… Bestin…10 Yoda 66 17 white green brown 896 male mascu… <NA> 11 Palpatine 170 75 grey pale yellow 82 male mascu… Naboo # … with 4 more variables: species <chr>, films <list>, vehicles <list>,# starships <list>, and abbreviated variable names ¹hair_color, ²skin_color,# ³eye_color, ⁴birth_year, ⁵homeworld
starwars %>% filter(height > 150, hair_color == "blond") %>% slice(1:3)
# A tibble: 3 × 14 name height mass hair_…¹ skin_…² eye_c…³ birth…⁴ sex gender homew…⁵ <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr> <chr> 1 Luke Skywal… 172 77 blond fair blue 19 male mascu… Tatooi…2 Anakin Skyw… 188 84 blond fair blue 41.9 male mascu… Tatooi…3 Finis Valor… 170 NA blond fair blue 91 male mascu… Corusc…# … with 4 more variables: species <chr>, films <list>, vehicles <list>,# starships <list>, and abbreviated variable names ¹hair_color, ²skin_color,# ³eye_color, ⁴birth_year, ⁵homeworld
n() - число записей
starwars %>% slice((n()-3):n())
# A tibble: 4 × 14 name height mass hair_…¹ skin_…² eye_c…³ birth…⁴ sex gender homew…⁵ <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr> <chr> 1 Poe Dameron NA NA brown light brown NA male mascu… <NA> 2 BB8 NA NA none none black NA none mascu… <NA> 3 Captain Pha… NA NA unknown unknown unknown NA <NA> <NA> <NA> 4 Padmé Amida… 165 45 brown light brown 46 fema… femin… Naboo # … with 4 more variables: species <chr>, films <list>, vehicles <list>,# starships <list>, and abbreviated variable names ¹hair_color, ²skin_color,# ³eye_color, ⁴birth_year, ⁵homeworld
starwars %>% slice_head(n=5)
# A tibble: 5 × 14 name height mass hair_…¹ skin_…² eye_c…³ birth…⁴ sex gender homew…⁵ <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr> <chr> 1 Luke Skywal… 172 77 blond fair blue 19 male mascu… Tatooi…2 C-3PO 167 75 <NA> gold yellow 112 none mascu… Tatooi…3 R2-D2 96 32 <NA> white,… red 33 none mascu… Naboo 4 Darth Vader 202 136 none white yellow 41.9 male mascu… Tatooi…5 Leia Organa 150 49 brown light brown 19 fema… femin… Aldera…# … with 4 more variables: species <chr>, films <list>, vehicles <list>,# starships <list>, and abbreviated variable names ¹hair_color, ²skin_color,# ³eye_color, ⁴birth_year, ⁵homeworld
set.seed(123)starwars %>% slice_sample(n=10)
# A tibble: 10 × 14 name height mass hair_…¹ skin_…² eye_c…³ birth…⁴ sex gender homew…⁵ <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr> <chr> 1 Qui-Gon Ji… 193 89 brown fair blue 92 male mascu… <NA> 2 Raymus Ant… 188 79 brown light brown NA male mascu… Aldera… 3 Eeth Koth 171 NA black brown brown NA male mascu… Iridon… 4 Han Solo 180 80 brown fair brown 29 male mascu… Corell… 5 Zam Wesell 168 55 blonde fair, … yellow NA fema… femin… Zolan 6 Darth Maul 175 80 none red yellow 54 male mascu… Dathom… 7 Kit Fisto 196 87 none green black NA male mascu… Glee A… 8 Bib Fortuna 180 NA none pale pink NA male mascu… Ryloth 9 Poe Dameron NA NA brown light brown NA male mascu… <NA> 10 Lobot 175 79 none light blue 37 male mascu… Bespin # … with 4 more variables: species <chr>, films <list>, vehicles <list>,# starships <list>, and abbreviated variable names ¹hair_color, ²skin_color,# ³eye_color, ⁴birth_year, ⁵homeworld
starwars %>% select(name,height,mass,hair_color)
# A tibble: 87 × 4 name height mass hair_color <chr> <int> <dbl> <chr> 1 Luke Skywalker 172 77 blond 2 C-3PO 167 75 <NA> 3 R2-D2 96 32 <NA> 4 Darth Vader 202 136 none 5 Leia Organa 150 49 brown 6 Owen Lars 178 120 brown, grey 7 Beru Whitesun lars 165 75 brown 8 R5-D4 97 32 <NA> 9 Biggs Darklighter 183 84 black 10 Obi-Wan Kenobi 182 77 auburn, white# … with 77 more rows
starwars %>% select(name:hair_color)
# A tibble: 87 × 4 name height mass hair_color <chr> <int> <dbl> <chr> 1 Luke Skywalker 172 77 blond 2 C-3PO 167 75 <NA> 3 R2-D2 96 32 <NA> 4 Darth Vader 202 136 none 5 Leia Organa 150 49 brown 6 Owen Lars 178 120 brown, grey 7 Beru Whitesun lars 165 75 brown 8 R5-D4 97 32 <NA> 9 Biggs Darklighter 183 84 black 10 Obi-Wan Kenobi 182 77 auburn, white# … with 77 more rows
starwars %>% select(-(name:hair_color))
# A tibble: 87 × 10 skin_color eye_c…¹ birth…² sex gender homew…³ species films vehic…⁴ stars…⁵ <chr> <chr> <dbl> <chr> <chr> <chr> <chr> <lis> <list> <list> 1 fair blue 19 male mascu… Tatooi… Human <chr> <chr> <chr> 2 gold yellow 112 none mascu… Tatooi… Droid <chr> <chr> <chr> 3 white, bl… red 33 none mascu… Naboo Droid <chr> <chr> <chr> 4 white yellow 41.9 male mascu… Tatooi… Human <chr> <chr> <chr> 5 light brown 19 fema… femin… Aldera… Human <chr> <chr> <chr> 6 light blue 52 male mascu… Tatooi… Human <chr> <chr> <chr> 7 light blue 47 fema… femin… Tatooi… Human <chr> <chr> <chr> 8 white, red red NA none mascu… Tatooi… Droid <chr> <chr> <chr> 9 light brown 24 male mascu… Tatooi… Human <chr> <chr> <chr> 10 fair blue-g… 57 male mascu… Stewjon Human <chr> <chr> <chr> # … with 77 more rows, and abbreviated variable names ¹eye_color, ²birth_year,# ³homeworld, ⁴vehicles, ⁵starships
as_tibble(iris) %>% select(starts_with('Sepal'))
# A tibble: 150 × 2 Sepal.Length Sepal.Width <dbl> <dbl> 1 5.1 3.5 2 4.9 3 3 4.7 3.2 4 4.6 3.1 5 5 3.6 6 5.4 3.9 7 4.6 3.4 8 5 3.4 9 4.4 2.910 4.9 3.1# … with 140 more rows
Аналогично:
starts_with(), ends_with(), matches() и contains()
starwars %>% select(name)
# A tibble: 87 × 1 name <chr> 1 Luke Skywalker 2 C-3PO 3 R2-D2 4 Darth Vader 5 Leia Organa 6 Owen Lars 7 Beru Whitesun lars 8 R5-D4 9 Biggs Darklighter 10 Obi-Wan Kenobi # … with 77 more rows
starwars %>% pull(name)
[1] "Luke Skywalker" "C-3PO" "R2-D2" [4] "Darth Vader" "Leia Organa" "Owen Lars" [7] "Beru Whitesun lars" "R5-D4" "Biggs Darklighter" [10] "Obi-Wan Kenobi" "Anakin Skywalker" "Wilhuff Tarkin" [13] "Chewbacca" "Han Solo" "Greedo" [16] "Jabba Desilijic Tiure" "Wedge Antilles" "Jek Tono Porkins" [19] "Yoda" "Palpatine" "Boba Fett" [22] "IG-88" "Bossk" "Lando Calrissian" [25] "Lobot" "Ackbar" "Mon Mothma" [28] "Arvel Crynyd" "Wicket Systri Warrick" "Nien Nunb" [31] "Qui-Gon Jinn" "Nute Gunray" "Finis Valorum" [34] "Jar Jar Binks" "Roos Tarpals" "Rugor Nass" [37] "Ric Olié" "Watto" "Sebulba" [40] "Quarsh Panaka" "Shmi Skywalker" "Darth Maul" [43] "Bib Fortuna" "Ayla Secura" "Dud Bolt" [46] "Gasgano" "Ben Quadinaros" "Mace Windu" [49] "Ki-Adi-Mundi" "Kit Fisto" "Eeth Koth" [52] "Adi Gallia" "Saesee Tiin" "Yarael Poof" [55] "Plo Koon" "Mas Amedda" "Gregar Typho" [58] "Cordé" "Cliegg Lars" "Poggle the Lesser" [61] "Luminara Unduli" "Barriss Offee" "Dormé" [64] "Dooku" "Bail Prestor Organa" "Jango Fett" [67] "Zam Wesell" "Dexter Jettster" "Lama Su" [70] "Taun We" "Jocasta Nu" "Ratts Tyerell" [73] "R4-P17" "Wat Tambor" "San Hill" [76] "Shaak Ti" "Grievous" "Tarfful" [79] "Raymus Antilles" "Sly Moore" "Tion Medon" [82] "Finn" "Rey" "Poe Dameron" [85] "BB8" "Captain Phasma" "Padmé Amidala"
starwars %>% arrange(name,desc(mass),hair_color) %>% slice_head(n = 3)
# A tibble: 3 × 14 name height mass hair_…¹ skin_…² eye_c…³ birth…⁴ sex gender homew…⁵ <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr> <chr> 1 Ackbar 180 83 none brown … orange 41 male mascu… Mon Ca…2 Adi Gallia 184 50 none dark blue NA fema… femin… Corusc…3 Anakin Skyw… 188 84 blond fair blue 41.9 male mascu… Tatooi…# … with 4 more variables: species <chr>, films <list>, vehicles <list>,# starships <list>, and abbreviated variable names ¹hair_color, ²skin_color,# ³eye_color, ⁴birth_year, ⁵homeworld
Сначала сортируем по столбцу name по возрастанию.
Потом сортируем по столбцу desc по убыванию.
Потом сортируем по столбцу hair_color по возрастанию.
Отсутствующие значения ВСЕГДА в конце
starwars %>% select(name:hair_color) %>% mutate(HM = height*mass)
# A tibble: 87 × 5 name height mass hair_color HM <chr> <int> <dbl> <chr> <dbl> 1 Luke Skywalker 172 77 blond 13244 2 C-3PO 167 75 <NA> 12525 3 R2-D2 96 32 <NA> 3072 4 Darth Vader 202 136 none 27472 5 Leia Organa 150 49 brown 7350 6 Owen Lars 178 120 brown, grey 21360 7 Beru Whitesun lars 165 75 brown 12375 8 R5-D4 97 32 <NA> 3104 9 Biggs Darklighter 183 84 black 1537210 Obi-Wan Kenobi 182 77 auburn, white 14014# … with 77 more rows
starwars %>% select(name:hair_color) %>% transmute(inv_mass = 1/mass, inv_height = 1/height)
# A tibble: 87 × 2 inv_mass inv_height <dbl> <dbl> 1 0.0130 0.00581 2 0.0133 0.00599 3 0.0312 0.0104 4 0.00735 0.00495 5 0.0204 0.00667 6 0.00833 0.00562 7 0.0133 0.00606 8 0.0312 0.0103 9 0.0119 0.0054610 0.0130 0.00549# … with 77 more rows
starwars %>% select(name:hair_color) %>% rename(NAME=name)
# A tibble: 87 × 4 NAME height mass hair_color <chr> <int> <dbl> <chr> 1 Luke Skywalker 172 77 blond 2 C-3PO 167 75 <NA> 3 R2-D2 96 32 <NA> 4 Darth Vader 202 136 none 5 Leia Organa 150 49 brown 6 Owen Lars 178 120 brown, grey 7 Beru Whitesun lars 165 75 brown 8 R5-D4 97 32 <NA> 9 Biggs Darklighter 183 84 black 10 Obi-Wan Kenobi 182 77 auburn, white# … with 77 more rows
df <- tribble( ~a,~b, "a",1, "a",1, "a",2, "b",3, "b",3)distinct(df)
# A tibble: 3 × 2 a b <chr> <dbl>1 a 12 a 23 b 3
starwars %>% summarise(mass_mean_noNA = mean(mass, na.rm=T), mass_mean_withNA = mean(mass), heihgt_max = max(height,na.rm = T), count = n())
# A tibble: 1 × 4 mass_mean_noNA mass_mean_withNA heihgt_max count <dbl> <dbl> <int> <int>1 97.3 NA 264 87
starwars %>% select(name:eye_color) %>% drop_na() %>% group_by(eye_color) %>% summarise(count = n(), height = max(height))
# A tibble: 11 × 3 eye_color count height <chr> <int> <int> 1 black 6 229 2 blue 12 234 3 blue-gray 1 182 4 brown 13 193 5 green, yellow 1 216 6 hazel 2 178 7 orange 5 224 8 red 3 200 9 unknown 2 19310 white 1 17811 yellow 8 202
Число строк и максимальный вес подсчитаны для каждого цвета глаз отдельно
После применения group_by() все манипуляции будут проходить для каждой группы отдельно
Если далее нужно вернуться к работе с полным набором данных, нужно разгрупировать tibble-фрейм - ungroup()
https://stringr.tidyverse.org/
Посмотрим работу некоторых функций в связке с dplyr
Тестовый набор данных:
SW <- starwars %>% select(name:eye_color) %>% drop_na()
SW %>% mutate(name_len = str_length(name))
# A tibble: 54 × 7 name height mass hair_color skin_color eye_color name_len <chr> <int> <dbl> <chr> <chr> <chr> <int> 1 Luke Skywalker 172 77 blond fair blue 14 2 Darth Vader 202 136 none white yellow 11 3 Leia Organa 150 49 brown light brown 11 4 Owen Lars 178 120 brown, grey light blue 9 5 Beru Whitesun lars 165 75 brown light blue 18 6 Biggs Darklighter 183 84 black light brown 17 7 Obi-Wan Kenobi 182 77 auburn, white fair blue-gray 14 8 Anakin Skywalker 188 84 blond fair blue 16 9 Chewbacca 228 112 brown unknown blue 910 Han Solo 180 80 brown fair brown 8# … with 44 more rows
SW %>% mutate(name_start = str_sub(name,1,5))
# A tibble: 54 × 7 name height mass hair_color skin_color eye_color name_start <chr> <int> <dbl> <chr> <chr> <chr> <chr> 1 Luke Skywalker 172 77 blond fair blue "Luke " 2 Darth Vader 202 136 none white yellow "Darth" 3 Leia Organa 150 49 brown light brown "Leia " 4 Owen Lars 178 120 brown, grey light blue "Owen " 5 Beru Whitesun lars 165 75 brown light blue "Beru " 6 Biggs Darklighter 183 84 black light brown "Biggs" 7 Obi-Wan Kenobi 182 77 auburn, white fair blue-gray "Obi-W" 8 Anakin Skywalker 188 84 blond fair blue "Anaki" 9 Chewbacca 228 112 brown unknown blue "Chewb" 10 Han Solo 180 80 brown fair brown "Han S" # … with 44 more rows
SW %>% mutate(name_upper = str_to_upper(name))
# A tibble: 54 × 7 name height mass hair_color skin_color eye_color name_upper <chr> <int> <dbl> <chr> <chr> <chr> <chr> 1 Luke Skywalker 172 77 blond fair blue LUKE SKYW… 2 Darth Vader 202 136 none white yellow DARTH VAD… 3 Leia Organa 150 49 brown light brown LEIA ORGA… 4 Owen Lars 178 120 brown, grey light blue OWEN LARS 5 Beru Whitesun lars 165 75 brown light blue BERU WHIT… 6 Biggs Darklighter 183 84 black light brown BIGGS DAR… 7 Obi-Wan Kenobi 182 77 auburn, white fair blue-gray OBI-WAN K… 8 Anakin Skywalker 188 84 blond fair blue ANAKIN SK… 9 Chewbacca 228 112 brown unknown blue CHEWBACCA 10 Han Solo 180 80 brown fair brown HAN SOLO # … with 44 more rows
Аналогично:
str_to_lower(), str_to_title()
SW %>% mutate(nameL = str_detect(name,"^L"))
# A tibble: 54 × 7 name height mass hair_color skin_color eye_color nameL <chr> <int> <dbl> <chr> <chr> <chr> <lgl> 1 Luke Skywalker 172 77 blond fair blue TRUE 2 Darth Vader 202 136 none white yellow FALSE 3 Leia Organa 150 49 brown light brown TRUE 4 Owen Lars 178 120 brown, grey light blue FALSE 5 Beru Whitesun lars 165 75 brown light blue FALSE 6 Biggs Darklighter 183 84 black light brown FALSE 7 Obi-Wan Kenobi 182 77 auburn, white fair blue-gray FALSE 8 Anakin Skywalker 188 84 blond fair blue FALSE 9 Chewbacca 228 112 brown unknown blue FALSE10 Han Solo 180 80 brown fair brown FALSE# … with 44 more rows
Находит значения в столбце name, начинающиеся с L.
"L$" - аналогично: заканчиваются на L
Можно искать вхождение подстроки или паттерна
SW %>% mutate(out = str_count(eye_color,c('l','a')))
# A tibble: 54 × 7 name height mass hair_color skin_color eye_color out <chr> <int> <dbl> <chr> <chr> <chr> <int> 1 Luke Skywalker 172 77 blond fair blue 1 2 Darth Vader 202 136 none white yellow 0 3 Leia Organa 150 49 brown light brown 0 4 Owen Lars 178 120 brown, grey light blue 0 5 Beru Whitesun lars 165 75 brown light blue 1 6 Biggs Darklighter 183 84 black light brown 0 7 Obi-Wan Kenobi 182 77 auburn, white fair blue-gray 1 8 Anakin Skywalker 188 84 blond fair blue 0 9 Chewbacca 228 112 brown unknown blue 110 Han Solo 180 80 brown fair brown 0# … with 44 more rows
Подсчитывает сколько в каждом значении столбца eye_color букв l и a
SW %>% mutate(rm = str_remove_all(eye_color, "[aeiou]"))
# A tibble: 54 × 7 name height mass hair_color skin_color eye_color rm <chr> <int> <dbl> <chr> <chr> <chr> <chr> 1 Luke Skywalker 172 77 blond fair blue bl 2 Darth Vader 202 136 none white yellow yllw 3 Leia Organa 150 49 brown light brown brwn 4 Owen Lars 178 120 brown, grey light blue bl 5 Beru Whitesun lars 165 75 brown light blue bl 6 Biggs Darklighter 183 84 black light brown brwn 7 Obi-Wan Kenobi 182 77 auburn, white fair blue-gray bl-gry 8 Anakin Skywalker 188 84 blond fair blue bl 9 Chewbacca 228 112 brown unknown blue bl 10 Han Solo 180 80 brown fair brown brwn # … with 44 more rows
Удалили из столбца eye_color все гласные буквы
str_remove() удаляет только первое вхождение
SW %>% mutate(rep = str_replace_all(eye_color,"[aeiou]","-"))
# A tibble: 54 × 7 name height mass hair_color skin_color eye_color rep <chr> <int> <dbl> <chr> <chr> <chr> <chr> 1 Luke Skywalker 172 77 blond fair blue bl-- 2 Darth Vader 202 136 none white yellow y-ll-w 3 Leia Organa 150 49 brown light brown br-wn 4 Owen Lars 178 120 brown, grey light blue bl-- 5 Beru Whitesun lars 165 75 brown light blue bl-- 6 Biggs Darklighter 183 84 black light brown br-wn 7 Obi-Wan Kenobi 182 77 auburn, white fair blue-gray bl---gr-y 8 Anakin Skywalker 188 84 blond fair blue bl-- 9 Chewbacca 228 112 brown unknown blue bl-- 10 Han Solo 180 80 brown fair brown br-wn # … with 44 more rows
Заменили в столбце eye_color все гласные буквы на -
str_replace() заменит только первое вхождение
SW %>% mutate(first = word(name,1))
# A tibble: 54 × 7 name height mass hair_color skin_color eye_color first <chr> <int> <dbl> <chr> <chr> <chr> <chr> 1 Luke Skywalker 172 77 blond fair blue Luke 2 Darth Vader 202 136 none white yellow Darth 3 Leia Organa 150 49 brown light brown Leia 4 Owen Lars 178 120 brown, grey light blue Owen 5 Beru Whitesun lars 165 75 brown light blue Beru 6 Biggs Darklighter 183 84 black light brown Biggs 7 Obi-Wan Kenobi 182 77 auburn, white fair blue-gray Obi-Wan 8 Anakin Skywalker 188 84 blond fair blue Anakin 9 Chewbacca 228 112 brown unknown blue Chewbacca10 Han Solo 180 80 brown fair brown Han # … with 44 more rows
L <- list("A",c(1,2),30)L
[[1]][1] "A"[[2]][1] 1 2[[3]][1] 30
print('L1')
[1] "L1"
Keyboard shortcuts
↑, ←, Pg Up, k | Go to previous slide |
↓, →, Pg Dn, Space, j | Go to next slide |
Home | Go to first slide |
End | Go to last slide |
Number + Return | Go to specific slide |
b / m / f | Toggle blackout / mirrored / fullscreen mode |
c | Clone slideshow |
p | Toggle presenter mode |
t | Restart the presentation timer |
?, h | Toggle this help |
o | Tile View: Overview of Slides |
Alt + f | Fit Slides to Screen |
Esc | Back to slideshow |
L <- list("A",c(1,2),30)L
[[1]][1] "A"[[2]][1] 1 2[[3]][1] 30
print('L1')
[1] "L1"
L1 <- list(L,40)L1
[[1]][[1]][[1]][1] "A"[[1]][[2]][1] 1 2[[1]][[3]][1] 30[[2]][1] 40
L[[4]] <- 'new_element'L
[[1]][1] "A"[[2]][1] 1 2[[3]][1] 30[[4]][1] "new_element"
L
[[1]][1] "A"[[2]][1] 1 2[[3]][1] 30[[4]][1] "new_element"
L[3]
[[1]][1] 30
L[[3]]
[1] 30
mtcars$mpg
[1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 10.4[16] 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4 15.8 19.7[31] 15.0 21.4
dim(mtcars$mpg)
NULL
mtcars$mpg[3]
[1] 22.8
dim(mtcars$mpg[3])
NULL
mtcars[3,]
mpg cyl disp hp drat wt qsec vs am gear carbDatsun 710 22.8 4 108 93 3.85 2.32 18.61 1 1 4 1
dim(mtcars[3,])
[1] 1 11
mtcars[3,][2]
cylDatsun 710 4
dim(mtcars[3,][2])
[1] 1 1
mtcars[3,][[2]]
[1] 4
dim(mtcars[3,][[2]])
NULL
L1
[[1]][[1]][[1]][1] "A"[[1]][[2]][1] 1 2[[1]][[3]][1] 30[[2]][1] 40
L1 [[1]]
[[1]][1] "A"[[2]][1] 1 2[[3]][1] 30
L1 [[1]] [[2]]
[1] 1 2
L <- list(10,20)L
[[1]][1] 10[[2]][1] 20
L$abc <- 123L
[[1]][1] 10[[2]][1] 20$abc[1] 123
names(L)
[1] "" "" "abc"
L[[3]]
[1] 123
L$abc
[1] 123
L[['abc']]
[1] 123
Циклы в R медленные!
for(year in c(2010:2015)){ print(paste('The year is', year))}
[1] "The year is 2010"[1] "The year is 2011"[1] "The year is 2012"[1] "The year is 2013"[1] "The year is 2014"[1] "The year is 2015"
mt <- mtcarshead(mt,2)
mpg cyl disp hp drat wt qsec vs am gear carbMazda RX4 21 6 160 110 3.9 2.620 16.46 0 1 4 4Mazda RX4 Wag 21 6 160 110 3.9 2.875 17.02 0 1 4 4
for(i in 1:nrow(mt)){ mt$new[i] <- i^2}head(mt,4)
mpg cyl disp hp drat wt qsec vs am gear carb newMazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 1Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 4Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 9Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 16
5/2
[1] 2.5
5%%2
[1] 1
mt <- mtcarsfor(i in 1:nrow(mt)){ if((i%%2) == 0){ mt$new[i] = i^2 mt$type[i] = 'even' } else{ mt$new[i] = i^3 mt$type[i] = 'odd' }}mt[1:4,(ncol(mt)-5):ncol(mt)]
vs am gear carb new typeMazda RX4 0 1 4 4 1 oddMazda RX4 Wag 0 1 4 4 4 evenDatsun 710 1 1 4 1 27 oddHornet 4 Drive 1 0 3 1 16 even
v = c(14,7,6,9,2)ifelse(v %% 2 == 1,"odd","even")
[1] "even" "odd" "even" "odd" "even"
x <- 1:5for(val in x){if (val == 3){break}print(val)}
[1] 1[1] 2
x <- 1:5for(val in x){if (val == 3){next}print(val)}
[1] 1[1] 2[1] 4[1] 5
i <- 1while (i < 6){ print(i) i = i+1}
[1] 1[1] 2[1] 3[1] 4[1] 5
set.seed(123)mt <- matrix(sample(1:5,10000,replace=T),ncol=10)dim(mt)
[1] 1000 10
mt[1:5,1:5]
[,1] [,2] [,3] [,4] [,5][1,] 3 4 5 4 1[2,] 3 4 4 1 2[3,] 2 2 3 2 3[4,] 2 4 1 4 5[5,] 3 5 2 4 2
colnames(mt) <- paste("D",1:ncol(mt),sep='_')rownames(mt) <- paste("Stud",1:nrow(mt),sep='.')head(mt)
D_1 D_2 D_3 D_4 D_5 D_6 D_7 D_8 D_9 D_10Stud.1 3 4 5 4 1 1 5 1 5 1Stud.2 3 4 4 1 2 3 5 1 3 3Stud.3 2 2 3 2 3 3 5 5 1 3Stud.4 2 4 1 4 5 2 3 5 5 2Stud.5 3 5 2 4 2 3 5 1 3 1Stud.6 5 4 5 5 1 1 5 3 3 4
x <- rep(NA,ncol(mt))length(x)
[1] 10
x
[1] NA NA NA NA NA NA NA NA NA NA
for(i in 1:ncol(mt)){ x[i] <- mean(mt[,i])}x
[1] 2.985 2.987 2.966 2.954 2.974 3.003 2.960 2.938 2.989 2.962
x.2 <- apply(mt,2,mean)x
[1] 2.985 2.987 2.966 2.954 2.974 3.003 2.960 2.938 2.989 2.962
x.2
D_1 D_2 D_3 D_4 D_5 D_6 D_7 D_8 D_9 D_10 2.985 2.987 2.966 2.954 2.974 3.003 2.960 2.938 2.989 2.962
a <- apply(mt,1,sum)a[1:6]
Stud.1 Stud.2 Stud.3 Stud.4 Stud.5 Stud.6 30 29 29 33 29 36
x <- list(a=1,b=1:3,d=10:100)lapply(x, FUN = length)
$a[1] 1$b[1] 3$d[1] 91
x <- list(a=1,b=1:3,d=10:100)sapply(x, FUN = length)
a b d 1 3 91
Забудьте все, чему вас учили...
CRAN - https://cran.r-project.org/: install.packages()
Bioconductor - https://www.bioconductor.org/
Github - https://github.com/
remove.packages("packagename") - удалить пакет
update.packages() - обновить все пакеты
library() - список доступных пакетов
library("packagename") - загрузить установленный пакет в текущую R сессию
vignette("packagename") - посмотреть "красивый" мануал по пакету, есть не для всех пакетов
Большинство нужных пакетов уже установлено в web RStudio на kodomo!
Нужно только подгрузить пакет с помощью library("packagename").
Если пакета не обнаружено - тогда устанавливайте.
Установить пакет нужно один раз, но подгружать при каждом запуске рабочего сеанса.
library(tidyverse)
install.packages("tidyverse")
library(tidyverse)
tidyverse_packages()
[1] "broom" "cli" "crayon" "dbplyr" [5] "dplyr" "dtplyr" "forcats" "ggplot2" [9] "googledrive" "googlesheets4" "haven" "hms" [13] "httr" "jsonlite" "lubridate" "magrittr" [17] "modelr" "pillar" "purrr" "readr" [21] "readxl" "reprex" "rlang" "rstudioapi" [25] "rvest" "stringr" "tibble" "tidyr" [29] "xml2" "tidyverse"
Похоже на data.frame
Ведут себя более предсказуемо и удобно
При работе с данными с помощью коллекции пакетов tidyverse в большинстве случаев на выходе получается tibble-фреймы
tibble( x = 1:5, y = 1, `1z b` = x^2 + y)
# A tibble: 5 × 3 x y `1z b` <int> <dbl> <dbl>1 1 1 22 2 1 53 3 1 104 4 1 175 5 1 26
Не преобразует строки в факторы
Не изменяет имена переменных
Можно (но нужно ли?) использовать "недопустимые" имена столбцов
Выводит информацию о размере выводимого фрейма и типе данных в столбцах
Позволяет ссылаться на только что созданные переменные
tribble( ~x,~y,~z, "a",2,3.6, "b",5,1.0)
# A tibble: 2 × 3 x y z <chr> <dbl> <dbl>1 a 2 3.62 b 5 1
head(iris)
Sepal.Length Sepal.Width Petal.Length Petal.Width Species1 5.1 3.5 1.4 0.2 setosa2 4.9 3.0 1.4 0.2 setosa3 4.7 3.2 1.3 0.2 setosa4 4.6 3.1 1.5 0.2 setosa5 5.0 3.6 1.4 0.2 setosa6 5.4 3.9 1.7 0.4 setosa
as_tibble(iris)
# A tibble: 150 × 5 Sepal.Length Sepal.Width Petal.Length Petal.Width Species <dbl> <dbl> <dbl> <dbl> <fct> 1 5.1 3.5 1.4 0.2 setosa 2 4.9 3 1.4 0.2 setosa 3 4.7 3.2 1.3 0.2 setosa 4 4.6 3.1 1.5 0.2 setosa 5 5 3.6 1.4 0.2 setosa 6 5.4 3.9 1.7 0.4 setosa 7 4.6 3.4 1.4 0.3 setosa 8 5 3.4 1.5 0.2 setosa 9 4.4 2.9 1.4 0.2 setosa 10 4.9 3.1 1.5 0.1 setosa # … with 140 more rows
Выведет только 10 первых строк и помещающиеся столбцы, а не "простыню"
https://dplyr.tidyverse.org/reference/index.html
Очень много разных функций
У каждой функции очень много разных опций
Разберем только некоторые наиболее употребимые
nrow(starwars)
[1] 87
a <- filter(starwars, height > 150)nrow(a)
[1] 69
Подключаем конвейер %>%
starwars %>% filter(height > 150)
# A tibble: 69 × 14 name height mass hair_…¹ skin_…² eye_c…³ birth…⁴ sex gender homew…⁵ <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr> <chr> 1 Luke Skywa… 172 77 blond fair blue 19 male mascu… Tatooi… 2 C-3PO 167 75 <NA> gold yellow 112 none mascu… Tatooi… 3 Darth Vader 202 136 none white yellow 41.9 male mascu… Tatooi… 4 Owen Lars 178 120 brown,… light blue 52 male mascu… Tatooi… 5 Beru White… 165 75 brown light blue 47 fema… femin… Tatooi… 6 Biggs Dark… 183 84 black light brown 24 male mascu… Tatooi… 7 Obi-Wan Ke… 182 77 auburn… fair blue-g… 57 male mascu… Stewjon 8 Anakin Sky… 188 84 blond fair blue 41.9 male mascu… Tatooi… 9 Wilhuff Ta… 180 NA auburn… fair blue 64 male mascu… Eriadu 10 Chewbacca 228 112 brown unknown blue 200 male mascu… Kashyy…# … with 59 more rows, 4 more variables: species <chr>, films <list>,# vehicles <list>, starships <list>, and abbreviated variable names# ¹hair_color, ²skin_color, ³eye_color, ⁴birth_year, ⁵homeworld
starwars %>% filter(height > 150, mass < 100, hair_color == "blond")
# A tibble: 2 × 14 name height mass hair_…¹ skin_…² eye_c…³ birth…⁴ sex gender homew…⁵ <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr> <chr> 1 Luke Skywal… 172 77 blond fair blue 19 male mascu… Tatooi…2 Anakin Skyw… 188 84 blond fair blue 41.9 male mascu… Tatooi…# … with 4 more variables: species <chr>, films <list>, vehicles <list>,# starships <list>, and abbreviated variable names ¹hair_color, ²skin_color,# ³eye_color, ⁴birth_year, ⁵homeworld
starwars %>% slice(10:20)
# A tibble: 11 × 14 name height mass hair_…¹ skin_…² eye_c…³ birth…⁴ sex gender homew…⁵ <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr> <chr> 1 Obi-Wan Ke… 182 77 auburn… fair blue-g… 57 male mascu… Stewjon 2 Anakin Sky… 188 84 blond fair blue 41.9 male mascu… Tatooi… 3 Wilhuff Ta… 180 NA auburn… fair blue 64 male mascu… Eriadu 4 Chewbacca 228 112 brown unknown blue 200 male mascu… Kashyy… 5 Han Solo 180 80 brown fair brown 29 male mascu… Corell… 6 Greedo 173 74 <NA> green black 44 male mascu… Rodia 7 Jabba Desi… 175 1358 <NA> green-… orange 600 herm… mascu… Nal Hu… 8 Wedge Anti… 170 77 brown fair hazel 21 male mascu… Corell… 9 Jek Tono P… 180 110 brown fair blue NA male mascu… Bestin…10 Yoda 66 17 white green brown 896 male mascu… <NA> 11 Palpatine 170 75 grey pale yellow 82 male mascu… Naboo # … with 4 more variables: species <chr>, films <list>, vehicles <list>,# starships <list>, and abbreviated variable names ¹hair_color, ²skin_color,# ³eye_color, ⁴birth_year, ⁵homeworld
starwars %>% filter(height > 150, hair_color == "blond") %>% slice(1:3)
# A tibble: 3 × 14 name height mass hair_…¹ skin_…² eye_c…³ birth…⁴ sex gender homew…⁵ <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr> <chr> 1 Luke Skywal… 172 77 blond fair blue 19 male mascu… Tatooi…2 Anakin Skyw… 188 84 blond fair blue 41.9 male mascu… Tatooi…3 Finis Valor… 170 NA blond fair blue 91 male mascu… Corusc…# … with 4 more variables: species <chr>, films <list>, vehicles <list>,# starships <list>, and abbreviated variable names ¹hair_color, ²skin_color,# ³eye_color, ⁴birth_year, ⁵homeworld
n() - число записей
starwars %>% slice((n()-3):n())
# A tibble: 4 × 14 name height mass hair_…¹ skin_…² eye_c…³ birth…⁴ sex gender homew…⁵ <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr> <chr> 1 Poe Dameron NA NA brown light brown NA male mascu… <NA> 2 BB8 NA NA none none black NA none mascu… <NA> 3 Captain Pha… NA NA unknown unknown unknown NA <NA> <NA> <NA> 4 Padmé Amida… 165 45 brown light brown 46 fema… femin… Naboo # … with 4 more variables: species <chr>, films <list>, vehicles <list>,# starships <list>, and abbreviated variable names ¹hair_color, ²skin_color,# ³eye_color, ⁴birth_year, ⁵homeworld
starwars %>% slice_head(n=5)
# A tibble: 5 × 14 name height mass hair_…¹ skin_…² eye_c…³ birth…⁴ sex gender homew…⁵ <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr> <chr> 1 Luke Skywal… 172 77 blond fair blue 19 male mascu… Tatooi…2 C-3PO 167 75 <NA> gold yellow 112 none mascu… Tatooi…3 R2-D2 96 32 <NA> white,… red 33 none mascu… Naboo 4 Darth Vader 202 136 none white yellow 41.9 male mascu… Tatooi…5 Leia Organa 150 49 brown light brown 19 fema… femin… Aldera…# … with 4 more variables: species <chr>, films <list>, vehicles <list>,# starships <list>, and abbreviated variable names ¹hair_color, ²skin_color,# ³eye_color, ⁴birth_year, ⁵homeworld
set.seed(123)starwars %>% slice_sample(n=10)
# A tibble: 10 × 14 name height mass hair_…¹ skin_…² eye_c…³ birth…⁴ sex gender homew…⁵ <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr> <chr> 1 Qui-Gon Ji… 193 89 brown fair blue 92 male mascu… <NA> 2 Raymus Ant… 188 79 brown light brown NA male mascu… Aldera… 3 Eeth Koth 171 NA black brown brown NA male mascu… Iridon… 4 Han Solo 180 80 brown fair brown 29 male mascu… Corell… 5 Zam Wesell 168 55 blonde fair, … yellow NA fema… femin… Zolan 6 Darth Maul 175 80 none red yellow 54 male mascu… Dathom… 7 Kit Fisto 196 87 none green black NA male mascu… Glee A… 8 Bib Fortuna 180 NA none pale pink NA male mascu… Ryloth 9 Poe Dameron NA NA brown light brown NA male mascu… <NA> 10 Lobot 175 79 none light blue 37 male mascu… Bespin # … with 4 more variables: species <chr>, films <list>, vehicles <list>,# starships <list>, and abbreviated variable names ¹hair_color, ²skin_color,# ³eye_color, ⁴birth_year, ⁵homeworld
starwars %>% select(name,height,mass,hair_color)
# A tibble: 87 × 4 name height mass hair_color <chr> <int> <dbl> <chr> 1 Luke Skywalker 172 77 blond 2 C-3PO 167 75 <NA> 3 R2-D2 96 32 <NA> 4 Darth Vader 202 136 none 5 Leia Organa 150 49 brown 6 Owen Lars 178 120 brown, grey 7 Beru Whitesun lars 165 75 brown 8 R5-D4 97 32 <NA> 9 Biggs Darklighter 183 84 black 10 Obi-Wan Kenobi 182 77 auburn, white# … with 77 more rows
starwars %>% select(name:hair_color)
# A tibble: 87 × 4 name height mass hair_color <chr> <int> <dbl> <chr> 1 Luke Skywalker 172 77 blond 2 C-3PO 167 75 <NA> 3 R2-D2 96 32 <NA> 4 Darth Vader 202 136 none 5 Leia Organa 150 49 brown 6 Owen Lars 178 120 brown, grey 7 Beru Whitesun lars 165 75 brown 8 R5-D4 97 32 <NA> 9 Biggs Darklighter 183 84 black 10 Obi-Wan Kenobi 182 77 auburn, white# … with 77 more rows
starwars %>% select(-(name:hair_color))
# A tibble: 87 × 10 skin_color eye_c…¹ birth…² sex gender homew…³ species films vehic…⁴ stars…⁵ <chr> <chr> <dbl> <chr> <chr> <chr> <chr> <lis> <list> <list> 1 fair blue 19 male mascu… Tatooi… Human <chr> <chr> <chr> 2 gold yellow 112 none mascu… Tatooi… Droid <chr> <chr> <chr> 3 white, bl… red 33 none mascu… Naboo Droid <chr> <chr> <chr> 4 white yellow 41.9 male mascu… Tatooi… Human <chr> <chr> <chr> 5 light brown 19 fema… femin… Aldera… Human <chr> <chr> <chr> 6 light blue 52 male mascu… Tatooi… Human <chr> <chr> <chr> 7 light blue 47 fema… femin… Tatooi… Human <chr> <chr> <chr> 8 white, red red NA none mascu… Tatooi… Droid <chr> <chr> <chr> 9 light brown 24 male mascu… Tatooi… Human <chr> <chr> <chr> 10 fair blue-g… 57 male mascu… Stewjon Human <chr> <chr> <chr> # … with 77 more rows, and abbreviated variable names ¹eye_color, ²birth_year,# ³homeworld, ⁴vehicles, ⁵starships
as_tibble(iris) %>% select(starts_with('Sepal'))
# A tibble: 150 × 2 Sepal.Length Sepal.Width <dbl> <dbl> 1 5.1 3.5 2 4.9 3 3 4.7 3.2 4 4.6 3.1 5 5 3.6 6 5.4 3.9 7 4.6 3.4 8 5 3.4 9 4.4 2.910 4.9 3.1# … with 140 more rows
Аналогично:
starts_with(), ends_with(), matches() и contains()
starwars %>% select(name)
# A tibble: 87 × 1 name <chr> 1 Luke Skywalker 2 C-3PO 3 R2-D2 4 Darth Vader 5 Leia Organa 6 Owen Lars 7 Beru Whitesun lars 8 R5-D4 9 Biggs Darklighter 10 Obi-Wan Kenobi # … with 77 more rows
starwars %>% pull(name)
[1] "Luke Skywalker" "C-3PO" "R2-D2" [4] "Darth Vader" "Leia Organa" "Owen Lars" [7] "Beru Whitesun lars" "R5-D4" "Biggs Darklighter" [10] "Obi-Wan Kenobi" "Anakin Skywalker" "Wilhuff Tarkin" [13] "Chewbacca" "Han Solo" "Greedo" [16] "Jabba Desilijic Tiure" "Wedge Antilles" "Jek Tono Porkins" [19] "Yoda" "Palpatine" "Boba Fett" [22] "IG-88" "Bossk" "Lando Calrissian" [25] "Lobot" "Ackbar" "Mon Mothma" [28] "Arvel Crynyd" "Wicket Systri Warrick" "Nien Nunb" [31] "Qui-Gon Jinn" "Nute Gunray" "Finis Valorum" [34] "Jar Jar Binks" "Roos Tarpals" "Rugor Nass" [37] "Ric Olié" "Watto" "Sebulba" [40] "Quarsh Panaka" "Shmi Skywalker" "Darth Maul" [43] "Bib Fortuna" "Ayla Secura" "Dud Bolt" [46] "Gasgano" "Ben Quadinaros" "Mace Windu" [49] "Ki-Adi-Mundi" "Kit Fisto" "Eeth Koth" [52] "Adi Gallia" "Saesee Tiin" "Yarael Poof" [55] "Plo Koon" "Mas Amedda" "Gregar Typho" [58] "Cordé" "Cliegg Lars" "Poggle the Lesser" [61] "Luminara Unduli" "Barriss Offee" "Dormé" [64] "Dooku" "Bail Prestor Organa" "Jango Fett" [67] "Zam Wesell" "Dexter Jettster" "Lama Su" [70] "Taun We" "Jocasta Nu" "Ratts Tyerell" [73] "R4-P17" "Wat Tambor" "San Hill" [76] "Shaak Ti" "Grievous" "Tarfful" [79] "Raymus Antilles" "Sly Moore" "Tion Medon" [82] "Finn" "Rey" "Poe Dameron" [85] "BB8" "Captain Phasma" "Padmé Amidala"
starwars %>% arrange(name,desc(mass),hair_color) %>% slice_head(n = 3)
# A tibble: 3 × 14 name height mass hair_…¹ skin_…² eye_c…³ birth…⁴ sex gender homew…⁵ <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr> <chr> <chr> 1 Ackbar 180 83 none brown … orange 41 male mascu… Mon Ca…2 Adi Gallia 184 50 none dark blue NA fema… femin… Corusc…3 Anakin Skyw… 188 84 blond fair blue 41.9 male mascu… Tatooi…# … with 4 more variables: species <chr>, films <list>, vehicles <list>,# starships <list>, and abbreviated variable names ¹hair_color, ²skin_color,# ³eye_color, ⁴birth_year, ⁵homeworld
Сначала сортируем по столбцу name по возрастанию.
Потом сортируем по столбцу desc по убыванию.
Потом сортируем по столбцу hair_color по возрастанию.
Отсутствующие значения ВСЕГДА в конце
starwars %>% select(name:hair_color) %>% mutate(HM = height*mass)
# A tibble: 87 × 5 name height mass hair_color HM <chr> <int> <dbl> <chr> <dbl> 1 Luke Skywalker 172 77 blond 13244 2 C-3PO 167 75 <NA> 12525 3 R2-D2 96 32 <NA> 3072 4 Darth Vader 202 136 none 27472 5 Leia Organa 150 49 brown 7350 6 Owen Lars 178 120 brown, grey 21360 7 Beru Whitesun lars 165 75 brown 12375 8 R5-D4 97 32 <NA> 3104 9 Biggs Darklighter 183 84 black 1537210 Obi-Wan Kenobi 182 77 auburn, white 14014# … with 77 more rows
starwars %>% select(name:hair_color) %>% transmute(inv_mass = 1/mass, inv_height = 1/height)
# A tibble: 87 × 2 inv_mass inv_height <dbl> <dbl> 1 0.0130 0.00581 2 0.0133 0.00599 3 0.0312 0.0104 4 0.00735 0.00495 5 0.0204 0.00667 6 0.00833 0.00562 7 0.0133 0.00606 8 0.0312 0.0103 9 0.0119 0.0054610 0.0130 0.00549# … with 77 more rows
starwars %>% select(name:hair_color) %>% rename(NAME=name)
# A tibble: 87 × 4 NAME height mass hair_color <chr> <int> <dbl> <chr> 1 Luke Skywalker 172 77 blond 2 C-3PO 167 75 <NA> 3 R2-D2 96 32 <NA> 4 Darth Vader 202 136 none 5 Leia Organa 150 49 brown 6 Owen Lars 178 120 brown, grey 7 Beru Whitesun lars 165 75 brown 8 R5-D4 97 32 <NA> 9 Biggs Darklighter 183 84 black 10 Obi-Wan Kenobi 182 77 auburn, white# … with 77 more rows
df <- tribble( ~a,~b, "a",1, "a",1, "a",2, "b",3, "b",3)distinct(df)
# A tibble: 3 × 2 a b <chr> <dbl>1 a 12 a 23 b 3
starwars %>% summarise(mass_mean_noNA = mean(mass, na.rm=T), mass_mean_withNA = mean(mass), heihgt_max = max(height,na.rm = T), count = n())
# A tibble: 1 × 4 mass_mean_noNA mass_mean_withNA heihgt_max count <dbl> <dbl> <int> <int>1 97.3 NA 264 87
starwars %>% select(name:eye_color) %>% drop_na() %>% group_by(eye_color) %>% summarise(count = n(), height = max(height))
# A tibble: 11 × 3 eye_color count height <chr> <int> <int> 1 black 6 229 2 blue 12 234 3 blue-gray 1 182 4 brown 13 193 5 green, yellow 1 216 6 hazel 2 178 7 orange 5 224 8 red 3 200 9 unknown 2 19310 white 1 17811 yellow 8 202
Число строк и максимальный вес подсчитаны для каждого цвета глаз отдельно
После применения group_by() все манипуляции будут проходить для каждой группы отдельно
Если далее нужно вернуться к работе с полным набором данных, нужно разгрупировать tibble-фрейм - ungroup()
https://stringr.tidyverse.org/
Посмотрим работу некоторых функций в связке с dplyr
Тестовый набор данных:
SW <- starwars %>% select(name:eye_color) %>% drop_na()
SW %>% mutate(name_len = str_length(name))
# A tibble: 54 × 7 name height mass hair_color skin_color eye_color name_len <chr> <int> <dbl> <chr> <chr> <chr> <int> 1 Luke Skywalker 172 77 blond fair blue 14 2 Darth Vader 202 136 none white yellow 11 3 Leia Organa 150 49 brown light brown 11 4 Owen Lars 178 120 brown, grey light blue 9 5 Beru Whitesun lars 165 75 brown light blue 18 6 Biggs Darklighter 183 84 black light brown 17 7 Obi-Wan Kenobi 182 77 auburn, white fair blue-gray 14 8 Anakin Skywalker 188 84 blond fair blue 16 9 Chewbacca 228 112 brown unknown blue 910 Han Solo 180 80 brown fair brown 8# … with 44 more rows
SW %>% mutate(name_start = str_sub(name,1,5))
# A tibble: 54 × 7 name height mass hair_color skin_color eye_color name_start <chr> <int> <dbl> <chr> <chr> <chr> <chr> 1 Luke Skywalker 172 77 blond fair blue "Luke " 2 Darth Vader 202 136 none white yellow "Darth" 3 Leia Organa 150 49 brown light brown "Leia " 4 Owen Lars 178 120 brown, grey light blue "Owen " 5 Beru Whitesun lars 165 75 brown light blue "Beru " 6 Biggs Darklighter 183 84 black light brown "Biggs" 7 Obi-Wan Kenobi 182 77 auburn, white fair blue-gray "Obi-W" 8 Anakin Skywalker 188 84 blond fair blue "Anaki" 9 Chewbacca 228 112 brown unknown blue "Chewb" 10 Han Solo 180 80 brown fair brown "Han S" # … with 44 more rows
SW %>% mutate(name_upper = str_to_upper(name))
# A tibble: 54 × 7 name height mass hair_color skin_color eye_color name_upper <chr> <int> <dbl> <chr> <chr> <chr> <chr> 1 Luke Skywalker 172 77 blond fair blue LUKE SKYW… 2 Darth Vader 202 136 none white yellow DARTH VAD… 3 Leia Organa 150 49 brown light brown LEIA ORGA… 4 Owen Lars 178 120 brown, grey light blue OWEN LARS 5 Beru Whitesun lars 165 75 brown light blue BERU WHIT… 6 Biggs Darklighter 183 84 black light brown BIGGS DAR… 7 Obi-Wan Kenobi 182 77 auburn, white fair blue-gray OBI-WAN K… 8 Anakin Skywalker 188 84 blond fair blue ANAKIN SK… 9 Chewbacca 228 112 brown unknown blue CHEWBACCA 10 Han Solo 180 80 brown fair brown HAN SOLO # … with 44 more rows
Аналогично:
str_to_lower(), str_to_title()
SW %>% mutate(nameL = str_detect(name,"^L"))
# A tibble: 54 × 7 name height mass hair_color skin_color eye_color nameL <chr> <int> <dbl> <chr> <chr> <chr> <lgl> 1 Luke Skywalker 172 77 blond fair blue TRUE 2 Darth Vader 202 136 none white yellow FALSE 3 Leia Organa 150 49 brown light brown TRUE 4 Owen Lars 178 120 brown, grey light blue FALSE 5 Beru Whitesun lars 165 75 brown light blue FALSE 6 Biggs Darklighter 183 84 black light brown FALSE 7 Obi-Wan Kenobi 182 77 auburn, white fair blue-gray FALSE 8 Anakin Skywalker 188 84 blond fair blue FALSE 9 Chewbacca 228 112 brown unknown blue FALSE10 Han Solo 180 80 brown fair brown FALSE# … with 44 more rows
Находит значения в столбце name, начинающиеся с L.
"L$" - аналогично: заканчиваются на L
Можно искать вхождение подстроки или паттерна
SW %>% mutate(out = str_count(eye_color,c('l','a')))
# A tibble: 54 × 7 name height mass hair_color skin_color eye_color out <chr> <int> <dbl> <chr> <chr> <chr> <int> 1 Luke Skywalker 172 77 blond fair blue 1 2 Darth Vader 202 136 none white yellow 0 3 Leia Organa 150 49 brown light brown 0 4 Owen Lars 178 120 brown, grey light blue 0 5 Beru Whitesun lars 165 75 brown light blue 1 6 Biggs Darklighter 183 84 black light brown 0 7 Obi-Wan Kenobi 182 77 auburn, white fair blue-gray 1 8 Anakin Skywalker 188 84 blond fair blue 0 9 Chewbacca 228 112 brown unknown blue 110 Han Solo 180 80 brown fair brown 0# … with 44 more rows
Подсчитывает сколько в каждом значении столбца eye_color букв l и a
SW %>% mutate(rm = str_remove_all(eye_color, "[aeiou]"))
# A tibble: 54 × 7 name height mass hair_color skin_color eye_color rm <chr> <int> <dbl> <chr> <chr> <chr> <chr> 1 Luke Skywalker 172 77 blond fair blue bl 2 Darth Vader 202 136 none white yellow yllw 3 Leia Organa 150 49 brown light brown brwn 4 Owen Lars 178 120 brown, grey light blue bl 5 Beru Whitesun lars 165 75 brown light blue bl 6 Biggs Darklighter 183 84 black light brown brwn 7 Obi-Wan Kenobi 182 77 auburn, white fair blue-gray bl-gry 8 Anakin Skywalker 188 84 blond fair blue bl 9 Chewbacca 228 112 brown unknown blue bl 10 Han Solo 180 80 brown fair brown brwn # … with 44 more rows
Удалили из столбца eye_color все гласные буквы
str_remove() удаляет только первое вхождение
SW %>% mutate(rep = str_replace_all(eye_color,"[aeiou]","-"))
# A tibble: 54 × 7 name height mass hair_color skin_color eye_color rep <chr> <int> <dbl> <chr> <chr> <chr> <chr> 1 Luke Skywalker 172 77 blond fair blue bl-- 2 Darth Vader 202 136 none white yellow y-ll-w 3 Leia Organa 150 49 brown light brown br-wn 4 Owen Lars 178 120 brown, grey light blue bl-- 5 Beru Whitesun lars 165 75 brown light blue bl-- 6 Biggs Darklighter 183 84 black light brown br-wn 7 Obi-Wan Kenobi 182 77 auburn, white fair blue-gray bl---gr-y 8 Anakin Skywalker 188 84 blond fair blue bl-- 9 Chewbacca 228 112 brown unknown blue bl-- 10 Han Solo 180 80 brown fair brown br-wn # … with 44 more rows
Заменили в столбце eye_color все гласные буквы на -
str_replace() заменит только первое вхождение
SW %>% mutate(first = word(name,1))
# A tibble: 54 × 7 name height mass hair_color skin_color eye_color first <chr> <int> <dbl> <chr> <chr> <chr> <chr> 1 Luke Skywalker 172 77 blond fair blue Luke 2 Darth Vader 202 136 none white yellow Darth 3 Leia Organa 150 49 brown light brown Leia 4 Owen Lars 178 120 brown, grey light blue Owen 5 Beru Whitesun lars 165 75 brown light blue Beru 6 Biggs Darklighter 183 84 black light brown Biggs 7 Obi-Wan Kenobi 182 77 auburn, white fair blue-gray Obi-Wan 8 Anakin Skywalker 188 84 blond fair blue Anakin 9 Chewbacca 228 112 brown unknown blue Chewbacca10 Han Solo 180 80 brown fair brown Han # … with 44 more rows