# load iris data into memory
library(tidyverse)
## ── Attaching core tidyverse pa
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.4 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ✔ readr 2.1.5
## ── Conflicts ─────────────────
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data(iris)
glimpse(iris)
## Rows: 150
## Columns: 5
## $ Sepal.Length <dbl> 5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.…
## $ Sepal.Width <dbl> 3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.…
## $ Petal.Length <dbl> 1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.…
## $ Petal.Width <dbl> 0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.…
## $ Species <fct> setosa, setosa, setosa, setosa, setosa, setosa, setosa, s…
iris1 <- filter(iris, Species == c("virginica", "versicolor"), Sepal.Length > 6, Sepal.Width > 2.5)
glimpse(iris1)
## Rows: 28
## Columns: 5
## $ Sepal.Length <dbl> 6.4, 6.1, 6.7, 6.1, 6.1, 6.6, 6.7, 6.1, 6.2, 6.3, 7.1, 6.…
## $ Sepal.Width <dbl> 3.2, 2.9, 3.1, 2.8, 2.8, 3.0, 3.0, 3.0, 2.9, 3.3, 3.0, 3.…
## $ Petal.Length <dbl> 4.5, 4.7, 4.4, 4.0, 4.7, 4.4, 5.0, 4.6, 4.3, 6.0, 5.9, 5.…
## $ Petal.Width <dbl> 1.5, 1.4, 1.4, 1.3, 1.2, 1.4, 1.7, 1.4, 1.3, 2.5, 2.1, 2.…
## $ Species <fct> versicolor, versicolor, versicolor, versicolor, versicolo…
The new iris1 object contains 28 observations of 5 variables.
iris2 <- select(iris1, Species, Sepal.Length, Sepal.Width)
glimpse(iris2)
## Rows: 28
## Columns: 3
## $ Species <fct> versicolor, versicolor, versicolor, versicolor, versicolo…
## $ Sepal.Length <dbl> 6.4, 6.1, 6.7, 6.1, 6.1, 6.6, 6.7, 6.1, 6.2, 6.3, 7.1, 6.…
## $ Sepal.Width <dbl> 3.2, 2.9, 3.1, 2.8, 2.8, 3.0, 3.0, 3.0, 2.9, 3.3, 3.0, 3.…
Now, iris2 has 28 observations of only 3 variables.
iris3 <- arrange(iris2, by = desc(Sepal.Length))
head(iris3)
## Species Sepal.Length Sepal.Width
## 1 virginica 7.7 2.6
## 2 virginica 7.7 2.8
## 3 virginica 7.4 2.8
## 4 virginica 7.1 3.0
## 5 virginica 6.9 3.2
## 6 virginica 6.8 3.0
iris4 <- mutate(iris3, Sepal.Area = Sepal.Length*Sepal.Width)
head(iris4)
## Species Sepal.Length Sepal.Width Sepal.Area
## 1 virginica 7.7 2.6 20.02
## 2 virginica 7.7 2.8 21.56
## 3 virginica 7.4 2.8 20.72
## 4 virginica 7.1 3.0 21.30
## 5 virginica 6.9 3.2 22.08
## 6 virginica 6.8 3.0 20.40
After adding sepal area as a variable, iris4 now has 28 observations of 4 variables.
iris5 <- summarize(iris4, Mean.Length = mean(Sepal.Length), Mean.Width = mean(Sepal.Width), Sample.Size = n())
print(iris5)
## Mean.Length Mean.Width Sample.Size
## 1 6.575 3.003571 28
iris6 <- iris4%>%
group_by(Species)%>%
summarize(Mean.Length = mean(Sepal.Length), Mean.Width = mean(Sepal.Width), Sample.Size = n())
print(iris6)
## # A tibble: 2 × 4
## Species Mean.Length Mean.Width Sample.Size
## <fct> <dbl> <dbl> <int>
## 1 versicolor 6.33 2.97 9
## 2 virginica 6.69 3.02 19
irisFinal <- iris%>%
filter(Species == c("virginica", "versicolor"), Sepal.Length > 6, Sepal.Width > 2.5)%>%
select(Species, Sepal.Length, Sepal.Width)%>%
arrange(by = desc(Sepal.Length))%>%
mutate(Sepal.Area = Sepal.Length*Sepal.Width)%>%
group_by(Species)%>%
summarize(Mean.Length = mean(Sepal.Length), Mean.Width = mean(Sepal.Width), Sample.Size = n())
print(irisFinal)
## # A tibble: 2 × 4
## Species Mean.Length Mean.Width Sample.Size
## <fct> <dbl> <dbl> <int>
## 1 versicolor 6.33 2.97 9
## 2 virginica 6.69 3.02 19
irisLong <- pivot_longer(iris, cols = c(Sepal.Length, Sepal.Width, Petal.Length, Petal.Width), names_to = "Measure", values_to = "Value")
head(irisLong)
## # A tibble: 6 × 3
## Species Measure Value
## <fct> <chr> <dbl>
## 1 setosa Sepal.Length 5.1
## 2 setosa Sepal.Width 3.5
## 3 setosa Petal.Length 1.4
## 4 setosa Petal.Width 0.2
## 5 setosa Sepal.Length 4.9
## 6 setosa Sepal.Width 3