-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtidyverse.R
142 lines (91 loc) · 3.09 KB
/
tidyverse.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
rm(list=ls())
## tidyverse and Pipe Operator %>%
# Using the pipe operator
result <- iris %>%
select(Sepal.Length, Species) %>%
group_by(Species) %>%
summarise(avg_length = mean(Sepal.Length))
print (result)
result_2 <- iris %>%
filter(Sepal.Length > 5 & Petal.Length > 1.5)
library(dplyr)
# Group by Species and calculate the number of individuals in each species
individual_counts <- iris %>%
group_by(Species) %>%
summarise(number_of_individuals = n())
# Print the result
print(individual_counts)
new <- apply(iris, 2, function(x) sum(!is.na(x)))
new <- apply(iris, 2, colsums= mean)
new_5 <- iris
mean_sepal_length <- iris %>%
group_by(Species) %>%
summarise(mean_sepal_length = mean(Sepal.Length))
mean_sepal_length
mean_values <- iris %>%
group_by(Species) %>%
summarise_all(mean, na.rm = TRUE) %>%
select(-Species)
mean_values
## summarise function
# Load the dplyr package
library(dplyr)
# Using the iris dataset
data(iris)
# Example 1: Calculate the mean of Sepal.Length
result_mean_sepal_length <- summarise(iris, mean_sepal_length = mean(Sepal.Length))
print(result_mean_sepal_length)
# Example 2: Grouped summarization - Calculate mean and median for each species
result_grouped <- iris %>%
group_by(Species) %>%
summarise(mean_sepal_length = mean(Sepal.Length),
median_sepal_length = median(Sepal.Length))
print(result_grouped)
# Example 3: Multiple summary statistics
result_multiple <- summarise(iris,
mean_sepal_length = mean(Sepal.Length),
median_sepal_length = median(Sepal.Length),
total_count = n())
print(result_multiple)
## group by
# Grouping by 'Species'
iris_grouped <- group_by(iris, Species)
iris_grouped
iris_grouped_multiple <- group_by(iris, Species, Petal.Length)
iris_grouped_multiple
result_combined <- iris %>%
group_by(Species) %>%
filter(Sepal.Length > mean(Sepal.Length)) %>%
summarise(mean_sepal_length = mean(Sepal.Length))
result_combined
##select function
selected_columns <- select(iris, Species, Sepal.Length, Sepal.Width)
selected_columns
##filter
filtered_data <- filter(iris, Sepal.Length > 5 & Species == "setosa")
filtered_data
##arrange
arranged_data <- arrange(iris, Sepal.Length, desc(Sepal.Width))
arranged_data
##mutate
mutated_data <- mutate(iris, Sepal.Area = Sepal.Length * Sepal.Width)
mutated_data
##distinct
unique_species <- distinct(iris, Species)
unique_species
##rename
renamed_data <- rename(iris, sepal_length = Sepal.Length, sepal_width = Sepal.Width)
renamed_data
# Load required libraries
library(dplyr)
# Using the iris dataset
data(iris)
# Create two modified datasets
iris1 <- iris
iris2 <- iris %>%
mutate(Sepal.Width = Sepal.Width + 0.1) # Modify one column in iris2
# Check if the two data frames have similar dimensions, column names, and row names
dim(iris1)[1] == dim(iris2)[1] ## row match number of rows
dim(iris1)[2] == dim(iris2)[2] ## column match number of columns
all(names(iris1) == names(iris2)) ## column names match
all(rownames(iris1) == rownames(iris2)) ## eow names match