.Rhistory

# Replace NA values with a specific value
my_data_replaced <- replace_na(my_data, list(Age = 0, Score = 100))
print(my_data_replaced)
# Display the updated column names
print(colnames(iris))
# Change the name of the second column
colnames(iris)[2] <- "sw"
## change the cl=olumn names
# Assuming 'your_data' is your dataset
colnames(iris) <- c("SL",
"sw",
"PL",
"PW",
"SPP")
not_na_vector <- !is.na(my_data)
print(is_na_vector)
print(not_na_vector)
# Create a vector with missing values
my_vector <- c(1, 2, NA, 4, NA, 6)
# Check for non-missing values using !is.na()
not_na_vector <- !is.na(my_vector)
# Print the result
print(not_na_vector)
rm(list=ls())
## tidyverse and Pipe Operator %>%
# Using the pipe operator
result <- iris %>%
select(Sepal.Length, Species) %>%
group_by(Species) %>%
summarise(avg_length = mean(Sepal.Length))
View(result)
print (result)
iris
result_2 <- iris %>%
filter(Sepal.Length == >5 && Petal.Length > 1.5) %>%
group_by(Species)
result_2 <- iris %>%
filter(Sepal.Length == >5 && Petal.Length > 1.5) %>%
group_by(iris$Species)
result_2 <- iris %>%
filter(Sepal.Length == >5 && Petal.Length > 1.5) %>%
group_by(iris$Species)
result_2 <- iris %>%
filter(Sepal.Length == >5 && Petal.Length > 1.5)
result_2 <- iris %>%
filter(Sepal.Length == > 5 && Petal.Length > 1.5)
result_2 <- iris %>%
filter(Sepal.Length > 5 & Petal.Length > 1.5)
View(result_2)
results_3 <- iris %>%
group_by(Species)
View(results_3)
iris
results_3 <- iris %>%
sum(Species)
library(dplyr)
# Group by Species and calculate the number of individuals in each species
individual_counts <- iris %>%
group_by(Species) %>%
summarise(number_of_individuals = n())
# Print the result
print(individual_counts)
new <- apply(iris, 2, !is.na)
new <- apply(iris, 2, function = !is.na)
new <- apply(iris, 2, function(x) sum(!is.na(x)))
new
new_4 <- iris%>%
group_by(Species)%>%
summarise(number_of_in =n())
new_4
new_5 <- iris %>%
group_by(Species) %>%
mean(Species)
new_5 <- iris %>%
group_by(Species) %>%
mean(Sepal.Length, Sepal.Width)
new_5 <- iris %>%
group_by(Species)
View(new_5)
new_5 <- iris %>%
subset(Species)
new_5 <- iris %>%
apply(iris, 2, mean)
new_5 <- iris %>%
apply(iris, 2 , mean)
apply(iris, 2 , mean)
neww<- apply(iris, 2 , mean)
new <- apply(iris, 2, function(x) mean(x)))
new <- apply(iris, 2,  mean(x)))
new <- apply(iris, 2, mean(x)))
new <- apply(iris, 2, mean)
new <- apply(iris, 2, colsums= mean)
new_5 <- iris%>%
summarise(iris)
new_5 <- iris%>%
summarise(iris)
summarise(iris)
summarise(iris)
iris
summarise(iris)
summarise(iris, group_by(species))
summarise(iris, group_by(iris$species))
iris
?summarise
iris%>%
summarise(mean = mean(Species), n=n())
summary_data <- my_data %>%
group_by(Species) %>%
summarise(number_of_observations = n())
summary_data <- iris %>%
group_by(Species) %>%
summarise(number_of_observations = n())
summary_data
summary_data <- iris %>%
group_by(Species) %>%
summarise(mean)
summary_data
summary_data <- iris %>%
group_by(Species) %>%
summarise(mean = mean())
mean_sepal_length <- iris %>%
group_by(Species) %>%
summarise(mean_sepal_length = mean(Sepal.Length))
mean_sepal_length
mean_sepal_length <- iris %>%
group_by(Species) %>%
summarise(mean_sepal_length = mean(Sepal.Length, Petal.Length, Petal.Width, Sepal.Width))
mean_sepal_length
mean_sepal_length <- iris %>%
group_by(Species) %>%
summarise(mean_sepal_length = mean(Sepal.Length))
mean_sepal_length
mean_values <- iris %>%
group_by(Species) %>%
summarise_all(mean, na.rm = TRUE) %>%
select(-Species)
mean_values
library(metan)
data <- data_ge
str(data)
head(data)
data$REP <- as.factor(data$REP)
data$GEN <- as.factor(data$GEN)
data$ENV <- as.factor(data$ENV)
Fitted <- lmer(GY ~ GEN + (1|ENV))
Fitted
library(lme4)
library(lme4)
library(emmeans)
library(metan)
data <- data_ge
str(data)
head(data)
data$REP <- as.factor(data$REP)
data$GEN <- as.factor(data$GEN)
data$ENV <- as.factor(data$ENV)
Fitted <- lmer(GY ~ GEN + (1|ENV))
data <- data_ge
str(data)
head(data)
data$REP <- as.factor(data$REP)
data$GEN <- as.factor(data$GEN)
data$ENV <- as.factor(data$ENV)
Fitted <- lmer(GY ~ GEN + (1|ENV))
data$GY <- as.factor(data$GY)
Fitted <- lmer(GY ~ GEN + (1|ENV))
Fitted <- lmer(GY ~ GEN + (1|ENV))
Fitted <- lmer(GY ~ GEN + (1|ENV), data = data)
library(lme4)
library(emmeans)
library(metan)
data <- data_ge
str(data)
head(data)
data$REP <- as.factor(data$REP)
data$GEN <- as.factor(data$GEN)
data$ENV <- as.factor(data$ENV)
Fitted <- lmer(GY ~ GEN + (1|ENV), data = data)
Fitted
fixef(Fitted)
# Extracting design matrix of fixed effects (Intercept and Genotype)
X <- model.matrix(formula(Fitted))
source("~/.active-rstudio-document", echo=TRUE)
# Extracting design matrix of fixed effects (Intercept and Genotype)
X <- model.matrix(formula(Fitted))
X <- X[!duplicated(X), ]
# Extracting design matrix of fixed effects (Intercept and Genotype)
X <- model.matrix(formula(Fitted))
Fitted <- lmer(GY ~ GEN + (1|ENV), data = data)
Fitted
fixef(Fitted)
# Extracting design matrix of fixed effects (Intercept and Genotype)
X <- model.matrix(formula(Fitted))
X <- X[!duplicated(X), ]
X
# Extracting the beta coefficients
Beta <- fixef(Fitted)
Beta
X <- X[!duplicated(X), ]
head(data)
# Extracting design matrix of fixed effects (Intercept and Genotype)
X <- model.matrix(formula(Fitted))
Fitted <- lmer(GY ~ GEN + (1|ENV), data = data)
# Extracting design matrix of fixed effects (Intercept and Genotype)
X <- model.matrix(formula(Fitted))
# Obtaining the BLUEs of genotypes
BLUEs_Gen <- X %*% Beta
BLUEs_Gen
# Estimating least square means by GEN
Lsmeans_Gen <- lsmeans(Fitted, ~ GEN)
str(Lsmeans_Gen)
# Creating the data frame of GEN and BLUEs
BLUEs_Gen1 <- data.frame(GID = levels(data$GEN),
BLUEs = summary(Lsmeans_Gen)$lsmean)
BLUEs_Gen1
# BLUP of genotypes
Fitted2 <- lmer(GY ~ (1|GEN) + (1|ENV) + (1|(GEN:ENV)))
# BLUP of genotypes
Fitted2 <- lmer(GY ~ (1|GEN) + (1|ENV) + (1|(GEN:ENV)), data = data)
Fitted2
# Fixed effect=Intercept
Intercept <- fixef(Fitted2)
str(Intercept)
U_ref <- c(ranef(Fitted2)$GEN)
U_ref
U_ref
# Fixed effect=Intercept
Intercept <- fixef(Fitted2)
str(Intercept)
U_ref <- c(ranef(Fitted2)$GEN)
U_ref
# BLUP of Genotypes
BLUP_Gen2 <- Intercept + U_ref$'(Intercept)'
BLUP_Gen2
cor(BLUEs_Gen, BLUP_Gen2)
# Creating a data frame of GEN and BLUPs
BLUPs_Gen1 <- data.frame(GID = levels(data$GEN),
BLUPs = BLUP_Gen2)
BLUPs_Gen1
# Creating a data frame of GEN, BLUEs, and BLUPs
Combined_Gen <- data.frame(
GID = levels(data$GEN),
BLUEs = BLUEs_Gen1$BLUEs,
BLUPs = BLUP_Gen2
)
# Writing combined data to CSV file
write.csv(Combined_Gen, file = "BLUEs_BLUPs_Gen.csv", row.names = FALSE)
###generating random numbers
# Syntax: runif(n, min = 0, max = 1)
# Generate 5 random numbers between 0 and 1
random_uniform <- runif(5, min = 0, max = 1)
print(random_uniform)
# Syntax: rnorm(n, mean = 0, sd = 1) ####
# Generate 5 random numbers from a normal distribution with mean 0 and standard deviation 1
random_normal <- rnorm(5, mean = 0, sd = 1)
print(random_normal)
# Syntax: sample(x, size, replace = FALSE, prob = NULL)
# Sample 3 values randomly from the vector
random_sample <- sample(c("A", "B", "C", "D", "E"), size = 3, replace = TRUE)
print(random_sample)
for (variable in sequence) {
# Code to be executed for each element
}
for (i in 1:5) {
square <- i^2
print(square)
}
for (i in 2:6) {
cube <- i^3
print(cube)
}
print(cube)
i <- 1
while (i^2 < 10) {
square <- i^2
print(square)
i <- i + 1
}
n <- 1
factorial_value <- 1
while (factorial_value <= 1000) {
factorial_value <- factorial_value * n
print(paste("Factorial of", n, "is", factorial_value))
n <- n + 1
}
number <- 1
while (number^2 <= 50) {
square_value <- number^2
print(paste("Square of", number, "is", square_value))
number <- number + 1
}
library(readr)
lentil_blocked <- read_csv("C:/Users/windows/OneDrive/Desktop/lentil_blocked.csv")
View(lentil_blocked)
### loops for dataframe
data <- lentil_blocked
# Create a new column 'PRODUCT' to store the results
data$results <- numeric(nrow(data))
data
data$results[i] <- data$YIELD[i] * data$NITROGEN[i]
# Using a for loop to calculate the product of 'YIELD' and 'NITROGEN'
for (i in 1:nrow(data)) {
data$results[i] <- data$YIELD[i] * data$NITROGEN[i]
}
print(data)
data
View(data)
# Columns to include in the boxplot
columns_to_plot <- c("YIELD", "NITROGEN", "result")
columns_to_plot
# Create a new window for the boxplot
par(mfrow = c(1, length(columns_to_plot)))
# Using a for loop to create boxplots for each column
for (col in columns_to_plot) {
boxplot(data[[col]], main = col, ylab = col)
}
# Columns to include in the boxplot
columns_to_plot <- c("YIELD", "NITROGEN", "results")
columns_to_plot
# Create a new window for the boxplot
par(mfrow = c(1, length(columns_to_plot)))
# Using a for loop to create boxplots for each column
for (col in columns_to_plot) {
boxplot(data[[col]], main = col, ylab = col)
}
## par and mfrow function uses
# Set up a 2x2 layout for multiple plots
par(mfrow = c(2, 2))
# Create and plot some example data
plot(1:10, main = "Plot 1")
plot(1:10, main = "Plot 2")
plot(1:10, main = "Plot 3")
plot(1:10, main = "Plot 4")
## par and mfrow function uses
# Set up a 2x2 layout for multiple plots
par(mfrow = c(1, 2))
# Create and plot some example data
plot(1:10, main = "Plot 1")
plot(1:10, main = "Plot 2")
plot(1:10, main = "Plot 3")
plot(1:10, main = "Plot 4")
## par and mfrow function uses
# Set up a 2x2 layout for multiple plots
par(mfrow = c(1, 3))   ## rows and columns
# Create and plot some example data
plot(1:10, main = "Plot 1")
plot(1:10, main = "Plot 2")
plot(1:10, main = "Plot 3")
plot(1:10, main = "Plot 4")
## par and mfrow function uses
# Set up a 2x2 layout for multiple plots
par(mfrow = c(1, 4))   ## rows and columns
# Create and plot some example data
plot(1:10, main = "Plot 1")
plot(1:10, main = "Plot 2")
plot(1:10, main = "Plot 3")
plot(1:10, main = "Plot 4")
# Columns to include in the boxplot
columns_to_plot <- c("YIELD", "NITROGEN", "results")
columns_to_plot
source("C:/Users/windows/OneDrive/Desktop/loops.R", echo=TRUE)
# Using a for loop to create boxplots for each column
for (col in columns_to_plot) {
boxplot(data[[col]], main = col, ylab = col)  ##col repersents column
}
# Using a for loop to create boxplots for each column
for (col in columns_to_plot) {
boxplot(data[[col]], main = col, ylab = col)  ##col repersents column
}
## par and mfrow function uses
# Set up a 2x2 layout for multiple plots
par(mfrow = c(2, 4))   ## rows and columns
# Using a for loop to create boxplots for each column
for (col in columns_to_plot) {
boxplot(data[[col]], main = col, ylab = col)  ##col repersents column
}
## par and mfrow function uses
# Set up a 2x2 layout for multiple plots
par(mfrow = c(1, 4))   ## rows and columns
# Create and plot some example data
plot(1:10, main = "Plot 1")
plot(1:10, main = "Plot 2")
plot(1:10, main = "Plot 3")
plot(1:10, main = "Plot 4")
# Create a new column 'PRODUCT' to store the results
data$result <- numeric(nrow(data))
# Initialize a counter
counter <- 1
# Using a while loop to calculate the product of 'YIELD' and 'NITROGEN'
while (counter <= nrow(data)) {
data$PRODUCT_WHILE[counter] <- data$YIELD[counter] * data$NITROGEN[counter]
counter <- counter + 1
}
# Using a while loop to calculate the product of 'YIELD' and 'NITROGEN'
while (counter <= nrow(data)) {
data$result[counter] <- data$YIELD[counter] * data$NITROGEN[counter]
counter <- counter + 1
}
# Display the updated dataframe
print(data)
# Create a new column 'PRODUCT' to store the results
data$result <- numeric(nrow(data))
# Initialize a counter
counter <- 1
# Using a while loop to calculate the product of 'YIELD' and 'NITROGEN'
while (counter <= nrow(data)) {
data$result[counter] <- data$YIELD[counter] * data$NITROGEN[counter]
counter <- counter + 1
}
# Display the updated dataframe
print(data)
# Set a seed for reproducibility
set.seed(123)
# Number of markers and individuals
num_markers <- 50
num_individuals <- 20
# Generate SNP data (0, 1, or 2) randomly
snp_data <- matrix(sample(0:2, num_markers * num_individuals, replace = TRUE), nrow = num_individuals, ncol = num_markers)
# Convert the matrix to a data frame for better readability
snp_df <- as.data.frame(snp_data)
# Print the generated SNP data
print(snp_df)
set.seed(123)
# Function to calculate Minor Allele Frequency (MAF)
calculate_maf <- function(genotypes) {
allele_freq <- table(genotypes) / length(genotypes)
maf <- min(allele_freq)
return(maf)
}
maf
set.seed(123)
# Function to calculate Minor Allele Frequency (MAF)
calculate_maf <- function(genotypes) {
allele_freq <- table(genotypes) / length(genotypes)
maf <- min(allele_freq)
return(maf)
}
# Function to calculate heterozygotes
calculate_heterozygotes <- function(genotypes) {
heterozygotes <- sum(genotypes == 1)
return(heterozygotes)
}
# Function to calculate missing value percentage
calculate_missing_percentage <- function(genotypes) {
missing_percentage <- sum(is.na(genotypes)) / length(genotypes) * 100
return(missing_percentage)
}
# Initialize variables to store results
maf_results <- numeric(ncol(snp_df))
heterozygotes_results <- numeric(ncol(snp_df))
missing_percentage_results <- numeric(ncol(snp_df))
# Loop through each column (SNP marker) in the dataframe
for (col in 1:ncol(snp_df)) {
genotypes <- snp_df[, col]
# Calculate MAF
maf_results[col] <- calculate_maf(genotypes)
# Calculate heterozygotes
heterozygotes_results[col] <- calculate_heterozygotes(genotypes)
# Calculate missing value percentage
missing_percentage_results[col] <- calculate_missing_percentage(genotypes)
}
# Display the results
results_df <- data.frame(
Marker = colnames(snp_df),
MAF = maf_results,
Heterozygotes = heterozygotes_results,
MissingPercentage = missing_percentage_results
)
print(results_df)
View(snp_df)
maf <- min(allele_freq)
return(maf)
set.seed(123)
# defining Function to calculate Minor Allele Frequency (MAF)
calculate_maf <- function(genotypes) {
allele_freq <- table(genotypes) / length(genotypes)
maf <- min(allele_freq)
return(maf)
}
# Function to calculate heterozygotes
calculate_heterozygotes <- function(genotypes) {
heterozygotes <- sum(genotypes == 1) / sum(genotypes)
return(heterozygotes)
}
# Function to calculate missing value percentage
calculate_missing_percentage <- function(genotypes) {
missing_percentage <- sum(is.na(genotypes)) / length(genotypes) * 100
return(missing_percentage)
}
# Initialize variables to store results
maf_results <- numeric(ncol(snp_df))
heterozygotes_results <- numeric(ncol(snp_df))
missing_percentage_results <- numeric(ncol(snp_df))
# Loop through each column (SNP marker) in the dataframe
for (col in 1:ncol(snp_df)) {
genotypes <- snp_df[, col]
# Calculate MAF
maf_results[col] <- calculate_maf(genotypes)
# Calculate heterozygotes
heterozygotes_results[col] <- calculate_heterozygotes(genotypes)
# Calculate missing value percentage
missing_percentage_results[col] <- calculate_missing_percentage(genotypes)
}
# Display the results
results_df <- data.frame(
Marker = colnames(snp_df),
MAF = maf_results,
Heterozygotes = heterozygotes_results,
MissingPercentage = missing_percentage_results
)
print(results_df)
setwd("C:/Users/windows/OneDrive/Desktop/github/Genomics")