Skip to content

Commit

Permalink
Improved () method. Also tests for that method added. Closes #44.
Browse files Browse the repository at this point in the history
  • Loading branch information
iferres committed May 7, 2021
1 parent af17214 commit 24d51aa
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 22 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

* Added panaroo_2_pagoo function to read the output of the panaroo pangenome reconstruction software.

* Improved $add_metadata() method. Now columns with the same name as the one already present are overwritten instead of duplicated (closes #44). Also, users can now provide metadata covering the pangenome partially, i.e, not providing all the gene/clusters/organisms in the mapping column. Those entities without metadata provided are filled with NAs.

# pagoo 0.3.8

* Improve backward compatibility. Older pagoo objects created by third party packages which depend on pagoo do not have an attribute required to successfully load them into session. Now the approach is to downgrade the object to a base pagoo class, or to provide the namespace using the pkg argument.
Expand Down
63 changes: 41 additions & 22 deletions R/PgR6.R
Original file line number Diff line number Diff line change
Expand Up @@ -201,48 +201,67 @@ PgR6 <- R6Class('PgR6',
}

if (map == 'org'){

if('org'%in%colnames(data)){
ma <- match(private$.organisms$org, data$org)
ma <- match(data$org, private$.organisms$org)
if (any(is.na(ma))) stop('data$org do not match with object organisms.')
if (dim(data)[1]!=dim(private$.organisms)[1]){
stp <- paste('data has', dim(data)[1], 'rows while object require', dim(private$.organisms)[1],'.')
stop(stp)
}
data <- data[ma, ]
oc <- which(colnames(data)=='org')
private$.organisms <- cbind(private$.organisms, DataFrame(data[, -oc, drop=F]))
nwcls <- colnames(data)[-oc]
repcol <- nwcls %in% colnames(private$.organisms)
invisible(lapply(which(repcol), function(x){
wr <- paste0("Column '", nwcls[x], "' already exists, overwriting.")
warning(wr, immediate. = TRUE)
private$.organisms[[nwcls[x]]] <- NULL
}))
for (i in seq_along(nwcls)){
private$.organisms[[nwcls[i]]][ma] <- data[[nwcls[i]]]
}
}else{
stop('"data" should contain an "org" column.')
}

}else if (map == 'cluster'){

if('cluster'%in%colnames(data)){
ma <- match(private$.clusters$cluster, data$cluster)
ma <- match(data$cluster, private$.clusters$cluster)
if (any(is.na(ma))) stop('data$cluster do not match with object "cluster" column.')
if (dim(data)[1]!=dim(private$.clusters)[1]){
stp <- paste('data has', dim(data)[1], 'rows while object require', dim(private$.clusters)[1],'.')
stop(stp)
}
data <- data[ma, ]
oc <- which(colnames(data)=='cluster')
private$.clusters <- cbind(private$.clusters, DataFrame(data[, -oc, drop=F]))
nwcls <- colnames(data)[-oc]
repcol <- nwcls %in% colnames(private$.clusters)
invisible(lapply(which(repcol), function(x){
wr <- paste0("Column '", nwcls[x], "' already exists, overwriting.")
warning(wr, immediate. = TRUE)
private$.clusters[[nwcls[x]]] <- NULL
}))
for (i in seq_along(nwcls)){
private$.clusters[[nwcls[i]]][ma] <- data[[nwcls[i]]]
}
}else{
stop('"data" should contain an "cluster" column.')
stop('"data" should contain a "cluster" column.')
}

}else{

if('gid'%in%colnames(data)){
ma <- match(private$.data$gid, data$gid)
ma <- match(data$gid, private$.data$gid)
if (any(is.na(ma))) stop('data$gid do not match with object gid.')
if (dim(data)[1]!=dim(private$.data)[1]){
stp <- paste('data has', dim(data)[1], 'rows while object require', dim(private$.data)[1],'.')
stop(stp)
oc <- which(colnames(data) %in% c("cluster", "org", "gene", "gid"))
nwcls <- colnames(data)[-oc]
repcol <- nwcls %in% colnames(private$.data)
invisible(lapply(which(repcol), function(x){
wr <- paste0("Column '", nwcls[x], "' already exists, overwriting.")
warning(wr, immediate. = TRUE)
private$.data[[nwcls[x]]] <- NULL
}))
for (i in seq_along(nwcls)){
private$.data[[nwcls[i]]][ma] <- data[[nwcls[i]]]
}
data <- data[ma, ]
oc <- which(colnames(data)=='gid')
private$.data <- cbind(private$.data, DataFrame(data[, -oc, drop=F]))
}else{
stop('"data" should contain a "gid" column.')
}

}

invisible(self)
},

Expand Down
50 changes: 50 additions & 0 deletions tests/testthat/test_input.R
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,56 @@ test_that('$save_pangenomeRDS() and load_pangenome() works', {
file.remove(out_rds)
})


test_that("Adding gene metadata after object creation works.",{
sep <- "__"
p <- pagoo(data = data[, -4], sep = sep)
df <- data.frame(gid = paste(data$org, data$gene, sep = sep),
annot = data[, 4])
p$add_metadata(map = "gid", df)
# p2 <- pagoo(data, sep = sep)

expect_is(p, "R6")
expect_is(p, "PgR6")
expect_is(p, "PgR6M")

expect_is(p$genes, "DFrameList")
expect_length(unlist(p$genes, use.names = FALSE), 5)
expect_named(unlist(p$genes, use.names = FALSE),
c("cluster", "org", "gene", "gid", "annot"))
# expect_equal(p$genes, p2$genes)
})

test_that("Adding cluster metadata after object creation works.",{
sep <- "__"
p <- pagoo(data = data, sep = sep)
p$add_metadata(map = "cluster", clust_meta)

expect_is(p, "R6")
expect_is(p, "PgR6")
expect_is(p, "PgR6M")

expect_is(p$organisms, "DFrame")
expect_length(p$clusters, dim(clust_meta)[2])
expect_named(p$clusters, names(clust_meta))
# expect_equal(p$clusters, p2$clusters)
})

test_that("Adding organism metadata after object creation works.",{
sep <- "__"
p <- pagoo(data = data, sep = sep)
p$add_metadata(map = "org", orgs_meta)

expect_is(p, "R6")
expect_is(p, "PgR6")
expect_is(p, "PgR6M")

expect_is(p$organisms, "DFrame")
expect_length(p$organisms, dim(orgs_meta)[2])
expect_named(p$organisms, names(orgs_meta))
# expect_equal(p$organisms, p2$organisms)
})

## Input from third party pangenome reconstruction software
# MISSING

Expand Down

0 comments on commit 24d51aa

Please sign in to comment.