diff --git a/DESCRIPTION b/DESCRIPTION index 2096924..b176948 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,10 +1,10 @@ Package: CAMERA -Version: 1.13.2 -Date: 2012-05-25 +Version: 1.13.3 +Date: 2012-05-29 Title: Collection of annotation related methods for mass spectrometry data Author: Carsten Kuhl, Ralf Tautenhahn, Steffen Neumann {ckuhl|sneumann}@ipb-halle.de, rtautenh@scripps.edu Maintainer: Carsten Kuhl <ckuhl@ipb-halle.de> -Depends: R (>= 2.1.0), methods, xcms (>= 1.13.5) +Depends: R (>= 2.1.0), methods, xcms (>= 1.13.5), Biobase Imports: methods, xcms, RBGL, graph, graphics, grDevices, stats, utils, Hmisc, igraph Suggests: faahKO, RUnit Enhances: Rmpi, snow diff --git a/NAMESPACE b/NAMESPACE index 8a880a7..eaf5c33 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,6 +12,9 @@ importFrom("stats", "cor.test") importFrom("utils", "flush.console", "object.size", "read.table") importFrom("igraph","graph.data.frame","label.propagation.community") +importClassesFrom(Biobase, "Versioned") +importFrom(Biobase, validMsg) + export("annotate","xsAnnotate","getpspectra","annotateDiffreport","getIsotopeCluster","findNeutralLossSpecs","findNeutralLoss","cleanParallel","combinexsAnnos") exportClasses("xsAnnotate") exportMethods("groupFWHM") @@ -29,3 +32,10 @@ exportMethods("calcPC", exportMethods("calcCiS", "calcCaS", "calcIsotopes") + +exportClasses("ruleSet") +exportMethods("setDefaultLists", + "readLists", + "setDefaultParams", + "setParams", + "generateRules") diff --git a/R/00xsAnnotate.R b/R/00xsAnnotate.R index 3cdbce9..febed59 100644 --- a/R/00xsAnnotate.R +++ b/R/00xsAnnotate.R @@ -15,6 +15,7 @@ setClass("xsAnnotate", isoID="matrix", polarity="character", runParallel="list"), + contains=c("Versioned"), prototype( groupInfo= matrix(ncol=0,nrow=0), pspectra = list(), @@ -25,9 +26,10 @@ setClass("xsAnnotate", sample=NULL, xcmsSet=NULL, ruleset=NULL, - annoID=matrix(ncol=3,nrow=0), - annoGrp=matrix(ncol=4,nrow=0), - isoID=matrix(ncol=4,nrow=0), + annoID=matrix(ncol=4, nrow=0), + annoGrp=matrix(ncol=4, nrow=0), + isoID=matrix(ncol=4, nrow=0), polarity="", - runParallel=NULL) + runParallel=NULL, + new("Versioned", versions=c(CAMERA="1.13.333"))) ); diff --git a/R/fct_findAdducts.R b/R/fct_findAdducts.R index ee85e4f..fbdf479 100644 --- a/R/fct_findAdducts.R +++ b/R/fct_findAdducts.R @@ -14,35 +14,39 @@ annotateGrpMPI <- function(params){ return(result); } -annotateGrp <- function(ipeak, imz, rules, mzabs, devppm, isotopes, quasimolion) { +annotateGrp <- function(ipeak, imz, rules, mzabs, devppm, isotopes, quasimolion, rules.idx) { #m/z vector for group i with peakindex ipeak mz <- imz[ipeak]; - na_ini <- which(!is.na(mz)) + naIdx <- which(!is.na(mz)) - if(length(na.omit(mz[na_ini])) < 1){ + #Spectrum have only annotated isotope peaks, without monoisotopic peak + #Give error or warning? + if(length(na.omit(mz[naIdx])) < 1){ return(NULL); } - ML <- massDiffMatrix(mz[na_ini], rules); + ML <- massDiffMatrix(mz[naIdx], rules[rules.idx,]); - hypothese <- createHypothese(ML,rules,devppm,mzabs,na_ini); + hypothese <- createHypothese(ML, rules[rules.idx, ], devppm, mzabs, naIdx); - #Erstelle Hypothesen + #create hypotheses if(is.null(nrow(hypothese)) || nrow(hypothese) < 2 ){ return(NULL); } - #Entferne Hypothesen, welche gegen Isotopenladungen verstossen! + #remove hypotheses, which violates via isotope annotation discovered ion charge if(length(isotopes) > 0){ - hypothese <- check_isotopes(hypothese, isotopes, ipeak); + hypothese <- checkIsotopes(hypothese, isotopes, ipeak); } + if(nrow(hypothese) < 2){ return(NULL); - }; + } - #Test auf Quasi-Molekülionen + #Test if hypothese grps include mandatory ions + #Filter Rules #2 if(length(quasimolion) > 0){ - hypothese <- check_quasimolion(hypothese, quasimolion); + hypothese <- checkQuasimolion(hypothese, quasimolion); } if(nrow(hypothese) < 2){ @@ -50,41 +54,52 @@ annotateGrp <- function(ipeak, imz, rules, mzabs, devppm, isotopes, quasimolion) }; #Entferne Hypothesen, welche gegen OID-Score&Kausalität verstossen! - hypothese <- check_oid_causality(hypothese, rules); + hypothese <- checkOidCausality(hypothese, rules[rules.idx, ]); if(nrow(hypothese) < 2){ return(NULL); }; #Prüfe IPS-Score - hypothese <- check_ips(hypothese) + hypothese <- checkIps(hypothese) if(nrow(hypothese) < 2){ - return(NULL); - }; + return(NULL) + } + + #We have hypotheses and want to add neutral losses + if("typ" %in% colnames(rules)){ + hypothese <- addFragments(hypothese, rules, mz) + hypothese <- resolveFragmentConnections(hypothese) + } return(hypothese); } -createHypothese <- function(ML,rules,devppm,mzabs,na_ini){ +createHypothese <- function(ML, rules, devppm, mzabs, naIdx){ ML.nrow <- nrow(ML); ML.vec <- as.vector(ML); - max.value <- max(round(ML,0)); - hashmap <- vector(mode="list",length=max.value); + max.value <- max(round(ML, 0)); + hashmap <- vector(mode="list", length=max.value); for(i in 1:length(ML)){ val <- trunc(ML[i],0); if(val>1){ hashmap[[val]] <- c(hashmap[[val]],i); } } - - hypothese <- matrix(NA,ncol=9,nrow=0); - colnames(hypothese) <- c("massID", "ruleID", "nmol", "charge", "mass", "oidscore", "ips", "massgrp", "check"); + if("ips" %in% colnames(rules)){ + score <- "ips" + }else{ + score <- "score" + } + hypothese <- matrix(NA,ncol=8,nrow=0); + colnames(hypothese) <- c("massID", "ruleID", "nmol", "charge", "mass", "score", "massgrp", "check"); massgrp <- 1; - for(i in 1:length(hashmap)){ + for(i in seq(along=hashmap)){ if(is.null(hashmap[[i]])){ next; } + candidates <- ML.vec[hashmap[[i]]]; candidates.index <- hashmap[[i]]; if(i != 1 && !is.null(hashmap[[i-1]]) && min(candidates) < i+(2*devppm*i+mzabs)){ @@ -94,11 +109,13 @@ createHypothese <- function(ML,rules,devppm,mzabs,na_ini){ candidates.index <- c(candidates.index,hashmap[[i-1]][index]); } } + if(length(candidates) < 2){ next; } + tol <- max(2*devppm*mean(candidates, na.rm=TRUE))+ mzabs; - result <- cutree(hclust(dist(candidates)),h=tol); + result <- cutree(hclust(dist(candidates)), h=tol); index <- which(table(result) >= 2); if(length(index) == 0){ next; @@ -113,89 +130,128 @@ createHypothese <- function(ML,rules,devppm,mzabs,na_ini){ mass <- ML.nrow; adduct <- adduct -1; } - hypothese <- rbind(hypothese, c(na_ini[mass], adduct, rules[adduct, "nmol"], rules[adduct, "charge"], mean(candidates[m[[ii]]]), rules[adduct, "oidscore"], rules[adduct,"ips"],massgrp ,1)); + hypothese <- rbind(hypothese, c(naIdx[mass], adduct, rules[adduct, "nmol"], rules[adduct, "charge"], mean(candidates[m[[ii]]]), rules[adduct,score],massgrp ,1)); + } + if(length(unique(hypothese[which(hypothese[, "massgrp"] == massgrp), "massID"])) == 1){ + ##only one mass annotated + hypothese <- hypothese[-(which(hypothese[,"massgrp"]==massgrp)),,drop=FALSE] + }else{ + massgrp <- massgrp +1; } - massgrp <- massgrp +1; } } return(hypothese); } - - -create_hypothese <- function(m, index, ML, rules, na_ini){ - a <- m[index]; - # a2 <- lapply(a, sort); - b <- a[which(duplicated(a) == FALSE)]; - b_length <- sapply(b, length); - b_ini <- 1:length(b); - b2 <- b[order(b_length)]; - b_ini <- b_ini[order(b_length)]; - add <- 1; - ini_new <- c(); - - b2_length <- sapply(b2,length); - b.index <- vector("numeric",length=max(b_length)-1); +resolveFragmentConnections <- function(hypothese){ + #Order hypothese after mass + hypothese <- hypothese[order(hypothese[, "mass"], decreasing=TRUE), ] - for( i in 2:max(b_length) - 1){ - b.index[i-1] <- which( b2_length > i)[1]; - } - b.index[max(b_length)-1] <- which(b2_length >= max(b_length)-1)[1] - - # time <- proc.time() - while(length(b2) > 0){ - ind <- b.index[length(b2[[1]])-1]; - ini <- which(sapply(b2[ind:length(b2)], function(x) {all((b2[[1]]) %in% x)}) == TRUE); - if(length(ini) >= 1){ - ini_new <- append(ini_new, add); - b2 <- b2[-1]; - add <- add+1; - }else{ - b2 <- b2[-1]; - add<-add+1; + for(massgrp in unique(hypothese[, "massgrp"])){ + index <- which(hypothese[, "massgrp"] == massgrp & !is.na(hypothese[, "parent"])) + if(length(index) > 0) { + index2 <- which(hypothese[, "massID"] %in% hypothese[index, "massID"] & hypothese[, "massgrp"] != massgrp) + if(length(index2) > 0){ + massgrp2del <- which(hypothese[, "massgrp"] %in% unique(hypothese[index2, "massgrp"])) + hypothese <- hypothese[-massgrp2del, ] + } } } - # proc.time() - time - - - - - if(length(ini_new > 0)){ - ini_new <- b_ini[ini_new]; - b <- b[-ini_new]; + return(hypothese) +} + +addFragments <- function(hypothese, rules, mz){ + #check every hypothese grp + fragments <- rules[which(rules[, "typ"] == "F"), , drop=FALSE] + hypothese <- cbind(hypothese, NA); + colnames(hypothese)[ncol(hypothese)] <- "parent" + if(nrow(fragments) < 1){ + #no fragment exists in rules + return(hypothese) } - nrow_b <- length(b); - ncol_b <- sapply(b, length) - nrow_ML <- nrow(ML); - ncol_ML <- ncol(ML); - ML.v <- as.vector(ML); - hypomass <- sapply(b, function(x) {mean(ML.v[x])}) - hypo <- matrix(NA,ncol=8); - colnames(hypo) <- c("massID","ruleID","nmol","charge","mass","oidscore","ips","massgrp"); + orderMZ <- cbind(order(mz),order(order(mz))) + sortMZ <- cbind(mz,1:length(mz)) + sortMZ <- sortMZ[order(sortMZ[,1]),] - for(row in 1:nrow_b){ - for(col in 1:ncol_b[row]){ - adduct <- b[[row]][col] %/% nrow_ML + 1; - mass <- b[[row]][col] %% nrow_ML; - if(mass == 0){ - mass <- nrow_ML; - adduct <- adduct - 1; - } - hypo <- rbind(hypo, c(na_ini[mass], adduct, rules[adduct, "nmol"], rules[adduct, "charge"], hypomass[row], rules[adduct, "oidscore"], rules[adduct,"ips"], row)); + for(massgrp in unique(hypothese[, "massgrp"])){ + for(index in which(hypothese[, "ruleID"] %in% unique(fragments[, "parent"]) & + hypothese[, "massgrp"] == massgrp)){ + massID <- hypothese[index, "massID"] + ruleID <- hypothese[index, "ruleID"] + indexFrag <- which(fragments[, "parent"] == ruleID) + + while(length(massID) > 0){ + result <- fastMatch(sortMZ[1:orderMZ[massID[1],2],1], mz[massID[1]] + + fragments[indexFrag, "massdiff"], tol=0.05) + invisible(sapply(1:orderMZ[massID[1],2], function(x){ + if(!is.null(result[[x]])){ + massID <<- c(massID, orderMZ[x,1]); + indexFrags <- indexFrag[result[[x]]]; + tmpRes <- cbind(orderMZ[x,1], as.numeric(rownames(fragments)[indexFrags]), fragments[indexFrags, c("nmol", "charge")], + hypothese[index, "mass"], fragments[indexFrags, c("score")], + massgrp, 1, massID[1], deparse.level=0) + colnames(tmpRes) <- colnames(hypothese) + hypothese <<- rbind(hypothese, tmpRes); + } + })) + massID <- massID[-1]; + } } } - hypo <- hypo[-1,]; - check <- 1; - hypothese <- matrix(NA, ncol=9); - colnames(hypothese) <- c("massID", "ruleID", "nmol", "charge", "mass", "oidscore", "ips", "massgrp", "check"); - hypothese <- cbind(hypo, check); - return(hypothese) } -check_ips<- function(hypothese){ +getderivativeIons <- function(annoID, annoGrp, rules, npeaks){ + derivativeIons <- vector("list", npeaks); + charge <- 0; + #check that we have annotations + if(nrow(annoID) < 1){ + return(derivativeIons); + } + + for(i in 1:nrow(annoID)){ + peakid <- annoID[i, 1]; + grpid <- annoID[i, 2]; + ruleid <- annoID[i, 3]; + parent <- annoID[i, 4]; +# if(is.null(derivativeIons[[peakid]])){ +# #Peak has no annotation +# if(charge == 0 | rules[ruleid, "charge"] == charge){ +# derivativeIons[[peakid]][[1]] <- list(rule_id = ruleid, charge = rules[ruleid, "charge"], +# nmol= rules[ruleid, "nmol"], name=paste(rules[ruleid, "name"]), +# mass=annoGrp[which(annoGrp[, "id"] == grpid), 2],parent=parent) +# } +# }else{ +# #Peak has already annotations + if(charge == 0 | rules[ruleid, "charge"] == charge){ + mass <- annoGrp[which(annoGrp[, "id"] == grpid), 2]; + if(is.na(parent)){ + name <- paste(rules[ruleid, "name"]); + } else { + #look for name + name <- paste(rules[ruleid, "name"]); + for(ii in seq(along=derivativeIons[[parent]])){ + if(derivativeIons[[parent]][[ii]]$mass == mass){ + + break; + } + } + } + derivativeIons[[peakid]][[(length(derivativeIons[[peakid]])+1)]] <- + list(rule_id = ruleid, charge=rules[ruleid, "charge"], + nmol=rules[ruleid, "nmol"], name=name, + mass=mass, + parent=parent) + } +# } + charge=0; + } + return(derivativeIons); +} + +checkIps <- function(hypothese){ for(hyp in 1:nrow(hypothese)){ if(length(which(hypothese[, "massgrp"] == hypothese[hyp, "massgrp"])) < 2){ hypothese[hyp, "check"] = 0; @@ -213,83 +269,87 @@ check_ips<- function(hypothese){ if(length(id <- which(hypothese[, "massID"] == hypothese[hyp, "massID"] & hypothese[, "check"] != 0)) > 1){ masses <- hypothese[id, "mass"] nmasses <- sapply(masses, function(x) { - sum(hypothese[which(hypothese[, "mass"] == x), "ips"]) + sum(hypothese[which(hypothese[, "mass"] == x), "score"]) }) masses <- masses[-which(nmasses == max(nmasses))]; if(length(masses) > 0){ - hypothese[unlist(sapply(masses, function(x) {which(hypothese[,"mass"]==x)})),"check"]=0; + hypothese[unlist(sapply(masses, function(x) {which(hypothese[, "mass"]==x)})), "check"]=0; } } } - hypothese <- hypothese[which(hypothese[, "check"]==TRUE), ]; - if(is.null(nrow(hypothese))) { - hypothese <- matrix(hypothese, byrow=F, ncol=9) - } - colnames(hypothese)<-c("massID", "ruleID", "nmol", "charge", "mass", "oidscore", "ips","massgrp", "check") + hypothese <- hypothese[which(hypothese[, "check"]==TRUE), ,drop=FALSE]; + #check if hypothese grps annotate at least two different peaks + hypothese <- checkHypothese(hypothese) return(hypothese) } -check_oid_causality <- function(hypothese,rules){ - for(hyp in 1:nrow(hypothese)){ - #check nmol - if(hypothese[hyp, "nmol"] > 1){ - #nmol > 1; - check_sure <- TRUE; - if(hypothese[hyp, "charge"] == 1){ - #nmol > 1 and charge = 1; e.g. [2M+H]+, ensure [M+H] is there - if(length(which(hypothese[, "mass"] == hypothese[hyp, "mass"] & hypothese[, "oidscore"] == hypothese[hyp, "oidscore"])) > 1){ - #same oidscore is there, could also be [3M+H]; otherwise could not check - for(prof in (hypothese[hyp, "nmol"] - 1):1){ - #check if [M+H] is there, for a [3M+H], [2M+H] and [M+H] has to be there - indi <- which(hypothese[,"mass"] == hypothese[hyp,"mass"] & hypothese[,"oidscore"] == hypothese[hyp,"oidscore"] & hypothese[,"nmol"] == prof) - if(length(indi) == 0){ - check_sure <- FALSE; - hypothese[hyp,"check"] <- 0; - next; - } - } - } - }else if(abs(hypothese[hyp, "charge"]) == hypothese[hyp, "nmol"]){ - #nmol > 1 and charge = nmol; e.g. [2M+2H]2+ - if(length(which(hypothese[, "mass"] == hypothese[hyp, "mass"] & hypothese[, "oidscore"] == hypothese[hyp, "oidscore"])) > 1){ - for(prof in (hypothese[hyp,"nmol"]-1):1){ - indi<-which(hypothese[,"mass"]==hypothese[hyp,"mass"] & hypothese[,"oidscore"]== hypothese[hyp,"oidscore"] & hypothese[,"nmol"]==prof) - if(length(indi) == 0){ - check_sure <- FALSE; - hypothese[hyp,"check"] <- 0;#next; - } - } +checkOidCausality <- function(hypothese,rules){ + #check every hypothese grp + for(hyp in unique(hypothese[,"massgrp"])){ + hyp.nmol <- which(hypothese[, "massgrp"] == hyp & hypothese[, "nmol"] > 1) + + for(hyp.nmol.idx in hyp.nmol){ + if(length(indi <- which(hypothese[, "mass"] == hypothese[hyp.nmol.idx, "mass"] & abs(hypothese[, "charge"]) == hypothese[, "nmol"])) > 1){ + #check if [M+H] [2M+2H]... annotate the same molecule + massdiff <- rules[hypothese[indi, "ruleID"], "massdiff"] / rules[hypothese[indi, "ruleID"], "charge"] + if(length(indi_new <- which(duplicated(massdiff))) > 0){ + hypothese[hyp.nmol.idx, "check"] <- 0; } - if(length(indi <- which(hypothese[, "mass"] == hypothese[hyp, "mass"] & abs(hypothese[, "charge"]) == hypothese[, "nmol"])) > 1){ - #check if [M+H] [2M+2H]... annotate the same molecule - massdiff <- rules[hypothese[indi,"ruleID"],"massdiff"] / rules[hypothese[indi,"ruleID"],"charge"] - if(length(indi_new <- which(duplicated(massdiff)))>0){ - check_sure <- FALSE; - hypothese[hyp, "check"] <- 0; - } - } - } - if(check_sure){ - hypothese[hyp,"check"] <- 1; - } - }else{ - if(hypothese[hyp,"charge"] > 1){ - ##todo - }else{ - #nothing to say } } } - hypothese <- hypothese[which(hypothese[, "check"] == TRUE), ]; - if(is.null(nrow(hypothese))) { - hypothese <- matrix(hypothese, byrow=F, ncol=9) - } - colnames(hypothese) <- c("massID", "ruleID", "nmol", "charge", "mass", "oidscore", "ips", "massgrp", "check") + +# #check nmol +# if(hypothese[hyp, "nmol"] > 1){ +# #nmol > 1; +# checkSure <- TRUE; +# if(hypothese[hyp, "charge"] == 1){ +# #nmol > 1 and charge = 1; e.g. [2M+H]+, ensure [M+H] is there +# if(length(which(hypothese[, "mass"] == hypothese[hyp, "mass"] & hypothese[, "oidscore"] == hypothese[hyp, "oidscore"])) > 1){ +# #same oidscore is there, could also be [3M+H]; otherwise could not check +# for(prof in (hypothese[hyp, "nmol"] - 1):1){ +# #check if [M+H] is there, for a [3M+H], [2M+H] and [M+H] has to be there +# indi <- which(hypothese[,"mass"] == hypothese[hyp,"mass"] & hypothese[,"oidscore"] == hypothese[hyp,"oidscore"] & hypothese[,"nmol"] == prof) +# if(length(indi) == 0){ +# checkSure <- FALSE; +# hypothese[hyp,"check"] <- 0; +# next; +# } +# } +# } +# }else if(abs(hypothese[hyp, "charge"]) == hypothese[hyp, "nmol"]){ +# #nmol > 1 and charge = nmol; e.g. [2M+2H]2+ +# if(length(which(hypothese[, "mass"] == hypothese[hyp, "mass"] & hypothese[, "oidscore"] == hypothese[hyp, "oidscore"])) > 1){ +# for(prof in (hypothese[hyp,"nmol"]-1):1){ +# indi<-which(hypothese[,"mass"]==hypothese[hyp,"mass"] & hypothese[,"oidscore"]== hypothese[hyp,"oidscore"] & hypothese[,"nmol"]==prof) +# if(length(indi) == 0){ +# checkSure <- FALSE; +# hypothese[hyp,"check"] <- 0;#next; +# } +# } +# } +# if(length(indi <- which(hypothese[, "mass"] == hypothese[hyp, "mass"] & abs(hypothese[, "charge"]) == hypothese[, "nmol"])) > 1){ +# #check if [M+H] [2M+2H]... annotate the same molecule +# massdiff <- rules[hypothese[indi, "ruleID"], "massdiff"] / rules[hypothese[indi, "ruleID"], "charge"] +# if(length(indi_new <- which(duplicated(massdiff))) > 0){ +# checkSure <- FALSE; +# hypothese[hyp, "check"] <- 0; +# } +# } +# } +# if(checkSure){ +# hypothese[hyp, "check"] <- 1; +# } +# } +# } + hypothese <- hypothese[which(hypothese[, "check"] == TRUE), ,drop=FALSE]; + #check if hypothese grps annotate at least two different peaks + hypothese <- checkHypothese(hypothese) return(hypothese) } -check_quasimolion <- function(hypothese, quasimolion){ +checkQuasimolion <- function(hypothese, quasimolion){ hypomass <- unique(hypothese[, "mass"]) for(mass in 1:length(hypomass)){ if(!any(quasimolion %in% hypothese[which(hypothese[, "mass"] == hypomass[mass]), "ruleID"])){ @@ -299,15 +359,14 @@ check_quasimolion <- function(hypothese, quasimolion){ } } - hypothese <- hypothese[which(hypothese[, "check"]==TRUE), ]; - if(is.null(nrow(hypothese))) { - hypothese <- matrix(hypothese, byrow=F, ncol=9) - } - colnames(hypothese) <- c("massID", "ruleID", "nmol", "charge", "mass", "oidscore", "ips", "massgrp", "check") + hypothese <- hypothese[which(hypothese[, "check"]==TRUE), , drop=FALSE]; + #check if hypothese grps annotate at least two different peaks + hypothese <- checkHypothese(hypothese) + return(hypothese) } -check_isotopes <- function(hypothese, isotopes, ipeak){ +checkIsotopes <- function(hypothese, isotopes, ipeak){ for(hyp in 1:nrow(hypothese)){ peakid <- ipeak[hypothese[hyp, 1]]; if(!is.null(isotopes[[peakid]])){ @@ -322,518 +381,27 @@ check_isotopes <- function(hypothese, isotopes, ipeak){ } } } - hypothese <- hypothese[which(hypothese[, "check"]==TRUE), ]; - if(is.null(nrow(hypothese))){ - hypothese <- matrix(hypothese, byrow=F, ncol=9) - } - colnames(hypothese) <- c("massID", "ruleID", "nmol", "charge", "mass", "oidscore", "ips", "massgrp", "check") + hypothese <- hypothese[which(hypothese[, "check"]==TRUE), ,drop=FALSE]; + #check if hypothese grps annotate at least two different peaks + hypothese <- checkHypothese(hypothese) + return(hypothese) } -calcRules2 <- function (maxcharge=3, mol=3, nion=1, nnloss=1, nnadd=1, nh=3, polarity=NULL){ - - ##Read Tabellen - ionlist <- system.file('lists/ions.csv', package = "CAMERA")[1] - if (!file.exists(ionlist)) stop('ions.csv not found.') - ionlist<-read.table(ionlist, header=TRUE, dec=".", sep=",", as.is=TRUE, stringsAsFactors = FALSE); - - neutralloss <- system.file('lists/neutralloss.csv', package = "CAMERA")[1] - if (!file.exists(neutralloss)) stop('neutralloss.csv not found.') - neutralloss <- read.table(neutralloss, header=TRUE, dec=".", sep=",", as.is=TRUE, stringsAsFactors = FALSE); - - neutraladdition <- system.file('lists/neutraladdition.csv', package = "CAMERA")[1] - if (!file.exists(neutraladdition)) stop('neutraladdition.csv not found.') - neutraladdition <- read.table(neutraladdition, header=TRUE, dec=".", sep=",", as.is=TRUE, stringsAsFactors = FALSE); - ##End Read Tabellen - - ##Create Rules - ruleset <- matrix(nrow=0, ncol=8); - colnames(ruleset) <- c("name","nmol","charge","massdiff","typ","mandatory","score","parent") - - tmpname <- c(); - tmpcharge <- 0; - tmpmass <- 0; - tmpionparent<- NA; - massH <- 1.007276 - - ##Positive Rule set - if(polarity == "positive"){ - charge <- "+" - chargeValue <- 1 - }else if(polarity == "negative"){ - charge <- "-" - chargeValue <- -1 - }else{ - stop("Unknown polarity mode in rule set creating! Debug!\n") - } - - #Hydrogen part is hard coded - for(k in 1:mol){ - if(k == 1){ - #For M+H - str <- ""; - tmpips <- 0.5; - quasi <- 1 - }else{ - #For xM+H - str <- k; - tmpips <- 0; - quasi <- 0 - } - - for(xh in seq.int(nh)){ - if(xh == 1){ - ruleset <- rbind(ruleset, cbind(paste("[", str, "M", charge, "H]", charge, sep=""), k, chargeValue, - massH*chargeValue, "A", - quasi, tmpips+0.5, tmpionparent)); - } else { - ruleset <- rbind(ruleset, cbind(paste("[", str, "M", charge, xh, "H]", xh, charge, sep=""), - k,xh*chargeValue, massH*xh*chargeValue, "A", 0, 0.5, tmpionparent+1)); - } - } - - for(i in 1:nrow(ionlist)){ - #Add change of kat- respectively anion against one hydrogen - if(ionlist[i, 2] > 0){ - if(ionlist[i, 2] == 1){ - sumcharge <- ""; - }else{ - sumcharge <- ionlist[i, 2]; - } - ruleset <- rbind(ruleset, cbind(paste("[", str, "M", charge,"H+", ionlist[i,1], "-H]", sumcharge, - charge, sep=""), k, ionlist[i, 2]*chargeValue, - ionlist[i, 3]+massH*(chargeValue-1), - "A", 0, 0.25, tmpionparent)); - } - - #xM+H + additional Kation like Na or K - if(ionlist[i,2] <= 0 & chargeValue > 0) { - #Ions with negative charge, when polarity is positive - next; - } else if(ionlist[i, 2] >= 0 & chargeValue < 0){ - #Ions with positive charge, when polarity is negative - next; - } - - #Add Rule to Ruleset - if(abs(ionlist[i, 2]) == 1){ - ruleset <- rbind(ruleset, cbind(paste("[", str, "M", charge,"H+", ionlist[i,1], "]2", - charge, sep=""), k, ionlist[i, 2]+1*chargeValue, - ionlist[i, 3]+massH*chargeValue, "A", 0, 0.25, tmpionparent)); - }else{ - ruleset <- rbind(ruleset, cbind(paste("[", str, "M", charge,"H+", ionlist[i, 1], "]", - ionlist[i, 2]+(1*chargeValue), - charge, sep=""), k, ionlist[i, 2]+1*chargeValue, - ionlist[i, 3]+massH*chargeValue, "A" ,0, 0.25, tmpionparent)); - } - - }#End for loop nrow(ionlist) - - ##Coeff - coefficient Matrix, for generating rules with - #combination of kat- or anionsexchange ions like [M-2H+Na] (M-H and change of H against Na) - coeff <- expand.grid(rep(list(0:nion), nrow(ionlist))) - if(chargeValue > 0){ - index <- which(ionlist[, 2] <= 0); - }else{ - index <- which(ionlist[, 2] >= 0); - } - - if(length(index) > 0){ - coeff[, index] <- 0; - } - - coeff <- unique(coeff); - for(i in 1:nrow(coeff)){ - if(sum(coeff[i, 1:nrow(ionlist)]) > 2 | sum(coeff[i, 1:nrow(ionlist)]) < 1){ - next; - } - - tmpname <- paste("[",str,"M",sep=""); - tmpcharge <- 0; - tmpmass <- 0; - - for(ii in 1:(ncol(coeff))){ - if(coeff[i,ii] > 0){ - if(coeff[i,ii] > 1){ - tmpname <- paste(tmpname, "+", coeff[i, ii], ionlist[ii, 1], sep=""); - }else{ - tmpname <- paste(tmpname, "+", ionlist[ii, 1], sep=""); - } - tmpcharge <- tmpcharge + coeff[i, ii] * ionlist[ii, 2]; - tmpmass <- tmpmass + coeff[i, ii] * ionlist[ii, 3]; - } - } - - if(abs(tmpcharge) > 1){ - tmpname <- paste(tmpname, "]", tmpcharge, charge, sep="") - }else{ - tmpname <- paste(tmpname, "]", charge, sep="") - } - - if(tmpcharge > maxcharge){ - next; - } - - if(sum(coeff[i, ]) == 1 && k == 1){ - ruleset <- rbind(ruleset, cbind(tmpname, k, tmpcharge, tmpmass, "A", 1, 0.75, tmpionparent)); - }else{ - ruleset <- rbind(ruleset, cbind(tmpname, k, tmpcharge, tmpmass, "A", 0, 0.25, tmpionparent)); - } - }#end for loop nrow(coeff) - }#end for loop k - - # Create neutral addition to M+H from list - for(i in 1:nrow(neutraladdition)){ - #Add neutral ion to only M+H - ruleset <- rbind(ruleset, cbind(paste("[M", charge, "H+", neutraladdition[i, 1], "]", charge, sep="") , 1, chargeValue, - neutraladdition[i, 2]+(massH*chargeValue), "A", 0, 0.25, 1)); +checkHypothese <- function(hypothese){ + if(is.null(nrow(hypothese))){ + hypothese <- matrix(hypothese, byrow=F, ncol=8) + } + colnames(hypothese) <- c("massID", "ruleID", "nmol", "charge", "mass", "score", "massgrp", "check") + for(i in unique(hypothese[,"massgrp"])){ + if(length(unique(hypothese[which(hypothese[, "massgrp"] == i), "massID"])) == 1){ + ##only one mass annotated + hypothese <- hypothese[-(which(hypothese[,"massgrp"]==i)), , drop=FALSE] + } } - - ## Add neutral loss from list to ruleset - for(i in 1:nrow(neutralloss)){ - ruleset <- rbind(ruleset, cbind(paste("-", neutralloss[i, 1], sep=""), 1, 0, - -neutralloss[i, 2], "F", 0, 0.25, 1)); - #Eliminate rules with charge > maxcharge - if(length(index <- which(ruleset[, "charge"] > maxcharge)) > 0){ - ruleset <- ruleset[-index, ]; - } - } - return(ruleset); -} - - -calcRules <- function (maxcharge=3,mol=3,nion=2,nnloss=1,nnadd=1,nh=2,polarity=NULL){ - #Allocate variables - name <- c(); - nmol <- c(); - charge <- c(); - massdiff <- c(); - oidscore <- c(); - quasi <- c(); - ips <- c(); - fragment <- c(); - - ##Read Tabellen - ionlist <- system.file('lists/ions.csv', package = "CAMERA")[1] - if (!file.exists(ionlist)) stop('ions.csv not found.') - ionlist<-read.table(ionlist, header=TRUE, dec=".", sep=",", - as.is=TRUE, stringsAsFactors = FALSE); - - neutralloss <- system.file('lists/neutralloss.csv', package = "CAMERA")[1] - if (!file.exists(neutralloss)) stop('neutralloss.csv not found.') - neutralloss <- read.table(neutralloss, header=TRUE, dec=".", sep=",", - as.is=TRUE, stringsAsFactors = FALSE); - - neutraladdition <- system.file('lists/neutraladdition.csv', package = "CAMERA")[1] - if (!file.exists(neutraladdition)) stop('neutraladdition.csv not found.') - neutraladdition <- read.table(neutraladdition, - header=TRUE, dec=".", sep=",", - as.is=TRUE, stringsAsFactors = FALSE); - ##End Read Tabellen - - ##temp variables - tmpname <- c(); - tmpnmol <- c(); - tmpcharge <- 0; - tmpmass <- 0; - tmpips <- 0; - tmpfragment <- c(); - - #Molekülionen - if(polarity=="positive"){ - #Hydrogen, hard coded - for(k in 1:mol){ - # k - number of molecules (kM+X) - if(k == 1){ - str <- ""; - tmpips <- 1.0; - }else{ - str <- k; - tmpips <- 0.5; - }; - - name <- append(name, paste("[", str, "M+H]+", sep="")); - charge <- append(charge, 1); - massdiff <- append(massdiff, 1.007276); - nmol <- append(nmol, k); - - if(k == 1) { - quasi <- append(quasi, 1); - } else { - quasi <- append(quasi, 0); - }; - - oidscore <- append(oidscore, 1); - ips <- append(ips, tmpips) - - name <- append(name, paste("[", str, "M+2H]2+", sep="")); - charge <- append(charge, 2); - massdiff <- append(massdiff, 2.014552); - nmol <- append(nmol, k); - quasi <- append(quasi, 0); - oidscore <- append(oidscore, 2); - ips <- append(ips, tmpips) - - name <- append(name,paste("[",str,"M+3H]3+",sep="")); - charge<-append(charge,3); - massdiff<-append(massdiff,3.021828); - nmol<-append(nmol,k); - quasi<-append(quasi,0); - oidscore<-append(oidscore,3); - ips<-append(ips,tmpips) - oid<-3; - for(i in 1:nrow(ionlist)){ - if(ionlist[i,2]<=0) {next;} - if(ionlist[i,2]==1){ - name<-append(name,paste("[",str,"M+H+",ionlist[i,1],"]2+",sep="")); - }else{ - name<-append(name,paste("[",str,"M+H+",ionlist[i,1],"]",ionlist[i,2]+1,"+",sep="")); - } - charge <- append(charge,ionlist[i,2]+1); - massdiff <- append(massdiff,ionlist[i,3]+1.007276); - nmol <- append(nmol,k); - quasi <- append(quasi,0); - oidscore <- append(oidscore,oid+i); - if(tmpips>0.75){ - ips<-append(ips,0.5) - }else{ - ips<-append(ips,tmpips); - }#Austausch - } - oid<-oid+nrow(ionlist); - coeff<-expand.grid(rep(list(0:nion),nrow(ionlist))) - if(length(list<-which(ionlist[,2]<=0))>0){ - coeff[,list]<-0; - } - coeff<-unique(coeff); - coeff<-cbind(coeff,rep(0,nrow(coeff))); - coeff<-coeff[-1,] - tmp<-NULL; - for(i in 1:nrow(ionlist)){ - if(ionlist[i,2]<=0)next; - #Austausch erstmal nur einen pro Ion - tmp<-rbind(tmp,t(apply(coeff,1,function(x) {x[i]<-x[i]+1;x[nrow(ionlist)+1]<-1;x}))); - } - coeff<-unique(rbind(coeff,tmp)); - for(i in 1:nrow(coeff)){ - tmpname<-paste("[",str,"M",sep=""); - tmpcharge<-0;tmpmass<-0; - for(ii in 1:(ncol(coeff)-1)){ - if(coeff[i,ii]>0){ - if(coeff[i,ii]>1){ - tmpname<-paste(tmpname,"+",coeff[i,ii],ionlist[ii,1],sep=""); - }else{ - tmpname<-paste(tmpname,"+",ionlist[ii,1],sep=""); - } - tmpcharge<-tmpcharge+coeff[i,ii]*ionlist[ii,2]; - tmpmass<-tmpmass+coeff[i,ii]*ionlist[ii,3] - } - } - if(coeff[i,ncol(coeff)]>0){ - #Austausch hat stattgefunden, einfach bsp 1 - tmpname<-paste(tmpname,"-H",sep=""); - tmpcharge<-tmpcharge-1; - tmpmass<-tmpmass-1.007276; - tmpips<-0.25; - } - if(tmpcharge>1){ - tmpname<-paste(tmpname,"]",tmpcharge,"+",sep="") - }else{ - tmpname<-paste(tmpname,"]+",sep="") - } - name<-append(name,tmpname) - charge<-append(charge,tmpcharge) - massdiff<-append(massdiff,tmpmass) - nmol <- append(nmol,k); - oidscore<-append(oidscore,oid+i) - if(sum(coeff[i,])==1&& k==1){ - quasi <- append(quasi,1); - ips <-append(ips,tmpips); - }else{ - quasi <- append(quasi,0); - if(tmpips>0.75){ - ips <- append(ips,0.75); - }else{ - ips <- append(ips,tmpips); - } - } - } - } - oid<-max(oidscore); - ##Erzeuge Neutral Addition - index<-which(quasi==1) - for(i in 1:nrow(neutraladdition)){ - if(length(index2<-which(ionlist[,2]>0))>0){ - for(ii in 1:length(index2)){ - if(ionlist[index2[ii],2] > 1){ - name <- append(name,paste("[M+",ionlist[index2[ii],1],"+",neutraladdition[i,1],"]",abs(ionlist[index2[ii],2]),"+",sep="")); - }else{ - name <- append(name,paste("[M+",ionlist[index2[ii],1],"+",neutraladdition[i,1],"]+",sep="")); - } - charge <- append(charge,ionlist[index2[ii],2]); - massdiff <- append(massdiff,neutraladdition[i,2]+ionlist[index2[ii],3]); - nmol <- append(nmol,1); - quasi <- append(quasi,0); - oidscore <- append(oidscore,oid+1);oid<-oid+1; - ips<-append(ips,0.5); - } - } - if(neutraladdition[i,1]=="NaCOOH"){next;} - name<-append(name,paste("[M+H+",neutraladdition[i,1],"]+",sep="")); - charge<-append(charge,+1); - massdiff<- append(massdiff,neutraladdition[i,2]+1.007276); - nmol<-append(nmol,1); - quasi<-append(quasi,0) - oidscore<-append(oidscore,oid+1);oid<-oid+1; - ips<-append(ips,0.5); - } - oid<-max(oidscore); - ##Erzeuge Neutral loss - index<-which(quasi==1) - for(i in 1:nrow(neutralloss)){ - for(ii in 1:maxcharge){ - if(ii > 1){ - name<-append(name,paste("[M+",ii,"H-",neutralloss[i,1],"]",ii,"+",sep="")); - }else {name<-append(name,paste("[M+H-",neutralloss[i,1],"]+",sep=""));} - charge<-append(charge,ii); - massdiff<- append(massdiff,-neutralloss[i,2]+1.007276*ii); - nmol<-append(nmol,1); - quasi<-append(quasi,0) - oidscore<-append(oidscore,oid+1);oid<-oid+1; - ips<-append(ips,0.25); - } - } - ruleset <- data.frame(name,nmol,charge,massdiff,oidscore,quasi,ips) - if(length(index<-which(ruleset[,"charge"]>maxcharge))>0){ - ruleset<- ruleset[-index,]; - } - }else if(polarity=="negative"){ - #Wasserstoff, hard codiert - for(k in 1:mol){ - if(k==1){str<-"";tmpips<-1.0;}else{str<-k;tmpips<-0.5}; - name<-append(name,paste("[",str,"M-H]-",sep="")); - charge<-append(charge,-1);massdiff<-append(massdiff,-1.007276);nmol<-append(nmol,k);if(k==1){quasi<-append(quasi,1);}else{quasi<-append(quasi,0);};oidscore<-append(oidscore,1);ips<-append(ips,tmpips) - name<-append(name,paste("[",str,"M-2H]2-",sep=""));charge<-append(charge,-2);massdiff<-append(massdiff,-2.014552);nmol<-append(nmol,k);quasi<-append(quasi,0);oidscore<-append(oidscore,2);ips<-append(ips,tmpips) - name<-append(name,paste("[",str,"M-3H]3-",sep=""));charge<-append(charge,-3);massdiff<-append(massdiff,-3.021828);nmol<-append(nmol,k);quasi<-append(quasi,0);oidscore<-append(oidscore,3);ips<-append(ips,tmpips) - oid<-3; - for(i in 1:nrow(ionlist)){ - if(ionlist[i,2]>=0){ - if(ionlist[i,2]>1) {next;} - name<-append(name,paste("[",str,"M-2H+",ionlist[i,1],"]-",sep="")); - charge <- append(charge,ionlist[i,2]-2); - massdiff<- append(massdiff,ionlist[i,3]-(2*1.007276)); - nmol <- append(nmol,k); - quasi <- append(quasi,0); - oidscore<-append(oidscore,oid+i); - ips<-append(ips,0.25); - next; - } - if(ionlist[i,2]== -1){ - name<-append(name,paste("[",str,"M-H+",ionlist[i,1],"]2-",sep="")); - }else{ - name<-append(name,paste("[",str,"M-H+",ionlist[i,1],"]",ionlist[i,2]+1,"-",sep="")); - } - charge <- append(charge,ionlist[i,2]-1); - massdiff<- append(massdiff,ionlist[i,3]-1.007276); - nmol <- append(nmol,k); - quasi <- append(quasi,0); - oidscore<-append(oidscore,oid+i); - ips<-append(ips,tmpips); - #Austausch - } - oid<-oid+nrow(ionlist); - coeff<-expand.grid(rep(list(0:nion),nrow(ionlist))) - if(length(list<-which(ionlist[,2]>=0))>0){ - coeff[,list]<-0; - } - coeff<-unique(coeff); - coeff<-cbind(coeff,rep(0,nrow(coeff))); - coeff<-coeff[-1,] - for(i in 1:nrow(coeff)){ - tmpname<-paste("[",str,"M",sep=""); - tmpcharge<-0;tmpmass<-0; - for(ii in 1:(ncol(coeff)-1)){ - if(coeff[i,ii]>0){ - if(coeff[i,ii]>1){ - tmpname<-paste(tmpname,"+",coeff[i,ii],ionlist[ii,1],sep=""); - }else{ - tmpname<-paste(tmpname,"+",ionlist[ii,1],sep=""); - } - tmpcharge<-tmpcharge+coeff[i,ii]*ionlist[ii,2]; - tmpmass<-tmpmass+coeff[i,ii]*ionlist[ii,3] - } - } - if(coeff[i,ncol(coeff)]>0){ - #Austausch hat stattgefunden, einfach bsp 1 - tmpname<-paste(tmpname,"-H",sep=""); - tmpcharge<-tmpcharge-1; - tmpmass<-tmpmass-1.007276; - tmpips<-0.5; - } - if(tmpcharge< -1){ - tmpname<-paste(tmpname,"]",abs(tmpcharge),"-",sep="") - }else{ - tmpname<-paste(tmpname,"]-",sep="") - } - name<-append(name,tmpname) - charge<-append(charge,tmpcharge) - massdiff<-append(massdiff,tmpmass) - nmol <-append(nmol,k); - oidscore<-append(oidscore,oid+i) - if(sum(coeff[i,])==1&& k==1){ - quasi <-append(quasi,1); - }else{ - quasi <-append(quasi,0); - } - ips <-append(ips,tmpips); - } - } - oid<-max(oidscore); - ##Erzeuge Neutral Addition - index<-which(quasi==1) - for(i in 1:nrow(neutraladdition)){ - if(length(index2<-which(ionlist[,2]<0))>0){ - for(ii in 1:length(index2)){ - if(ionlist[index2[ii],2]< -1){ - name <- append(name,paste("[M+",ionlist[index2[ii],1],"+",neutraladdition[i,1],"]",abs(ionlist[index2[ii],2]),"-",sep="")); - }else{ - name <- append(name,paste("[M+",ionlist[index2[ii],1],"+",neutraladdition[i,1],"]-",sep="")); - } - charge <- append(charge,ionlist[index2[ii],2]); - massdiff<- append(massdiff,neutraladdition[i,2]+ionlist[index2[ii],3]); - nmol <- append(nmol,1); - quasi <- append(quasi,0); - oidscore<-append(oidscore,oid+1);oid<-oid+1; - ips<-append(ips,0.5); - } - } - name<-append(name,paste("[M-H+",neutraladdition[i,1],"]-",sep="")); - charge<-append(charge,-1); - massdiff<- append(massdiff,neutraladdition[i,2]-1.007276); - nmol<-append(nmol,1); - quasi<-append(quasi,0) - oidscore<-append(oidscore,oid+1);oid<-oid+1; - ips<-append(ips,0.5); - } - oid<-max(oidscore); - ##Erzeuge Neutral loss - index<-which(quasi==1) - for(i in 1:nrow(neutralloss)){ - name<-append(name,paste("[M-H-",neutralloss[i,1],"]-",sep="")); - charge<-append(charge,-1); - massdiff<- append(massdiff,-neutralloss[i,2]-1.007276); - nmol<-append(nmol,1); - quasi<-append(quasi,0) - oidscore<-append(oidscore,oid+1);oid<-oid+1; - ips<-append(ips,0.25); - } - ruleset <- data.frame(name,nmol,charge,massdiff,oidscore,quasi,ips) - if(length(index<-which(ruleset[,"charge"]< -maxcharge))>0){ - ruleset<- ruleset[-index,]; - } - }else stop("Unknown error") -return(ruleset); + return(hypothese) } + add_same_oidscore <-function(hypo,adducts,adducts_no_oid){ hypo_new<-matrix(NA,ncol=6) @@ -849,15 +417,18 @@ add_same_oidscore <-function(hypo,adducts,adducts_no_oid){ } -massDiffMatrix <- function(m,adducts){ -nadd <- length(adducts[,"massdiff"]) -DM <- matrix(NA,length(m),nadd) - -for (i in 1:length(m)) - for (j in 1:nadd) - DM[i,j] <- (abs(adducts[j,"charge"] * m[i]) - adducts[j,"massdiff"]) / adducts[j,"nmol"] # ((z*m) - add) /n - -return(DM) +massDiffMatrix <- function(m, rules){ + #m - m/z vector + #rules - annotation rules + nRules <- nrow(rules); + DM <- matrix(NA, length(m), nRules) + + for (i in seq_along(m)){ + for (j in seq_len(nRules)){ + DM[i, j] <- (abs(rules[j, "charge"] * m[i]) - rules[j, "massdiff"]) / rules[j, "nmol"] # ((z*m) - add) /n + } + } + return(DM) } massDiffMatrixNL <- function(m,neutralloss){ diff --git a/R/ruleSet.R b/R/ruleSet.R new file mode 100644 index 0000000..9fc457e --- /dev/null +++ b/R/ruleSet.R @@ -0,0 +1,682 @@ +############################################################## +## private class to calculate (dynamic) rulesets +## + +setClass("ruleSet", + representation(ionlistfile="character", + ionlist="data.frame", + neutrallossfile="character", + neutralloss="data.frame", + neutraladditionfile="character", + neutraladdition="data.frame", + maxcharge="numeric", + mol="numeric", + nion="numeric", + nnloss="numeric", + nnadd="numeric", + nh="numeric", + polarity="character", + rules="data.frame", + lib.loc="character"), + contains=c("Versioned"), + prototype=prototype( + ionlistfile="", + ionlist=data.frame(), + neutrallossfile="", + neutralloss=data.frame(), + neutraladditionfile="", + neutraladdition=data.frame(), + maxcharge=numeric(), + mol=numeric(), + nion=numeric(), + nnloss=numeric(), + nnadd=numeric(), + nh=numeric(), + polarity=NULL, + rules=data.frame(), + lib.loc=NULL, + new("Versioned", versions=c(ruleSet="0.1.1"))), + validity=function(object) { + TRUE + }) + +setGeneric("setDefaultLists", function(object, lib.loc=.libPaths()) standardGeneric("setDefaultLists")) +setGeneric("readLists", function(object) standardGeneric("readLists")) +setGeneric("setDefaultParams", function(object) standardGeneric("setDefaultParams")) +setGeneric("setParams", function(object, maxcharge, mol, nion, nnloss, nnadd, nh, + polarity, lib.loc) standardGeneric("setParams")) +setGeneric("generateRules", function(object) standardGeneric("generateRules")) +setGeneric("generateRules2", function(object) standardGeneric("generateRules2")) + +setMethod("show", + signature="ruleSet", + function(object) { + cat("Backing files: ", object@ionlistfile, object@neutrallossfile, object@neutraladditionfile,"\n") + cat("Ion lists: ", + " ", nrow(object@ionlist), " ions", + " ", nrow(object@neutralloss), " neutral losses", + " ", nrow(object@neutraladdition), " neutral additions", + "\n") + object + }) + + +setMethod("setDefaultLists", + signature="ruleSet", + function(object, lib.loc=.libPaths()) { + + ##Read Tabellen + object@ionlistfile <- system.file('lists/ions.csv', package = "CAMERA", + lib.loc=lib.loc)[1] + if (!file.exists(object@ionlistfile)) stop('ions.csv not found.') + + object@neutrallossfile <- system.file('lists/neutralloss.csv', + package = "CAMERA", lib.loc=lib.loc)[1] + if (!file.exists(object@neutrallossfile)) stop('neutralloss.csv not found.') + + object@neutraladditionfile <- system.file('lists/neutraladdition.csv', + package = "CAMERA", lib.loc=lib.loc)[1] + if (!file.exists(object@neutraladditionfile)) stop('neutraladdition.csv not found.') + object + }) + +setMethod("readLists", + signature="ruleSet", + function(object) { + + object@ionlist <- read.table(object@ionlistfile, header=TRUE, dec=".", sep=",", + as.is=TRUE, stringsAsFactors = FALSE); + + object@neutralloss <- read.table(object@neutrallossfile, header=TRUE, dec=".", sep=",", + as.is=TRUE, stringsAsFactors = FALSE); + + object@neutraladdition <- read.table(object@neutraladditionfile, + header=TRUE, dec=".", sep=",", + as.is=TRUE, stringsAsFactors = FALSE); + object + }) + +setMethod("setDefaultParams", + signature="ruleSet", + function(object) { + object@maxcharge=3 + object@mol=3 + object@nion=2 + object@nnloss=1 + object@nnadd=1 + object@nh=2 + object@polarity="positive" + object + }) + +setMethod("setParams", + signature=c("ruleSet", "numeric","numeric","numeric","numeric","numeric", + "numeric","character","character"), + function (object, maxcharge=3, mol=3, nion=2, nnloss=1, nnadd=1, nh=2, + polarity=NULL, lib.loc=NULL) { + object@maxcharge=maxcharge + object@mol=mol + object@nion=nion + object@nnloss=nnloss + object@nnadd=nnadd + object@nh=nh + object@polarity=polarity + object@lib.loc=lib.loc + object + }) + +setMethod("generateRules", + signature="ruleSet", + function (object) { + + maxcharge=object@maxcharge + mol=object@mol + nion=object@nion + nnloss=object@nnloss + nnadd=object@nnadd + nh=object@nh + polarity=object@polarity + + ionlist=object@ionlist + neutralloss=object@neutralloss + neutraladdition=object@neutraladdition + + rules=object@rules + + name<-c(); + nmol<-c(); + charge<-c(); + massdiff<-c(); + oidscore<-c(); + quasi<-c(); + ips<-c(); + + ##Erzeuge Regeln + tmpname <- c(); + tmpnmol <- c(); + tmpcharge <- 0; + tmpmass <- 0; + tmpips <- 0; + + ## Molekülionen + if(polarity=="positive"){ + ## Wasserstoff, hard codiert + for(k in 1:mol){ + if(k == 1){ + str <- ""; + tmpips <- 1.0; + }else{ + str <- k; + tmpips <- 0.5; + }; + name <- append(name, paste("[", str, "M+H]+", sep="")); + charge <- append(charge, 1); + massdiff <- append(massdiff, 1.007276); + nmol <- append(nmol, k); + if(k == 1) { + quasi <- append(quasi, 1); + } else { + quasi <- append(quasi, 0); + }; + oidscore <- append(oidscore, 1); + ips <- append(ips, tmpips) + name <- append(name,paste("[",str,"M+2H]2+",sep="")); + charge<-append(charge,2); + massdiff<-append(massdiff,2.014552); + nmol<-append(nmol,k);quasi<-append(quasi,0); + oidscore<-append(oidscore,2); + ips<-append(ips,tmpips) + name<-append(name,paste("[",str,"M+3H]3+",sep="")); + charge<-append(charge,3); + massdiff<-append(massdiff,3.021828); + nmol<-append(nmol,k); + quasi<-append(quasi,0); + oidscore<-append(oidscore,3); + ips<-append(ips,tmpips) + oid<-3; + for(i in 1:nrow(ionlist)){ + if(ionlist[i,2]<=0) {next;} + if(ionlist[i,2]==1){ + name<-append(name,paste("[",str,"M+H+",ionlist[i,1],"]2+",sep="")); + }else{ + name<-append(name,paste("[",str,"M+H+",ionlist[i,1],"]",ionlist[i,2]+1,"+",sep="")); + } + charge <- append(charge,ionlist[i,2]+1); + massdiff <- append(massdiff,ionlist[i,3]+1.007276); + nmol <- append(nmol,k); + quasi <- append(quasi,0); + oidscore <- append(oidscore,oid+i); + if(tmpips>0.75){ + ips<-append(ips,0.5) + }else{ + ips<-append(ips,tmpips); + }#Austausch + } + oid<-oid+nrow(ionlist); + coeff<-expand.grid(rep(list(0:nion),nrow(ionlist))) + if(length(list<-which(ionlist[,2]<=0))>0){ + coeff[,list]<-0; + } + coeff<-unique(coeff); + coeff<-cbind(coeff,rep(0,nrow(coeff))); + coeff<-coeff[-1,] + tmp<-NULL; + for(i in 1:nrow(ionlist)){ + if(ionlist[i,2]<=0)next; + ## Austausch erstmal nur einen pro Ion + tmp<-rbind(tmp,t(apply(coeff,1,function(x) {x[i]<-x[i]+1;x[nrow(ionlist)+1]<-1;x}))); + } + coeff<-unique(rbind(coeff,tmp)); + for(i in 1:nrow(coeff)){ + tmpname<-paste("[",str,"M",sep=""); + tmpcharge<-0;tmpmass<-0; + for(ii in 1:(ncol(coeff)-1)){ + if(coeff[i,ii]>0){ + if(coeff[i,ii]>1){ + tmpname<-paste(tmpname,"+",coeff[i,ii],ionlist[ii,1],sep=""); + }else{ + tmpname<-paste(tmpname,"+",ionlist[ii,1],sep=""); + } + tmpcharge<-tmpcharge+coeff[i,ii]*ionlist[ii,2]; + tmpmass<-tmpmass+coeff[i,ii]*ionlist[ii,3] + } + } + if(coeff[i,ncol(coeff)]>0){ + ## Austausch hat stattgefunden, einfach bsp 1 + tmpname<-paste(tmpname,"-H",sep=""); + tmpcharge<-tmpcharge-1; + tmpmass<-tmpmass-1.007276; + tmpips<-0.25; + } + if(tmpcharge>1){ + tmpname<-paste(tmpname,"]",tmpcharge,"+",sep="") + }else{ + tmpname<-paste(tmpname,"]+",sep="") + } + name<-append(name,tmpname) + charge<-append(charge,tmpcharge) + massdiff<-append(massdiff,tmpmass) + nmol <- append(nmol,k); + oidscore<-append(oidscore,oid+i) + if(sum(coeff[i,])==1&& k==1){ + quasi <- append(quasi,1); + ips <-append(ips,tmpips); + }else{ + quasi <- append(quasi,0); + if(tmpips>0.75){ + ips <- append(ips,0.75); + }else{ + ips <- append(ips,tmpips); + } + } + } + } + oid<-max(oidscore); + + ## Erzeuge Neutral Addition + index<-which(quasi==1) + for(i in 1:nrow(neutraladdition)){ + if(length(index2<-which(ionlist[,2]>0))>0){ + for(ii in 1:length(index2)){ + if(ionlist[index2[ii],2] > 1){ + name <- append(name,paste("[M+",ionlist[index2[ii],1],"+",neutraladdition[i,1],"]",abs(ionlist[index2[ii],2]),"+",sep="")); + }else{ + name <- append(name,paste("[M+",ionlist[index2[ii],1],"+",neutraladdition[i,1],"]+",sep="")); + } + charge <- append(charge,ionlist[index2[ii],2]); + massdiff <- append(massdiff,neutraladdition[i,2]+ionlist[index2[ii],3]); + nmol <- append(nmol,1); + quasi <- append(quasi,0); + oidscore <- append(oidscore,oid+1);oid<-oid+1; + ips<-append(ips,0.5); + } + } + if(neutraladdition[i,1]=="NaCOOH"){next;} + name<-append(name,paste("[M+H+",neutraladdition[i,1],"]+",sep="")); + charge<-append(charge,+1); + massdiff<- append(massdiff,neutraladdition[i,2]+1.007276); + nmol<-append(nmol,1); + quasi<-append(quasi,0) + oidscore<-append(oidscore,oid+1);oid<-oid+1; + ips<-append(ips,0.5); + } + oid<-max(oidscore); + + ##Erzeuge Neutral loss + index<-which(quasi==1) + for(i in 1:nrow(neutralloss)){ + for(ii in 1:maxcharge){ + if(ii > 1){ + name<-append(name,paste("[M+",ii,"H-",neutralloss[i,1],"]",ii,"+",sep="")); + }else {name<-append(name,paste("[M+H-",neutralloss[i,1],"]+",sep=""));} + charge<-append(charge,ii); + massdiff<- append(massdiff,-neutralloss[i,2]+1.007276*ii); + nmol<-append(nmol,1); + quasi<-append(quasi,0) + oidscore<-append(oidscore,oid+1);oid<-oid+1; + ips<-append(ips,0.25); + } + } + ruleset <- data.frame(name,nmol,charge,massdiff,oidscore,quasi,ips) + if(length(index<-which(ruleset[,"charge"]>maxcharge))>0){ + ruleset<- ruleset[-index,]; + } + }else if(polarity=="negative"){ + ## Wasserstoff, hard codiert + for(k in 1:mol){ + if(k==1){str<-"";tmpips<-1.0;}else{str<-k;tmpips<-0.5}; + name<-append(name,paste("[",str,"M-H]-",sep="")); + charge<-append(charge,-1);massdiff<-append(massdiff,-1.007276);nmol<-append(nmol,k);if(k==1){quasi<-append(quasi,1);}else{quasi<-append(quasi,0);};oidscore<-append(oidscore,1);ips<-append(ips,tmpips) + name<-append(name,paste("[",str,"M-2H]2-",sep=""));charge<-append(charge,-2);massdiff<-append(massdiff,-2.014552);nmol<-append(nmol,k);quasi<-append(quasi,0);oidscore<-append(oidscore,2);ips<-append(ips,tmpips) + name<-append(name,paste("[",str,"M-3H]3-",sep=""));charge<-append(charge,-3);massdiff<-append(massdiff,-3.021828);nmol<-append(nmol,k);quasi<-append(quasi,0);oidscore<-append(oidscore,3);ips<-append(ips,tmpips) + oid<-3; + for(i in 1:nrow(ionlist)){ + if(ionlist[i,2]>=0){ + if(ionlist[i,2]>1) {next;} + name<-append(name,paste("[",str,"M-2H+",ionlist[i,1],"]-",sep="")); + charge <- append(charge,ionlist[i,2]-2); + massdiff<- append(massdiff,ionlist[i,3]-(2*1.007276)); + nmol <- append(nmol,k); + quasi <- append(quasi,0); + oidscore<-append(oidscore,oid+i); + ips<-append(ips,0.25); + next; + } + if(ionlist[i,2]== -1){ + name<-append(name,paste("[",str,"M-H+",ionlist[i,1],"]2-",sep="")); + }else{ + name<-append(name,paste("[",str,"M-H+",ionlist[i,1],"]",ionlist[i,2]+1,"-",sep="")); + } + charge <- append(charge,ionlist[i,2]-1); + massdiff<- append(massdiff,ionlist[i,3]-1.007276); + nmol <- append(nmol,k); + quasi <- append(quasi,0); + oidscore<-append(oidscore,oid+i); + ips<-append(ips,tmpips); + #Austausch + } + oid<-oid+nrow(ionlist); + coeff<-expand.grid(rep(list(0:nion),nrow(ionlist))) + if(length(list<-which(ionlist[,2]>=0))>0){ + coeff[,list]<-0; + } + coeff<-unique(coeff); + coeff<-cbind(coeff,rep(0,nrow(coeff))); + coeff<-coeff[-1,] + for(i in 1:nrow(coeff)){ + tmpname<-paste("[",str,"M",sep=""); + tmpcharge<-0;tmpmass<-0; + for(ii in 1:(ncol(coeff)-1)){ + if(coeff[i,ii]>0){ + if(coeff[i,ii]>1){ + tmpname<-paste(tmpname,"+",coeff[i,ii],ionlist[ii,1],sep=""); + }else{ + tmpname<-paste(tmpname,"+",ionlist[ii,1],sep=""); + } + tmpcharge<-tmpcharge+coeff[i,ii]*ionlist[ii,2]; + tmpmass<-tmpmass+coeff[i,ii]*ionlist[ii,3] + } + } + if(coeff[i,ncol(coeff)]>0){ + #Austausch hat stattgefunden, einfach bsp 1 + tmpname<-paste(tmpname,"-H",sep=""); + tmpcharge<-tmpcharge-1; + tmpmass<-tmpmass-1.007276; + tmpips<-0.5; + } + if(tmpcharge< -1){ + tmpname<-paste(tmpname,"]",abs(tmpcharge),"-",sep="") + }else{ + tmpname<-paste(tmpname,"]-",sep="") + } + name<-append(name,tmpname) + charge<-append(charge,tmpcharge) + massdiff<-append(massdiff,tmpmass) + nmol <-append(nmol,k); + oidscore<-append(oidscore,oid+i) + if(sum(coeff[i,])==1&& k==1){ + quasi <-append(quasi,1); + }else{ + quasi <-append(quasi,0); + } + ips <-append(ips,tmpips); + } + } + oid<-max(oidscore); + + ##Erzeuge Neutral Addition + index<-which(quasi==1) + for(i in 1:nrow(neutraladdition)){ + if(length(index2<-which(ionlist[,2]<0))>0){ + for(ii in 1:length(index2)){ + if(ionlist[index2[ii],2]< -1){ + name <- append(name,paste("[M+",ionlist[index2[ii],1],"+",neutraladdition[i,1],"]",abs(ionlist[index2[ii],2]),"-",sep="")); + }else{ + name <- append(name,paste("[M+",ionlist[index2[ii],1],"+",neutraladdition[i,1],"]-",sep="")); + } + charge <- append(charge,ionlist[index2[ii],2]); + massdiff<- append(massdiff,neutraladdition[i,2]+ionlist[index2[ii],3]); + nmol <- append(nmol,1); + quasi <- append(quasi,0); + oidscore<-append(oidscore,oid+1);oid<-oid+1; + ips<-append(ips,0.5); + } + } + name<-append(name,paste("[M-H+",neutraladdition[i,1],"]-",sep="")); + charge<-append(charge,-1); + massdiff<- append(massdiff,neutraladdition[i,2]-1.007276); + nmol<-append(nmol,1); + quasi<-append(quasi,0) + oidscore<-append(oidscore,oid+1);oid<-oid+1; + ips<-append(ips,0.5); + } + oid<-max(oidscore); + + ##Erzeuge Neutral loss + index<-which(quasi==1) + for(i in 1:nrow(neutralloss)){ + name<-append(name,paste("[M-H-",neutralloss[i,1],"]-",sep="")); + charge<-append(charge,-1); + massdiff<- append(massdiff,-neutralloss[i,2]-1.007276); + + quasi<-append(quasi,0) + oidscore<-append(oidscore,oid+1);oid<-oid+1; + ips<-append(ips,0.25); + } + ruleset <- data.frame(name,nmol,charge,massdiff,oidscore,quasi,ips) + if(length(index<-which(ruleset[,"charge"]< -maxcharge))>0){ + ruleset<- ruleset[-index,]; + } + }else stop("Unknown error") + + ## Update object rules and return ruleset + object@rules=ruleset + object; + }) + + +setMethod("generateRules2", signature="ruleSet", function (object) { + + maxcharge=object@maxcharge + mol=object@mol + nion=object@nion + nnloss=object@nnloss + nnadd=object@nnadd + nh=object@nh + polarity=object@polarity + + ionlist=object@ionlist + neutralloss=object@neutralloss + neutraladdition=object@neutraladdition + + rules=object@rules + + ##Create Rules + ruleset <- matrix(nrow=0, ncol=8); + colnames(ruleset) <- c("name", "nmol","charge", "massdiff", "typ","mandatory", + "score", "parent") + + tmpname <- c(); + tmpcharge <- 0; + tmpmass <- 0; + tmpionparent <- NA; + massH <- 1.007276 + + ##Positive Rule set + if(polarity == "positive"){ + charge <- "+" + chargeValue <- 1 + }else if(polarity == "negative"){ + charge <- "-" + chargeValue <- -1 + }else{ + stop("Unknown polarity mode in rule set generation! Debug!\n") + } + + #Hydrogen part is hard coded + for(k in 1:mol) { + if(k == 1){ + #For M+H + str <- ""; + tmpips <- 1.5; + quasi <- 1 + }else{ + #For xM+H + str <- k; + tmpips <- 0; + quasi <- 0 + } + + for(xh in seq.int(nh)){ + if(xh == 1){ + ruleset <- rbind(ruleset, cbind(paste("[", str, "M", charge, "H]", charge, sep=""), k, chargeValue, + massH*chargeValue, "A", + quasi, tmpips+0.5, tmpionparent)); + } else { + ruleset <- rbind(ruleset, cbind(paste("[", str, "M", charge, xh, "H]", xh, charge, sep=""), + k,xh*chargeValue, massH*xh*chargeValue, "A", 0, 0.5, tmpionparent+1)); + } + } + + for(i in 1:nrow(ionlist)){ + #Add change of kat- respectively anion against one hydrogen + if(ionlist[i, 2] > 0 & charge == "-"){ + if(ionlist[i, 2] == 1){ + sumcharge <- ""; + }else{ + sumcharge <- ionlist[i, 2]; + } + ruleset <- rbind(ruleset, cbind(paste("[", str, "M", charge,"H+", ionlist[i,1], "-H]", sumcharge, + charge, sep=""), k, ionlist[i, 2]*chargeValue, + ionlist[i, 3]+massH*(chargeValue-1), + "A", 0, 0.25, tmpionparent)); + } + + #xM+H + additional Kation like Na or K + if(ionlist[i,2] <= 0 & chargeValue > 0) { + #Ions with negative charge, when polarity is positive + next; + } else if(ionlist[i, 2] >= 0 & chargeValue < 0){ + #Ions with positive charge, when polarity is negative + next; + } + + #Add Rule to Ruleset + if(abs(ionlist[i, 2]) == 1){ + ruleset <- rbind(ruleset, cbind(paste("[", str, "M", charge,"H+", ionlist[i,1], "]2", + charge, sep=""), k, ionlist[i, 2]+1*chargeValue, + ionlist[i, 3]+massH*chargeValue, "A", 0, 0.25, tmpionparent)); + }else{ + ruleset <- rbind(ruleset, cbind(paste("[", str, "M", charge,"H+", ionlist[i, 1], "]", + ionlist[i, 2]+(1*chargeValue), + charge, sep=""), k, ionlist[i, 2]+1*chargeValue, + ionlist[i, 3]+massH*chargeValue, "A" ,0, 0.25, tmpionparent)); + } + + }#End for loop nrow(ionlist) + + ##Coeff - coefficient Matrix, for generating rules with + #combination of kat- or anionsexchange ions like [M-2H+Na] (M-H and change of H against Na) + coeff <- expand.grid(rep(list(0:nion), nrow(ionlist))) + if(chargeValue > 0){ + index <- which(ionlist[, 2] <= 0); + }else{ + index <- which(ionlist[, 2] >= 0); + } + + if(length(index) > 0){ + coeff[, index] <- 0; + } + + tmp <- NULL; + for(i in 1:nrow(ionlist)){ + if((chargeValue > 0 & ionlist[i, 2] <= 0) | (chargeValue < 0 & ionlist[i,2] >=0)){ + next; + } + #Austausch erstmal nur einen pro Ion + tmp <- rbind(tmp, t(apply(coeff, 1, function(x) { + x[i] <- x[i]+1; + x[nrow(ionlist)+1] <- -1; + x } + ))); + } + coeff <- cbind(coeff, rep(0, nrow(coeff))); + colnames(coeff)[4] <- "Var4" + colnames(tmp)[4] <- "Var4" + coeff <- unique(rbind(coeff, tmp)); + + for(i in 1:nrow(coeff)){ + if(sum(coeff[i, 1:nrow(ionlist)]) > 2 | sum(coeff[i, 1:nrow(ionlist)]) < 1){ + next; + } + + tmpname <- paste("[",str,"M",sep=""); + tmpcharge <- 0; + tmpmass <- 0; + + for(ii in 1:(ncol(coeff))){ + if(coeff[i,ii] > 0){ + if(coeff[i,ii] > 1){ + tmpname <- paste(tmpname, "+", coeff[i, ii], ionlist[ii, 1], sep=""); + }else{ + tmpname <- paste(tmpname, "+", ionlist[ii, 1], sep=""); + } + tmpcharge <- tmpcharge + coeff[i, ii] * ionlist[ii, 2]; + tmpmass <- tmpmass + coeff[i, ii] * ionlist[ii, 3]; + } else if (coeff[i,ii] < 0){ + tmpname <- paste(tmpname, "-H", sep=""); + tmpcharge <- tmpcharge - 1; + tmpmass <- tmpmass - massH; + } + } + + if(abs(tmpcharge) > 1){ + tmpname <- paste(tmpname, "]", tmpcharge, charge, sep="") + }else{ + tmpname <- paste(tmpname, "]", charge, sep="") + } + + if(tmpcharge > maxcharge | tmpcharge == 0){ + next; + } + + if(sum(coeff[i, ]) == 1 && k == 1 && coeff[i, 4] >=0){ + ruleset <- rbind(ruleset, cbind(tmpname, k, tmpcharge, tmpmass, "A", 1, 0.75, tmpionparent)); + }else{ + ruleset <- rbind(ruleset, cbind(tmpname, k, tmpcharge, tmpmass, "A", 0, 0.25, tmpionparent)); + } + }#end for loop nrow(coeff) + }#end for loop k + + # Create neutral addition to M+H from list + for(i in 1:nrow(neutraladdition)){ + #Add neutral ion to only M+H + ruleset <- rbind(ruleset, cbind(paste("[M", charge, "H+", neutraladdition[i, 1], "]", charge, sep="") , 1, chargeValue, + neutraladdition[i, 2]+(massH*chargeValue), "A", 0, 0.25, 1)); + } + + ## Add neutral loss from list to ruleset + for(i in 1:nrow(neutralloss)){ + ruleset <- rbind(ruleset, cbind(paste("-", neutralloss[i, 1], sep=""), 1, 0, + -neutralloss[i, 2], "F", 0, 0.25, 1)); + #Eliminate rules with charge > maxcharge + if(length(index <- which(ruleset[, "charge"] > maxcharge)) > 0){ + ruleset <- ruleset[-index, ]; + } + } + ruleset <- as.data.frame(ruleset, stringsAsFactors=FALSE) + class(ruleset$nmol) <- "numeric" + class(ruleset$charge) <- "numeric" + class(ruleset$massdiff) <- "numeric" + class(ruleset$mandatory) <- "numeric" + class(ruleset$score) <- "numeric" + class(ruleset$parent) <- "numeric" + object@rules=ruleset + object; + + return(object); +}) + + +############################################### +## +## calcRules() convenience method with the old +## behaviour and signature +## + +calcRules <- function (maxcharge=3, mol=3, nion=2, nnloss=1, + nnadd=1, nh=2, polarity=NULL, lib.loc = NULL, newFragments=FALSE){ + + r <- new("ruleSet") + r <- setDefaultLists(r, lib.loc=lib.loc) + r <- readLists(r) + r <- setParams(r, maxcharge=maxcharge, mol=mol, nion=nion, + nnloss=nnloss, nnadd=nnadd, nh=nh, polarity=polarity, lib.loc = lib.loc) + if(newFragments){ + r <- generateRules2(r) + }else{ + r <- generateRules(r) + } + + return(r@rules) +} diff --git a/R/xsAnnotate.R b/R/xsAnnotate.R index d1cc39f..66b4591 100644 --- a/R/xsAnnotate.R +++ b/R/xsAnnotate.R @@ -99,7 +99,7 @@ xsAnnotate <- function(xs=NULL, sample=NA, nSlaves=1, polarity=NULL){ } object@runParallel<-runParallel; - colnames(object@annoID) <- c("id","grp_id","rule_id"); + colnames(object@annoID) <- c("id","grpID","ruleID","parentID"); colnames(object@annoGrp)<- c("id","mass","ips","psgrp"); colnames(object@isoID) <- c("mpeak","isopeak","iso","charge") @@ -697,6 +697,7 @@ setGeneric("findAdducts", function(object, ppm=5, mzabs=0.015, multiplier=3, pol rules=NULL, max_peaks=100, psg_list=NULL) standardGeneric("findAdducts")); setMethod("findAdducts", "xsAnnotate", function(object, ppm=5, mzabs=0.015, multiplier=3, polarity=NULL, rules=NULL, max_peaks=100, psg_list=NULL){ + newFragments <- FALSE; # Scaling ppm factor devppm <- ppm / 1000000; @@ -704,7 +705,8 @@ setMethod("findAdducts", "xsAnnotate", function(object, ppm=5, mzabs=0.015, mult npeaks.global <- 0; # get mz values from peaklist - imz <- object@groupInfo[,"mz"]; + imz <- object@groupInfo[, "mz"]; + #TODO: Change intensity choosing? intval <- "maxo"; #max. intensity #number of pseudo-spectra @@ -728,7 +730,7 @@ setMethod("findAdducts", "xsAnnotate", function(object, ppm=5, mzabs=0.015, mult } cat("Generating peak matrix for peak annotation!\n"); - mint <- groupval(object@xcmsSet,value=intval)[,index,drop=FALSE]; + mint <- groupval(object@xcmsSet,value=intval)[, index, drop=FALSE]; peakmat <- object@xcmsSet@peaks; groupmat <- groups(object@xcmsSet); @@ -747,10 +749,10 @@ setMethod("findAdducts", "xsAnnotate", function(object, ppm=5, mzabs=0.015, mult # other variables oidscore <- c(); index <- c(); - annoID <- matrix(ncol=3, nrow=0) + annoID <- matrix(ncol=4, nrow=0) annoGrp <- matrix(ncol=4, nrow=0) - colnames(annoID) <- c("id", "grp_id", "rule_id"); - colnames(annoGrp)<- c("id", "mass", "ips", "psgrp"); + colnames(annoID) <- colnames(object@annoID) + colnames(annoGrp) <- colnames(object@annoGrp) ##Examine polarity and rule set if(!(object@polarity == "")){ @@ -760,7 +762,9 @@ setMethod("findAdducts", "xsAnnotate", function(object, ppm=5, mzabs=0.015, mult rules <- object@ruleset; }else{ cat("Ruleset could not read from object! Recalculate\n"); - rules <- calcRules(maxcharge=3, mol=3, nion=2, nnloss=1, nnadd=1, nh=2, polarity=object@polarity); + rules <- calcRules(maxcharge=3, mol=3, nion=2, nnloss=1, nnadd=1, nh=2, + polarity=object@polarity, + lib.loc= .libPaths(),newFragments=newFragments); object@ruleset <- rules; } }else{ @@ -772,15 +776,19 @@ setMethod("findAdducts", "xsAnnotate", function(object, ppm=5, mzabs=0.015, mult if(!is.null(polarity)){ if(polarity %in% c("positive","negative")){ if(is.null(rules)){ - rules <- calcRules(maxcharge=3,mol=3,nion=2,nnloss=1,nnadd=1,nh=2,polarity=polarity); + rules <- calcRules(maxcharge=3, mol=3, nion=2, nnloss=1, nnadd=1, + nh=2, polarity=polarity, lib.loc= .libPaths(), + newFragments=newFragments); }else{ cat("Found and use user-defined ruleset!");} object@polarity <- polarity; }else stop("polarity mode unknown, please choose between positive and negative.") - }else if(length(object@xcmsSet@polarity)>0){ + }else if(length(object@xcmsSet@polarity) > 0){ index <- which(sampclass(object@xcmsSet) == object@category)[1] + object@sample-1; if(object@xcmsSet@polarity[index] %in% c("positive","negative")){ if(is.null(rules)){ - rules<-calcRules(maxcharge=3,mol=3,nion=2,nnloss=1,nnadd=1,nh=2,polarity=object@xcmsSet@polarity[index]); + rules <- calcRules(maxcharge=3, mol=3, nion=2, nnloss=1, nnadd=1, + nh=2, polarity=object@xcmsSet@polarity[index], + lib.loc= .libPaths(), newFragments=newFragments); }else{ cat("Found and use user-defined ruleset!");} object@polarity <- polarity; }else stop("polarity mode in xcmsSet unknown, please define variable polarity.") @@ -803,8 +811,13 @@ setMethod("findAdducts", "xsAnnotate", function(object, ppm=5, mzabs=0.015, mult runParallel <- 0; } - quasimolion <- which(rules[, "quasi"]== 1) - + if("quasi" %in% colnames(rules)){ + #backup for old rule sets + quasimolion <- which(rules[, "quasi"]== 1) + }else{ + quasimolion <- which(rules[, "mandatory"]== 1) + } + #Remove recognized isotopes from annotation m/z vector for(x in seq(along = isotopes)){ if(!is.null(isotopes[[x]])){ @@ -829,6 +842,7 @@ setMethod("findAdducts", "xsAnnotate", function(object, ppm=5, mzabs=0.015, mult lp <- -1; pspectra_list <- psg_list; } + argList <- list(); cnt_peak <- 0; if(is.null(max_peaks)){ @@ -866,7 +880,14 @@ setMethod("findAdducts", "xsAnnotate", function(object, ppm=5, mzabs=0.015, mult x=argList, fun=annotateGrpMPI, msgfun=msgfun.snowParallel) } - + if("typ" %in% colnames(rules)){ + rules.idx <- which(rules[, "typ"]== "A") + parent <- TRUE; + }else{ + #backup for old rule sets + rules.idx <- 1:nrow(rules); + parent <- FALSE; + } for(ii in 1:length(result)){ if(length(result[[ii]]) == 0){ next; @@ -881,15 +902,18 @@ setMethod("findAdducts", "xsAnnotate", function(object, ppm=5, mzabs=0.015, mult index <- argList[[ii]]$i[[iii]]; ipeak <- object@pspectra[[index]]; for(hyp in 1:nrow(hypothese)){ - peakid <- ipeak[hypothese[hyp,"massID"]]; + peakid <- as.numeric(ipeak[hypothese[hyp, "massID"]]); if(old_massgrp != hypothese[hyp,"massgrp"]) { massgrp <- massgrp+1; old_massgrp <- hypothese[hyp,"massgrp"]; - annoGrp <- rbind(annoGrp, c(massgrp, hypothese[hyp, "mass"], - sum(hypothese[which(hypothese[, "massgrp"]==old_massgrp), - "ips"]), as.numeric(names(result[[ii]][iii])))) + annoGrp <- rbind(annoGrp,c(massgrp,hypothese[hyp,"mass"],sum(hypothese[ which(hypothese[,"massgrp"]==old_massgrp),"score"]),i) ) + } + if(parent){ + annoID <- rbind(annoID, cbind(peakid, massgrp, hypothese[hyp,c("ruleID","parent")])) + }else{ + annoID <- rbind(annoID, cbind(peakid, massgrp, hypothese[hyp,c("ruleID")],NA)) } - annoID <- rbind(annoID, c(peakid,massgrp,hypothese[hyp, "ruleID"])) + } } } @@ -912,7 +936,15 @@ setMethod("findAdducts", "xsAnnotate", function(object, ppm=5, mzabs=0.015, mult pspectra_list <- psg_list; sum_peaks <- sum(sapply(object@pspectra[psg_list],length)); } - + if("typ" %in% colnames(rules)){ + rules.idx <- which(rules[, "typ"]== "A") + parent <- TRUE; + }else{ + #backup for old rule sets + rules.idx <- 1:nrow(rules); + parent <- FALSE; + } + for(j in seq(along = pspectra_list)){ i <- pspectra_list[j]; @@ -935,7 +967,8 @@ setMethod("findAdducts", "xsAnnotate", function(object, ppm=5, mzabs=0.015, mult #check if the pspec contains more than one peak if(length(ipeak) > 1){ - hypothese <- annotateGrp(ipeak, imz, rules, mzabs, devppm, isotopes, quasimolion); + + hypothese <- annotateGrp(ipeak, imz, rules, mzabs, devppm, isotopes, quasimolion, rules.idx=rules.idx); #save results if(is.null(hypothese)){ next; @@ -945,14 +978,18 @@ setMethod("findAdducts", "xsAnnotate", function(object, ppm=5, mzabs=0.015, mult #combine annotation hypotheses to annotation groups for one compound mass for(hyp in 1:nrow(hypothese)){ - peakid <- ipeak[hypothese[hyp, "massID"]]; + peakid <- as.numeric(ipeak[hypothese[hyp, "massID"]]); if(old_massgrp != hypothese[hyp, "massgrp"]) { massgrp <- massgrp + 1; old_massgrp <- hypothese[hyp, "massgrp"]; - annoGrp <- rbind( annoGrp, c(massgrp, hypothese[hyp, "mass"], - sum(hypothese[ which(hypothese[,"massgrp"]==old_massgrp), "ips"]), i) ) + annoGrp <- rbind(annoGrp, c(massgrp, hypothese[hyp, "mass"], + sum(hypothese[ which(hypothese[, "massgrp"] == old_massgrp), "score"]), i) ) + } + if(parent){ + annoID <- rbind(annoID, c(peakid, massgrp, hypothese[hyp, "ruleID"], ipeak[hypothese[hyp, "parent"]])) + }else{ + annoID <- rbind(annoID, c(peakid, massgrp, hypothese[hyp, "ruleID"], NA)) } - annoID <- rbind(annoID, c(peakid,massgrp,hypothese[hyp,"ruleID"])) } } } @@ -962,8 +999,8 @@ setMethod("findAdducts", "xsAnnotate", function(object, ppm=5, mzabs=0.015, mult cat("\n"); object@derivativeIons <- derivativeIons; - object@annoID<-annoID; - object@annoGrp<-annoGrp; + object@annoID <- annoID; + object@annoGrp <- annoGrp; return(object) } }) @@ -1164,7 +1201,7 @@ setMethod("getPeaklist", "xsAnnotate", function(object, intval="into") { start <- ncol(peaktable) - grpval.ncol +1; ende <- start + grpval.ncol - 1; - peaktable[,start:ende] <- grpval; + peaktable[, start:ende] <- grpval; } #allocate variables for CAMERA output diff --git a/inst/lists/ions.csv b/inst/lists/ions.csv index c8a8eca..cf52be4 100644 --- a/inst/lists/ions.csv +++ b/inst/lists/ions.csv @@ -2,4 +2,3 @@ name, charge, molecular_mass Na, +1, 22.989218 Cl, -1, 34.969402 K, +1, 38.963158 -NH4, +1, 18.033823 diff --git a/inst/lists/neutraladdition.csv b/inst/lists/neutraladdition.csv index 9d50a2c..546d8ea 100644 --- a/inst/lists/neutraladdition.csv +++ b/inst/lists/neutraladdition.csv @@ -2,3 +2,4 @@ name, molecular mass NaCOOH, 67.98744 HCOOH, 46.00547 CF3COOH, 133.993 +NH3, 17.02655 diff --git a/inst/lists/neutralloss.csv b/inst/lists/neutralloss.csv index a34fa1c..777853f 100644 --- a/inst/lists/neutralloss.csv +++ b/inst/lists/neutralloss.csv @@ -5,8 +5,13 @@ NH3, 17.02655 H20, 18.0153 CH4, 16.0313 CO, 27.9949 +C2H4, 28.0313 COCH2, 42.01057 CO2, 43.9898 -C2H4, 28.0313 HCOOH, 46.00547 +C4H8, 56.0626 +C3H2O3, 86.00039 +C5H8O4, 132.04226 +C6H10O4, 146.05791 C6H10O5, 162.05282 +C6H8O6, 176.03209 \ No newline at end of file diff --git a/man/ruleSet-class.Rd b/man/ruleSet-class.Rd new file mode 100644 index 0000000..201d7f1 --- /dev/null +++ b/man/ruleSet-class.Rd @@ -0,0 +1,88 @@ +\name{ruleSet} +\Rdversion{1.1} +\docType{class} +\alias{ruleSet-class} +\alias{class:ruleSet} + +\alias{show,ruleSet-method} +\alias{generateRules} +\alias{generateRules,ruleSet-method} +\alias{setDefaultLists} +\alias{setDefaultLists,ruleSet-method} +\alias{readLists} +\alias{readLists,ruleSet-method} +\alias{setDefaultParams} +\alias{setDefaultParams,ruleSet-method} +\alias{setParams} +\alias{setParams,ruleSet,numeric,numeric,numeric,numeric,numeric,numeric,character-method} + +\title{Class \code{ruleSet}} +\description{ + The class \code{ruleSet} is used to read lists of ions, adducts and + neutral losses, and compile the dynamic ruleSet from those. + This makes it possible to modify the default rules for certain + analytical settings. +} + +\section{Slots}{ + \describe{ + \item{\code{ionlistfile}:}{File of known charged ions, an example is + found in CAMERA/lists/ions.csv .} + \item{\code{neutrallossfile}:}{File of known neutral losses, an + example is found in CAMERA/lists/neutralloss.csv.} + \item{\code{neutraladditionfile}:}{File of known adducts, an + example is found in CAMERA/lists/lists/neutraladdition.csv .} + \item{\code{ionlist}:}{Known charged ions.} + \item{\code{neutralloss}:}{Known neutral losses.} + \item{\code{neutraladdition}:}{Known adducts.} + + \item{\code{maxcharge}:}{.} + \item{\code{mol }:}{.} + \item{\code{nion }:}{.} + \item{\code{nnloss }:}{.} + \item{\code{nnadd }:}{.} + \item{\code{nh }:}{.} + + \item{\code{polarity }:}{Polarity of the ruleSet.} + + \item{\code{rules}:}{data.frame of resulting mass differences, this + is the dynamic ruleSet.} + } +} + +\section{Extends}{ + Class \code{"\linkS4class{Versioned}"}, directly. +} + +\section{Methods}{ + Methods implemented for \code{ruleSet} + \describe{ + \item{setDefaultLists }{\code{signature(object = "ruleSet")}: + Set filenames for the lists shipped with CAMERA.} + \item{readLists }{\code{signature(object = "ruleSet")}: Read and + parse the lists from the files.} + \item{setDefaultParams}{\code{signature(object = "ruleSet")}: Set + the default parameters for rule generation. } + \item{setParams }{\code{signature(object = "ruleSet")}: Set + the parameters for rule generation. } + \item{generateRules }{\code{signature(object = "ruleSet")}: Create + the rules in \code{ruleSet@rules} . } + } +} + +\author{ + Steffen Neumann and Carsten Kuhl +} + +\examples{ + + r <- new("ruleSet"); + r2 <- setDefaultLists(r) ; + r3 <- readLists(r2) ; + r4 <- setDefaultParams(r3) ; + r5 <- generateRules(r4) + dim(r5@rules) + +} + +\keyword{classes}