diff --git a/DESCRIPTION b/DESCRIPTION
index 2851b80..40d065e 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -2,8 +2,8 @@ Package: rMVP
 Type: Package
 Title: Memory-Efficient, Visualize-Enhanced, Parallel-Accelerated GWAS
         Tool
-Version: 1.2.0
-Date: 2024-09-18
+Version: 1.3.0
+Date: 2024-12-12
 Authors@R: c( person("Lilin", "Yin", role = "aut"), 
               person("Haohao", "Zhang", role = "aut"),
               person("Zhenshuang", "Tang", role = "aut"),
diff --git a/R/MVP.Data.r b/R/MVP.Data.r
index 5aaa77a..2adc7df 100755
--- a/R/MVP.Data.r
+++ b/R/MVP.Data.r
@@ -12,16 +12,13 @@
 
 
 #' MVP.Data: To prepare data for MVP package
-#' Author: Xiaolei Liu, Lilin Yin and Haohao Zhang
-#' Build date: Aug 30, 2016
-#' Last update: Sep 12, 2018
 #'
 #' @param fileMVP Genotype in MVP format
 #' @param fileVCF Genotype in VCF format
 #' @param fileHMP Genotype in hapmap format
 #' @param fileBed Genotype in PLINK binary format
 #' @param fileInd Individual name file
-#' @param fileNum Genotype in numeric format; pure 0, 1, 2 matrix; m * n, m is marker size, n is sample size
+#' @param fileNum Genotype in numeric format; pure 0, 1, 2 matrix; m * n or n * m, m is marker size, n is sample size
 #' @param filePhe Phenotype, the first column is taxa name, the subsequent columns are traits
 #' @param fileMap SNP map information, there are three columns, including SNP_ID, Chromosome, and Position
 #' @param fileKin Kinship that represents relationship among individuals, n * n matrix, n is sample size
@@ -246,8 +243,8 @@ MVP.Data.VCF2MVP <- function(vcf_file, out='mvp', maxLine = 1e4, type.geno='char
     
     # parse genotype
     bigmat <- filebacked.big.matrix(
-        nrow = m,
-        ncol = n,
+        nrow = n,
+        ncol = m,
         type = type.geno,
         backingfile = backingfile,
         backingpath = dirname(out),
@@ -308,8 +305,8 @@ MVP.Data.Bfile2MVP <- function(bfile, out='mvp', maxLine=1e4, type.geno='char',
     
     # parse genotype
     bigmat <- filebacked.big.matrix(
-        nrow = m,
-        ncol = n,
+        nrow = n,
+        ncol = m,
         type = type.geno,
         backingfile = backingfile,
         backingpath = dirname(out),
@@ -365,8 +362,8 @@ MVP.Data.Hapmap2MVP <- function(hmp_file, out='mvp', maxLine = 1e4, type.geno='c
     
     # parse genotype
     bigmat <- filebacked.big.matrix(
-        nrow = m,
-        ncol = n,
+        nrow = n,
+        ncol = m,
         type = type.geno,
         backingfile = backingfile,
         backingpath = dirname(out),
@@ -432,8 +429,8 @@ MVP.Data.Numeric2MVP <- function(num_file, map_file, out='mvp', maxLine=1e4, row
     
     # define bigmat
     bigmat <- filebacked.big.matrix(
-        nrow = m,
-        ncol = n,
+        nrow = n,
+        ncol = m,
         type = type.geno,
         backingfile = backingfile,
         backingpath = dirname(out),
@@ -476,15 +473,15 @@ MVP.Data.Numeric2MVP <- function(num_file, map_file, out='mvp', maxLine=1e4, row
             len <- length(line)
             if (len == 0) { break }
 
-            line <- do.call(rbind, strsplit(line, '\\s+'))
-            if (row_names) { line <- line[, 2:ncol(line)]}
+            line <- do.call(cbind, strsplit(line, '\\s+'))
+            if (row_names) { line <- line[2:ncol(line), ]}
             if (transposed) {
-                bigmat[, (i + 1):(i + ncol(line))] <- line
-                i <- i + ncol(line)
-                percent <- 100 * i / n
-            } else {
                 bigmat[(i + 1):(i + nrow(line)), ] <- line
                 i <- i + nrow(line)
+                percent <- 100 * i / n
+            } else {
+                bigmat[, (i + 1):(i + ncol(line))] <- line
+                i <- i + ncol(line)
                 percent <- 100 * i / m
             }
 
@@ -527,8 +524,11 @@ MVP.Data.Numeric2MVP <- function(num_file, map_file, out='mvp', maxLine=1e4, row
 #' 
 MVP.Data.MVP2Bfile <- function(bigmat, map, pheno=NULL, out='mvp.plink', threads=1, verbose=TRUE) {
     t1 <- as.numeric(Sys.time())
+
+    if(nrow(map) != nrow(bigmat) && nrow(map) != ncol(bigmat))  stop("mismatched number of SNPs between genotype and map.")
+    mrk_bycol <- nrow(map) == ncol(bigmat)
     # write bed file
-    write_bfile(bigmat@address, out, threads=threads, verbose = verbose)
+    write_bfile(bigmat@address, out, mrkbycol=mrk_bycol, threads=threads, verbose = verbose)
     
     # write fam
     #  1. Family ID ('FID')
@@ -537,13 +537,14 @@ MVP.Data.MVP2Bfile <- function(bigmat, map, pheno=NULL, out='mvp.plink', threads
     #  4. Within-family ID of mother ('0' if mother isn't in dataset)
     #  5. Sex code ('1' = male, '2' = female, '0' = unknown)
     #  6. Phenotype value ('1' = control, '2' = case, '-9'/'0'/non-numeric = missing data if case/control)
+    n <- ifelse(mrk_bycol, nrow(bigmat), ncol(bigmat))
     if (is.null(pheno)) {
-        ind <- paste0("ind", 1:ncol(bigmat))
-        pheno <- rep(-9, ncol(bigmat))
+        ind <- paste0("ind", 1:n)
+        pheno <- rep(-9, n)
         message("pheno is NULL, automatically named individuals.")
     } else if (ncol(pheno) == 1) {
         ind <- pheno[, 1]
-        pheno <- rep(-9, ncol(bigmat))
+        pheno <- rep(-9, n)
     } else {
         if (ncol(pheno) > 2) { 
             message("Only the first phenotype is written to the fam file, and the remaining ", ncol(pheno) - 1, " phenotypes are ignored.")
@@ -704,6 +705,7 @@ MVP.Data.Map <- function(map, out='mvp', cols=1:5, header=TRUE, sep='\t', verbos
 #' @param out prefix of output file name
 #' @param pcs.keep how many PCs to keep
 #' @param maxLine the number of markers handled at a time, smaller value would reduce the memory cost
+#' @param mrk_bycol whether the markers are stored by columns in genotype (i.e. genotype is a n by m matrix)
 #' @param sep seperator for PC file.
 #' @param cpu the number of cpu
 #' @param verbose whether to print detail.
@@ -727,6 +729,7 @@ MVP.Data.PC <- function(
     out=NULL,  
     pcs.keep=5,
     maxLine=10000,
+    mrk_bycol=TRUE,
     sep='\t',
     cpu=1,
     verbose=TRUE
@@ -743,7 +746,7 @@ MVP.Data.PC <- function(
     } else if (filePC == TRUE) {
         if(is.null(K)){
             geno <- attach.big.matrix(paste0(mvp_prefix, ".geno.desc"))
-            myPC <- MVP.PCA(M=geno, pcs.keep = pcs.keep, maxLine = maxLine, cpu=cpu)
+            myPC <- MVP.PCA(M=geno, mrk_bycol=mrk_bycol, pcs.keep = pcs.keep, maxLine = maxLine, cpu=cpu)
         }else{
             myPC <- MVP.PCA(K=K, pcs.keep = pcs.keep, maxLine = maxLine, cpu=cpu)
         }
@@ -773,6 +776,7 @@ MVP.Data.PC <- function(
 #' @param mvp_prefix Prefix for mvp format files
 #' @param out prefix of output file name
 #' @param maxLine the number of markers handled at a time, smaller value would reduce the memory cost
+#' @param mrk_bycol whether the markers are stored by columns in genotype (i.e. genotype is a n by m matrix)
 #' @param sep seperator for Kinship file.
 #' @param cpu the number of cpu
 #' @param verbose whether to print detail.
@@ -794,6 +798,7 @@ MVP.Data.Kin <- function(
     mvp_prefix='mvp', 
     out=NULL, 
     maxLine=10000, 
+    mrk_bycol=TRUE,
     sep='\t',
     cpu=1,
     verbose=TRUE
@@ -809,7 +814,7 @@ MVP.Data.Kin <- function(
         myKin <- read.big.matrix(fileKin, header = FALSE, type = 'double', sep = sep)
     } else if (fileKin == TRUE) {
         geno <- attach.big.matrix(paste0(mvp_prefix, ".geno.desc"))
-        myKin <- MVP.K.VanRaden(geno, maxLine = maxLine, cpu = cpu, verbose = verbose)
+        myKin <- MVP.K.VanRaden(geno, mrk_bycol = mrk_bycol, maxLine = maxLine, cpu = cpu, verbose = verbose)
     } else {
         stop("ERROR: The value of fileKin is invalid.")
     }
@@ -838,6 +843,7 @@ MVP.Data.Kin <- function(
 #' 
 #' @param mvp_prefix the prefix of mvp file
 #' @param out the prefix of output file
+#' @param mrk_bycol whether the markers are stored by columns in genotype (i.e. genotype is a n by m matrix)
 #' @param method 'Major', 'Minor', "Middle"
 #' @param ncpus number of threads for imputing
 #' @param verbose whether to print the reminder
@@ -849,7 +855,7 @@ MVP.Data.Kin <- function(
 #' 
 #' MVP.Data.impute(mvpPath, tempfile("outfile"), ncpus=1)
 #' 
-MVP.Data.impute <- function(mvp_prefix, out=NULL, method='Major', ncpus=NULL, verbose=TRUE) {
+MVP.Data.impute <- function(mvp_prefix, out=NULL, mrk_bycol=TRUE, method='Major', ncpus=NULL, verbose=TRUE) {
     # input
     desc <- paste0(mvp_prefix, ".geno.desc")
     bigmat <- attach.big.matrix(desc)
@@ -888,7 +894,7 @@ MVP.Data.impute <- function(mvp_prefix, out=NULL, method='Major', ncpus=NULL, ve
         file.copy(paste0(mvp_prefix, ".geno.map"), paste0(out, ".geno.map"))
     }
     
-    impute_marker(outmat@address, threads = ncpus, verbose = verbose)
+    impute_marker(outmat@address, mrkbycol = mrk_bycol, threads = ncpus, verbose = verbose)
     
     logging.log("Impute Genotype File is done!\n", verbose = verbose)
 }
diff --git a/R/MVP.FarmCPU.r b/R/MVP.FarmCPU.r
index cbde427..ba28518 100755
--- a/R/MVP.FarmCPU.r
+++ b/R/MVP.FarmCPU.r
@@ -20,7 +20,7 @@
 #' @author Xiaolei Liu and Zhiwu Zhang
 #' 
 #' @param phe phenotype, n by t matrix, n is sample size, t is number of phenotypes
-#' @param geno genotype, m by n matrix, m is marker size, n is sample size. This is Pure Genotype Data Matrix(GD). THERE IS NO COLUMN FOR TAXA.
+#' @param geno genotype, either m by n or n by m is supportable, m is marker size, n is population size. This is Pure Genotype Data Matrix(GD). THERE IS NO COLUMN FOR TAXA.
 #' @param map SNP map information, m by 3 matrix, m is marker size, the three columns are SNP_ID, Chr, and Pos
 #' @param CV covariates, n by c matrix, n is sample size, c is number of covariates
 #' @param ind_idx the index of effective genotyped individuals
@@ -35,6 +35,7 @@
 #' @param Prior prior information, four columns, which are SNP_ID, Chr, Pos, P-value
 #' @param ncpus number of threads used for parallele computation
 #' @param maxLoop maximum number of iterations
+#' @param maxLine the number of markers handled at a time, smaller value would reduce the memory cost
 #' @param threshold.output only the GWAS results with p-values lower than threshold.output will be output
 #' @param converge a number, 0 to 1, if selected pseudo QTNs in the last and the second last iterations have a certain probality (the probability is converge) of overlap, the loop will stop
 #' @param iteration.output whether to output results of all iterations
@@ -55,7 +56,7 @@
 #' print(dim(phenotype))
 #' genoPath <- system.file("extdata", "06_mvp-impute", "mvp.imp.geno.desc", package = "rMVP")
 #' genotype <- attach.big.matrix(genoPath)
-#' genotype <- deepcopy(genotype, cols=idx)
+#' genotype <- deepcopy(genotype, rows=idx)
 #' print(dim(genotype))
 #' mapPath <- system.file("extdata", "06_mvp-impute", "mvp.imp.geno.map", package = "rMVP")
 #' map <- read.table(mapPath , head = TRUE)
@@ -66,15 +67,20 @@
 #' 
 `MVP.FarmCPU` <- function(phe, geno, map, CV=NULL, ind_idx=NULL, mrk_idx=NULL, P=NULL, method.sub="reward", method.sub.final="reward", 
                           method.bin=c("EMMA", "static", "FaST-LMM"), bin.size=c(5e5,5e6,5e7), bin.selection=seq(10,100,10), 
-                          memo="MVP.FarmCPU", Prior=NULL, ncpus=2, maxLoop=10, 
+                          memo="MVP.FarmCPU", Prior=NULL, ncpus=2, maxLoop=10, maxLine=5000,
                           threshold.output=.01, converge=1, iteration.output=FALSE, p.threshold=NA, 
                           QTN.threshold=0.01, bound=NULL, verbose=TRUE){
     #print("--------------------- Welcome to FarmCPU ----------------------------")
 
-    n <- ifelse(is.null(ind_idx), ncol(geno), length(ind_idx))
     if(!is.big.matrix(geno))    stop("genotype should be in 'big.matrix' format.")
     if(sum(is.na(phe[, 2])) != 0) stop("NAs are not allowed in phenotype.")
-    if(nrow(phe) != n) stop("number of individuals does not match in phenotype and genotype.")
+    if(is.null(ind_idx)){
+        if(nrow(phe) != ncol(geno) && nrow(phe) != nrow(geno)) stop("number of individuals does not match in phenotype and genotype.")
+        n <- ifelse(nrow(phe) == ncol(geno), ncol(geno), nrow(geno))
+    }else{
+        n <- length(ind_idx)
+        if(nrow(phe) != n) stop("number of individuals does not match in phenotype and genotype.")
+    }
 
     echo=TRUE
     nm=nrow(map)
@@ -233,14 +239,10 @@
             theCV=CV
             
             if(!is.null(myRemove$bin)){
-                if(length(myRemove$seqQTN) == 1){
-                    #myRemove$bin = as.matrix(myRemove$bin)
-                    myRemove$bin = t(myRemove$bin)
-                }
                 theCV=cbind(CV,myRemove$bin)
             }
 
-            myGLM=FarmCPU.LM(y=phe[,2],GDP=geno,GDP_index=ind_idx,w=theCV,ncpus=ncpus,npc=npc, verbose=verbose)
+            myGLM=FarmCPU.LM(y=phe[,2],GDP=geno,GDP_index=ind_idx,w=theCV,maxLine=maxLine,ncpus=ncpus,npc=npc, verbose=verbose)
             if(!is.null(seqQTN)){
                 if(ncol(myGLM$P) != (npc + length(seqQTN) + 1))    stop("wrong dimensions.")
             }
@@ -474,7 +476,7 @@
 #' @author Xiaolei Liu and Zhiwu Zhang
 #' 
 #' @param Y a n by 2 matrix, the fist column is taxa id and the second is trait
-#' @param GDP genotype, m by n matrix, m is marker size, n is sample size. This is Pure Genotype Data Matrix(GD). THERE IS NO COLUMN FOR TAXA
+#' @param GDP genotype
 #' @param GDP_index the index of effective genotyped individuals
 #' @param GM SNP map information, m by 3 matrix, m is marker size, the three columns are SNP_ID, Chr, and Pos
 #' @param CV covariates, n by c matrix, n is sample size, c is number of covariates
@@ -497,7 +499,12 @@ FarmCPU.BIN <-
         
         if(is.null(P)) return(list(bin=NULL,binmap=NULL,seqQTN=NULL))
         
-        if(is.null(GDP_index))  GDP_index=seq(1, ncol(GDP))
+        if(nrow(Y) == nrow(GDP)){
+            if(is.null(GDP_index))  GDP_index=seq(1, nrow(GDP))
+        }else{
+            if(is.null(GDP_index))  GDP_index=seq(1, ncol(GDP))
+        }
+        
         #Set upper bound for bin selection to squareroot of sample size
         n=nrow(Y)
         #bound=round(sqrt(n)/log10(n))
@@ -551,7 +558,11 @@ FarmCPU.BIN <-
                     GP=cbind(GM,P,NA,NA,NA)
                     mySpecify=FarmCPU.Specify(GI=GM,GP=GP,bin.size=bin,inclosure.size=inc)
                     seqQTN=which(mySpecify$index==TRUE)
-                    GK=t(GDP[seqQTN, GDP_index])
+                    if(nrow(Y) == nrow(GDP)){
+                        GK=GDP[GDP_index, seqQTN]
+                    }else{
+                        GK=t(GDP[seqQTN, GDP_index])
+                    }
                     myBurger=FarmCPU.Burger(Y=Y[,1:2], CV=CV, GK=GK, ncpus=ncpus, method=method)
                     myREML=myBurger$REMLs
                     myVG=myBurger$vg #it is unused
@@ -595,7 +606,11 @@ FarmCPU.BIN <-
                 GP=cbind(GM,P,NA,NA,NA)
                 mySpecify=FarmCPU.Specify(GI=GM,GP=GP,bin.size=bin,inclosure.size=inc)
                 seqQTN=which(mySpecify$index==TRUE)
-                GK=t(GDP[seqQTN,GDP_index])
+                if(nrow(Y) == nrow(GDP)){
+                    GK=GDP[GDP_index, seqQTN]
+                }else{
+                    GK=t(GDP[seqQTN, GDP_index])
+                }
                 myBurger=FarmCPU.Burger(Y=Y[,1:2], CV=CV, GK=GK, ncpus=ncpus, method=method)
                 myREML=myBurger$REMLs
                 myVG=myBurger$vg #it is unused
@@ -632,7 +647,11 @@ FarmCPU.BIN <-
                 GP=cbind(GM,P,NA,NA,NA)
                 mySpecify=FarmCPU.Specify(GI=GM,GP=GP,bin.size=bin[ii],inclosure.size=inc[ii])
                 seqQTN=which(mySpecify$index==TRUE)
-                GK=t(GDP[seqQTN,GDP_index])
+                if(nrow(Y) == nrow(GDP)){
+                    GK=GDP[GDP_index, seqQTN]
+                }else{
+                    GK=t(GDP[seqQTN, GDP_index])
+                }
                 myBurger=FarmCPU.Burger(Y=Y[,1:2], CV=CV, GK=GK, ncpus=ncpus, method=method)
                 myREML=myBurger$REMLs
                 myVG=myBurger$vg #it is unused
@@ -754,8 +773,9 @@ FarmCPU.Specify <-
 #' 
 #' @param y one column matrix, dependent variable
 #' @param w covariates, n by c matrix, n is sample size, c is number of covariates
-#' @param GDP genotype, m by n matrix, m is marker size, n is sample size. This is Pure Genotype Data Matrix(GD). THERE IS NO COLUMN FOR TAXA.
+#' @param GDP genotype
 #' @param GDP_index index of effective genotyped individuals
+#' @param maxLine the number of markers handled at a time, smaller value would reduce the memory cost
 #' @param ncpus number of threads used for parallele computation
 #' @param npc number of covariates without pseudo QTNs
 #' @param verbose whether to print detail.
@@ -767,7 +787,7 @@ FarmCPU.Specify <-
 #' 
 #' @keywords internal
 FarmCPU.LM <-
-    function(y, w=NULL, GDP, GDP_index=NULL, ncpus=2, npc=0, verbose=TRUE){
+    function(y, w=NULL, GDP, GDP_index=NULL, maxLine=5000, ncpus=2, npc=0, verbose=TRUE){
         #print("FarmCPU.LM started")
         if(is.null(y)) return(NULL)
         if(is.null(GDP)) return(NULL)
@@ -843,7 +863,7 @@ FarmCPU.LM <-
         # if(ncol(w) == 50)  write.csv(P, "P.csv")
 
         mkl_env({
-            results <- glm_c(y=y, X=w, iXX = wwi, GDP@address, geno_ind=GDP_index, verbose=verbose, threads=ncpus)
+            results <- glm_c(y=y, X=w, iXX = wwi, GDP@address, geno_ind=GDP_index, step=maxLine, verbose=verbose, threads=ncpus)
         })
         return(list(P=results[ ,-c(1:3), drop=FALSE], betapred=betapred, sepred=sepred, B=results[ , 1, drop=FALSE], S=results[ , 2, drop=FALSE]))
         # return(list(P=P, betapred=betapred, B=as.matrix(B), S=S))
@@ -904,7 +924,7 @@ FarmCPU.Burger <-
         }
         
         if(method=="EMMA"){
-            K <- MVP.K.VanRaden(M=as.big.matrix(t(theGK)), verbose=FALSE)
+            K <- MVP.K.VanRaden(M=as.big.matrix(theGK), verbose=FALSE)
             myEMMAREML <- MVP.EMMA.Vg.Ve(y=matrix(Y[,-1],nrow(Y),1), X=theCV, K=K)
             REMLs=-2*myEMMAREML$REML
             delta=myEMMAREML$delta
@@ -982,7 +1002,7 @@ FarmCPU.SUB <-
 #' 
 #' @author Xiaolei Liu and Zhiwu Zhang
 #' 
-#' @param GDP genotype, m by n matrix, m is marker size, n is sample size. This is Pure Genotype Data Matrix(GD). THERE IS NO COLUMN FOR TAXA
+#' @param GDP genotype
 #' @param GDP_index the index of effective genotyped individuals
 #' @param GM SNP map information, m by 3 matrix, m is marker size, the three columns are SNP_ID, Chr, and Pos
 #' @param seqQTN s by 1 vecter for index of QTN on GM
@@ -1001,7 +1021,11 @@ FarmCPU.Remove <-
         if(is.null(seqQTN))return(list(bin=NULL,binmap=NULL,seqQTN=NULL))
         seqQTN=seqQTN[order(seqQTN.p)]
         
-        if(is.null(GDP_index))  GDP_index=seq(1, ncol(GDP))
+        if(nrow(GM) == ncol(GDP)){
+            if(is.null(GDP_index))  GDP_index=seq(1, nrow(GDP))
+        }else{
+            if(is.null(GDP_index))  GDP_index=seq(1, ncol(GDP))
+        }
 
         hugeNum=10e10
         n=length(seqQTN)
@@ -1025,7 +1049,12 @@ FarmCPU.Remove <-
         #Set sample
         ratio=.1
         maxNum=100000
-        m=nrow(GDP)
+        if(nrow(GM) == ncol(GDP)){
+            m=ncol(GDP)
+        }else{
+            m=nrow(GDP)
+        }
+        
         s=length(GDP_index)
         
         sampled=s
@@ -1038,9 +1067,17 @@ FarmCPU.Remove <-
         #This section has problem of turning big.matrix to R matrix
         #It is OK as x is small
         if(is.big.matrix(GDP)){
-            x=t(as.matrix(deepcopy(GDP,rows=seqQTN,cols=index)))
+            if(nrow(GM) == ncol(GDP)){
+                x=deepcopy(GDP,cols=seqQTN,rows=index)[,, drop=FALSE]
+            }else{
+                x=t(deepcopy(GDP,rows=seqQTN,cols=index)[,, drop=FALSE])
+            }
         }else{
-            x=t(GDP[seqQTN,index] )
+            if(nrow(GM) == ncol(GDP)){
+                x=GDP[index, seqQTN, drop=FALSE]
+            }else{
+                x=t(GDP[seqQTN, index, drop=FALSE])
+            }
         }
         
         r=cor(as.matrix(x))
@@ -1062,9 +1099,17 @@ FarmCPU.Remove <-
         
         #This section has problem of turning big.matrix to R matrix
         if(is.big.matrix(GDP)){
-            bin=t(as.matrix(deepcopy(GDP,rows=seqQTN,cols=GDP_index) ))
+            if(nrow(GM) == ncol(GDP)){
+                bin=deepcopy(GDP,cols=seqQTN,rows=GDP_index)[,, drop=FALSE]
+            }else{
+                bin=t(deepcopy(GDP,rows=seqQTN,cols=GDP_index)[,, drop=FALSE])
+            }
         }else{
-            bin=t(GDP[seqQTN, GDP_index, drop=FALSE] )
+            if(nrow(GM) == ncol(GDP)){
+                bin=GDP[GDP_index, seqQTN, drop=FALSE]
+            }else{
+                bin=t(GDP[seqQTN, GDP_index, drop=FALSE])
+            }
         }
         
         binmap=GM[seqQTN, , drop=FALSE]
diff --git a/R/MVP.GLM.r b/R/MVP.GLM.r
index a7d0614..044e6da 100644
--- a/R/MVP.GLM.r
+++ b/R/MVP.GLM.r
@@ -19,10 +19,11 @@
 #' @author Lilin Yin and Xiaolei Liu
 #' 
 #' @param phe phenotype, n * 2 matrix
-#' @param geno Genotype in numeric format, pure 0, 1, 2 matrix; m * n, m is marker size, n is population size
+#' @param geno genotype, either m by n or n by m is supportable, m is marker size, n is population size
 #' @param CV Covariance, design matrix(n * x) for the fixed effects
 #' @param ind_idx the index of effective genotyped individuals
 #' @param mrk_idx the index of effective markers used in analysis
+#' @param maxLine the number of markers handled at a time, smaller value would reduce the memory cost
 #' @param cpu number of cpus used for parallel computation
 #' @param verbose whether to print detail.
 #'
@@ -38,7 +39,7 @@
 #' print(dim(phenotype))
 #' genoPath <- system.file("extdata", "06_mvp-impute", "mvp.imp.geno.desc", package = "rMVP")
 #' genotype <- attach.big.matrix(genoPath)
-#' genotype <- deepcopy(genotype, cols=idx)
+#' genotype <- deepcopy(genotype, rows=idx)
 #' print(dim(genotype))
 #' 
 #' glm <- MVP.GLM(phe=phenotype, geno=genotype, cpu=1)
@@ -49,18 +50,24 @@ function(
     phe, 
     geno, 
     CV=NULL, 
-    ind_idx = NULL,
+    ind_idx=NULL,
     mrk_idx=NULL,
+    maxLine=5000,
     cpu=1,
     verbose=TRUE
 ){
 
-    n <- ifelse(is.null(ind_idx), ncol(geno), length(ind_idx))
-    ys <- as.numeric(as.matrix(phe[,2]))
-    
+    if(is.null(ind_idx)){
+        if(nrow(phe) != ncol(geno) && nrow(phe) != nrow(geno)) stop("number of individuals does not match in phenotype and genotype.")
+        n <- ifelse(nrow(phe) == ncol(geno), ncol(geno), nrow(geno))
+    }else{
+        n <- length(ind_idx)
+        if(nrow(phe) != n) stop("number of individuals does not match in phenotype and genotype.")
+    }
+
     if(!is.big.matrix(geno))    stop("genotype should be in 'big.matrix' format.")
+    ys <- as.numeric(as.matrix(phe[,2]))
     if(sum(is.na(ys)) != 0) stop("NAs are not allowed in phenotype.")
-    if(nrow(phe) != n) stop("number of individuals does not match in phenotype and genotype.")
     
     if(is.null(CV)){
         X0 <- matrix(1, n)
@@ -77,7 +84,7 @@ function(
     logging.log("scanning...\n", verbose = verbose)
 
     mkl_env({
-        results <- glm_c(y = ys, X = X0, iXX = iX0X0, geno@address, geno_ind = ind_idx, marker_ind = mrk_idx, verbose = verbose, threads = cpu)
+        results <- glm_c(y = ys, X = X0, iXX = iX0X0, geno@address, geno_ind = ind_idx, marker_ind = mrk_idx, step = maxLine, verbose = verbose, threads = cpu)
     })
 
     return(results[, c(1, 2, ncol(results))])
diff --git a/R/MVP.K.VanRaden.r b/R/MVP.K.VanRaden.r
index 169bac7..fe80911 100755
--- a/R/MVP.K.VanRaden.r
+++ b/R/MVP.K.VanRaden.r
@@ -13,13 +13,12 @@
 
 #' Calculate Kinship matrix by VanRaden method
 #'
-#' Build date: Dec 12, 2016
-#' Last update: Dec 12, 2019
-#'
-#' @param M Genotype, m * n, m is marker size, n is population size
+#' @param M genotype, either m by n or n by m is supportable, m is marker size, n is population size
 #' @param maxLine the number of markers handled at a time, smaller value would reduce the memory cost
 #' @param ind_idx the index of effective genotyped individuals used in analysis
 #' @param mrk_idx the index of effective markers used in analysis
+#' @param mrk_freq the prior calculated major allele frequency (not MAF) for all markers used in analysis
+#' @param mrk_bycol whether the markers are stored by columns in genotype (i.e. M is a n by m matrix)
 #' @param cpu the number of cpu
 #' @param verbose whether to print detail.
 #' @param checkNA whether to check NA in genotype.
@@ -42,6 +41,8 @@ function(
     maxLine=5000,
     ind_idx=NULL,
     mrk_idx=NULL,
+    mrk_freq = NULL,
+    mrk_bycol = TRUE,
     cpu=1,
     verbose=TRUE,
     checkNA=TRUE
@@ -53,19 +54,32 @@ function(
     mac <- (!linux) & (!wind)
     HPCMathLib <- mac | grepl("mkl", sessionInfo()$LAPACK) | grepl("openblas", sessionInfo()$LAPACK) | eval(parse(text = "!inherits(try(Revo.version,silent=TRUE),'try-error')"))
 
-    if (!is.big.matrix(M)) stop("Format of Genotype Data must be big.matrix")
+    if(!is.big.matrix(M)) stop("Format of Genotype Data must be big.matrix")
     if(checkNA){
-        if(hasNA(M@address, geno_ind = ind_idx, threads = cpu))   stop("NA is not allowed in genotype, use 'MVP.Data.impute' to impute.")
+        if(hasNA(M@address, mrkbycol = mrk_bycol, geno_ind = ind_idx, marker_ind = mrk_idx, threads = cpu))   stop("NA is not allowed in genotype, use 'MVP.Data.impute' to impute.")
     }
-    n <- ifelse(is.null(ind_idx), ncol(M), length(ind_idx))
-    m <- ifelse(is.null(mrk_idx), nrow(M), length(mrk_idx))
+
+    if(!is.null(mrk_freq)){
+        if(!is.null(mrk_idx)){
+            if(length(mrk_freq) != length(mrk_idx)) stop("mismatched number of markers between provided frequency and index.")
+        }else{
+            if(mrk_bycol){
+                if(length(mrk_freq) != ncol(M)) stop("mismatched number of markers between genotype and provided frequency.")
+            }else{
+                if(length(mrk_freq) != nrow(M)) stop("mismatched number of markers between genotype and provided frequency.")
+            }
+        }
+    }
+    
+    n <- ifelse(is.null(ind_idx), ifelse(mrk_bycol, nrow(M), ncol(M)), length(ind_idx))
+    m <- ifelse(is.null(mrk_idx), ifelse(mrk_bycol, ncol(M), nrow(M)), length(mrk_idx))
 
     # logging.log("Relationship matrix mode in", priority[1], "\n", verbose = verbose)
     # if(is.null(dim(M))) M <- t(as.matrix(M))
     
     logging.log(paste0("Computing GRM for ", n, " individuals using ", m, " markers with a step of ", maxLine), "\n", verbose = verbose)
     
-    K <- try(kin_cal(M@address, geno_ind = ind_idx, marker_ind = mrk_idx, threads = cpu, step = maxLine, verbose = verbose, mkl = HPCMathLib), silent=TRUE)
+    K <- try(kin_cal(M@address, geno_ind = ind_idx, marker_ind = mrk_idx, marker_freq = mrk_freq, marker_bycol = mrk_bycol, threads = cpu, step = maxLine, verbose = verbose, mkl = HPCMathLib), silent=TRUE)
     # K <- try(kin_cal_s(M@address, threads = cpu, verbose = verbose, mkl = HPCMathLib), silent=TRUE)
 
     if(inherits(K,"try-error")){
@@ -177,40 +191,3 @@ function(
     logging.log("Deriving relationship matrix successfully", "\n", verbose = verbose); gc()
     return(K)
 }#end of MVP.k.VanRaden function
-
-# #' Calculate Kinship matrix by Blocking strategy
-# #'
-# #' Build date: Apr 14, 2021
-# #' Last update: Apr 14, 2021
-# #'
-# #' @param M Genotype, m * n, m is marker size, n is population size
-# #' @param step Number of markers processed at one time
-# #'
-# #' @return K, n * n matrix
-# #' @export
-# #'
-# #' @examples
-# #' genoPath <- system.file("extdata", "06_mvp-impute", "mvp.imp.geno.desc", package = "rMVP")
-# #' genotype <- attach.big.matrix(genoPath)
-# #' print(dim(genotype))
-# #'
-# #' K <- MVP.calk(genotype)
-# #'
-# MVP.calk <- function(M, step = 1000) {
-#     n_marker = nrow(M)
-#     n_sample = ncol(M)
-#     if (step > n_marker) { step = n_marker }
-#     idx = 0
-#     sum = 0
-#     K = matrix(0, n_sample, n_sample)
-#     while(idx < n_marker){
-#         G_buffer = M[idx + seq_len(step), ]
-#         means = rowMeans(G_buffer)
-#         sum   = sum + (0.5 * means) %*% (1 - 0.5 * means)
-#         K = K + crossprod(G_buffer - means)
-#         idx = idx + step
-#         if (idx + step > n_marker) { step = n_marker - idx }
-#     }
-#     K = K / (2 * drop(sum))
-#     return(K)
-# }
diff --git a/R/MVP.MLM.r b/R/MVP.MLM.r
index 820689c..1c2ad02 100644
--- a/R/MVP.MLM.r
+++ b/R/MVP.MLM.r
@@ -12,20 +12,18 @@
 
 
 #' To perform GWAS with GLM and MLM model and get the P value of SNPs
-#'
-#' Build date: Aug 30, 2016
-#' Last update: Aug 30, 2016
 #' 
 #' @author Lilin Yin and Xiaolei Liu
 #' 
 #' @param phe phenotype, n * 2 matrix
-#' @param geno genotype, m * n, m is marker size, n is population size
+#' @param geno genotype, either m by n or n by m is supportable, m is marker size, n is population size
 #' @param K Kinship, Covariance matrix(n * n) for random effects; must be positive semi-definite
 #' @param eigenK list of eigen Kinship
 #' @param CV covariates
 #' @param ind_idx the index of effective genotyped individuals
 #' @param mrk_idx the index of effective markers used in analysis
 #' @param REML a list that contains ve and vg
+#' @param maxLine the number of markers handled at a time, smaller value would reduce the memory cost
 #' @param cpu number of cpus used for parallel computation
 #' @param vc.method the methods for estimating variance component("emma" or "he" or "brent")
 #' @param verbose whether to print detail.
@@ -43,7 +41,7 @@
 #' print(dim(phenotype))
 #' genoPath <- system.file("extdata", "06_mvp-impute", "mvp.imp.geno.desc", package = "rMVP")
 #' genotype <- attach.big.matrix(genoPath)
-#' genotype <- deepcopy(genotype, cols=idx)
+#' genotype <- deepcopy(genotype, rows=idx)
 #' print(dim(genotype))
 #' K <- MVP.K.VanRaden(genotype, cpu=1)
 #' 
@@ -62,18 +60,24 @@ function(
     ind_idx=NULL,
     mrk_idx=NULL,
     REML=NULL,
+    maxLine=5000,
     cpu=1,
     vc.method=c("BRENT", "EMMA", "HE"),
     verbose=TRUE
 ){
 
+    if(is.null(ind_idx)){
+        if(nrow(phe) != ncol(geno) && nrow(phe) != nrow(geno)) stop("number of individuals does not match in phenotype and genotype.")
+        n <- ifelse(nrow(phe) == ncol(geno), ncol(geno), nrow(geno))
+    }else{
+        n <- length(ind_idx)
+        if(nrow(phe) != n) stop("number of individuals does not match in phenotype and genotype.")
+    }
     vc.method <- match.arg(vc.method)
-    n <- ifelse(is.null(ind_idx), ncol(geno), length(ind_idx))
     ys <- as.numeric(as.matrix(phe[,2]))
 
     if(!is.big.matrix(geno))    stop("genotype should be in 'big.matrix' format.")
     if(sum(is.na(ys)) != 0) stop("NAs are not allowed in phenotype.")
-    if(nrow(phe) != n) stop("number of individuals does not match in phenotype and genotype.")
     
     if(is.null(K)){
         if(vc.method == "EMMA" | vc.method == "he")  stop("Kinship must be provided!")
@@ -121,7 +125,7 @@ function(
     
     logging.log("scanning...\n", verbose = verbose)
     mkl_env({
-        results <- mlm_c(y = ys, X = X0, U = U, vgs = vgs, geno@address, ind_idx, mrk_idx, verbose = verbose, threads = cpu)
+        results <- mlm_c(y = ys, X = X0, U = U, vgs = vgs, geno@address, ind_idx, mrk_idx, step = maxLine, verbose = verbose, threads = cpu)
     })
 
     return(results)
diff --git a/R/MVP.PCA.r b/R/MVP.PCA.r
index 9485120..2d8261d 100755
--- a/R/MVP.PCA.r
+++ b/R/MVP.PCA.r
@@ -13,15 +13,12 @@
 
 #' Principal Component Analysis
 #' 
-#' Build date: Dec 14, 2016
-#' Last update: Oct 29, 2018
-#' @author Xiaolei Liu, Lilin Yin and Haohao Zhang
-#' 
-#' @param M Genotype in numeric format, pure 0, 1, 2 matrix; m * n, m is marker size, n is population size
+#' @param M genotype, either m by n or n by m is supportable, m is marker size, n is population size
 #' @param K kinship matrix
 #' @param maxLine the number of markers handled at a time, smaller value would reduce the memory cost
 #' @param ind_idx the index of effective genotyped individuals used in analysis
 #' @param mrk_idx the index of effective markers used in analysis
+#' @param mrk_bycol whether the markers are stored by columns in genotype (i.e. M is a n by m matrix)
 #' @param pcs.keep maximum number of PCs for output
 #' @param cpu the number of cpu
 #' @param verbose whether to print detail.
@@ -42,7 +39,7 @@
 #' }
 #' 
 MVP.PCA <-
-function(M=NULL, K=NULL, maxLine=10000, ind_idx=NULL, mrk_idx=NULL, pcs.keep=5, cpu=1, verbose=TRUE){
+function(M=NULL, K=NULL, maxLine=10000, ind_idx=NULL, mrk_idx=NULL, mrk_bycol=TRUE, pcs.keep=5, cpu=1, verbose=TRUE){
 
     #Data Check
     if (is.null(M) & is.null(K)) {
@@ -58,7 +55,7 @@ function(M=NULL, K=NULL, maxLine=10000, ind_idx=NULL, mrk_idx=NULL, pcs.keep=5,
     }
 
     if(is.null(K)){
-        K <- MVP.K.VanRaden(M=M, ind_idx = ind_idx, mrk_idx = mrk_idx, maxLine = maxLine, cpu = cpu, verbose = verbose)
+        K <- MVP.K.VanRaden(M=M, ind_idx = ind_idx, mrk_idx = mrk_idx, mrk_bycol = mrk_bycol, maxLine = maxLine, cpu = cpu, verbose = verbose)
     }else{
         if(!is.null(ind_idx))    K <- K[ind_idx, ind_idx]
     }
diff --git a/R/MVP.Utility.r b/R/MVP.Utility.r
index 8e4dd72..d959e65 100755
--- a/R/MVP.Utility.r
+++ b/R/MVP.Utility.r
@@ -25,12 +25,12 @@
 #'
 #' @examples
 #' MVP.Version()
-MVP.Version <- function(width=60, verbose=TRUE) {
+MVP.Version <- function(width=65, verbose=TRUE) {
     welcome <- "Welcome to MVP"
-    title   <- "A Memory-efficient, Visualization-enhanced, and Parallel-accelerated Tool For GWAS"
-    authors <- c("Designed and Maintained by Lilin Yin, Haohao Zhang, and Xiaolei Liu", 
+    title   <- "an R package for Memory-efficient, Visualization-enhanced and Parallel-accelerated genome-wide association study"
+    authors <- c("Design & Maintain: Lilin Yin, Haohao Zhang, and Xiaolei Liu", 
                  "Contributors: Zhenshuang Tang, Jingya Xu, Dong Yin, Zhiwu Zhang, Xiaohui Yuan, Mengjin Zhu, Shuhong Zhao, Xinyun Li")
-    contact <- "Contact: xiaoleiliu@mail.hzau.edu.cn"
+    contact <- "Mailto: xiaoleiliu@mail.hzau.edu.cn, ylilin@mail.hzau.edu.cn"
     logo_s  <- c(" __  __  __   __  ___",
                  "|  \\/  | \\ \\ / / | _ \\",
                  "| |\\/| |  \\ V /  |  _/",
diff --git a/R/MVP.r b/R/MVP.r
index a31f29c..e453aa0 100755
--- a/R/MVP.r
+++ b/R/MVP.r
@@ -18,13 +18,10 @@
 #' Object 3: Estimate variance components using EMMA, FaST-LMM, and HE regression
 #' Object 4: Generate high-quality figures
 #' 
-#' Build date: Aug 30, 2017
-#' Last update: Dec 14, 2018
-#' 
 #' @author Lilin Yin, Haohao Zhang, and Xiaolei Liu
 #' 
 #' @param phe phenotype, n * 2 matrix, n is sample size
-#' @param geno Genotype in bigmatrix format; m * n, m is marker size, n is sample size
+#' @param geno genotype, either m by n or n by m is supportable, m is marker size, n is population size
 #' @param map SNP map information, SNP name, Chr, Pos
 #' @param K Kinship, Covariance matrix(n * n) for random effects, must be positive semi-definite
 #' @param nPC.GLM number of PCs added as fixed effects in GLM
@@ -49,7 +46,7 @@
 #' @param permutation.rep number of permutation replicates
 #' @param col for color of points in each chromosome on manhattan plot
 #' @param memo Character. A text marker on output files
-#' @param tmppath the path of the temporary file
+#' @param outpath the path of the output files
 #' @param file.output whether to output files or not
 #' @param file.type figure formats, "jpg", "tiff"
 #' @param dpi resolution for output figures
@@ -57,7 +54,7 @@
 #' @param verbose whether to print detail.
 #' 
 #' @export
-#' @return a m * 2 matrix, the first column is the SNP effect, the second column is the P values
+#' @return
 #' Output: MVP.return$map - SNP map information, SNP name, Chr, Pos
 #' Output: MVP.return$glm.results - p-values obtained by GLM method
 #' Output: MVP.return$mlm.results - p-values obtained by MLM method
@@ -116,14 +113,15 @@ function(phe, geno, map, K=NULL, nPC.GLM=NULL, nPC.MLM=NULL, nPC.FarmCPU=NULL,
     }
     logging.initialize("MVP", logging.outpath)
 
-    MVP.Version(width = 60, verbose = verbose)
-    logging.log("Start:", as.character(Sys.time()), "\n", verbose = verbose)
+    MVP.Version(width = 65, verbose = verbose)
+    time_start <- Sys.time()
+    logging.log("Start:", format(time_start, format = "%F %T %Z"), "\n", verbose = verbose)
     if ("log" %in% file.output) {
         logging.log("The log has been output to the file:", get("logging.file", envir = package.env), "\n", verbose = verbose)
     }
     vc.method <- match.arg(vc.method)
-    if (nrow(phe) != ncol(geno)) stop("The number of individuals in phenotype and genotype doesn't match!")
-    if (nrow(geno) != nrow(map)) stop("The number of markers in genotype and map doesn't match!")
+    if (nrow(phe) != ncol(geno) & nrow(phe) != nrow(geno)) stop("The number of individuals in phenotype and genotype doesn't match!")
+    if (nrow(map) != ncol(geno) & nrow(map) != nrow(geno)) stop("The number of markers in genotype and map doesn't match!")
     if (!is.big.matrix(geno))    stop("genotype should be in 'big.matrix' format.")
     
     #list -> matrix
@@ -135,27 +133,39 @@ function(phe, geno, map, K=NULL, nPC.GLM=NULL, nPC.MLM=NULL, nPC.FarmCPU=NULL,
     na.index <- NULL
     if (!is.null(CV.GLM)) {
         CV.GLM <- as.matrix(CV.GLM)
-    if (nrow(CV.GLM) != ncol(geno)) stop("The number of individuals in covariates and genotype doesn't match!")
+    if (nrow(CV.GLM) != nrow(phe)) stop("The number of individuals in covariates and phenotype doesn't match!")
         na.index <- c(na.index, which(is.na(CV.GLM), arr.ind = TRUE)[, 1])
     }
     if (!is.null(CV.MLM)) {
         CV.MLM <- as.matrix(CV.MLM)
-        if (nrow(CV.MLM) != ncol(geno)) stop("The number of individuals in covariates and genotype doesn't match!")
+        if (nrow(CV.MLM) != nrow(phe)) stop("The number of individuals in covariates and phenotype doesn't match!")
        na.index <- c(na.index, which(is.na(CV.MLM), arr.ind = TRUE)[, 1])
     }
     if (!is.null(CV.FarmCPU)) {
         CV.FarmCPU <- as.matrix(CV.FarmCPU)
-        if (nrow(CV.FarmCPU) != ncol(geno)) stop("The number of individuals in covariates and genotype doesn't match!")
+        if (nrow(CV.FarmCPU) != nrow(phe)) stop("The number of individuals in covariates and phenotype doesn't match!")
         na.index <- c(na.index, which(is.na(CV.FarmCPU), arr.ind = TRUE)[, 1])
     }
     na.index <- unique(na.index)
 
     #Data information
-    m <- nrow(geno)
-    n <- ncol(geno)
-    logging.log(paste("Input data has", n, "individuals,", m, "markers"), "\n", verbose = verbose)
+    MrkByCol <- nrow(phe) == nrow(geno)
+    m <- ifelse(MrkByCol, ncol(geno), nrow(geno))
+    n <- nrow(phe)
+    logging.log(paste("Input data has", n, "individuals and", m, "markers"), "\n", verbose = verbose)
+    logging.log(paste("Markers are detected to be stored by", ifelse(MrkByCol, "column", "row")), "\n", verbose = verbose)
     logging.log("Analyzed trait:", colnames(phe)[2], "\n", verbose = verbose)
     logging.log("Number of threads used:", ncpus, "\n", verbose = verbose)
+    hpclib <- grepl("mkl", sessionInfo()$LAPACK) | grepl("openblas", sessionInfo()$LAPACK) | !inherits(try(Revo.version, silent=TRUE),"try-error")
+	if(!hpclib){
+		logging.log("No high performance math library detected! The computational efficiency would be greatly reduced\n", verbose = verbose)
+	}else{
+		if(grepl("mkl", sessionInfo()$LAPACK) | !inherits(try(Revo.version, silent=TRUE),"try-error")){
+			logging.log("Math Kernel Library is detected, nice job!\n", verbose = verbose)
+		}else{
+			logging.log("OpenBLAS Library is detected, nice job!\n", verbose = verbose)
+		}
+	}
 
     #remove samples with missing phenotype
     seqTaxa <- which(!is.na(phe[,2]))
@@ -169,15 +179,23 @@ function(phe, geno, map, K=NULL, nPC.GLM=NULL, nPC.MLM=NULL, nPC.FarmCPU=NULL,
         if (!is.null(CV.GLM)) { CV.GLM = CV.GLM[seqTaxa, , drop = FALSE] }
         if (!is.null(CV.MLM)) { CV.MLM = CV.MLM[seqTaxa, , drop = FALSE] }
         if (!is.null(CV.FarmCPU)) { CV.FarmCPU = CV.FarmCPU[seqTaxa, , drop = FALSE] }
-        if(length(seqTaxa) < n * 0.85){
-            geno <- deepcopy(geno, cols = seqTaxa)
+        if(length(seqTaxa) < n * 0.8){
+            logging.log("Re-build memory-mapping file for remaining individuals", "\n", verbose = verbose)
+            if(!MrkByCol){
+                geno <- deepcopy(geno, cols = seqTaxa)
+            }else{
+                geno <- deepcopy(geno, rows = seqTaxa)
+            }
             seqTaxa <- NULL
         }
     }
 
-    logging.log("Check if NAs exist in genotype...", verbose = verbose)
-    if(hasNA(geno@address, geno_ind = seqTaxa, threads = ncpus))   stop("NA is not allowed in genotype, use 'MVP.Data.impute' to impute.")
-    logging.log("(OK!)", "\n", verbose = verbose)
+    logging.log("Check if NAs exist in genotype...", "\n", verbose = verbose)
+    if(hasNA(geno@address, mrkbycol = MrkByCol, geno_ind = seqTaxa, threads = ncpus))   stop("NA is not allowed in genotype, use 'MVP.Data.impute' to impute.")
+
+    logging.log("Calculate allele frequency...", "\n", verbose = verbose)
+    marker_freq <- BigRowMean(geno@address, MrkByCol, threads = ncpus, geno_ind = seqTaxa) / 2
+    map$MAF <- ifelse(marker_freq > 0.5, 1 - marker_freq, marker_freq)
 
     #remove SNPs with low MAF
     geno_marker_index <- NULL
@@ -185,10 +203,7 @@ function(phe, geno, map, K=NULL, nPC.GLM=NULL, nPC.MLM=NULL, nPC.FarmCPU=NULL,
     if (!is.null(maf)){
         if(length(maf) != 1) stop("maf should be a value")
         if(maf <= 0 || maf >= 0.5) stop("maf should be at the range of 0-0.5")
-        logging.log("Calculate allele frequency...", "\n", verbose = verbose)
-        marker_maf <- BigRowMean(geno@address, threads = ncpus, geno_ind = seqTaxa) / 2
-        marker_maf <- ifelse(marker_maf > 0.5, 1 - marker_maf, marker_maf)
-        geno_marker_index <- which(marker_maf > maf)
+        geno_marker_index <- which(map$MAF >= maf)
         if(length(geno_marker_index) == 0) stop(paste("MAFs of all markers are smaller than the threshold", maf))
         if(length(geno_marker_index) == 1) stop(paste("only 1 marker left on the given MAF threshold", maf))
         if(length(geno_marker_index) == m)  geno_marker_index <- NULL
@@ -197,12 +212,23 @@ function(phe, geno, map, K=NULL, nPC.GLM=NULL, nPC.MLM=NULL, nPC.FarmCPU=NULL,
         logging.log("Total", m - length(geno_marker_index), "markers are removed at MAF threshold", maf, "\n", verbose = verbose)
         m <- length(geno_marker_index)
         map_sub <- map[geno_marker_index, ]
-        if(length(geno_marker_index) < m * 0.85){
+        marker_freq <- marker_freq[geno_marker_index]
+        if(length(geno_marker_index) < m * 0.8){
             if(!is.null(seqTaxa)){
-                geno <- deepcopy(geno, cols = seqTaxa, rows = geno_marker_index)
+                logging.log("Re-build memory-mapping file for remaining individuals and markers", "\n", verbose = verbose)
+                if(MrkByCol){
+                    geno <- deepcopy(geno, rows = seqTaxa, cols = geno_marker_index)
+                }else{
+                    geno <- deepcopy(geno, cols = seqTaxa, rows = geno_marker_index)
+                }
                 seqTaxa <- NULL
             }else{
-                geno <- deepcopy(geno, rows = geno_marker_index)
+                logging.log("Re-build memory-mapping file for remaining markers", "\n", verbose = verbose)
+                if(MrkByCol){
+                    geno <- deepcopy(geno, cols = geno_marker_index)
+                }else{
+                    geno <- deepcopy(geno, rows = geno_marker_index)
+                }
             }
             geno_marker_index <- NULL
         }
@@ -229,11 +255,13 @@ function(phe, geno, map, K=NULL, nPC.GLM=NULL, nPC.MLM=NULL, nPC.FarmCPU=NULL,
     if (!is.null(nPC) | "MLM" %in% method) {
         if (is.null(K)) {
             K <- MVP.K.VanRaden(
-              M = geno, 
-              ind_idx = seqTaxa, 
-              mrk_idx = geno_marker_index,
-              maxLine = maxLine, 
-              cpu = ncpus, 
+                M = geno, 
+                ind_idx = seqTaxa, 
+                mrk_idx = geno_marker_index,
+                mrk_freq = marker_freq,
+                mrk_bycol = MrkByCol,
+                maxLine = maxLine, 
+                cpu = ncpus, 
               verbose = verbose,
               checkNA = FALSE
             )
@@ -312,7 +340,7 @@ function(phe, geno, map, K=NULL, nPC.GLM=NULL, nPC.MLM=NULL, nPC.FarmCPU=NULL,
     logging.log("-------------------------GWAS Start-------------------------", "\n", verbose = verbose)
     if (glm.run) {
         logging.log("General Linear Model (GLM) Start...", "\n", verbose = verbose)
-        glm.results <- MVP.GLM(phe=phe, geno=geno, CV=CV.GLM, ind_idx=seqTaxa, mrk_idx=geno_marker_index, cpu=ncpus, verbose = verbose);gc()
+        glm.results <- MVP.GLM(phe=phe, geno=geno, CV=CV.GLM, ind_idx=seqTaxa, mrk_idx=geno_marker_index, maxLine = maxLine, cpu=ncpus, verbose = verbose);gc()
         colnames(glm.results) <- c("Effect", "SE", paste(colnames(phe)[2],"GLM",sep="."))
         z = glm.results[, 1]/glm.results[, 2]
         lambda = median(z^2, na.rm=TRUE)/qchisq(1/2, df = 1,lower.tail=FALSE)
@@ -327,7 +355,7 @@ function(phe, geno, map, K=NULL, nPC.GLM=NULL, nPC.MLM=NULL, nPC.FarmCPU=NULL,
 
     if (mlm.run) {
         logging.log("Mixed Linear Model (MLM) Start...", "\n", verbose = verbose)
-        mlm.results <- MVP.MLM(phe=phe, geno=geno, K=K, eigenK=eigenK, CV=CV.MLM, ind_idx=seqTaxa, mrk_idx=geno_marker_index, cpu=ncpus, vc.method=vc.method, verbose = verbose);gc()
+        mlm.results <- MVP.MLM(phe=phe, geno=geno, K=K, eigenK=eigenK, CV=CV.MLM, ind_idx=seqTaxa, mrk_idx=geno_marker_index, maxLine = maxLine, cpu=ncpus, vc.method=vc.method, verbose = verbose);gc()
         colnames(mlm.results) <- c("Effect", "SE", paste(colnames(phe)[2],"MLM",sep="."))
         z = mlm.results[, 1]/mlm.results[, 2]
         lambda = median(z^2, na.rm=TRUE)/qchisq(1/2, df = 1,lower.tail=FALSE)
@@ -342,7 +370,7 @@ function(phe, geno, map, K=NULL, nPC.GLM=NULL, nPC.MLM=NULL, nPC.FarmCPU=NULL,
     
     if (farmcpu.run) {
         logging.log("FarmCPU Start...", "\n", verbose = verbose)
-        farmcpu.results <- MVP.FarmCPU(phe=phe, geno=geno, map=map[,1:3], CV=CV.FarmCPU, ind_idx=seqTaxa, mrk_idx=geno_marker_index, ncpus=ncpus, memo="MVP.FarmCPU", p.threshold=p.threshold, QTN.threshold=QTN.threshold, method.bin=method.bin, bin.size=bin.size, bin.selection=bin.selection, maxLoop=maxLoop, verbose = verbose)
+        farmcpu.results <- MVP.FarmCPU(phe=phe, geno=geno, map=map[,1:3], CV=CV.FarmCPU, ind_idx=seqTaxa, mrk_idx=geno_marker_index, maxLine = maxLine, ncpus=ncpus, memo="MVP.FarmCPU", p.threshold=p.threshold, QTN.threshold=QTN.threshold, method.bin=method.bin, bin.size=bin.size, bin.selection=bin.selection, maxLoop=maxLoop, verbose = verbose)
         colnames(farmcpu.results) <- c("Effect", "SE", paste(colnames(phe)[2],"FarmCPU",sep="."))
         z = farmcpu.results[, 1]/farmcpu.results[, 2]
         lambda = median(z^2, na.rm=TRUE)/qchisq(1/2, df = 1,lower.tail=FALSE)
@@ -366,12 +394,12 @@ function(phe, geno, map, K=NULL, nPC.GLM=NULL, nPC.MLM=NULL, nPC.FarmCPU=NULL,
             myY.shuffle = phe
             myY.shuffle[,2] = myY.shuffle[index.shuffle,2]
             #GWAS using t.test...
-            myPermutation = MVP.GLM(phe=myY.shuffle[,c(1,2)], geno=geno, ind_idx=seqTaxa, mrk_idx=geno_marker_index, cpu=ncpus)
+            myPermutation = MVP.GLM(phe=myY.shuffle[,c(1,2)], geno=geno, ind_idx=seqTaxa, mrk_idx=geno_marker_index, maxLine=maxLine, cpu=ncpus)
             pvalue = min(myPermutation[,3],na.rm=TRUE)
             if(i==1){
-                    pvalue.final=pvalue
-               }else{
-                    pvalue.final=c(pvalue.final,pvalue)
+                pvalue.final=pvalue
+            }else{
+                pvalue.final=c(pvalue.final,pvalue)
             }
         }#end of permutation.rep
         permutation.cutoff = sort(pvalue.final)[ceiling(permutation.rep*0.05)]
@@ -451,11 +479,20 @@ function(phe, geno, map, K=NULL, nPC.GLM=NULL, nPC.MLM=NULL, nPC.FarmCPU=NULL,
             )
         }
     }
-    now <- Sys.time()
+    time_end <- Sys.time()
     if (length(file.output) > 0) {
       logging.log("Results are stored at Working Directory:", outpath, "\n", verbose = verbose)
     }
-    logging.log("End:", as.character(now), "\n", verbose = verbose)
+    logging.log("End:", format(time_end, format = "%F %T %Z"), "\n", verbose = verbose)
+    time_diff <- as.numeric(time_end) - as.numeric(time_start)
+    h <- time_diff %/% 3600
+	m <- (time_diff %% 3600) %/% 60
+	s <- ((time_diff %% 3600) %% 60)
+	index <- which(c(h, m, s) != 0)
+	num <- c(h, m, s)[index]
+	num <- round(num, 0)
+	char <- c("h", "m", "s")[index]
+	logging.log("Total running time:", paste(num, char, sep="", collapse=""), "\n", verbose = verbose)
     print_accomplished(width = 60, verbose = verbose)
     
     return(invisible(MVP.return))
diff --git a/R/RcppExports.R b/R/RcppExports.R
index 1b2c695..6cda37c 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -5,12 +5,12 @@ getRow <- function(pBigMat, row) {
     .Call(`_rMVP_getRow`, pBigMat, row)
 }
 
-glm_c <- function(y, X, iXX, pBigMat, geno_ind = NULL, marker_ind = NULL, verbose = TRUE, threads = 0L) {
-    .Call(`_rMVP_glm_c`, y, X, iXX, pBigMat, geno_ind, marker_ind, verbose, threads)
+glm_c <- function(y, X, iXX, pBigMat, geno_ind = NULL, marker_ind = NULL, step = 10000L, verbose = TRUE, threads = 0L) {
+    .Call(`_rMVP_glm_c`, y, X, iXX, pBigMat, geno_ind, marker_ind, step, verbose, threads)
 }
 
-mlm_c <- function(y, X, U, vgs, pBigMat, geno_ind = NULL, marker_ind = NULL, verbose = TRUE, threads = 0L) {
-    .Call(`_rMVP_mlm_c`, y, X, U, vgs, pBigMat, geno_ind, marker_ind, verbose, threads)
+mlm_c <- function(y, X, U, vgs, pBigMat, geno_ind = NULL, marker_ind = NULL, step = 10000L, verbose = TRUE, threads = 0L) {
+    .Call(`_rMVP_mlm_c`, y, X, U, vgs, pBigMat, geno_ind, marker_ind, step, verbose, threads)
 }
 
 vcf_parser_map <- function(vcf_file, out) {
@@ -33,8 +33,8 @@ numeric_scan <- function(num_file) {
     .Call(`_rMVP_numeric_scan`, num_file)
 }
 
-write_bfile <- function(pBigMat, bed_file, threads = 0L, verbose = TRUE) {
-    invisible(.Call(`_rMVP_write_bfile`, pBigMat, bed_file, threads, verbose))
+write_bfile <- function(pBigMat, bed_file, mrkbycol = TRUE, threads = 0L, verbose = TRUE) {
+    invisible(.Call(`_rMVP_write_bfile`, pBigMat, bed_file, mrkbycol, threads, verbose))
 }
 
 read_bfile <- function(bed_file, pBigMat, maxLine, threads = 0L, verbose = TRUE) {
@@ -53,16 +53,16 @@ geninv <- function(GG) {
     .Call(`_rMVP_geninv`, GG)
 }
 
-impute_marker <- function(pBigMat, threads = 0L, verbose = TRUE) {
-    invisible(.Call(`_rMVP_impute_marker`, pBigMat, threads, verbose))
+impute_marker <- function(pBigMat, mrkbycol = TRUE, threads = 0L, verbose = TRUE) {
+    invisible(.Call(`_rMVP_impute_marker`, pBigMat, mrkbycol, threads, verbose))
 }
 
-hasNA <- function(pBigMat, geno_ind = NULL, threads = 1L) {
-    .Call(`_rMVP_hasNA`, pBigMat, geno_ind, threads)
+hasNA <- function(pBigMat, mrkbycol = TRUE, geno_ind = NULL, marker_ind = NULL, threads = 1L) {
+    .Call(`_rMVP_hasNA`, pBigMat, mrkbycol, geno_ind, marker_ind, threads)
 }
 
-BigRowMean <- function(pBigMat, threads = 0L, geno_ind = NULL) {
-    .Call(`_rMVP_BigRowMean`, pBigMat, threads, geno_ind)
+BigRowMean <- function(pBigMat, mrkbycol = TRUE, threads = 0L, geno_ind = NULL) {
+    .Call(`_rMVP_BigRowMean`, pBigMat, mrkbycol, threads, geno_ind)
 }
 
 kin_cal_m <- function(pBigMat, threads = 0L, verbose = TRUE) {
@@ -73,7 +73,7 @@ kin_cal_s <- function(pBigMat, threads = 0L, mkl = FALSE, verbose = TRUE) {
     .Call(`_rMVP_kin_cal_s`, pBigMat, threads, mkl, verbose)
 }
 
-kin_cal <- function(pBigMat, geno_ind = NULL, marker_ind = NULL, threads = 0L, step = 10000L, mkl = FALSE, verbose = TRUE) {
-    .Call(`_rMVP_kin_cal`, pBigMat, geno_ind, marker_ind, threads, step, mkl, verbose)
+kin_cal <- function(pBigMat, geno_ind = NULL, marker_ind = NULL, marker_freq = NULL, marker_bycol = TRUE, threads = 0L, step = 10000L, mkl = FALSE, verbose = TRUE) {
+    .Call(`_rMVP_kin_cal`, pBigMat, geno_ind, marker_ind, marker_freq, marker_bycol, threads, step, mkl, verbose)
 }
 
diff --git a/README.md b/README.md
index 515744f..5ddb3fd 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# rMVP [![](https://img.shields.io/badge/Issues-%2B-brightgreen.svg)](https://github.com/XiaoleiLiuBio/rMVP/issues/new) [![](http://www.r-pkg.org/badges/version/rMVP?color=red)](https://cran.r-project.org/package=rMVP) [![CRAN Version](https://www.r-pkg.org/badges/version/rMVP?color=yellow)](https://CRAN.R-project.org/package=rMVP) [![](https://img.shields.io/badge/GitHub-1.2.0-blueviolet.svg)]() ![](http://cranlogs.r-pkg.org/badges/grand-total/rMVP?color=green) [![](https://cranlogs.r-pkg.org/badges/rMVP)](https://cran.r-project.org/package=rMVP) <a href="https://hits.seeyoufarm.com"/><img src="https://hits.seeyoufarm.com/api/count/incr/badge.svg?url=https%3A%2F%2Fgithub.com%2Fxiaolei-lab%2FrMVP"/></a>
+# rMVP [![](https://img.shields.io/badge/Issues-%2B-brightgreen.svg)](https://github.com/XiaoleiLiuBio/rMVP/issues/new) [![](http://www.r-pkg.org/badges/version/rMVP?color=red)](https://cran.r-project.org/package=rMVP) [![CRAN Version](https://www.r-pkg.org/badges/version/rMVP?color=yellow)](https://CRAN.R-project.org/package=rMVP) [![](https://img.shields.io/badge/GitHub-1.3.0-blueviolet.svg)]() ![](http://cranlogs.r-pkg.org/badges/grand-total/rMVP?color=green) [![](https://cranlogs.r-pkg.org/badges/rMVP)](https://cran.r-project.org/package=rMVP) <a href="https://hits.seeyoufarm.com"/><img src="https://hits.seeyoufarm.com/api/count/incr/badge.svg?url=https%3A%2F%2Fgithub.com%2Fxiaolei-lab%2FrMVP"/></a>
 
 ## An [r](https://) package for [M](https://)emory-efficient, [V](https://)isualization-enhanced, and [P](https://)arallel-accelerated Genome-Wide Association Study
 
@@ -227,7 +227,7 @@ MVP.Data(fileHMP=c("hmp.chr1.txt", "hmp.chr2.txt", "hmp.chr3.txt", "hmp.chr4.txt
 
 ### Numeric
 **[back to top](#contents)**  
-If you have genotype data in **Numeric** (m * n, m rows and n columns, m is the number of SNPs, n is the number of individuals) format:  
+If you have genotype data in **Numeric** (either m * n or n * m is acceptable, m is the number of SNPs, n is the number of individuals) format:  
 
 **fileNum**, name of genotype data in Numeric format  
 **filePhe**, name of phenotype file  
diff --git a/inst/extdata/05_mvp/mvp.geno.bin b/inst/extdata/05_mvp/mvp.geno.bin
index 400f2cb..3dc46b8 100644
Binary files a/inst/extdata/05_mvp/mvp.geno.bin and b/inst/extdata/05_mvp/mvp.geno.bin differ
diff --git a/inst/extdata/05_mvp/mvp.geno.desc b/inst/extdata/05_mvp/mvp.geno.desc
index f3fb5a5..a84afae 100644
--- a/inst/extdata/05_mvp/mvp.geno.desc
+++ b/inst/extdata/05_mvp/mvp.geno.desc
@@ -1,5 +1,5 @@
 new("big.matrix.descriptor", description = list(sharedType = "FileBacked", 
-    filename = "mvp.geno.bin", dirname = "./", totalRows = 15L, 
-    totalCols = 10L, rowOffset = c(0, 15), colOffset = c(0, 10
-    ), nrow = 15, ncol = 10, rowNames = NULL, colNames = NULL, 
+    filename = "mvp.geno.bin", dirname = "05_mvp/", totalRows = 10L, 
+    totalCols = 15L, rowOffset = c(0, 10), colOffset = c(0, 15
+    ), nrow = 10, ncol = 15, rowNames = NULL, colNames = NULL, 
     type = "char", separated = FALSE))
diff --git a/inst/extdata/06_mvp-impute/mvp.imp.geno.bin b/inst/extdata/06_mvp-impute/mvp.imp.geno.bin
index 867362e..d64da2e 100644
Binary files a/inst/extdata/06_mvp-impute/mvp.imp.geno.bin and b/inst/extdata/06_mvp-impute/mvp.imp.geno.bin differ
diff --git a/inst/extdata/06_mvp-impute/mvp.imp.geno.desc b/inst/extdata/06_mvp-impute/mvp.imp.geno.desc
index cd066ba..4ced266 100644
--- a/inst/extdata/06_mvp-impute/mvp.imp.geno.desc
+++ b/inst/extdata/06_mvp-impute/mvp.imp.geno.desc
@@ -1,5 +1,5 @@
 new("big.matrix.descriptor", description = list(sharedType = "FileBacked", 
-    filename = "mvp.imp.geno.bin", dirname = "./", totalRows = 15L, 
-    totalCols = 10L, rowOffset = c(0, 15), colOffset = c(0, 10
-    ), nrow = 15, ncol = 10, rowNames = NULL, colNames = NULL, 
+    filename = "mvp.imp.geno.bin", dirname = "06_mvp-impute/", 
+    totalRows = 10L, totalCols = 15L, rowOffset = c(0, 10), colOffset = c(0, 
+    15), nrow = 10, ncol = 15, rowNames = NULL, colNames = NULL, 
     type = "char", separated = FALSE))
diff --git a/man/MVP.Data.Kin.Rd b/man/MVP.Data.Kin.Rd
index bb4172b..186cdd7 100644
--- a/man/MVP.Data.Kin.Rd
+++ b/man/MVP.Data.Kin.Rd
@@ -9,6 +9,7 @@ MVP.Data.Kin(
   mvp_prefix = "mvp",
   out = NULL,
   maxLine = 10000,
+  mrk_bycol = TRUE,
   sep = "\\t",
   cpu = 1,
   verbose = TRUE
@@ -23,6 +24,8 @@ MVP.Data.Kin(
 
 \item{maxLine}{the number of markers handled at a time, smaller value would reduce the memory cost}
 
+\item{mrk_bycol}{whether the markers are stored by columns in genotype (i.e. genotype is a n by m matrix)}
+
 \item{sep}{seperator for Kinship file.}
 
 \item{cpu}{the number of cpu}
diff --git a/man/MVP.Data.PC.Rd b/man/MVP.Data.PC.Rd
index 603d7cf..9a156fd 100644
--- a/man/MVP.Data.PC.Rd
+++ b/man/MVP.Data.PC.Rd
@@ -11,6 +11,7 @@ MVP.Data.PC(
   out = NULL,
   pcs.keep = 5,
   maxLine = 10000,
+  mrk_bycol = TRUE,
   sep = "\\t",
   cpu = 1,
   verbose = TRUE
@@ -29,6 +30,8 @@ MVP.Data.PC(
 
 \item{maxLine}{the number of markers handled at a time, smaller value would reduce the memory cost}
 
+\item{mrk_bycol}{whether the markers are stored by columns in genotype (i.e. genotype is a n by m matrix)}
+
 \item{sep}{seperator for PC file.}
 
 \item{cpu}{the number of cpu}
diff --git a/man/MVP.Data.Rd b/man/MVP.Data.Rd
index c2f722c..999972d 100644
--- a/man/MVP.Data.Rd
+++ b/man/MVP.Data.Rd
@@ -2,10 +2,7 @@
 % Please edit documentation in R/MVP.Data.r
 \name{MVP.Data}
 \alias{MVP.Data}
-\title{MVP.Data: To prepare data for MVP package
-Author: Xiaolei Liu, Lilin Yin and Haohao Zhang
-Build date: Aug 30, 2016
-Last update: Sep 12, 2018}
+\title{MVP.Data: To prepare data for MVP package}
 \usage{
 MVP.Data(
   fileMVP = NULL,
@@ -44,7 +41,7 @@ MVP.Data(
 
 \item{fileBed}{Genotype in PLINK binary format}
 
-\item{fileNum}{Genotype in numeric format; pure 0, 1, 2 matrix; m * n, m is marker size, n is sample size}
+\item{fileNum}{Genotype in numeric format; pure 0, 1, 2 matrix; m * n or n * m, m is marker size, n is sample size}
 
 \item{fileMap}{SNP map information, there are three columns, including SNP_ID, Chromosome, and Position}
 
@@ -101,9 +98,6 @@ Requirement: fileHMP, fileBed, and fileNum can not input at the same time
 }
 \description{
 MVP.Data: To prepare data for MVP package
-Author: Xiaolei Liu, Lilin Yin and Haohao Zhang
-Build date: Aug 30, 2016
-Last update: Sep 12, 2018
 }
 \examples{
 \donttest{
diff --git a/man/MVP.Data.impute.Rd b/man/MVP.Data.impute.Rd
index 4d1782c..8244916 100644
--- a/man/MVP.Data.impute.Rd
+++ b/man/MVP.Data.impute.Rd
@@ -9,6 +9,7 @@ Build date: Sep 12, 2018}
 MVP.Data.impute(
   mvp_prefix,
   out = NULL,
+  mrk_bycol = TRUE,
   method = "Major",
   ncpus = NULL,
   verbose = TRUE
@@ -19,6 +20,8 @@ MVP.Data.impute(
 
 \item{out}{the prefix of output file}
 
+\item{mrk_bycol}{whether the markers are stored by columns in genotype (i.e. genotype is a n by m matrix)}
+
 \item{method}{'Major', 'Minor', "Middle"}
 
 \item{ncpus}{number of threads for imputing}
diff --git a/man/MVP.FarmCPU.Rd b/man/MVP.FarmCPU.Rd
index bcb18ea..2be28fd 100755
--- a/man/MVP.FarmCPU.Rd
+++ b/man/MVP.FarmCPU.Rd
@@ -21,6 +21,7 @@ MVP.FarmCPU(
   Prior = NULL,
   ncpus = 2,
   maxLoop = 10,
+  maxLine = 5000,
   threshold.output = 0.01,
   converge = 1,
   iteration.output = FALSE,
@@ -33,7 +34,7 @@ MVP.FarmCPU(
 \arguments{
 \item{phe}{phenotype, n by t matrix, n is sample size, t is number of phenotypes}
 
-\item{geno}{genotype, m by n matrix, m is marker size, n is sample size. This is Pure Genotype Data Matrix(GD). THERE IS NO COLUMN FOR TAXA.}
+\item{geno}{genotype, either m by n or n by m is supportable, m is marker size, n is population size. This is Pure Genotype Data Matrix(GD). THERE IS NO COLUMN FOR TAXA.}
 
 \item{map}{SNP map information, m by 3 matrix, m is marker size, the three columns are SNP_ID, Chr, and Pos}
 
@@ -63,6 +64,8 @@ MVP.FarmCPU(
 
 \item{maxLoop}{maximum number of iterations}
 
+\item{maxLine}{the number of markers handled at a time, smaller value would reduce the memory cost}
+
 \item{threshold.output}{only the GWAS results with p-values lower than threshold.output will be output}
 
 \item{converge}{a number, 0 to 1, if selected pseudo QTNs in the last and the second last iterations have a certain probality (the probability is converge) of overlap, the loop will stop}
@@ -94,7 +97,7 @@ phenotype <- phenotype[idx, ]
 print(dim(phenotype))
 genoPath <- system.file("extdata", "06_mvp-impute", "mvp.imp.geno.desc", package = "rMVP")
 genotype <- attach.big.matrix(genoPath)
-genotype <- deepcopy(genotype, cols=idx)
+genotype <- deepcopy(genotype, rows=idx)
 print(dim(genotype))
 mapPath <- system.file("extdata", "06_mvp-impute", "mvp.imp.geno.map", package = "rMVP")
 map <- read.table(mapPath , head = TRUE)
diff --git a/man/MVP.GLM.Rd b/man/MVP.GLM.Rd
index d015b43..f09baed 100755
--- a/man/MVP.GLM.Rd
+++ b/man/MVP.GLM.Rd
@@ -10,6 +10,7 @@ MVP.GLM(
   CV = NULL,
   ind_idx = NULL,
   mrk_idx = NULL,
+  maxLine = 5000,
   cpu = 1,
   verbose = TRUE
 )
@@ -17,7 +18,7 @@ MVP.GLM(
 \arguments{
 \item{phe}{phenotype, n * 2 matrix}
 
-\item{geno}{Genotype in numeric format, pure 0, 1, 2 matrix; m * n, m is marker size, n is population size}
+\item{geno}{genotype, either m by n or n by m is supportable, m is marker size, n is population size}
 
 \item{CV}{Covariance, design matrix(n * x) for the fixed effects}
 
@@ -25,6 +26,8 @@ MVP.GLM(
 
 \item{mrk_idx}{the index of effective markers used in analysis}
 
+\item{maxLine}{the number of markers handled at a time, smaller value would reduce the memory cost}
+
 \item{cpu}{number of cpus used for parallel computation}
 
 \item{verbose}{whether to print detail.}
@@ -45,7 +48,7 @@ phenotype <- phenotype[idx, ]
 print(dim(phenotype))
 genoPath <- system.file("extdata", "06_mvp-impute", "mvp.imp.geno.desc", package = "rMVP")
 genotype <- attach.big.matrix(genoPath)
-genotype <- deepcopy(genotype, cols=idx)
+genotype <- deepcopy(genotype, rows=idx)
 print(dim(genotype))
 
 glm <- MVP.GLM(phe=phenotype, geno=genotype, cpu=1)
diff --git a/man/MVP.K.VanRaden.Rd b/man/MVP.K.VanRaden.Rd
index 5badf32..1800398 100755
--- a/man/MVP.K.VanRaden.Rd
+++ b/man/MVP.K.VanRaden.Rd
@@ -9,13 +9,15 @@ MVP.K.VanRaden(
   maxLine = 5000,
   ind_idx = NULL,
   mrk_idx = NULL,
+  mrk_freq = NULL,
+  mrk_bycol = TRUE,
   cpu = 1,
   verbose = TRUE,
   checkNA = TRUE
 )
 }
 \arguments{
-\item{M}{Genotype, m * n, m is marker size, n is population size}
+\item{M}{genotype, either m by n or n by m is supportable, m is marker size, n is population size}
 
 \item{maxLine}{the number of markers handled at a time, smaller value would reduce the memory cost}
 
@@ -23,6 +25,10 @@ MVP.K.VanRaden(
 
 \item{mrk_idx}{the index of effective markers used in analysis}
 
+\item{mrk_freq}{the prior calculated major allele frequency (not MAF) for all markers used in analysis}
+
+\item{mrk_bycol}{whether the markers are stored by columns in genotype (i.e. M is a n by m matrix)}
+
 \item{cpu}{the number of cpu}
 
 \item{verbose}{whether to print detail.}
@@ -33,8 +39,7 @@ MVP.K.VanRaden(
 K, n * n matrix
 }
 \description{
-Build date: Dec 12, 2016
-Last update: Dec 12, 2019
+Calculate Kinship matrix by VanRaden method
 }
 \examples{
 \donttest{
diff --git a/man/MVP.MLM.Rd b/man/MVP.MLM.Rd
index b5b8be3..5943e99 100755
--- a/man/MVP.MLM.Rd
+++ b/man/MVP.MLM.Rd
@@ -13,6 +13,7 @@ MVP.MLM(
   ind_idx = NULL,
   mrk_idx = NULL,
   REML = NULL,
+  maxLine = 5000,
   cpu = 1,
   vc.method = c("BRENT", "EMMA", "HE"),
   verbose = TRUE
@@ -21,7 +22,7 @@ MVP.MLM(
 \arguments{
 \item{phe}{phenotype, n * 2 matrix}
 
-\item{geno}{genotype, m * n, m is marker size, n is population size}
+\item{geno}{genotype, either m by n or n by m is supportable, m is marker size, n is population size}
 
 \item{K}{Kinship, Covariance matrix(n * n) for random effects; must be positive semi-definite}
 
@@ -35,6 +36,8 @@ MVP.MLM(
 
 \item{REML}{a list that contains ve and vg}
 
+\item{maxLine}{the number of markers handled at a time, smaller value would reduce the memory cost}
+
 \item{cpu}{number of cpus used for parallel computation}
 
 \item{vc.method}{the methods for estimating variance component("emma" or "he" or "brent")}
@@ -45,8 +48,7 @@ MVP.MLM(
 results: a m * 2 matrix, the first column is the SNP effect, the second column is the P values
 }
 \description{
-Build date: Aug 30, 2016
-Last update: Aug 30, 2016
+To perform GWAS with GLM and MLM model and get the P value of SNPs
 }
 \examples{
 \donttest{
@@ -57,7 +59,7 @@ phenotype <- phenotype[idx, ]
 print(dim(phenotype))
 genoPath <- system.file("extdata", "06_mvp-impute", "mvp.imp.geno.desc", package = "rMVP")
 genotype <- attach.big.matrix(genoPath)
-genotype <- deepcopy(genotype, cols=idx)
+genotype <- deepcopy(genotype, rows=idx)
 print(dim(genotype))
 K <- MVP.K.VanRaden(genotype, cpu=1)
 
diff --git a/man/MVP.PCA.Rd b/man/MVP.PCA.Rd
index 5e28b97..811b342 100755
--- a/man/MVP.PCA.Rd
+++ b/man/MVP.PCA.Rd
@@ -10,13 +10,14 @@ MVP.PCA(
   maxLine = 10000,
   ind_idx = NULL,
   mrk_idx = NULL,
+  mrk_bycol = TRUE,
   pcs.keep = 5,
   cpu = 1,
   verbose = TRUE
 )
 }
 \arguments{
-\item{M}{Genotype in numeric format, pure 0, 1, 2 matrix; m * n, m is marker size, n is population size}
+\item{M}{genotype, either m by n or n by m is supportable, m is marker size, n is population size}
 
 \item{K}{kinship matrix}
 
@@ -26,6 +27,8 @@ MVP.PCA(
 
 \item{mrk_idx}{the index of effective markers used in analysis}
 
+\item{mrk_bycol}{whether the markers are stored by columns in genotype (i.e. M is a n by m matrix)}
+
 \item{pcs.keep}{maximum number of PCs for output}
 
 \item{cpu}{the number of cpu}
@@ -36,8 +39,7 @@ MVP.PCA(
 Output: PCs - a n * npc matrix of top number of PCs, n is population size and npc is @param pcs.keep
 }
 \description{
-Build date: Dec 14, 2016
-Last update: Oct 29, 2018
+Principal Component Analysis
 }
 \examples{
 \donttest{
@@ -50,6 +52,3 @@ str(pca)
 }
 
 }
-\author{
-Xiaolei Liu, Lilin Yin and Haohao Zhang
-}
diff --git a/man/MVP.Rd b/man/MVP.Rd
index b43661e..e0c737f 100755
--- a/man/MVP.Rd
+++ b/man/MVP.Rd
@@ -43,7 +43,7 @@ MVP(
 \arguments{
 \item{phe}{phenotype, n * 2 matrix, n is sample size}
 
-\item{geno}{Genotype in bigmatrix format; m * n, m is marker size, n is sample size}
+\item{geno}{genotype, either m by n or n by m is supportable, m is marker size, n is population size}
 
 \item{map}{SNP map information, SNP name, Chr, Pos}
 
@@ -91,6 +91,8 @@ MVP(
 
 \item{memo}{Character. A text marker on output files}
 
+\item{outpath}{the path of the output files}
+
 \item{col}{for color of points in each chromosome on manhattan plot}
 
 \item{file.output}{whether to output files or not}
@@ -102,11 +104,8 @@ MVP(
 \item{threshold}{a cutoff line on manhattan plot, 0.05/marker size}
 
 \item{verbose}{whether to print detail.}
-
-\item{tmppath}{the path of the temporary file}
 }
 \value{
-a m * 2 matrix, the first column is the SNP effect, the second column is the P values
 Output: MVP.return$map - SNP map information, SNP name, Chr, Pos
 Output: MVP.return$glm.results - p-values obtained by GLM method
 Output: MVP.return$mlm.results - p-values obtained by MLM method
@@ -118,10 +117,6 @@ Object 2: To calculate kinship among individuals using Varaden method
 Object 3: Estimate variance components using EMMA, FaST-LMM, and HE regression
 Object 4: Generate high-quality figures
 }
-\details{
-Build date: Aug 30, 2017
-Last update: Dec 14, 2018
-}
 \examples{
 \donttest{
 phePath <- system.file("extdata", "07_other", "mvp.phe", package = "rMVP")
diff --git a/man/MVP.Version.Rd b/man/MVP.Version.Rd
index dfebdda..d9f453d 100755
--- a/man/MVP.Version.Rd
+++ b/man/MVP.Version.Rd
@@ -4,7 +4,7 @@
 \alias{MVP.Version}
 \title{Print MVP Banner}
 \usage{
-MVP.Version(width = 60, verbose = TRUE)
+MVP.Version(width = 65, verbose = TRUE)
 }
 \arguments{
 \item{width}{the width of the message}
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index 5ca50b3..a741d05 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -25,8 +25,8 @@ BEGIN_RCPP
 END_RCPP
 }
 // glm_c
-SEXP glm_c(const arma::vec& y, const arma::mat& X, const arma::mat& iXX, SEXP pBigMat, const Nullable<arma::uvec> geno_ind, const Nullable<arma::uvec> marker_ind, const bool verbose, const int threads);
-RcppExport SEXP _rMVP_glm_c(SEXP ySEXP, SEXP XSEXP, SEXP iXXSEXP, SEXP pBigMatSEXP, SEXP geno_indSEXP, SEXP marker_indSEXP, SEXP verboseSEXP, SEXP threadsSEXP) {
+SEXP glm_c(const arma::vec& y, const arma::mat& X, const arma::mat& iXX, SEXP pBigMat, const Nullable<arma::uvec> geno_ind, const Nullable<arma::uvec> marker_ind, const int step, const bool verbose, const int threads);
+RcppExport SEXP _rMVP_glm_c(SEXP ySEXP, SEXP XSEXP, SEXP iXXSEXP, SEXP pBigMatSEXP, SEXP geno_indSEXP, SEXP marker_indSEXP, SEXP stepSEXP, SEXP verboseSEXP, SEXP threadsSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
@@ -36,15 +36,16 @@ BEGIN_RCPP
     Rcpp::traits::input_parameter< SEXP >::type pBigMat(pBigMatSEXP);
     Rcpp::traits::input_parameter< const Nullable<arma::uvec> >::type geno_ind(geno_indSEXP);
     Rcpp::traits::input_parameter< const Nullable<arma::uvec> >::type marker_ind(marker_indSEXP);
+    Rcpp::traits::input_parameter< const int >::type step(stepSEXP);
     Rcpp::traits::input_parameter< const bool >::type verbose(verboseSEXP);
     Rcpp::traits::input_parameter< const int >::type threads(threadsSEXP);
-    rcpp_result_gen = Rcpp::wrap(glm_c(y, X, iXX, pBigMat, geno_ind, marker_ind, verbose, threads));
+    rcpp_result_gen = Rcpp::wrap(glm_c(y, X, iXX, pBigMat, geno_ind, marker_ind, step, verbose, threads));
     return rcpp_result_gen;
 END_RCPP
 }
 // mlm_c
-SEXP mlm_c(const arma::vec& y, const arma::mat& X, const arma::mat& U, const double vgs, SEXP pBigMat, const Nullable<arma::uvec> geno_ind, const Nullable<arma::uvec> marker_ind, const bool verbose, const int threads);
-RcppExport SEXP _rMVP_mlm_c(SEXP ySEXP, SEXP XSEXP, SEXP USEXP, SEXP vgsSEXP, SEXP pBigMatSEXP, SEXP geno_indSEXP, SEXP marker_indSEXP, SEXP verboseSEXP, SEXP threadsSEXP) {
+SEXP mlm_c(const arma::vec& y, const arma::mat& X, const arma::mat& U, const double vgs, SEXP pBigMat, const Nullable<arma::uvec> geno_ind, const Nullable<arma::uvec> marker_ind, const int step, const bool verbose, const int threads);
+RcppExport SEXP _rMVP_mlm_c(SEXP ySEXP, SEXP XSEXP, SEXP USEXP, SEXP vgsSEXP, SEXP pBigMatSEXP, SEXP geno_indSEXP, SEXP marker_indSEXP, SEXP stepSEXP, SEXP verboseSEXP, SEXP threadsSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
@@ -55,9 +56,10 @@ BEGIN_RCPP
     Rcpp::traits::input_parameter< SEXP >::type pBigMat(pBigMatSEXP);
     Rcpp::traits::input_parameter< const Nullable<arma::uvec> >::type geno_ind(geno_indSEXP);
     Rcpp::traits::input_parameter< const Nullable<arma::uvec> >::type marker_ind(marker_indSEXP);
+    Rcpp::traits::input_parameter< const int >::type step(stepSEXP);
     Rcpp::traits::input_parameter< const bool >::type verbose(verboseSEXP);
     Rcpp::traits::input_parameter< const int >::type threads(threadsSEXP);
-    rcpp_result_gen = Rcpp::wrap(mlm_c(y, X, U, vgs, pBigMat, geno_ind, marker_ind, verbose, threads));
+    rcpp_result_gen = Rcpp::wrap(mlm_c(y, X, U, vgs, pBigMat, geno_ind, marker_ind, step, verbose, threads));
     return rcpp_result_gen;
 END_RCPP
 }
@@ -126,15 +128,16 @@ BEGIN_RCPP
 END_RCPP
 }
 // write_bfile
-void write_bfile(SEXP pBigMat, std::string bed_file, int threads, bool verbose);
-RcppExport SEXP _rMVP_write_bfile(SEXP pBigMatSEXP, SEXP bed_fileSEXP, SEXP threadsSEXP, SEXP verboseSEXP) {
+void write_bfile(SEXP pBigMat, std::string bed_file, bool mrkbycol, int threads, bool verbose);
+RcppExport SEXP _rMVP_write_bfile(SEXP pBigMatSEXP, SEXP bed_fileSEXP, SEXP mrkbycolSEXP, SEXP threadsSEXP, SEXP verboseSEXP) {
 BEGIN_RCPP
     Rcpp::RNGScope rcpp_rngScope_gen;
     Rcpp::traits::input_parameter< SEXP >::type pBigMat(pBigMatSEXP);
     Rcpp::traits::input_parameter< std::string >::type bed_file(bed_fileSEXP);
+    Rcpp::traits::input_parameter< bool >::type mrkbycol(mrkbycolSEXP);
     Rcpp::traits::input_parameter< int >::type threads(threadsSEXP);
     Rcpp::traits::input_parameter< bool >::type verbose(verboseSEXP);
-    write_bfile(pBigMat, bed_file, threads, verbose);
+    write_bfile(pBigMat, bed_file, mrkbycol, threads, verbose);
     return R_NilValue;
 END_RCPP
 }
@@ -194,40 +197,44 @@ BEGIN_RCPP
 END_RCPP
 }
 // impute_marker
-void impute_marker(SEXP pBigMat, int threads, bool verbose);
-RcppExport SEXP _rMVP_impute_marker(SEXP pBigMatSEXP, SEXP threadsSEXP, SEXP verboseSEXP) {
+void impute_marker(SEXP pBigMat, bool mrkbycol, int threads, bool verbose);
+RcppExport SEXP _rMVP_impute_marker(SEXP pBigMatSEXP, SEXP mrkbycolSEXP, SEXP threadsSEXP, SEXP verboseSEXP) {
 BEGIN_RCPP
     Rcpp::RNGScope rcpp_rngScope_gen;
     Rcpp::traits::input_parameter< SEXP >::type pBigMat(pBigMatSEXP);
+    Rcpp::traits::input_parameter< bool >::type mrkbycol(mrkbycolSEXP);
     Rcpp::traits::input_parameter< int >::type threads(threadsSEXP);
     Rcpp::traits::input_parameter< bool >::type verbose(verboseSEXP);
-    impute_marker(pBigMat, threads, verbose);
+    impute_marker(pBigMat, mrkbycol, threads, verbose);
     return R_NilValue;
 END_RCPP
 }
 // hasNA
-bool hasNA(SEXP pBigMat, const Nullable<arma::uvec> geno_ind, const int threads);
-RcppExport SEXP _rMVP_hasNA(SEXP pBigMatSEXP, SEXP geno_indSEXP, SEXP threadsSEXP) {
+bool hasNA(SEXP pBigMat, bool mrkbycol, const Nullable<arma::uvec> geno_ind, const Nullable<arma::uvec> marker_ind, const int threads);
+RcppExport SEXP _rMVP_hasNA(SEXP pBigMatSEXP, SEXP mrkbycolSEXP, SEXP geno_indSEXP, SEXP marker_indSEXP, SEXP threadsSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
     Rcpp::traits::input_parameter< SEXP >::type pBigMat(pBigMatSEXP);
+    Rcpp::traits::input_parameter< bool >::type mrkbycol(mrkbycolSEXP);
     Rcpp::traits::input_parameter< const Nullable<arma::uvec> >::type geno_ind(geno_indSEXP);
+    Rcpp::traits::input_parameter< const Nullable<arma::uvec> >::type marker_ind(marker_indSEXP);
     Rcpp::traits::input_parameter< const int >::type threads(threadsSEXP);
-    rcpp_result_gen = Rcpp::wrap(hasNA(pBigMat, geno_ind, threads));
+    rcpp_result_gen = Rcpp::wrap(hasNA(pBigMat, mrkbycol, geno_ind, marker_ind, threads));
     return rcpp_result_gen;
 END_RCPP
 }
 // BigRowMean
-arma::vec BigRowMean(SEXP pBigMat, int threads, const Nullable<arma::uvec> geno_ind);
-RcppExport SEXP _rMVP_BigRowMean(SEXP pBigMatSEXP, SEXP threadsSEXP, SEXP geno_indSEXP) {
+arma::vec BigRowMean(SEXP pBigMat, bool mrkbycol, int threads, const Nullable<arma::uvec> geno_ind);
+RcppExport SEXP _rMVP_BigRowMean(SEXP pBigMatSEXP, SEXP mrkbycolSEXP, SEXP threadsSEXP, SEXP geno_indSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
     Rcpp::traits::input_parameter< SEXP >::type pBigMat(pBigMatSEXP);
+    Rcpp::traits::input_parameter< bool >::type mrkbycol(mrkbycolSEXP);
     Rcpp::traits::input_parameter< int >::type threads(threadsSEXP);
     Rcpp::traits::input_parameter< const Nullable<arma::uvec> >::type geno_ind(geno_indSEXP);
-    rcpp_result_gen = Rcpp::wrap(BigRowMean(pBigMat, threads, geno_ind));
+    rcpp_result_gen = Rcpp::wrap(BigRowMean(pBigMat, mrkbycol, threads, geno_ind));
     return rcpp_result_gen;
 END_RCPP
 }
@@ -259,43 +266,45 @@ BEGIN_RCPP
 END_RCPP
 }
 // kin_cal
-SEXP kin_cal(SEXP pBigMat, const Nullable<arma::uvec> geno_ind, const Nullable<arma::uvec> marker_ind, int threads, size_t step, bool mkl, bool verbose);
-RcppExport SEXP _rMVP_kin_cal(SEXP pBigMatSEXP, SEXP geno_indSEXP, SEXP marker_indSEXP, SEXP threadsSEXP, SEXP stepSEXP, SEXP mklSEXP, SEXP verboseSEXP) {
+SEXP kin_cal(SEXP pBigMat, const Nullable<arma::uvec> geno_ind, const Nullable<arma::uvec> marker_ind, const Nullable<arma::vec> marker_freq, const bool marker_bycol, int threads, size_t step, bool mkl, bool verbose);
+RcppExport SEXP _rMVP_kin_cal(SEXP pBigMatSEXP, SEXP geno_indSEXP, SEXP marker_indSEXP, SEXP marker_freqSEXP, SEXP marker_bycolSEXP, SEXP threadsSEXP, SEXP stepSEXP, SEXP mklSEXP, SEXP verboseSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
     Rcpp::traits::input_parameter< SEXP >::type pBigMat(pBigMatSEXP);
     Rcpp::traits::input_parameter< const Nullable<arma::uvec> >::type geno_ind(geno_indSEXP);
     Rcpp::traits::input_parameter< const Nullable<arma::uvec> >::type marker_ind(marker_indSEXP);
+    Rcpp::traits::input_parameter< const Nullable<arma::vec> >::type marker_freq(marker_freqSEXP);
+    Rcpp::traits::input_parameter< const bool >::type marker_bycol(marker_bycolSEXP);
     Rcpp::traits::input_parameter< int >::type threads(threadsSEXP);
     Rcpp::traits::input_parameter< size_t >::type step(stepSEXP);
     Rcpp::traits::input_parameter< bool >::type mkl(mklSEXP);
     Rcpp::traits::input_parameter< bool >::type verbose(verboseSEXP);
-    rcpp_result_gen = Rcpp::wrap(kin_cal(pBigMat, geno_ind, marker_ind, threads, step, mkl, verbose));
+    rcpp_result_gen = Rcpp::wrap(kin_cal(pBigMat, geno_ind, marker_ind, marker_freq, marker_bycol, threads, step, mkl, verbose));
     return rcpp_result_gen;
 END_RCPP
 }
 
 static const R_CallMethodDef CallEntries[] = {
     {"_rMVP_getRow", (DL_FUNC) &_rMVP_getRow, 2},
-    {"_rMVP_glm_c", (DL_FUNC) &_rMVP_glm_c, 8},
-    {"_rMVP_mlm_c", (DL_FUNC) &_rMVP_mlm_c, 9},
+    {"_rMVP_glm_c", (DL_FUNC) &_rMVP_glm_c, 9},
+    {"_rMVP_mlm_c", (DL_FUNC) &_rMVP_mlm_c, 10},
     {"_rMVP_vcf_parser_map", (DL_FUNC) &_rMVP_vcf_parser_map, 2},
     {"_rMVP_vcf_parser_genotype", (DL_FUNC) &_rMVP_vcf_parser_genotype, 5},
     {"_rMVP_hapmap_parser_map", (DL_FUNC) &_rMVP_hapmap_parser_map, 2},
     {"_rMVP_hapmap_parser_genotype", (DL_FUNC) &_rMVP_hapmap_parser_genotype, 6},
     {"_rMVP_numeric_scan", (DL_FUNC) &_rMVP_numeric_scan, 1},
-    {"_rMVP_write_bfile", (DL_FUNC) &_rMVP_write_bfile, 4},
+    {"_rMVP_write_bfile", (DL_FUNC) &_rMVP_write_bfile, 5},
     {"_rMVP_read_bfile", (DL_FUNC) &_rMVP_read_bfile, 5},
     {"_rMVP_fit_diago_brent", (DL_FUNC) &_rMVP_fit_diago_brent, 9},
     {"_rMVP_crossprodcpp", (DL_FUNC) &_rMVP_crossprodcpp, 1},
     {"_rMVP_geninv", (DL_FUNC) &_rMVP_geninv, 1},
-    {"_rMVP_impute_marker", (DL_FUNC) &_rMVP_impute_marker, 3},
-    {"_rMVP_hasNA", (DL_FUNC) &_rMVP_hasNA, 3},
-    {"_rMVP_BigRowMean", (DL_FUNC) &_rMVP_BigRowMean, 3},
+    {"_rMVP_impute_marker", (DL_FUNC) &_rMVP_impute_marker, 4},
+    {"_rMVP_hasNA", (DL_FUNC) &_rMVP_hasNA, 5},
+    {"_rMVP_BigRowMean", (DL_FUNC) &_rMVP_BigRowMean, 4},
     {"_rMVP_kin_cal_m", (DL_FUNC) &_rMVP_kin_cal_m, 3},
     {"_rMVP_kin_cal_s", (DL_FUNC) &_rMVP_kin_cal_s, 4},
-    {"_rMVP_kin_cal", (DL_FUNC) &_rMVP_kin_cal, 7},
+    {"_rMVP_kin_cal", (DL_FUNC) &_rMVP_kin_cal, 9},
     {NULL, NULL, 0}
 };
 
diff --git a/src/assoc.cpp b/src/assoc.cpp
index 7d67bdc..eb34db0 100755
--- a/src/assoc.cpp
+++ b/src/assoc.cpp
@@ -67,19 +67,20 @@ NumericVector getRow(SEXP pBigMat, const int row){
 }
 
 template <typename T>
-SEXP glm_c(const arma::vec &y, const arma::mat &X, const arma::mat & iXX, XPtr<BigMatrix> pMat, const Nullable<arma::uvec> geno_ind = R_NilValue, const Nullable<arma::uvec> marker_ind = R_NilValue, const bool verbose = true, const int threads = 0){
+SEXP glm_c(const arma::vec &y, const arma::mat &X, const arma::mat & iXX, XPtr<BigMatrix> pMat, const Nullable<arma::uvec> geno_ind = R_NilValue, const Nullable<arma::uvec> marker_ind = R_NilValue, const int step = 10000, const bool verbose = true, const int threads = 0){
 	
 	omp_setup(threads);
 	
 	MatrixAccessor<T> genomat = MatrixAccessor<T>(*pMat);
-
+	bool marker_bycol = (y.n_elem == pMat->nrow());
+	
 	int n;
 	uvec _geno_ind;
 	if(geno_ind.isNotNull()){
         _geno_ind = as<uvec>(geno_ind) - 1;
 		n = _geno_ind.n_elem;
     }else{
-		n = pMat->ncol();
+		n = marker_bycol ? pMat->nrow() : pMat->ncol();
 	}
 
 	int m;
@@ -88,7 +89,7 @@ SEXP glm_c(const arma::vec &y, const arma::mat &X, const arma::mat & iXX, XPtr<B
         _marker_ind = as<uvec>(marker_ind) - 1;
 		m = _marker_ind.n_elem;
     }else{
-		m = pMat->nrow();
+		m = marker_bycol ? pMat->ncol() : pMat->nrow();
 	}
 
 	int q0 = X.n_cols;
@@ -101,111 +102,174 @@ SEXP glm_c(const arma::vec &y, const arma::mat &X, const arma::mat & iXX, XPtr<B
 	arma::mat xy = X.t() * y;
 	double yy = sum(y % y);
 	arma::mat res(m, 1 + 1 + 1 + q0);
-	arma::vec snp(n);
 	arma::mat iXXs(q0 + 1, q0 + 1);
 
-	#pragma omp parallel for schedule(dynamic) firstprivate(snp, iXXs)
-	for(int i = 0; i < m; i++){
+	arma::mat Z_buffer(n, step, fill::none);
+	int i = 0, j = 0;
+	int i_marker = 0;
+	for (;i < m;) {
+		
+		int cnt = 0;
+		for (; j < m && cnt < step; j++)
+		{
+			cnt++;
+		}
+
+		if (cnt != step) {
+			Z_buffer.set_size(n, cnt);
+		}
 
 		if(_geno_ind.is_empty()){
 			if(_marker_ind.is_empty()){
-				for(int ii = 0; ii < n; ii++){
-					snp[ii] = genomat[ii][i];
+				if(marker_bycol){
+					#pragma omp parallel for
+					for(int l = 0; l < cnt; l++){
+						for(int k = 0; k < n; k++){
+							Z_buffer(k, l) = genomat[(i_marker + l)][k];
+						}
+					}
+				}else{
+					#pragma omp parallel for
+					for(int k = 0; k < n; k++){
+						for(int l = 0; l < cnt; l++){
+							Z_buffer(k, l) = genomat[k][(i_marker + l)];
+						}
+					}
 				}
 			}else{
-				for(int ii = 0; ii < n; ii++){
-					snp[ii] = genomat[ii][_marker_ind[i]];
+				if(marker_bycol){
+					#pragma omp parallel for
+					for(int l = 0; l < cnt; l++){
+						for(int k = 0; k < n; k++){
+							Z_buffer(k, l) = genomat[_marker_ind[(i_marker + l)]][k];
+						}
+					}
+				}else{
+					#pragma omp parallel for
+					for(int k = 0; k < n; k++){
+						for(int l = 0; l < cnt; l++){
+							Z_buffer(k, l) = genomat[k][_marker_ind[(i_marker + l)]];
+						}
+					}
 				}
 			}
 		}else{
 			if(_marker_ind.is_empty()){
-				for(int ii = 0; ii < n; ii++){
-					snp[ii] = genomat[_geno_ind[ii]][i];
+				if(marker_bycol){
+					#pragma omp parallel for
+					for(int l = 0; l < cnt; l++){
+						for(int k = 0; k < n; k++){
+							Z_buffer(k, l) = genomat[(i_marker + l)][_geno_ind[k]];
+						}
+					}
+				}else{
+					#pragma omp parallel for
+					for(int k = 0; k < n; k++){
+						for(int l = 0; l < cnt; l++){
+							Z_buffer(k, l) = genomat[_geno_ind[k]][(i_marker + l)];
+						}
+					}
 				}
 			}else{
-				for(int ii = 0; ii < n; ii++){
-					snp[ii] = genomat[_geno_ind[ii]][_marker_ind[i]];
+				if(marker_bycol){
+					#pragma omp parallel for
+					for(int l = 0; l < cnt; l++){
+						for(int k = 0; k < n; k++){
+							Z_buffer(k, l) = genomat[_marker_ind[(i_marker + l)]][_geno_ind[k]];
+						}
+					}
+				}else{
+					#pragma omp parallel for
+					for(int k = 0; k < n; k++){
+						for(int l = 0; l < cnt; l++){
+							Z_buffer(k, l) = genomat[_geno_ind[k]][_marker_ind[(i_marker + l)]];
+						}
+					}
 				}
 			}
 		}
-		
-		double sy = sum(snp % y);
-		double ss = sum(snp % snp);
-		arma::mat xs = X.t() * snp;
-		arma::mat B21 = xs.t() * iXX;
-		double t2 = as_scalar(B21 * xs);
-		double B22 = (ss - t2);
-		double invB22;
-		int df;
-		if(B22 < 1e-8){
-			invB22 = 0;
-			df = n - q0;
-		}else{
-			invB22 = 1 / B22;
-			df = n - q0 - 1;
+
+		#pragma omp parallel for firstprivate(iXXs)
+		for(int l = 0; l < cnt; l++){
+			double sy = sum(Z_buffer.col(l) % y);
+			double ss = sum(Z_buffer.col(l) % Z_buffer.col(l));
+			arma::mat xs = X.t() * Z_buffer.col(l);
+			arma::mat B21 = xs.t() * iXX;
+			double t2 = as_scalar(B21 * xs);
+			double B22 = (ss - t2);
+			double invB22;
+			int df;
+			if(B22 < 1e-8){
+				invB22 = 0;
+				df = n - q0;
+			}else{
+				invB22 = 1 / B22;
+				df = n - q0 - 1;
+			}
+			arma::mat NeginvB22B21 = -1 * invB22 * B21;
+			iXXs(q0, q0) = invB22;
+			iXXs.submat(0, 0, q0 - 1, q0 - 1) = iXX + invB22 * B21.t() * B21;
+			iXXs(q0, span(0, q0 - 1)) = NeginvB22B21;
+			iXXs(span(0, q0 - 1), q0) = NeginvB22B21.t();
+
+			// statistics
+			arma::mat rhs(xy.n_rows + 1, 1);
+			rhs.rows(0, xy.n_rows - 1) = xy;
+			rhs(xy.n_rows, 0) = sy;
+			arma::mat beta = iXXs * rhs;
+
+			double ve = (yy - as_scalar(beta.t() * rhs)) / df;
+			arma::vec se(q0 + 1);
+			arma::vec pvalue(q0 + 1);
+			for(int ff = 0; ff < (q0 + 1); ff++){
+				se[ff] = sqrt(iXXs(ff, ff) * ve);
+				pvalue[ff] = 2 * R::pt(abs(beta[ff] / se[ff]), df, false, false);
+				res(i_marker + l, ff + 2) = pvalue[ff];
+			}
+
+			if(invB22 == 0){
+				beta[q0] = NA_REAL;
+				se[q0] = NA_REAL;
+				res(i_marker + l, q0) = NA_REAL;
+			}
+			res(i_marker + l, 0) = beta[q0];
+			res(i_marker + l, 1) = se[q0]; 
 		}
 
-		arma::mat NeginvB22B21 = -1 * invB22 * B21;
-
-		iXXs(q0, q0)=invB22;
-
-		iXXs.submat(0, 0, q0 - 1, q0 - 1) = iXX + invB22 * B21.t() * B21;
-		iXXs(q0, span(0, q0 - 1)) = NeginvB22B21;
-		iXXs(span(0, q0 - 1), q0) = NeginvB22B21.t();
-
-        // statistics
-        arma::mat rhs(xy.n_rows + 1, 1);
-        rhs.rows(0, xy.n_rows - 1) = xy;
-        rhs(xy.n_rows, 0) = sy;
-		arma::mat beta = iXXs * rhs;
-
-        double ve = (yy - as_scalar(beta.t() * rhs)) / df;
-       	arma::vec se(q0 + 1);
-        arma::vec pvalue(q0 + 1);
-        for(int ff = 0; ff < (q0 + 1); ff++){
-        	se[ff] = sqrt(iXXs(ff, ff) * ve);
-        	pvalue[ff] = 2 * R::pt(abs(beta[ff] / se[ff]), df, false, false);
-        	res(i, ff + 2) = pvalue[ff];
-        }
-
-        if(invB22 == 0){
-        	beta[q0] = NA_REAL;
-        	se[q0] = NA_REAL;
-        	res(i, q0) = NA_REAL;
-        }
-        res(i, 0) = beta[q0];
-        res(i, 1) = se[q0]; 
-        progress.increment();
+		i = j;
+		i_marker += cnt;
+		if(!Progress::check_abort()) progress.increment(cnt);
 	}
-
+	Z_buffer.reset();
 	return wrap(res);
 }
 
 // [[Rcpp::export]]
-SEXP glm_c(const arma::vec & y, const arma::mat & X, const arma::mat & iXX, SEXP pBigMat, const Nullable<arma::uvec> geno_ind = R_NilValue, const Nullable<arma::uvec> marker_ind = R_NilValue, const bool verbose = true, const int threads = 0){
+SEXP glm_c(const arma::vec & y, const arma::mat & X, const arma::mat & iXX, SEXP pBigMat, const Nullable<arma::uvec> geno_ind = R_NilValue, const Nullable<arma::uvec> marker_ind = R_NilValue, const int step = 10000, const bool verbose = true, const int threads = 0){
 
 	XPtr<BigMatrix> xpMat(pBigMat);
 
 	switch(xpMat->matrix_type()){
 	case 1:
-		return glm_c<char>(y, X, iXX, xpMat, geno_ind, marker_ind, verbose, threads);
+		return glm_c<char>(y, X, iXX, xpMat, geno_ind, marker_ind, step, verbose, threads);
 	case 2:
-		return glm_c<short>(y, X, iXX, xpMat, geno_ind, marker_ind, verbose, threads);
+		return glm_c<short>(y, X, iXX, xpMat, geno_ind, marker_ind, step, verbose, threads);
 	case 4:
-		return glm_c<int>(y, X, iXX, xpMat, geno_ind, marker_ind, verbose, threads);
+		return glm_c<int>(y, X, iXX, xpMat, geno_ind, marker_ind, step, verbose, threads);
 	case 8:
-		return glm_c<double>(y, X, iXX, xpMat, geno_ind, marker_ind, verbose, threads);
+		return glm_c<double>(y, X, iXX, xpMat, geno_ind, marker_ind, step, verbose, threads);
 	default:
 		throw Rcpp::exception("unknown type detected for big.matrix object!");
 	}
 }
 
 template <typename T>
-SEXP mlm_c(const arma::vec & y, const arma::mat & X, const arma::mat & U, const double vgs, XPtr<BigMatrix> pMat, const Nullable<arma::uvec> geno_ind = R_NilValue, const Nullable<arma::uvec> marker_ind = R_NilValue, const bool verbose = true, const int threads = 0){
+SEXP mlm_c(const arma::vec & y, const arma::mat & X, const arma::mat & U, const double vgs, XPtr<BigMatrix> pMat, const Nullable<arma::uvec> geno_ind = R_NilValue, const Nullable<arma::uvec> marker_ind = R_NilValue, const int step = 10000, const bool verbose = true, const int threads = 0){
 	
 	omp_setup(threads);
 
 	MatrixAccessor<T> genomat = MatrixAccessor<T>(*pMat);
+	bool marker_bycol = (y.n_elem == pMat->nrow());
 
 	int n;
 	uvec _geno_ind;
@@ -213,7 +277,7 @@ SEXP mlm_c(const arma::vec & y, const arma::mat & X, const arma::mat & U, const
         _geno_ind = as<uvec>(geno_ind) - 1;
 		n = _geno_ind.n_elem;
     }else{
-		n = pMat->ncol();
+		n = marker_bycol ? pMat->nrow() : pMat->ncol();
 	}
 
 	int m;
@@ -222,7 +286,7 @@ SEXP mlm_c(const arma::vec & y, const arma::mat & X, const arma::mat & U, const
         _marker_ind = as<uvec>(marker_ind) - 1;
 		m = _marker_ind.n_elem;
     }else{
-		m = pMat->nrow();
+		m = marker_bycol ? pMat->ncol() : pMat->nrow();
 	}
 
 	int q0 = X.n_cols;
@@ -237,79 +301,144 @@ SEXP mlm_c(const arma::vec & y, const arma::mat & X, const arma::mat & U, const
 	arma::mat iUXUX = GInv(UX.t() * UX);
 	
 	arma::mat res(m, 3);		
-	arma::vec snp(n);
 	arma::mat iXXs(q0 + 1, q0 + 1);
 
-	#pragma omp parallel for schedule(dynamic) firstprivate(snp, iXXs)
-	for(int i = 0; i < m; i++){
+
+	arma::mat Z_buffer(n, step, fill::none);
+	int i = 0, j = 0;
+	int i_marker = 0;
+	for (;i < m;) {
+		
+		int cnt = 0;
+		for (; j < m && cnt < step; j++)
+		{
+			cnt++;
+		}
+
+		if (cnt != step) {
+			Z_buffer.set_size(n, cnt);
+		}
 
 		if(_geno_ind.is_empty()){
 			if(_marker_ind.is_empty()){
-				for(int ii = 0; ii < n; ii++){
-					snp[ii] = genomat[ii][i];
+				if(marker_bycol){
+					#pragma omp parallel for
+					for(int l = 0; l < cnt; l++){
+						for(int k = 0; k < n; k++){
+							Z_buffer(k, l) = genomat[(i_marker + l)][k];
+						}
+					}
+				}else{
+					#pragma omp parallel for
+					for(int k = 0; k < n; k++){
+						for(int l = 0; l < cnt; l++){
+							Z_buffer(k, l) = genomat[k][(i_marker + l)];
+						}
+					}
 				}
 			}else{
-				for(int ii = 0; ii < n; ii++){
-					snp[ii] = genomat[ii][_marker_ind[i]];
+				if(marker_bycol){
+					#pragma omp parallel for
+					for(int l = 0; l < cnt; l++){
+						for(int k = 0; k < n; k++){
+							Z_buffer(k, l) = genomat[_marker_ind[(i_marker + l)]][k];
+						}
+					}
+				}else{
+					#pragma omp parallel for
+					for(int k = 0; k < n; k++){
+						for(int l = 0; l < cnt; l++){
+							Z_buffer(k, l) = genomat[k][_marker_ind[(i_marker + l)]];
+						}
+					}
 				}
 			}
 		}else{
 			if(_marker_ind.is_empty()){
-				for(int ii = 0; ii < n; ii++){
-					snp[ii] = genomat[_geno_ind[ii]][i];
+				if(marker_bycol){
+					#pragma omp parallel for
+					for(int l = 0; l < cnt; l++){
+						for(int k = 0; k < n; k++){
+							Z_buffer(k, l) = genomat[(i_marker + l)][_geno_ind[k]];
+						}
+					}
+				}else{
+					#pragma omp parallel for
+					for(int k = 0; k < n; k++){
+						for(int l = 0; l < cnt; l++){
+							Z_buffer(k, l) = genomat[_geno_ind[k]][(i_marker + l)];
+						}
+					}
 				}
 			}else{
-				for(int ii = 0; ii < n; ii++){
-					snp[ii] = genomat[_geno_ind[ii]][_marker_ind[i]];
+				if(marker_bycol){
+					#pragma omp parallel for
+					for(int l = 0; l < cnt; l++){
+						for(int k = 0; k < n; k++){
+							Z_buffer(k, l) = genomat[_marker_ind[(i_marker + l)]][_geno_ind[k]];
+						}
+					}
+				}else{
+					#pragma omp parallel for
+					for(int k = 0; k < n; k++){
+						for(int l = 0; l < cnt; l++){
+							Z_buffer(k, l) = genomat[_geno_ind[k]][_marker_ind[(i_marker + l)]];
+						}
+					}
 				}
 			}
 		}
-		
-		arma::mat Us = U.t() * snp;
-		arma::mat UXUs = UX.t() * Us;
-
-		double UsUs = as_scalar(Us.t() * Us);
-		double UsUy = as_scalar(Us.t() * Uy);
-		double B22 = UsUs - as_scalar(UXUs.t() * iUXUX * UXUs);
-		double invB22 = 1 / B22;
-        arma::mat B21 = UXUs.t() * iUXUX;
-        arma::mat NeginvB22B21 = -1 * invB22 * B21;
-		
-		iXXs(q0, q0)=invB22;
-		iXXs.submat(0, 0, q0 - 1, q0 - 1) = iUXUX + invB22 * B21.t() * B21;
-		iXXs(q0, span(0, q0 - 1)) = NeginvB22B21;
-		iXXs(span(0, q0 - 1), q0) = NeginvB22B21.t();
-
-        // statistics
-        arma::mat rhs(UXUy.n_rows + 1, 1);
-        rhs.rows(0, UXUy.n_rows - 1) = UXUy;
-        rhs(UXUy.n_rows, 0) = UsUy;
-		arma::mat beta = iXXs * rhs;
-		int df = n - q0 - 1;
-
-        res(i, 0) = beta(q0, 0);
-        res(i, 1) = sqrt(iXXs(q0, q0) * vgs); 
-        res(i, 2) = 2 * R::pt(abs(res(i, 0) / res(i, 1)), df, false, false);
-        progress.increment();
-	}
 
+		#pragma omp parallel for firstprivate(iXXs)
+		for(int l = 0; l < cnt; l++){
+			arma::mat Us = U.t() * Z_buffer.col(l);
+			arma::mat UXUs = UX.t() * Us;
+			double UsUs = as_scalar(Us.t() * Us);
+			double UsUy = as_scalar(Us.t() * Uy);
+			double B22 = UsUs - as_scalar(UXUs.t() * iUXUX * UXUs);
+			double invB22 = 1 / B22;
+			arma::mat B21 = UXUs.t() * iUXUX;
+			arma::mat NeginvB22B21 = -1 * invB22 * B21;
+			
+			iXXs(q0, q0)=invB22;
+			iXXs.submat(0, 0, q0 - 1, q0 - 1) = iUXUX + invB22 * B21.t() * B21;
+			iXXs(q0, span(0, q0 - 1)) = NeginvB22B21;
+			iXXs(span(0, q0 - 1), q0) = NeginvB22B21.t();
+
+			// statistics
+			arma::mat rhs(UXUy.n_rows + 1, 1);
+			rhs.rows(0, UXUy.n_rows - 1) = UXUy;
+			rhs(UXUy.n_rows, 0) = UsUy;
+			arma::mat beta = iXXs * rhs;
+			int df = n - q0 - 1;
+
+			res(i_marker + l, 0) = beta(q0, 0);
+			res(i_marker + l, 1) = sqrt(iXXs(q0, q0) * vgs); 
+			res(i_marker + l, 2) = 2 * R::pt(abs(res(i_marker + l, 0) / res(i_marker + l, 1)), df, false, false);
+		}
+
+		i = j;
+		i_marker += cnt;
+		if(!Progress::check_abort()) progress.increment(cnt);
+	}
+	Z_buffer.reset();
 	return wrap(res);
 }
 
 // [[Rcpp::export]]
-SEXP mlm_c(const arma::vec & y, const arma::mat & X, const arma::mat & U, const double vgs, SEXP pBigMat, const Nullable<arma::uvec> geno_ind = R_NilValue, const Nullable<arma::uvec> marker_ind = R_NilValue, const bool verbose = true, const int threads = 0){
+SEXP mlm_c(const arma::vec & y, const arma::mat & X, const arma::mat & U, const double vgs, SEXP pBigMat, const Nullable<arma::uvec> geno_ind = R_NilValue, const Nullable<arma::uvec> marker_ind = R_NilValue, const int step = 10000, const bool verbose = true, const int threads = 0){
 
 	XPtr<BigMatrix> xpMat(pBigMat);
 
 	switch(xpMat->matrix_type()){
 	case 1:
-		return mlm_c<char>(y, X, U, vgs, xpMat, geno_ind, marker_ind, verbose, threads);
+		return mlm_c<char>(y, X, U, vgs, xpMat, geno_ind, marker_ind, step, verbose, threads);
 	case 2:
-		return mlm_c<short>(y, X, U, vgs, xpMat, geno_ind, marker_ind, verbose, threads);
+		return mlm_c<short>(y, X, U, vgs, xpMat, geno_ind, marker_ind, step, verbose, threads);
 	case 4:
-		return mlm_c<int>(y, X, U, vgs, xpMat, geno_ind, marker_ind, verbose, threads);
+		return mlm_c<int>(y, X, U, vgs, xpMat, geno_ind, marker_ind, step, verbose, threads);
 	case 8:
-		return mlm_c<double>(y, X, U, vgs, xpMat, geno_ind, marker_ind, verbose, threads);
+		return mlm_c<double>(y, X, U, vgs, xpMat, geno_ind, marker_ind, step, verbose, threads);
 	default:
 		throw Rcpp::exception("unknown type detected for big.matrix object!");
 	}
diff --git a/src/data_converter.cpp b/src/data_converter.cpp
index 18c886d..ad52aec 100755
--- a/src/data_converter.cpp
+++ b/src/data_converter.cpp
@@ -138,7 +138,7 @@ void vcf_parser_genotype(std::string vcf_file, XPtr<BigMatrix> pMat, long maxLin
     
     // progress bar
     MinimalProgressBar_perc pb;
-    Progress progress(pMat->nrow(), verbose, pb);
+    Progress progress(pMat->ncol(), verbose, pb);
     
     // Skip Header
     string prefix("#CHROM");
@@ -179,7 +179,7 @@ void vcf_parser_genotype(std::string vcf_file, XPtr<BigMatrix> pMat, long maxLin
             //     mat[j][m + i] = markers[j];
             // }
             for(size_t j = 0; j < (l.size() - 9); j++) {
-                mat[j][m + i] = static_cast<T>(vcf_marker_parser(l[j + 9], NA_C));
+                mat[m + i][j] = static_cast<T>(vcf_marker_parser(l[j + 9], NA_C));
             }
         }
         progress.increment(buffer.size());
@@ -339,7 +339,7 @@ void hapmap_parser_genotype(std::string hmp_file, std::vector<std::string> Major
     
     // progress bar
     MinimalProgressBar_perc pb;
-    Progress progress(pMat->nrow(), verbose, pb);
+    Progress progress(pMat->ncol(), verbose, pb);
     
     // Skip Header
     string prefix("rs#");
@@ -380,7 +380,7 @@ void hapmap_parser_genotype(std::string hmp_file, std::vector<std::string> Major
                 Rcpp::stop(("line " + to_string(m+i+2) + " does not have " + to_string(n_col) + " elements in HAPMAP file.").c_str());
             major = Major[m + i][0];
             for(size_t j = 0; j < (l.size() - 11); j++) {
-                mat[j][m + i] = static_cast<T>(hapmap_marker_parser(l[j + 11], major, NA_C));
+                mat[m + i][j] = static_cast<T>(hapmap_marker_parser(l[j + 11], major, NA_C));
             }
         }
         progress.increment(n_marker);
@@ -436,7 +436,7 @@ List numeric_scan(std::string num_file) {
 // ***** BFILE *****
 
 template <typename T>
-void write_bfile(XPtr<BigMatrix> pMat, std::string bed_file, double NA_C, int threads=0, bool verbose=true) {
+void write_bfile(XPtr<BigMatrix> pMat, std::string bed_file, double NA_C, bool mrkbycol = true, int threads=0, bool verbose=true) {
     // check input
     string ending = ".bed";
     if (bed_file.length() <= ending.length() ||
@@ -447,10 +447,10 @@ void write_bfile(XPtr<BigMatrix> pMat, std::string bed_file, double NA_C, int th
     // define
     T c;
     omp_setup(threads);
-    int m = pMat->nrow();
-    int nind = pMat->ncol();
-    int n = pMat->ncol() / 4;  // 4 individual = 1 bit
-    if (pMat->ncol() % 4 != 0) 
+    int m = (mrkbycol ? pMat->ncol() : pMat->nrow());
+    int nind = (mrkbycol ? pMat->nrow() : pMat->ncol());
+    int n = nind / 4;  // 4 individual = 1 bit
+    if (nind % 4 != 0) 
         n++;
     
     vector<uint8_t> geno(n);
@@ -460,7 +460,7 @@ void write_bfile(XPtr<BigMatrix> pMat, std::string bed_file, double NA_C, int th
     
     // progress bar
     MinimalProgressBar_perc pb;
-    Progress progress(pMat->nrow(), verbose, pb);
+    Progress progress(m, verbose, pb);
     
     // magic number of bfile
     const unsigned char magic_bytes[] = { 0x6c, 0x1b, 0x01 };
@@ -474,36 +474,52 @@ void write_bfile(XPtr<BigMatrix> pMat, std::string bed_file, double NA_C, int th
     code[static_cast<T>(NA_C)] = 1;
     
     // write bfile
-    for (int i = 0; i < m; i++) {
-        #pragma omp parallel for private(c)
-        for (int j = 0; j < n; j++) {
-            uint8_t p = 0;
-            for (int x = 0; x < 4 && (4 * j + x) < nind; x++) {
-                c = mat[4 * j + x][i];
-                p |= code[c] << (x*2);
+    if(mrkbycol){
+        for (int i = 0; i < m; i++) {
+            #pragma omp parallel for private(c)
+            for (int j = 0; j < n; j++) {
+                uint8_t p = 0;
+                for (int x = 0; x < 4 && (4 * j + x) < nind; x++) {
+                    c = mat[i][4 * j + x];
+                    p |= code[c] << (x*2);
+                }
+                geno[j] = p;
             }
-            geno[j] = p;
+            fwrite((char*)geno.data(), 1, geno.size(), fout);
+            progress.increment();
+        }
+    }else{
+        for (int i = 0; i < m; i++) {
+            #pragma omp parallel for private(c)
+            for (int j = 0; j < n; j++) {
+                uint8_t p = 0;
+                for (int x = 0; x < 4 && (4 * j + x) < nind; x++) {
+                    c = mat[4 * j + x][i];
+                    p |= code[c] << (x*2);
+                }
+                geno[j] = p;
+            }
+            fwrite((char*)geno.data(), 1, geno.size(), fout);
+            progress.increment();
         }
-        fwrite((char*)geno.data(), 1, geno.size(), fout);
-        progress.increment();
     }
     fclose(fout);
     return;
 }
 
 // [[Rcpp::export]]
-void write_bfile(SEXP pBigMat, std::string bed_file, int threads=0, bool verbose=true) {
+void write_bfile(SEXP pBigMat, std::string bed_file, bool mrkbycol = true, int threads=0, bool verbose=true) {
     XPtr<BigMatrix> xpMat(pBigMat);
     
     switch(xpMat->matrix_type()) {
     case 1:
-        return write_bfile<char>(xpMat, bed_file, NA_CHAR, threads, verbose);
+        return write_bfile<char>(xpMat, bed_file, NA_CHAR, mrkbycol, threads, verbose);
     case 2:
-        return write_bfile<short>(xpMat, bed_file, NA_SHORT, threads, verbose);
+        return write_bfile<short>(xpMat, bed_file, NA_SHORT, mrkbycol, threads, verbose);
     case 4:
-        return write_bfile<int>(xpMat, bed_file, NA_INTEGER, threads, verbose);
+        return write_bfile<int>(xpMat, bed_file, NA_INTEGER, mrkbycol, threads, verbose);
     case 8:
-        return write_bfile<double>(xpMat, bed_file, NA_REAL, threads, verbose);
+        return write_bfile<double>(xpMat, bed_file, NA_REAL, mrkbycol, threads, verbose);
     default:
         throw Rcpp::exception("unknown type detected for big.matrix object!");
     }
@@ -518,7 +534,7 @@ void read_bfile(std::string bed_file, XPtr<BigMatrix> pMat, long maxLine, double
     
     // define
     omp_setup(threads);
-    size_t ind = pMat->ncol();
+    size_t ind = pMat->nrow();
     long n = ind / 4;  // 4 individual = 1 bit
     if (ind % 4 != 0) 
         n++; 
@@ -572,7 +588,7 @@ void read_bfile(std::string bed_file, XPtr<BigMatrix> pMat, long maxLine, double
             uint8_t p = buffer[j];
             
             for (size_t x = 0; x < 4 && (c + x) < ind; x++) {
-                mat[c + x][r] = code[(p >> (2*x)) & 0x03];
+                mat[r][c + x] = code[(p >> (2*x)) & 0x03];
             }
         }
         progress.increment();
diff --git a/src/impute.cpp b/src/impute.cpp
index 7b20ca9..edde606 100755
--- a/src/impute.cpp
+++ b/src/impute.cpp
@@ -23,116 +23,200 @@ using namespace Rcpp;
 using namespace arma;
 
 template <typename T>
-void impute_marker(XPtr<BigMatrix> pMat, int threads=0, bool verbose=true) {
+void impute_marker(XPtr<BigMatrix> pMat, bool mrkbycol = true, int threads=0, bool verbose=true) {
     omp_setup(threads);
 
-    MinimalProgressBar_perc pb;
-    Progress progress(pMat->nrow(), verbose, pb);
-    
     MatrixAccessor<T> mat = MatrixAccessor<T>(*pMat);
-    const size_t n = pMat->ncol();
-    const size_t m = pMat->nrow();
+    const size_t n = (mrkbycol ? pMat->nrow() : pMat->ncol());
+    const size_t m = (mrkbycol ? pMat->ncol() : pMat->nrow());
+    
+    MinimalProgressBar_perc pb;
+    Progress progress(m, verbose, pb);
     
     // loop marker
-    #pragma omp parallel for
-    for (size_t i = 0; i < m; i++) {
-        std::vector<size_t> na_index = {};;
-        size_t counts[3] = { 0 };
-        
-        // count allele, record missing index 
-        for (size_t j = 0; j < n; j++) {
-            switch(int(mat[j][i])) {
-            case 0: counts[0]++; break;
-            case 1: counts[1]++; break;
-            case 2: counts[2]++; break;
-            default: na_index.push_back(j);
+    if(mrkbycol){
+        #pragma omp parallel for
+        for (size_t i = 0; i < m; i++) {
+            std::vector<size_t> na_index = {};;
+            size_t counts[3] = { 0 };
+            
+            // count allele, record missing index 
+            for (size_t j = 0; j < n; j++) {
+                switch(int(mat[i][j])) {
+                case 0: counts[0]++; break;
+                case 1: counts[1]++; break;
+                case 2: counts[2]++; break;
+                default: na_index.push_back(j);
+                }
             }
+            
+            // find major allele
+            T major = counts[2] > counts[1] ? (counts[2] > counts[0] ? 2 : 0) : (counts[1] > counts[0] ? 1 : 0);
+            
+            // impute
+            for (auto&& x: na_index) {
+                mat[i][x] = major;   
+            }
+            progress.increment();
         }
-        
-        // find major allele
-        T major = counts[2] > counts[1] ? (counts[2] > counts[0] ? 2 : 0) : (counts[1] > counts[0] ? 1 : 0);
-        
-        // impute
-        for (auto&& x: na_index) {
-            mat[x][i] = major;   
+    }else{
+        #pragma omp parallel for
+        for (size_t i = 0; i < m; i++) {
+            std::vector<size_t> na_index = {};;
+            size_t counts[3] = { 0 };
+            
+            // count allele, record missing index 
+            for (size_t j = 0; j < n; j++) {
+                switch(int(mat[j][i])) {
+                case 0: counts[0]++; break;
+                case 1: counts[1]++; break;
+                case 2: counts[2]++; break;
+                default: na_index.push_back(j);
+                }
+            }
+            
+            // find major allele
+            T major = counts[2] > counts[1] ? (counts[2] > counts[0] ? 2 : 0) : (counts[1] > counts[0] ? 1 : 0);
+            
+            // impute
+            for (auto&& x: na_index) {
+                mat[x][i] = major;   
+            }
+            progress.increment();
         }
-        progress.increment();
     }
 }
 
 // [[Rcpp::export]]
-void impute_marker(SEXP pBigMat, int threads=0, bool verbose=true) {
+void impute_marker(SEXP pBigMat, bool mrkbycol = true, int threads=0, bool verbose=true) {
     XPtr<BigMatrix> xpMat(pBigMat);
     
     switch(xpMat->matrix_type()) {
     case 1:
-        return impute_marker<char>(xpMat, threads, verbose);
+        return impute_marker<char>(xpMat, mrkbycol, threads, verbose);
     case 2:
-        return impute_marker<short>(xpMat, threads, verbose);
+        return impute_marker<short>(xpMat, mrkbycol, threads, verbose);
     case 4:
-        return impute_marker<int>(xpMat, threads, verbose);
+        return impute_marker<int>(xpMat, mrkbycol, threads, verbose);
     case 8:
-        return impute_marker<double>(xpMat, threads, verbose);
+        return impute_marker<double>(xpMat, mrkbycol, threads, verbose);
     default:
         throw Rcpp::exception("unknown type detected for big.matrix object!");
     }
 }
 
 template <typename T>
-bool hasNA(XPtr<BigMatrix> pMat, double NA_C, const Nullable<arma::uvec> geno_ind=R_NilValue, const int threads=1) {
+bool hasNA(XPtr<BigMatrix> pMat, double NA_C, bool mrkbycol = true, const Nullable<arma::uvec> geno_ind = R_NilValue, const Nullable<arma::uvec> marker_ind = R_NilValue, const int threads = 1) {
     
     omp_setup(threads);
-    size_t m = pMat->nrow();
-    size_t n;
-    
-    uvec _geno_ind;
-	if(geno_ind.isNotNull()){
-        _geno_ind = as<uvec>(geno_ind) - 1;
-        n = _geno_ind.n_elem;
-    }else{
-		n = pMat->ncol();
-	}
 
     bool HasNA = false;
     MatrixAccessor<T> mat = MatrixAccessor<T>(*pMat);
     
-    if(_geno_ind.is_empty()){
-        #pragma omp parallel for shared(HasNA)
-        for (size_t j = 0; j < n; j++) {
-            if(HasNA)   continue;
-            for (size_t i = 0; i < m; i++) {
-                if (mat[j][i] == NA_C) {
-                    HasNA = true;
+    if(geno_ind.isNotNull()){
+        uvec _geno_ind = as<uvec>(geno_ind) - 1;
+        int n = _geno_ind.n_elem;
+        if(marker_ind.isNotNull()){
+            uvec _marker_ind = as<uvec>(marker_ind) - 1;
+            int m = _marker_ind.n_elem;
+            if(mrkbycol){
+                #pragma omp parallel for shared(HasNA)
+                for (int j = 0; j < m; j++) {
+                    if(HasNA)   continue;
+                    for (int i = 0; i < n; i++) {
+                        if (mat[_marker_ind[j]][_geno_ind[i]] == NA_C) {
+                            HasNA = true;
+                        }
+                    }
+                }
+            }else{
+                #pragma omp parallel for shared(HasNA)
+                for (int j = 0; j < n; j++) {
+                    if(HasNA)   continue;
+                    for (int i = 0; i < m; i++) {
+                        if (mat[_geno_ind[j]][_marker_ind[i]] == NA_C) {
+                            HasNA = true;
+                        }
+                    }
+                }
+            }
+        }else{
+            if(mrkbycol){
+                #pragma omp parallel for shared(HasNA)
+                for (int j = 0; j < pMat->ncol(); j++) {
+                    if(HasNA)   continue;
+                    for (int i = 0; i < n; i++) {
+                        if (mat[j][_geno_ind[i]] == NA_C) {
+                            HasNA = true;
+                        }
+                    }
+                }
+            }else{
+                #pragma omp parallel for shared(HasNA)
+                for (int j = 0; j < n; j++) {
+                    if(HasNA)   continue;
+                    for (int i = 0; i < pMat->nrow(); i++) {
+                        if (mat[_geno_ind[j]][i] == NA_C) {
+                            HasNA = true;
+                        }
+                    }
                 }
             }
         }
     }else{
-        #pragma omp parallel for shared(HasNA)
-        for (size_t j = 0; j < n; j++) {
-            if(HasNA)   continue;
-            for (size_t i = 0; i < m; i++) {
-                if (mat[_geno_ind[j]][i] == NA_C) {
-                    HasNA = true;
+        if(marker_ind.isNotNull()){
+            uvec _marker_ind = as<uvec>(marker_ind) - 1;
+            int m = _marker_ind.n_elem;
+            if(mrkbycol){
+                #pragma omp parallel for shared(HasNA)
+                for (int j = 0; j < m; j++) {
+                    if(HasNA)   continue;
+                    for (int i = 0; i < pMat->nrow(); i++) {
+                        if (mat[_marker_ind[j]][i] == NA_C) {
+                            HasNA = true;
+                        }
+                    }
+                }
+            }else{
+                #pragma omp parallel for shared(HasNA)
+                for (int j = 0; j < pMat->ncol(); j++) {
+                    if(HasNA)   continue;
+                    for (int i = 0; i < m; i++) {
+                        if (mat[j][_marker_ind[i]] == NA_C) {
+                            HasNA = true;
+                        }
+                    }
+                }
+            }
+        }else{
+            #pragma omp parallel for shared(HasNA)
+            for (int j = 0; j < pMat->ncol(); j++) {
+                if(HasNA)   continue;
+                for (int i = 0; i < pMat->nrow(); i++) {
+                    if (mat[j][i] == NA_C) {
+                        HasNA = true;
+                    }
                 }
             }
         }
     }
+
     return HasNA;
 }
 
 // [[Rcpp::export]]
-bool hasNA(SEXP pBigMat, const Nullable<arma::uvec> geno_ind = R_NilValue, const int threads=1) {
+bool hasNA(SEXP pBigMat, bool mrkbycol = true, const Nullable<arma::uvec> geno_ind = R_NilValue, const Nullable<arma::uvec> marker_ind = R_NilValue, const int threads=1) {
     XPtr<BigMatrix> xpMat(pBigMat);
     
     switch(xpMat->matrix_type()) {
     case 1:
-        return hasNA<char>(xpMat, NA_CHAR, geno_ind, threads);
+        return hasNA<char>(xpMat, NA_CHAR, mrkbycol, geno_ind, marker_ind, threads);
     case 2:
-        return hasNA<short>(xpMat, NA_SHORT, geno_ind, threads);
+        return hasNA<short>(xpMat, NA_SHORT, mrkbycol, geno_ind, marker_ind, threads);
     case 4:
-        return hasNA<int>(xpMat, NA_INTEGER, geno_ind, threads);
+        return hasNA<int>(xpMat, NA_INTEGER, mrkbycol, geno_ind, marker_ind, threads);
     case 8:
-        return hasNA<double>(xpMat, NA_REAL, geno_ind, threads);
+        return hasNA<double>(xpMat, NA_REAL, mrkbycol, geno_ind, marker_ind, threads);
     default:
         throw Rcpp::exception("unknown type detected for big.matrix object!");
     }
diff --git a/src/kinship.cpp b/src/kinship.cpp
index 652ebbc..f993274 100755
--- a/src/kinship.cpp
+++ b/src/kinship.cpp
@@ -14,13 +14,13 @@ using namespace Rcpp;
 using namespace arma;
 
 template <typename T>
-arma::vec BigRowMean(XPtr<BigMatrix> pMat, int threads = 0, const Nullable<arma::uvec> geno_ind = R_NilValue){
+arma::vec BigRowMean(XPtr<BigMatrix> pMat, bool mrkbycol = true, int threads = 0, const Nullable<arma::uvec> geno_ind = R_NilValue){
 
     omp_setup(threads);
 	MatrixAccessor<T> bigm = MatrixAccessor<T>(*pMat);
 
 	int n;
-	int m = pMat->nrow();
+	int m = mrkbycol ? pMat->ncol() : pMat->nrow();
 	arma::vec mean(m);
 
 	uvec _geno_ind;
@@ -28,26 +28,48 @@ arma::vec BigRowMean(XPtr<BigMatrix> pMat, int threads = 0, const Nullable<arma:
         _geno_ind = as<uvec>(geno_ind) - 1;
         n = _geno_ind.n_elem;
     }else{
-		n = pMat->ncol();
+		n = mrkbycol ? pMat->nrow() : pMat->ncol();
 	}
 
-	if(_geno_ind.is_empty()){
-		#pragma omp parallel for
-		for (int j = 0; j < m; j++){
-			double p1 = 0.0;
-			for(int k = 0; k < n; k++){
-				p1 += bigm[k][j];
+	if(mrkbycol){
+		if(_geno_ind.is_empty()){
+			#pragma omp parallel for
+			for (int j = 0; j < m; j++){
+				double p1 = 0.0;
+				for(int k = 0; k < n; k++){
+					p1 += bigm[j][k];
+				}
+				mean[j] = p1 / n;
+			}
+		}else{
+			#pragma omp parallel for
+			for (int j = 0; j < m; j++){
+				double p1 = 0.0;
+				for(int k = 0; k < n; k++){
+					p1 += bigm[j][_geno_ind[k]];
+				}
+				mean[j] = p1 / n;
 			}
-			mean[j] = p1 / n;
 		}
 	}else{
-		#pragma omp parallel for
-		for (int j = 0; j < m; j++){
-			double p1 = 0.0;
-			for(int k = 0; k < n; k++){
-				p1 += bigm[_geno_ind[k]][j];
+		if(_geno_ind.is_empty()){
+			#pragma omp parallel for
+			for (int j = 0; j < m; j++){
+				double p1 = 0.0;
+				for(int k = 0; k < n; k++){
+					p1 += bigm[k][j];
+				}
+				mean[j] = p1 / n;
+			}
+		}else{
+			#pragma omp parallel for
+			for (int j = 0; j < m; j++){
+				double p1 = 0.0;
+				for(int k = 0; k < n; k++){
+					p1 += bigm[_geno_ind[k]][j];
+				}
+				mean[j] = p1 / n;
 			}
-			mean[j] = p1 / n;
 		}
 	}
 
@@ -55,19 +77,19 @@ arma::vec BigRowMean(XPtr<BigMatrix> pMat, int threads = 0, const Nullable<arma:
 }
 
 // [[Rcpp::export]]
-arma::vec BigRowMean(SEXP pBigMat, int threads = 0, const Nullable<arma::uvec> geno_ind = R_NilValue){
+arma::vec BigRowMean(SEXP pBigMat, bool mrkbycol = true, int threads = 0, const Nullable<arma::uvec> geno_ind = R_NilValue){
 	
 	XPtr<BigMatrix> xpMat(pBigMat);
 
 	switch(xpMat->matrix_type()) {
 	case 1:
-		return BigRowMean<char>(xpMat, threads, geno_ind);
+		return BigRowMean<char>(xpMat, mrkbycol, threads, geno_ind);
 	case 2:
-		return BigRowMean<short>(xpMat, threads, geno_ind);
+		return BigRowMean<short>(xpMat, mrkbycol, threads, geno_ind);
 	case 4:
-		return BigRowMean<int>(xpMat, threads, geno_ind);
+		return BigRowMean<int>(xpMat, mrkbycol, threads, geno_ind);
 	case 8:
-		return BigRowMean<double>(xpMat, threads, geno_ind);
+		return BigRowMean<double>(xpMat, mrkbycol, threads, geno_ind);
 	default:
 		throw Rcpp::exception("unknown type detected for big.matrix object!");
 	}
@@ -228,7 +250,7 @@ SEXP kin_cal_s(SEXP pBigMat, int threads = 0, bool mkl = false, bool verbose = t
 
 
 template <typename T>
-SEXP kin_cal(XPtr<BigMatrix> pMat, const Nullable<arma::uvec> geno_ind = R_NilValue, const Nullable<arma::uvec> marker_ind = R_NilValue, int threads = 0, int step = 5000, bool mkl = false, bool verbose = true){
+SEXP kin_cal(XPtr<BigMatrix> pMat, const Nullable<arma::uvec> geno_ind = R_NilValue, const Nullable<arma::uvec> marker_ind = R_NilValue, const Nullable<arma::vec> marker_freq = R_NilValue, const bool marker_bycol = true, int threads = 0, int step = 5000, bool mkl = false, bool verbose = true){
 
     omp_setup(threads);
 
@@ -248,7 +270,7 @@ SEXP kin_cal(XPtr<BigMatrix> pMat, const Nullable<arma::uvec> geno_ind = R_NilVa
         _geno_ind = as<uvec>(geno_ind) - 1;
 		n = _geno_ind.n_elem;
     }else{
-		n = pMat->ncol();
+		n = marker_bycol ? pMat->nrow() : pMat->ncol();
 	}
 
 	int m;
@@ -257,52 +279,102 @@ SEXP kin_cal(XPtr<BigMatrix> pMat, const Nullable<arma::uvec> geno_ind = R_NilVa
         _marker_ind = as<uvec>(marker_ind) - 1;
 		m = _marker_ind.n_elem;
     }else{
-		m = pMat->nrow();
+		m = marker_bycol ? pMat->ncol() : pMat->nrow();
 	}
 
-	arma::vec means(m);
-	if(_geno_ind.is_empty()){
-		if(_marker_ind.is_empty()){
-			#pragma omp parallel for
-			for(int j = 0; j < m; j++){
-				double p1 = 0.0;
-				for(int k = 0; k < n; k++){
-					p1 += bigm[k][j];
-				}
-				means[j] = p1 / n;
-			}
-		}else{
-			#pragma omp parallel for
-			for(int j = 0; j < m; j++){
-				double p1 = 0.0;
-				for(int k = 0; k < n; k++){
-					p1 += bigm[k][_marker_ind[j]];
-				}
-				means[j] = p1 / n;
-			}
-		}
+	vec means;
+	if(marker_freq.isNotNull()){
+		means = as<vec>(marker_freq) * 2;
 	}else{
-		if(_marker_ind.is_empty()){
-			#pragma omp parallel for
-			for(int j = 0; j < m; j++){
-				double p1 = 0.0;
-				for(int k = 0; k < n; k++){
-					p1 += bigm[_geno_ind[k]][j];
+		means.resize(m);
+		if(_geno_ind.is_empty()){
+			if(_marker_ind.is_empty()){
+				if(marker_bycol){
+					#pragma omp parallel for
+					for(int j = 0; j < m; j++){
+						double p1 = 0.0;
+						for(int k = 0; k < n; k++){
+							p1 += bigm[j][k];
+						}
+						means[j] = p1 / n;
+					}
+				}else{
+					#pragma omp parallel for
+					for(int j = 0; j < m; j++){
+						double p1 = 0.0;
+						for(int k = 0; k < n; k++){
+							p1 += bigm[k][j];
+						}
+						means[j] = p1 / n;
+					}
+				}
+			}else{
+				if(marker_bycol){
+					#pragma omp parallel for
+					for(int j = 0; j < m; j++){
+						double p1 = 0.0;
+						for(int k = 0; k < n; k++){
+							p1 += bigm[_marker_ind[j]][k];
+						}
+						means[j] = p1 / n;
+					}
+				}else{
+					#pragma omp parallel for
+					for(int j = 0; j < m; j++){
+						double p1 = 0.0;
+						for(int k = 0; k < n; k++){
+							p1 += bigm[k][_marker_ind[j]];
+						}
+						means[j] = p1 / n;
+					}
 				}
-				means[j] = p1 / n;
 			}
 		}else{
-			#pragma omp parallel for
-			for(int j = 0; j < m; j++){
-				double p1 = 0.0;
-				for(int k = 0; k < n; k++){
-					p1 += bigm[_geno_ind[k]][_marker_ind[j]];
+			if(_marker_ind.is_empty()){
+				if(marker_bycol){
+					#pragma omp parallel for
+					for(int j = 0; j < m; j++){
+						double p1 = 0.0;
+						for(int k = 0; k < n; k++){
+							p1 += bigm[j][_geno_ind[k]];
+						}
+						means[j] = p1 / n;
+					}
+				}else{
+					#pragma omp parallel for
+					for(int j = 0; j < m; j++){
+						double p1 = 0.0;
+						for(int k = 0; k < n; k++){
+							p1 += bigm[_geno_ind[k]][j];
+						}
+						means[j] = p1 / n;
+					}
+				}
+			}else{
+				if(marker_bycol){
+					#pragma omp parallel for
+					for(int j = 0; j < m; j++){
+						double p1 = 0.0;
+						for(int k = 0; k < n; k++){
+							p1 += bigm[_marker_ind[j]][_geno_ind[k]];
+						}
+						means[j] = p1 / n;
+					}
+				}else{
+					#pragma omp parallel for
+					for(int j = 0; j < m; j++){
+						double p1 = 0.0;
+						for(int k = 0; k < n; k++){
+							p1 += bigm[_geno_ind[k]][_marker_ind[j]];
+						}
+						means[j] = p1 / n;
+					}
 				}
-				means[j] = p1 / n;
 			}
 		}
+		if(means.has_nan())	throw Rcpp::exception("NA is not allowed in genotype, use 'MVP.Data.impute' to impute!");
 	}
-	
+
 	arma::mat kin = zeros<mat>(n, n);
 	arma::mat Z_buffer(step, n, fill::none);
 
@@ -325,33 +397,69 @@ SEXP kin_cal(XPtr<BigMatrix> pMat, const Nullable<arma::uvec> geno_ind = R_NilVa
 
 		if(_geno_ind.is_empty()){
 			if(_marker_ind.is_empty()){
-				#pragma omp parallel for
-				for(int k = 0; k < n; k++){
+				if(marker_bycol){
+					#pragma omp parallel for
 					for(int l = 0; l < cnt; l++){
-						Z_buffer(l, k) = bigm[k][(i_marker + l)] - means[(i_marker + l)];
+						for(int k = 0; k < n; k++){
+							Z_buffer(l, k) = bigm[(i_marker + l)][k] - means[(i_marker + l)];
+						}
+					}
+				}else{
+					#pragma omp parallel for
+					for(int k = 0; k < n; k++){
+						for(int l = 0; l < cnt; l++){
+							Z_buffer(l, k) = bigm[k][(i_marker + l)] - means[(i_marker + l)];
+						}
 					}
 				}
 			}else{
-				#pragma omp parallel for
-				for(int k = 0; k < n; k++){
+				if(marker_bycol){
+					#pragma omp parallel for
 					for(int l = 0; l < cnt; l++){
-						Z_buffer(l, k) = bigm[k][_marker_ind[(i_marker + l)]] - means[(i_marker + l)];
+						for(int k = 0; k < n; k++){
+							Z_buffer(l, k) = bigm[_marker_ind[(i_marker + l)]][k] - means[(i_marker + l)];
+						}
+					}
+				}else{
+					#pragma omp parallel for
+					for(int k = 0; k < n; k++){
+						for(int l = 0; l < cnt; l++){
+							Z_buffer(l, k) = bigm[k][_marker_ind[(i_marker + l)]] - means[(i_marker + l)];
+						}
 					}
 				}
 			}
 		}else{
 			if(_marker_ind.is_empty()){
-				#pragma omp parallel for
-				for(int k = 0; k < n; k++){
+				if(marker_bycol){
+					#pragma omp parallel for
 					for(int l = 0; l < cnt; l++){
-						Z_buffer(l, k) = bigm[_geno_ind[k]][(i_marker + l)] - means[(i_marker + l)];
+						for(int k = 0; k < n; k++){
+							Z_buffer(l, k) = bigm[(i_marker + l)][_geno_ind[k]] - means[(i_marker + l)];
+						}
+					}
+				}else{
+					#pragma omp parallel for
+					for(int k = 0; k < n; k++){
+						for(int l = 0; l < cnt; l++){
+							Z_buffer(l, k) = bigm[_geno_ind[k]][(i_marker + l)] - means[(i_marker + l)];
+						}
 					}
 				}
 			}else{
-				#pragma omp parallel for
-				for(int k = 0; k < n; k++){
+				if(marker_bycol){
+					#pragma omp parallel for
 					for(int l = 0; l < cnt; l++){
-						Z_buffer(l, k) = bigm[_geno_ind[k]][_marker_ind[(i_marker + l)]] - means[(i_marker + l)];
+						for(int k = 0; k < n; k++){
+							Z_buffer(l, k) = bigm[_marker_ind[(i_marker + l)]][_geno_ind[k]] - means[(i_marker + l)];
+						}
+					}
+				}else{
+					#pragma omp parallel for
+					for(int k = 0; k < n; k++){
+						for(int l = 0; l < cnt; l++){
+							Z_buffer(l, k) = bigm[_geno_ind[k]][_marker_ind[(i_marker + l)]] - means[(i_marker + l)];
+						}
 					}
 				}
 			}
@@ -391,19 +499,19 @@ SEXP kin_cal(XPtr<BigMatrix> pMat, const Nullable<arma::uvec> geno_ind = R_NilVa
 }
 
 // [[Rcpp::export]]
-SEXP kin_cal(SEXP pBigMat, const Nullable<arma::uvec> geno_ind = R_NilValue, const Nullable<arma::uvec> marker_ind = R_NilValue, int threads = 0, size_t step = 10000, bool mkl = false, bool verbose = true){
+SEXP kin_cal(SEXP pBigMat, const Nullable<arma::uvec> geno_ind = R_NilValue, const Nullable<arma::uvec> marker_ind = R_NilValue, const Nullable<arma::vec> marker_freq = R_NilValue, const bool marker_bycol = true, int threads = 0, size_t step = 10000, bool mkl = false, bool verbose = true){
 
 	XPtr<BigMatrix> xpMat(pBigMat);
 
 	switch(xpMat->matrix_type()){
 	case 1:
-		return kin_cal<char>(xpMat, geno_ind, marker_ind, threads, step, mkl, verbose);
+		return kin_cal<char>(xpMat, geno_ind, marker_ind, marker_freq, marker_bycol, threads, step, mkl, verbose);
 	case 2:
-		return kin_cal<short>(xpMat, geno_ind, marker_ind, threads, step, mkl, verbose);
+		return kin_cal<short>(xpMat, geno_ind, marker_ind, marker_freq, marker_bycol, threads, step, mkl, verbose);
 	case 4:
-		return kin_cal<int>(xpMat, geno_ind, marker_ind, threads, step, mkl, verbose);
+		return kin_cal<int>(xpMat, geno_ind, marker_ind, marker_freq, marker_bycol, threads, step, mkl, verbose);
 	case 8:
-		return kin_cal<double>(xpMat, geno_ind, marker_ind, threads, step, mkl, verbose);
+		return kin_cal<double>(xpMat, geno_ind, marker_ind, marker_freq, marker_bycol, threads, step, mkl, verbose);
 	default:
 		throw Rcpp::exception("unknown type detected for big.matrix object!");
 	}
diff --git a/src/mvp_omp.h b/src/mvp_omp.h
index 8eb9c99..de1d07f 100755
--- a/src/mvp_omp.h
+++ b/src/mvp_omp.h
@@ -17,7 +17,6 @@
 
 #if defined(_OPENMP)
 #include <omp.h>
-#else
 #endif
 
 #include <Rcpp.h>