Skip to content

Commit 76b1a0c

Browse files
authored
Merge pull request #44 from tpbilton/master
Added functionality to writeVCF function.
2 parents 7c13ad3 + c60b0ac commit 76b1a0c

File tree

1 file changed

+36
-13
lines changed

1 file changed

+36
-13
lines changed

GBS-Chip-Gmatrix.R

Lines changed: 36 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1571,8 +1571,20 @@ calcG <- function(snpsubset, sfx = "", puse, indsubset, depth.min = 0, depth.max
15711571
if(withHeatmaply){
15721572
if(!require(heatmaply) || compareVersion(as.character(packageVersion("heatmaply")), "0.15.0")==-1)
15731573
withHeatmaply = FALSE
1574-
if(!exists("hover.info"))
1574+
if(!exists("hover.info")){
1575+
if(missing(samp.info)){
1576+
if(!exists("seqID"))
1577+
samp.info <- list("Sample ID"=1:length(indsubset))
1578+
else
1579+
samp.info <- list("Sample ID"=seqID[indsubset])
1580+
} else if(!is.list(samp.info)){
1581+
warning("Sample information object is not a list")
1582+
samp.info <- list("Sample ID"=seqID[indsubset])
1583+
} else if(any(unlist(lapply(samp.info,function(x) length(x)!=length(indsubset))))){
1584+
stop("Missing or extra sample information. Check that the 'samp.info' object is correct.")
1585+
}
15751586
hover.info <- apply(sapply(1:length(samp.info),function(x) paste0(names(samp.info)[x],": ",samp.info[[x]]),simplify = TRUE),1,paste0,collapse="<br>")
1587+
}
15761588
}
15771589
nsnpsub <- length(snpsubset)
15781590
nindsub <- length(indsubset)
@@ -2378,15 +2390,15 @@ Gbend <- function(GRM,mineval=0.001, doplot=TRUE, sfx="", evalsum="free") {
23782390
}
23792391

23802392
## function to use in writeVCF
2381-
genostring <- function(vec) { #vec has gt, paa, pab ,pbb, llaa, llab, llbb, ref, alt
2382-
nobj <- length(vec)/9
2393+
genostring <- function(vec) { #vec has gt, paa, pab ,pbb, llaa, llab, llbb, ref, alt, depth
2394+
nobj <- length(vec)/10
23832395
outstr <- gsub("NA",".",gsub("NA,","",paste(vec[1:nobj],paste(vec[nobj+(1:nobj)],vec[2*nobj+(1:nobj)],vec[3*nobj+(1:nobj)],sep=","),
23842396
paste(vec[4*nobj+(1:nobj)],vec[5*nobj+(1:nobj)],vec[6*nobj+(1:nobj)],sep=","),
2385-
paste(vec[7*nobj+(1:nobj)],vec[8*nobj+(1:nobj)],sep=","), sep=":")))
2397+
paste(vec[7*nobj+(1:nobj)],vec[8*nobj+(1:nobj)],sep=","),vec[9*nobj+(1:nobj)], sep=":")))
23862398
}
23872399

23882400
## Write KGD back to VCF file
2389-
writeVCF <- function(indsubset, snpsubset, outname=NULL, ep=0.001, puse = p, IDuse, keepgt=TRUE, mindepth=0, allele.ref="C", allele.alt="G",
2401+
writeVCF <- function(indsubset, snpsubset, outname=NULL, ep=0.001, puse = p, IDuse, keepgt=TRUE, mindepth=0, allele.ref="C", allele.alt="G", GTmethod="observed",
23902402
verlabel="4.3",usePL=FALSE, contig.meta=FALSE, CHROM=NULL, POS=NULL){
23912403
gform <- tolower(gform)
23922404
if (is.null(outname)) outname <- "GBSdata"
@@ -2420,6 +2432,7 @@ writeVCF <- function(indsubset, snpsubset, outname=NULL, ep=0.001, puse = p, IDu
24202432
pmat <- matrix(puse[snpsubset], nrow=length(indsubset), ncol=length(snpsubset), byrow=TRUE)
24212433
if(length(allele.ref) == nsnps) allele.ref <- allele.ref[snpsubset]
24222434
if(length(allele.alt) == nsnps) allele.alt <- allele.alt[snpsubset]
2435+
GTmethod = match.arg(GTmethod, c("observed","GP")) ## Note: "GTmethod = NULL" will give "observed"
24232436

24242437
# Meta information
24252438
metalik <- '##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood">'
@@ -2457,8 +2470,8 @@ writeVCF <- function(indsubset, snpsubset, outname=NULL, ep=0.001, puse = p, IDu
24572470
out[,6] <- rep(".", length(snpsubset))
24582471
out[,7] <- rep(".", length(snpsubset))
24592472
out[,8] <- rep(".", length(snpsubset))
2460-
out[,9] <- rep("GT:GP:GL:AD", length(snpsubset))
2461-
if(usePL) out[,9] <- rep("GT:GP:PL:AD", length(snpsubset))
2473+
out[,9] <- rep("GT:GP:GL:AD:DP", length(snpsubset))
2474+
if(usePL) out[,9] <- rep("GT:GP:PL:AD:DP", length(snpsubset))
24622475

24632476

24642477
## compute probs
@@ -2493,16 +2506,26 @@ writeVCF <- function(indsubset, snpsubset, outname=NULL, ep=0.001, puse = p, IDu
24932506
## Create the data part
24942507
## Compute the genotype fields
24952508
depthsub <- ref+alt
2496-
is.na(genon) <- is.na(paa) <- is.na(pab) <- is.na(pbb) <- is.na(llaa) <- is.na(llab) <- is.na(llbb) <- (depthsub < mindepth)
2497-
rm(depthsub)
2498-
genon0[is.na(genon0)] <- -1
2499-
if(!keepgt) genon0[] <- -1 # set all elements to missing
2509+
is.na(genon0) <- is.na(paa) <- is.na(pab) <- is.na(pbb) <- is.na(llaa) <- is.na(llab) <- is.na(llbb) <- (depthsub < mindepth)
2510+
depthsub[(depthsub < mindepth)] = 0
2511+
#rm(depthsub)
2512+
if(!keepgt) {
2513+
genon0[] <- -1 # set all elements to missing
2514+
} else if (GTmethod == "GP"){
2515+
genon0_tmp = matrix(0, nrow=nrow(genon0), ncol=ncol(genon0))
2516+
genon0_tmp[which((paa > pab) & (paa > pbb))] = 2
2517+
genon0_tmp[which((pab > paa) & (pab > pbb))] = 1
2518+
genon0_tmp[is.na(genon0)] <- -1
2519+
genon0 = genon0_tmp
2520+
} else if (GTmethod == "observed"){
2521+
genon0[is.na(genon0)] <- -1
2522+
}
25002523
if (is.big) {
25012524
gt <- apply(genon0+2, 2, function(x) c("./.","1/1","0/1","0/0")[x])
2502-
out2[,-c(1:9)] <- apply(cbind(gt,paa,pab,pbb,llaa,llab,llbb,ref,alt),1,genostring)
2525+
out[,-c(1:9)] <- apply(cbind(gt,paa,pab,pbb,llaa,llab,llbb,ref,alt,depthsub),1,genostring)
25032526
} else {
25042527
gt <- sapply(as.vector(genon0), function(x) switch(x+2,"./.","1/1","0/1","0/0"))
2505-
out[,-c(1:9)] <- matrix(gsub("NA",".",gsub("NA,","",paste(gt,paste(paa,pab,pbb,sep=","),paste(llaa,llab,llbb,sep=","), paste(ref,alt,sep=","), sep=":"))),
2528+
out[,-c(1:9)] <- matrix(gsub("NA",".",gsub("NA,","",paste(gt,paste(paa,pab,pbb,sep=","),paste(llaa,llab,llbb,sep=","), paste(ref,alt,sep=","),depthsub, sep=":"))),
25062529
nrow=length(snpsubset), ncol=length(indsubset), byrow=TRUE)
25072530
# for missings, first change NA, to empty so that any set of NA,NA,...,NA changes to NA, then can set that to . which is vcf missing for the whole field
25082531
}

0 commit comments

Comments
 (0)