# SL96 functions related to data format.

# "checkGenoData" checks format of genotype file.
# Arguments
#	In: Data frame of genotype file imported using 'read.table(~, header=FALSE)'.
# Value
#	0 or 1. If 'r==1', invalid format.
	checkGenoData <- function(In) {
		r <- 0
		if (udac(In[1,1])!="snp_id") { r <- 1 }
		if (udac(In[1,2])!="chr") { r <- 1 }
		if (udac(In[1,3])!="pos_bp") { r <- 1 }
		if (udac(In[1,4])!="pos_cM") { r <- 1 }
		if (length(udac(In[1,5:ncol(In)]))!=length(unique(udac(In[1,5:ncol(In)])))*2) { r <- 1 }
		x <- matrix(udac(In[1,5:ncol(In)]), nrow=2)
		if(length(unique(x[1,]==x[2,]))!=1) { r <- 1 }
		r
		}

# "checkPhenoData" checks format of phenotype file.
# Arguments
#	In: Data frame of genotype file imported using 'read.table(~, header=FALSE)'.
# Value
#	0 or 1. If 'r==1', invalid format.
	checkPhenoData <- function(In, Trait=NULL) {
		r <- 0
		if (colnames(In)[1]!="gid") { r <- 1 }
		for (i in 2:ncol(In)) { if (is.factor(In[,i])==TRUE) r <- 1 }
		if (length(Trait)!=0) {
			ans <- unique(is.element(Trait, colnames(In)))
			if (length(ans)!=1) r <- 1
			if (ans!=TRUE) r <- 1
			}
		r
		}			

# "unphase" converts pahsed data format to unphased state.
# Arguments
#	IN: Character string of genotype file name (e.g. "SL96axiom.txt").
#	OUT: Character string of output file name (e.g. "out.txt")
# Value
#	Text file of unphased genotype data.
	unphase <- function(IN, OUT) {
		In <- read.table(IN)
		r <- checkGenoData(In)
		if (r==1) { stop("Format error.") }
		geno <- In[,5:ncol(In)]
		ID <- unique(udac(geno[1,]))
		unphased <- matrix(NA, ncol=length(ID), nrow=(nrow(geno)-1))
		for (i in 1:length(ID)) {
			g <- geno[,geno[1,]==ID[i]]
			g <- as.matrix(g[2:nrow(g),])
			unphased[,i] <- (as.numeric(g[,1])+as.numeric(g[,2])) - 1
			}
		colnames(unphased) <- ID
		Header <- In[2:nrow(In),1:4]
		colnames(Header) <- udac(In[1,1:4])
		geno <- cbind(Header, unphased)
		write.table(geno, OUT, row.names=F, quote=F)
		}

# "makeSimGenoFile" generates genotype file of simulated population.
# Arguments
#	Pop: List vector of simulated population generated using functions makeProgenies or makeRILs.
#	Filename: Character string of output genotype filename (e.g. "out.txt").
#	Bin.size: Bin size of simulated genome (cM).
	makeSimGenoFile <- function(Pop, Filename, Bin.size=0.1) {
		convertSimGenome <- function(Pop) { 
			n.chr <- length(Pop[[1]])/2
			All <- vector(mode="list", length=n.chr)
			for (chr in 1:n.chr) {
				Each <- matrix(0, nrow=length(Pop)*2, ncol=length(Pop[[1]][[chr]]))
				gid <- numeric(length(Pop)*2)
				count <- 1			
				for (i in 1:length(Pop)) {
					gid[count] <- i
					Each[count,] <- as.numeric(Pop[[i]][[chr]])
					count <- count + 1
					gid[count] <- i
					Each[count,] <- as.numeric(Pop[[i]][[(chr+n.chr)]])
					count <- count + 1
					}
				rownames(Each) <- gid
				All[[chr]] <- Each
				}
			All
			}
		if (!is.list(Pop)) { stop("Invalid Population data.") }
		if (is.null(SimGenomeTable)) { stop("SimGenomeTable does not exist.") }
		Hap <- SimGenomeTable[[1]]
		Geno <- SimGenomeTable[[2]]
		colnames(Geno)[5:ncol(Geno)] <- Hap[,2]
		M <- convertSimGenome(Pop)
		n.chr <- length(M)
		G <- c()
		for (i in 1:n.chr) {
			z <- t(M[[i]])
			w <- Geno[Geno$chr==i,]
			w$pos_cM <- floor(w$pos_cM*(1/Bin.size))+1
			if (max(w$pos_cM)>nrow(z)) w$pos_cM[w$pos_cM==max(w$pos_cM)] <- nrow(z)
			g <- matrix(NA, nrow=nrow(w), ncol=ncol(z))
			for (j in 1:nrow(w)) {
				hap <- w[j,5:ncol(w)]
				allele <- sort(unique(udan(hap)))
				geno <- z[w$pos_cM[j],]
				for (a in 1:length(allele)) {
					gt <- udan(colnames(hap)[hap==allele[a]])
					geno[is.element(geno, gt)] <- allele[a]
					}
				g[j,] <- geno
				}
			G <- rbind(G, g)
			}
		gid <- c()
		for (i in 1:(ncol(G)/2)) { gid <- c(gid, c(paste("gid", i, sep=""), paste("gid", i, sep=""))) }
		colnames(G) <- gid
		G <- cbind(Geno[,1:4], G)
		write.table(G, Filename, row.names=FALSE, quote=FALSE)
		}

# others
	options(scipen=100)	
	udac <- function(vec) { as.character(unlist(vec)) }
	udan <- function(vec) { as.numeric(as.character(unlist(vec))) }
