################################################################
##
## Load pattern counts in upstream regions of NIT, MET and
## PHO genes
##

################################################################
## read data
##
################################################################
verbose("Loading data", 1)
setwd(dir.data)

#### input files
dir.counts <- paste(dir.data, input.prefix, sep='/')
file.counts <- paste(input.prefix, '_', input.genes, 'g_pattern_counts.tab', sep='')
file.counts.allup <- paste(input.prefix, '_allup_pattern_counts.tab', sep='')
file.counts.random <- paste("RANDOM_SEQ/",input.prefix,"_random_seq_pattern_counts.tab",sep="")

## prefix for output files
prefix <- paste("N","M","P",sep='')
if (input.prefix == "NIT_MET_PHO_together") {
  prefix <- paste(prefix, "t",sep='')  
}
prefix <- paste (input.genes, "g_", prefix, sep = "")

## Title for plots
data.title = input.prefix


################################################################
## Read gene families
verbose("Reading gene families", 2)
dir.families <- paste(dir.data, 'gene_families', sep='/')
file.families <- paste("selected_families_",input.genes,"g.tab", sep='')
setwd(dir.families)
fam <- read.table(file.families, header=TRUE,row.names=1,as.is=TRUE,sep='\t')

################################################################
## Read pattern counts in upstream sequences
verbose("Reading pattern counts in family sequences", 2)
setwd(dir.counts)
x.ori  <- read.table(file.counts,header=TRUE,row.names=1,as.is=TRUE)
x <- check.data(x.ori)
selected.vars <- names(x)
group.labels <- fam[row.names(x),c("family")]
full.labels=paste(group.labels, row.names(x), sep="_")
y <- cbind(x,group.labels)


################################################################
## an even smaller selection
## x.selected <- x[genes.selected,]
## group.labels.selected <- group.labels[genes.selected]


################################################################
#### Read pattern counts in all yeast genes
verbose("Reading pattern counts in all upstream sequences", 2)
x.all.ori  <- read.table(file.counts.allup,header=TRUE,row.names=NULL,as.is=TRUE)
all.row.names <- make.names(x.all.ori[,1],unique=T)
row.names(x.all.ori) <- all.row.names
x.all <- x.all.ori[,selected.vars]

################################################################
### random selection of genes
if (rand.selection) {
    verbose("Random selection of control gehes", 2)
  control.genes <- sample(row.names(x.all),rand.number)
#  group.labels.all[control.genes] <- "CTL"
#  y.all <- cbind(x.all,group.labels.all)
  ## add the controls to the original data
  x <- rbind(x, x.all[control.genes,])
  group.labels <- c(group.labels, rep("CTL", rand.number))
  y <- cbind(x,group.labels)
  full.labels=paste(group.labels, row.names(x), sep="_")
  prefix <- paste(prefix, "C",sep='')
  data.title <- paste(data.title,"ctl", sep="_")
}

################################################################
## Pattern counts in randomly generated sequences (Markov chain)
if (rand.sequences) {
    verbose("Reading pattern counts in random sequence", 2)
    setwd(dir.data)
    rand.ori  <- read.table(file.counts.random,header=TRUE,row.names=1,as.is=TRUE)
    rand <- rand.ori[0:rand.number,]
    x <- rbind(x, rand)
    x.all <- rbind(x.all, rand)
    group.labels <- c(group.labels, rep("RAND", dim(rand)[1]))
    full.labels=paste(group.labels, row.names(x), sep="_")
    prefix <- paste(prefix, "R",sep='')
    data.title <- paste(data.title,"rand", sep="_")
    y <- cbind(x,group.labels)
}

################################################################
## Permutation test
if (permut.test) {
  verbose("Permuting data labels",1)
  group.labels <- sample(as.vector(group.labels))
  prefix <- paste (prefix, "permut", sep = "_")
  data.title <- paste (data.title, "permut", sep = "_")
}

################################################################
## Column bootstrapping
if (bootstrap.columns) {
  verbose("Bootstrapping columns",1)
  bootstrapped.columns <- sample(1:ncol(x),replace=T)
  x <- x[,bootstrapped.columns]
  names(x) <- make.names(names(x)[bootstrapped.columns],unique=T)
  prefix <- paste (prefix, "colboot", sep = "_")
  data.title <- paste (data.title, "colboot", sep = "_")
}

################################################################
## Row bootstrapping
if (bootstrap.rows) {
  verbose("Bootstrapping rows",1)
  bootstrapped.rows <- sample(1:nrow(x), replace=T)
  x <- x[bootstrapped.rows,]
  row.names(x) <- make.names(row.names(x)[bootstrapped.rows],unique=T)
  group.labels <- group.labels[bootstrapped.rows]
  prefix <- paste (prefix, "rowboot", sep = "_")
  data.title <- paste (data.title, "rowboot", sep = "_")
}


## group labels
verbose("Defining group labels", 2)
names(group.labels) <- row.names(x)

## group labels for all genes
group.labels.all <- as.vector(rep(NA,l=dim(x.all)[1]))
names(group.labels.all) <- row.names(x.all)

## Identify common elements between all and selected genes
common <- intersect(names(group.labels.all),names(group.labels))
missing <- setdiff(names(group.labels),names(group.labels.all))
group.labels.all[common] <- group.labels[common]

y.all <- cbind(x.all,group.labels.all)


## groups 
groups <- names(table(group.labels))


## ##############################################################
## Group colors
group.colors <- c("PHO"="#00CC00",
                  "NIT"="#0000FF",
                  "MET"="#FF00FF",
                  "CTL"="#000000",
                  "RAND"="#00BBBB"
		  )
