#!/usr/bin/env python

'''
NAME
        get-matrices-from-peakmotifs.py

VERSION
        %(version)s

AUTHOR
        Walter Santana-Garcia <santana@biologie.ens.fr>

DESCRIPTION
        Retrieve all the transfac files for each motif discovery
        algorithm result in a peak-motifs directory.

CATEGORY
        motif-discovery

ARGUMENTS
    GENERAL OPTIONS
        --version             show program's version number and exit
        -h, --help            show this help message and exit
        -v #, --verbosity=#   set verbosity to level #


        -i #, --input=#       Mandatory option. The name of an output directory
                              from RSAT peak-motifs.
        -o #, --output=#      Mandatory option. The name of the output directory
                              to store the results.
        -p #, --prefix=#      Mandatory option. Prefix for the file name to be used
                              as suffix for each result.
'''

VERSION = '20200413'
USAGE = '''get-matrices-from-peakmotifs.py  -i input_dir
                                        -o output_dir
                                        -p prefix
                                        [-h | --help]
'''
#===============================================================================
# Imports
#===============================================================================
import os
import re
import sys
import glob
import time
import argparse
import subprocess


#===============================================================================
# Functions
#===============================================================================
def create_dir(dir_path, accession_rights = 0o755):
    """
    Create a directory and all subdirectories
    from a path string with specific permission
    rights.
    """

    try:
        os.makedirs(dir_path, accession_rights)
    except OSError:
        print(";\tERROR:\n\t\tUnable to create directory %s." % dir_path)
    else:
        print(";\tINFO: Succesfull creation of directory %s." % dir_path)

    return
def file_exist(file_path, fatal = True):
    """
    Test if a file exist. If fatal param is set to TRUE,
    exit program when file is not found.
    """
    # Test if file exist
    file_found = os.path.isfile(file_path)

    # Die if fatal is set to TRUE
    if fatal:
        if file_found:
            print(";\tINFO: File %s found!" % file_path)
        else:
            sys.exit(";\tERROR:\n\t\tUnable to found file %s !" % file_path)

    return file_found
#===============================================================================
# Main of annotate-matrix-clusters
#===============================================================================

if __name__=='__main__':
    # Start time
    time_start= time.strftime("%Y-%d-%m.%H:%M:%S")
    seconds_start= time.time()
    #===========================================================================
    # Arguments definition
    #===========================================================================

    parse = argparse.ArgumentParser()
    parse.add_argument('-i', action = 'store', dest = 'input_dir',    help = "Mandatory option. The name of an output directory from RSAT matrix-clustering.")
    parse.add_argument('-o', action = 'store', dest = 'output_dir',    help = "Mandatory option. The name of the output directory to store the result.")
    parse.add_argument('-p', action = 'store', dest = 'prefix',       help = "Mandatory option. Prefix for the file name to be used as suffix for each result.")
    parse.add_argument('-v', action = 'store', dest = 'verbose', default=1,  type=int, help = "level of verbosity. Supported: Integer = 1,2,3. By default : 1 = No message. Level 2 : moderately density of messages. Level 3 : High density")

    # Arguments extraction
    args = parse.parse_args()

    # Create input variables
    input_dir  = args.input_dir
    output_dir = args.output_dir
    prefix     = args.prefix

    # Remove trailing "/" character
    input_dir  = input_dir.rstrip("/")
    output_dir = output_dir.rstrip("/")

    # Test if directory exists
    if not os.path.isdir(input_dir):
        sys.stderr.write(";\tERROR\t:\Input peak-motifs directory does not exist!\n")

    # Create result directory path
    result_dir = input_dir + "/results"
    # Create output file name
    out_file = output_dir + "/" + prefix + ".tf"

    # Test if directory exists
    if not os.path.isdir(result_dir):
        sys.stderr.write(";\tERROR\t:\Input peak-motifs results directory does not exist!\n")

    ###################################################
    # NOTE WSG: I can tune a regex or something to
    # seletively retrieve the desired motif algorithm
    # Get individual motif result directories
    motif_files = glob.glob(result_dir + "/oligos_*/*_pssm_count_matrices.tf")
    # Get number of motif files
    motif_nb = len(motif_files)

    # Print file info
    motif_str = "\t\n".join(motif_files)
    print(";\tINFO\t:\tFound {0} files in current peak-motif directory!\n".format(motif_nb))
    print(";\tINFO\t:\tFiles:\n{0}".format(motif_str))

    # Create output file name
    subprocess.check_output(["touch",out_file])


    # Iterate through each result file and append to output file
    for i in range(0,motif_nb):
        cmd_append  = " ".join(["grep","-v","^BS",motif_files[i],">>",out_file])
        subprocess.check_output(cmd_append, shell = True)
        print("uno")

    # finished times
    time_end = time.strftime("%Y-%d-%m.%H:%M:%S")

    # Write start time and finished time
    print(";\tJob started    %s" % time_start)
    print(";\tJob done       %s" % time_end)

    # finished time in second
    seconds_end=time.time()

    # Write time of the scan
    print(";\tSeconds        %f\n" % (seconds_end - seconds_start))
