#!/usr/bin/env python
#
# File: post_phase.py
#

import os
import argparse

def read_mapinfo(mapfile):
    var2gene = {}
    var2info = {} # gene id maf
    with open(mapfile) as mf:
        for idx, line in enumerate(mf):
            if idx < 1:
                continue
            items = line.rstrip().split()
            var = items[0].strip() + "_" + items[1].strip()
            gene = items[2].strip()
            maf = items[3].strip()

            if gene != ".":
                var2gene[var] = gene
                var2info[var] = [gene, maf]

    return var2gene, var2info


def append_line_to_file(file, dat):
    with open(file, 'a') as f:
        f.write(dat + "\n")
    
   # 
def post_phase_tped(tpedfile, var2gene, var2info, outdir):
    if not os.path.exists(outdir):
        os.makedirs(outdir)
        
    with open(tpedfile) as tf:
        for idx, line in enumerate(tf):
            if idx < 2:
                continue
            items = line.rstrip().split()
            var = items[1]
            varid = "_".join(var.split("_")[0:2])

            if varid in var2gene and varid in var2info:
                postgeno = []
                for g in items[2:]:
                    # change coding: '2'=wildtype -> '0'=wildtype
                    if g == '2':
                        postgeno.append('0')
                    else:
                        postgeno.append(g)
                gene = var2gene[varid]
                info = var2info[varid]

                tpedout = outdir + "/" + gene + ".tped"
                mapout = outdir + "/" + gene + ".map"
                append_line_to_file(mapout, info[0] + " " + var + " " + info[1])
                append_line_to_file(tpedout, " ".join([var] + postgeno))


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Post phase process for rvTDT')
    parser.add_argument("-a", "--annotate", help="annotate file")
    parser.add_argument("-b", "--beagle", help="output of BEAGLE")
    parser.add_argument("-o", "--output", help="output folder name")
    args = parser.parse_args()
    v2gene, v2info = read_mapinfo(args.annotate)
    post_phase_tped(args.beagle, v2gene, v2info, args.output)
