#!/usr/bin/env python
#
# File: pre_phase.py
#

### 
# For each variant:
# If any member within a trio has missing genotype, mark all members of that trio on that site as wildtype '2'
###

import os
import argparse

# in *.bgl file: 0 is missing
def bgl_mark_missing(bglfile, outfile):
    outf = open(outfile, 'w')
    with open(bglfile) as bf:
        for idx, line in enumerate(bf):
            if idx < 2:
                outf.write(line)
                continue
                
            items = line.rstrip().split()
            end = len(items) - 2
            for i in range(2, end, 6):
                missing = False
                for j in range(6):
                    if items[i+j] == '0' or items[i+j] == 0:
                        missing = True
                        break
                if missing:
                    for j in range(6):
                        items[i+j] = '2' 
            outf.write(" ".join(items) + "\n")
    outf.close()


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Pre phase process for rvTDT')
    parser.add_argument("-i", "--input", help="*.bgl file, output of linkage2beagle.jar")
    parser.add_argument("-a", "--alter", help="alternative name of original *.bgl file")
    args = parser.parse_args()

    tmp = args.input+".backup"
    bgl_mark_missing(args.input, tmp)
    os.rename(args.input, args.alter)
    os.rename(tmp, args.input)
