#file annotation.conf
seqspark {
  #project name, will be used to create output folder and as suffix for default export path
  project = demo
 
  #number of partitions of large datasets
  partitions = 10

  #pipeline to run, here only annotation
  pipeline = [ "annotation" ]

  #input genotype path
  input.genotype.path = "demo.vcf.bz2"

  #output genotype config
  output.genotype {
    
    #export the VCF after annotation
    export = true

    #don't include any sample/genotype. i.e. just the variants information
    samples = none

  }

  #annotation object
  annotation {
 
    #what will be added to the INFO field of the input VCF
    addInfo {  

      #a key "gnomAD" will be added if the variant is in the database
      gnomAD = "gnomad"

      #get the AF value from gnomad and name it "Total_AF"
      Total_AF = "gnomad.AF"

      #get the AC_NFE and AN_NFE counts from gnomad, and calculate the ratio, name it "SS_AF"
      SS_AF = "gnomad.AC_NFE/gnomad.AN_NFE"
    }
    db {
    #gnomad database config
    gnomad = {

      #where is the database, can be on HDFS or local, seqspark will search HDFS first
      path = "seqspark/gnomad.exome.vcf.bz2"

      #format of the database, here VCF. Can be tsv, csv
      format = "vcf"
    }
    }
  }
}