
f = open('../DATABASE/database.tsv')

out1 = open('coordinates.txt', 'w')
out2 = open('potential_functions.txt', 'w')
out3 = open('general_information.txt', 'w')

dict_species = {'amborella':'A. trichopoda', 'arabidopsis':'A. thaliana', 'chlamydomonas':'C. reinhardtii', 'glycine':'G. max', 'oryza':'O. sativa', 'physcomitrella':'P. patens', 'potato':'S. tuberosum', 'selaginella':'S. moellendorffii', 'vitis':'V. vinifera', 'zea':'Z. mays'}

out1.write('CANTATA_id\tspecies\tchr\tstart\tend\tstrand\n')
out2.write('CANTATA_id\tspecies\tpredicted functions')
out3.write('CANTATA_id\tspecies\tCPC status\tCPC_score\tmax_peptide_length\tmax_expression(RPKM)\n')

for line in f:
  line = line.strip().split('\t')
  id = line[0]
  for i in range(7 - len(id)):
    id = '0' + id
  id = 'CNT' + id
  species = line[1]
  species = dict_species[species]
  cpc_status = line[6]
  cpc_score = line[7]
  max_peptide_length = line[15]
  events = line[18]
  chr = line[19]
  start = line[20]
  end = line[21]
  strand = line[22]
  max_expression = line[23]
  out1.write(id + '\t' + species + '\t' + chr + '\t' + start + '\t' + end + '\t' + strand + '\n')
  if events:
    out2.write(id + '\t' + species + '\t' + events.replace('miRNA', 'masking miRNA binding sites').replace('splicing', 'splicing regulation') + '\n')
  out3.write(id + '\t' + species + '\t' + cpc_status + '\t' + cpc_score + '\t' + max_peptide_length + '\t' + max_expression + '\n')
  
