BED格式转GTF格式

# -*- coding: utf-8 -*-
"""
Created on Fri Dec 25 19:43:00 2015
 
@author: biochen
"""
 
import sys, getopt, csv
 
# get parameter
opts, args = getopt.getopt(sys.argv[1:], "hi:o:s:")
input_file = ""
output_file = ""
source = ""
for op, value in opts:
    if op == "-i":
        input_file = value
    elif op == "-o":
        output_file = value
    elif op == "-s":
        source = value
    elif op == "-h":
        print("Usage: python3 BED2GTF.py -i input.bed -o output.gtf -s source")
        sys.exit()
 
 
BED_in_file = open(input_file, "r")
GTF_out_file = open(output_file, "w")
BED = csv.reader(BED_in_file, dialect = "excel-tab")
GTF = csv.writer(GTF_out_file, dialect = "excel-tab", quotechar = "'")
 
for item in BED:
    transcript = []
    transcript.append(item[0])
    transcript.append(source)
    transcript.append("transcript")
    transcript.append(int(item[1]) + 1)
    transcript.append(int(item[2]) + 1)
    transcript.append(item[4])
    transcript.append(item[5])
    transcript.append(".")
    transcript.append('transcript_id "' + item[3] + '";')
    GTF.writerow(transcript)
    exon_number = int(item[9])
    exon_size = list(map(int, (filter(lambda x:x and len(x.strip()) > 0, item[10].split(",")))))
    exon_start = list(map(int, (filter(lambda x:x and len(x.strip()) > 0, item[11].split(",")))))
    for i in range(exon_number):
        exon = []
        exon.append(item[0])
        exon.append(source)
        exon.append("exon")
        exon.append(int(item[1]) + exon_start[i] + 1)
        exon.append(exon[3] + exon_size[i] - 1)
        exon.append(item[4])
        exon.append(item[5])
        exon.append(".")
        exon.append('transcript_id "' + item[3] + '\";')
        GTF.writerow(exon)
 
BED_in_file.close()
GTF_out_file.close()

GTF格式转BED格式

 # -*- coding: utf-8 -*-
"""
Created on Wed Dec 16 11:03:05 2015
 
@author: biochen
"""
 
import sys, getopt, csv, re
 
opts, args = getopt.getopt(sys.argv[1:], "hi:o:")
for op, value in opts:
    if op == "-i":
        GTF_in_File_Name = value
    elif op == "-o":
        BED_in_File_Name = value
    elif op == "-h":
        print("Usage: python3 GTF2BED.py -i input.gtf -o output.bed")
        sys.exit()
 
GTF_in_file = open(GTF_in_File_Name, "r")
BED_out_file = open(BED_in_File_Name, "w")
GTFs = csv.reader(GTF_in_file, dialect = "excel-tab")
BEDs = csv.writer(BED_out_file, dialect = "excel-tab")
 
transcripts = {}
for GTF in GTFs:
    if GTF[2] == "exon":
        transcript_id = re.findall('transcript_id "([^;]*)"', GTF[8])[0]
        if transcript_id in transcripts:
            transcripts[transcript_id][6].append([int(GTF[3]), int(GTF[4])])
        else:
            transcripts[transcript_id] = []
            transcripts[transcript_id].append(GTF[0])
            transcripts[transcript_id].append(GTF[3])
            transcripts[transcript_id].append(GTF[4])
            transcripts[transcript_id].append(GTF[5])
            transcripts[transcript_id].append(GTF[6])
            transcripts[transcript_id].append(transcript_id)
            transcripts[transcript_id].append([[int(GTF[3]), int(GTF[4])]])
 
for transcript in transcripts:
    transcripts[transcript][6].sort()
    transcript_start = transcripts[transcript][6][0][0]
    transcript_end = transcripts[transcript][6][len(transcripts[transcript][6]) - 1][1]
    exon_sizes = ""
    exon_starts = ""
    for exon in transcripts[transcript][6]:
        exon_size = exon[1] - exon[0] + 1
        exon_start = exon[0] - transcript_start
        exon_sizes = exon_sizes + str(exon_size) + ","
        exon_starts = exon_starts + str(exon_start) + ","
    BED = []
    BED.append(transcripts[transcript][0])
    BED.append(transcript_start - 1)
    BED.append(transcript_end)
    BED.append(transcripts[transcript][5])
    BED.append(transcripts[transcript][3])
    BED.append(transcripts[transcript][4])
    BED.append(transcript_start - 1)
    BED.append(transcript_end)
    BED.append("0, 0, 0")
    BED.append(len(transcripts[transcript][6]))
    BED.append(exon_sizes)
    BED.append(exon_starts)
    BEDs.writerow(BED)
 
GTF_in_file.close()
BED_out_file.close()