Python脚本过滤大于200nt的转录本

lncRNA是大于200nt的,有时候我们就需要过滤出大于200bp的转录本,可以用Python写一个脚本轻松实现这个功能。

import sys, getopt
 
opts, args = getopt.getopt(sys.argv[1:], "hi:o:")
input_file = ""
output_file = ""
for op, value in opts:
    if op == "-i":
        input_file = value
    elif op == "-o":
        output_file = value
    elif op == "-h":
        print("Usage: python filter.py -i input.fasta -o output.fasta")
        sys.exit()
 
Input_file = open(input_file)
Output_file = open(output_file,"w")
fa_Name = ""
fa_Seq = ""
 
for line in Input_file.readlines():
    line = line.rstrip()
    if line[0] == ">":
        if len(fa_Seq) >= 200:
            Output_file.write(fa_Name + "\n")
            while len(fa_Seq) > 60:
                Output_file.write(fa_Seq[:60] + "\n")
                fa_Seq = fa_Seq[60:]
            Output_file.write(fa_Seq + "\n")
        fa_Seq = ""
        fa_Name = line
    else:
        fa_Seq = fa_Seq + line