Question: ########################################################## #Add code to the findORF.py file so that the program reads in a FASTA-formatted sequence data file containing multiple fasta-formatted sequences from a file

##########################################################

#Add code to the findORF.py file so that the program reads in a FASTA-formatted sequence data file containing multiple fasta-formatted sequences from a file called genome.fasta, then outputs a .txt file containing the header line from the original sequence on the first line followed by the coding frame, the starting position, the end position, and the length on the next line. Your output should be sorted by decreasing ORF size, and in cases where multiple ORFs have the same size, sort those by the left position of the gene. Use a minimum ORF length of 100 bases, ATG as the start codon, and TAG, TAA and TGA as stop codons.

Import the fastareader file to the findORF.py program to read genome.fasta.

##########################################################

########################### # File 1: fastareader.py ###########################

#!/usr/bin/env python3

import sys class FastAreader : ''' Define objects to read FastA files. instantiation: thisReader = FastAreader ('testTiny.fa') usage: for head, seq in thisReader.readFasta(): print (head,seq) ''' def __init__ (self, fname=''): '''contructor: saves attribute fname ''' self.fname = fname def doOpen (self): ''' Handle file opens, allowing STDIN.''' if self.fname is '': return sys.stdin else: return open(self.fname) def readFasta (self): ''' Read an entire FastA record and return the sequence header/sequence''' header = '' sequence = '' with self.doOpen() as fileH: header = '' sequence = '' # skip to first fasta header line = fileH.readline() while not line.startswith('>') : line = fileH.readline() header = line[1:].rstrip()

for line in fileH: if line.startswith ('>'): yield header,sequence header = line[1:].rstrip() sequence = '' else : sequence += ''.join(line.rstrip().split()).upper()

yield header,sequence

########################## # File 2: findORF.py ########################## class CommandLine() : ''' Handle the command line, usage and help requests.

CommandLine uses argparse, now standard in 2.7 and beyond. it implements a standard command line argument parser with various argument options, a standard usage and help.

attributes: all arguments received from the commandline using .add_argument will be avalable within the .args attribute of object instantiated from CommandLine. For example, if myCommandLine is an object of the class, and requiredbool was set as an option using add_argument, then myCommandLine.args.requiredbool will name that option. ''' def __init__(self, inOpts=None) : ''' Implement a parser to interpret the command line argv string using argparse. ''' import argparse self.parser = argparse.ArgumentParser(description = 'Program prolog - a brief description of what this thing does', epilog = 'Program epilog - some other stuff you feel compelled to say', add_help = True, #default is True prefix_chars = '-', usage = '%(prog)s [options] -option1[default] output' ) self.parser.add_argument('inFile', action = 'store', help='input file name') self.parser.add_argument('outFile', action = 'store', help='output file name') self.parser.add_argument('-lG', '--longestGene', action = 'store', nargs='?', const=True, default=False, help='longest Gene in an ORF') self.parser.add_argument('-mG', '--minGene', type=int, choices= (100,200,300,500,1000), default=100, action = 'store', help='minimum Gene length') self.parser.add_argument('-s', '--start', action = 'append', default = ['ATG'],nargs='?', help='start Codon') #allows multiple list options self.parser.add_argument('-t', '--stop', action = 'append', default = ['TAG','TGA','TAA'],nargs='?', help='stop Codon') #allows multiple list options self.parser.add_argument('-v', '--version', action='version', version='%(prog)s 0.1') if inOpts is None : self.args = self.parser.parse_args() else : self.args = self.parser.parse_args(inOpts)

######################################################################## # Main # Here is the main program # # ########################################################################

def main(inCL=None): ''' Find some genes. ''' if inCL is None: myCommandLine = CommandLine() else : myCommandLine = CommandLine(inCL) print (myCommandLine.args) ###### replace the code between comments. # myCommandLine.args.inFile has the input file name # myCommandLine.args.outFile has the output file name # myCommandLine.args.longestGene is True if only the longest Gene is desired # myCommandLine.args.start is a list of start codons # myCommandLine.args.minGene is the minimum Gene length to include # ####### if __name__ == "__main__": main([ 'genome.fa', 'genome.txt', '--longestGene']) # delete the list when you want to run normall

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer
Step: 1 Unlock blur-text-image
Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock
Step: 3 Unlock

Students Have Also Explored These Related Databases Questions!