Question: I need explination for this question: # You are provided with python scripts of De Bruijn graph with example reads. Find the contig of reads.

I need explination for this question:

# You are provided with python scripts of De Bruijn graph with example reads. Find the contig of reads. This time you will run the scripts in a slightly different way. The script requires you provide the path of reads file. How is that done on command line? How can the outputs be printed into a new file?

RunDB.py:

#!/usr/bin/env python import debruijn as db import sys

# Main script reads = db.read_reads(sys.argv[1]) print 'Index Reads:' for index,read in enumerate(reads): print index+1,read g = db.construct_graph(reads, 11) # print_graph(g) # for k in g.keys(): # print k, g[k] # g = construct_graph(reads) contig = db.output_contigs(g) print 'Assembled contig:' print contig

debruijn.py:

class Node: """ Class Node to represent a vertex in the de bruijn graph """ def __init__(self, lab): self.label = lab self.indegree = 0 self.outdegree = 0

class Edge: def __init__(self, lab): self.label = lab

def read_reads(fname): """ Read short reads in FASTA format. It is assumed that one line in the input file correspond to one read. """ f = open(fname, 'r') lines = f.readlines() f.close() reads = []

for line in lines: if line[0] != '>': reads = reads + [line.rstrip()]

return reads

def construct_graph(reads, k): """ Construct de bruijn graph from sets of short reads with k length word""" edges = dict() vertices = dict()

for read in reads: i = 0 while i+k < len(read): v1 = read[i:i+k] v2 = read[i+1:i+k+1] if v1 in edges.keys(): vertices[v1].outdegree += 1 edges[v1] += [Edge(v2)] else: vertices[v1] = Node(v1) vertices[v1].outdegree += 1 edges[v1] = [Edge(v2)] if v2 in edges.keys(): vertices[v2].indegree += 1 else: vertices[v2] = Node(v2) vertices[v2].indegree += 1 edges[v2] = [] i += 1

return (vertices, edges)

def output_contigs(g): """ Perform searching for Eulerian path in the graph to output genome assembly""" V = g[0] E = g[1] # Pick starting node (the vertex with zero in degree) start = V.keys()[0] for k in V.keys(): if V[k].indegree < V[start].indegree: start = k

contig = start current = start while len(E[current]) > 0: # Pick the next node to be traversed (for now, at random) next = E[current][0] del E[current][0] contig += next.label[-1] current = next.label

return contig

def print_graph(g): """ Print the information in the graph to be (somewhat) presentable """ V = g[0] E = g[1] for k in V.keys(): print "name: ", V[k].label, ". indegree: ", V[k].indegree, ". outdegree: ", V[k].outdegree print "Edges: " for e in E[k]: print e.label print

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer
Step: 1 Unlock blur-text-image
Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock
Step: 3 Unlock

Students Have Also Explored These Related Databases Questions!