Question: Consider the given frequencies for the letters of the alphabet provided below (add space with frqeuency 15%). a 8.167% h 6.094% o 7.507% v 0.978%
Consider the given frequencies for the letters of the alphabet provided below (add
space with frqeuency 15%).
| a 8.167% | h 6.094% | o 7.507% | v 0.978% |
| b 1.492% | i 6.966% | p 1.929% | w 2.360% |
| c 2.782% | j 0.153% | q 0.095% | x 0.150% |
| d 4.253% | k 0.772% | r 5.987% | y 1.974% |
| e 12.702% | l 4.025% | s 6.327% | z 0.074% |
| f 2.228% | m 2.406% | t 9.056% | |
| g 2.015% | n 6.749% | u 2.758% |
Frequencies of English letters
Source: Pavel Micka via Wikipedia (Letter Frequencies)
Use the provided Python program to create the Huffman code, encode the English
text provided (speech.txt) and decode it back. Note that in the code
provided, upper case is converted to lower case and all punctuation ignored except
spaces (newlines treated as spaces). This is done before encoding into a sequence
of 0s and 1s. Write separate functions to encode and decode.
We will check your code by encoding and decoding the text in speech.txt.
Compare the number of bits for the Huffman encoding compared to using one
byte (8 bits) per letter or space.
Partial Huffman Code:
from min_heap import MinHeap
def huffman (frequencies):
h = MinHeap(frequencies,[i for i in range(len(frequencies))])
n_nodes = len(frequencies)
# create parent array for Huffman tree
parent = [-1]*(2*len(frequencies)-1)
for i in range(len(frequencies)):
# print('heap =',h)
# print('parent =',parent)
f1,n1 = h.pull()
f2,n2 = h.pull()
# print('n1 =',n1,', f1 =',f1,', n2 =',n2,', f2 =',f2)
h.add(f1+f2,n_nodes)
parent[n1] = n_nodes
parent[n2] = n_nodes
n_nodes = n_nodes + 1
# print('complete parent array =',parent)
# now create lchild and rchild for the left and right children
# ... first initialize as arrays or -1's
lchild = [-1]*len(parent)
rchild = [-1]*len(parent)
for n in range(len(parent)): # for each node...
p = parent[n]
if p >= len(parent) or p < 0: # invalid parent, so n must be root
pass
else: # n is left child of p unless lchild[p] already set
if lchild[p] < 0:
lchild[p] = n
else:
rchild[p] = n
# print('lchild =',lchild)
# print('rchild =',rchild)
# now generate codes for each letter
codebook = [None]*len(frequencies)
for n in range(len(frequencies)):
# go from leaf up to root,
# using 0 if you are left child, 1 if a right child
# Then reverse the list.
n0 = n # save original letter number
code = []
while n < len(parent):
p = parent[n]
if p >= len(parent): # n is root
break # while loop
else: # add 0 or 1
if n == lchild[p]:
code.append(0)
else: # n == rchild[p]
code.append(1)
n = p # go to parent node
code.reverse()
codebook[n0] = code
# print('n0 =',n0,', code =',code)
return codebook,lchild,rchild
def decode(lchild,rchild,input):
# Decode a list of bits in input according to huffman tree
# as represented by the lchild (left child) & rchild (right child)
# arrays.
# Note that left children correspond to 0, and right children to 1.
# ... 15 lines of code ...
return output
def encode(codebook,input):
output = []
# ... two lines of code ...
return output
if __name__== '__main__':
# Create Huffman code for alphabet in text file. Frequencies given in
the pdf file.
# read in text and convert to indexes.
idxlist = []
file = open('textsample.txt','r')
for line in file:
line = line.lower() # convert to lower case
for letter in line:
if letter == ' ': # treat newline as a space
letter = ' '
if letter in letter_idx:
idx = letter_idx[letter]
idxlist.append(idx)
print(idxlist)
# Encode using the huffman (two line code written by student)
# Decode (15 line code written by student) gets indexes of letters
# Convert array of indexes to string of letters.
Min_Heap.py
class MinHeap:
def __init__(self,keys,vals):
assert len(keys) == len(vals) # if false, raise AssertionError
self.keys = keys
self.vals = vals
self.n_entries = len(keys)
self.buildheap()
def parent(self,k):
return (k-1) // 2 # == floor((k-1)/2)
def lchild(self,k):
return 2*k+1
def rchild(self,k):
return 2*k+2
def shift_up(self,k):
# restore heap structure after modification (increase) at index k
p = self.parent(k)
while k != 0 and self.keys[p] > self.keys[k]:
if self.keys[p] > self.keys[k]:
# swap entries for indexes k and p
t = self.keys[k]
self.keys[k] = self.keys[p]
self.keys[p] = t
t = self.vals[k]
self.vals[k] = self.vals[p]
self.vals[p] = t
# update k
k = p
else:
k = 0 # nothing more to do: jump to root & exit loop
# update p using k
p = self.parent(k)
def shift_down(self,k):
# restore heap structure after modification (decrease) at index k
c1 = self.lchild(k)
c2 = self.rchild(k)
n = self.n_entries
while (c1 < n and self.keys[c1] < self.keys[k]) \
or (c2 < n and self.keys[c2] < self.keys[k]):
# while heap condition fails...
if c2 < n and self.keys[c1] > self.keys[c2]:
c = c2
else:
c = c1
# swap entries with indexes c and k
t = self.keys[k]
self.keys[k] = self.keys[c]
self.keys[c] = t
t = self.vals[k]
self.vals[k] = self.vals[c]
self.vals[c] = t
# update k
k = c
# update c1 & c2 using k
c1 = self.lchild(k)
c2 = self.rchild(k)
def buildheap(self):
# turn arrays into a heap
n = self.n_entries
# print(self)
for k in range((n+1)//2,-1,-1):
self.shift_down(k)
def add(self,key,val):
n = self.n_entries
# increase size of arrays if needed
if len(self.keys) <= n:
currlen = len(self.keys)
newlen = max(2*n,10)
self.keys.extend([None]*(newlen-currlen))
self.vals.extend([None]*(newlen-currlen))
# Now add to end of list & shift up
self.keys[n] = key
self.vals[n] = val
self.n_entries = n+1
self.shift_up(n)
def delete(self,k): # delete the entry at index k
# move entry with indexes k and l=n_entries-1
l = self.n_entries - 1
self.keys[k] = self.keys[l]
self.vals[k] = self.vals[l]
self.shift_down(k)
self.n_entries = l
def pull(self):
key = self.keys[0]
val = self.vals[0]
self.delete(0)
return key,val
def is_empty(self):
return self.n_entries <= 0
def __str__(self):
n = self.n_entries
s = 'MinHeap: n_entries = ' + str(n) + \
', length = ' + str(len(self.keys)) + ' '
s = s + 'keys = ' + str([self.keys[i] for i in range(n)]) + ' '
s = s + 'vals = ' + str([self.vals[i] for i in range(n)])
return s
def check(self):
# returns true if heap condition satisfied; false otherwise
for k in range(1,self.n_entries):
if self.keys[k] < self.keys[self.parent(k)]:
return False
return True
if __name__ == '__main__':
keys = ['c', 'a', 'f', 'w', 'r', 'm']
vals = ['Charlie','Albert','Fred','Will','Rob','Mark']
h = MinHeap(keys,vals)
print(h)
print('OK? ',h.check())
h.add('g','George')
print(h)
print('OK? ',h.check())
h.add('z','Zoe')
print(h)
print('OK? ',h.check())
h.add('t','Tom')
print(h)
print('OK? ',h.check())
h.delete(7)
print(h)
print('OK? ',h.check())
h.delete(2)
print(h)
print('OK? ',h.check())
k,v = h.pull()
print('Key =',k,'; Val =',v)
print(h)
print('OK? ',h.check())
k,v = h.pull()
print('Key =',k,'; Val =',v)
print(h)
print('OK? ',h.check())
Speech.text
You may take it as my will. It was the message that I desired to impart to
you before starting on the march or for the jail. I wish that there should
be no suspension or abandonment of the war that commences tomorrow morning
or earlier, if I am arrested before that time. I shall eagerly await the
news that ten batches are ready as soon as my batch is arrested. I believe
there are men in India to complete the work our begun by me. I have faith
in the righteousness of our cause and the purity of our weapons. And where
the means are clean, there God is undoubtedly present with His blessings.
And where these three combine, there defeat is an impossibility. A
Satyagrahi, whether free or incarcerated, is ever victorious. He is
vanquished only, when he forsakes truth and nonviolence and turns a deaf
ear to the inner voice. If, therefore, there is such a thing as defeat for
even a Satyagrahi, he alone is the cause of it. God bless you all and keep
off all obstacles from the path in the struggle that begins tomorrow.
Step by Step Solution
There are 3 Steps involved in it
Get step-by-step solutions from verified subject matter experts
