Question: Fix this code so that it gives correct output: Conduct punctuation removal, stop word removal, casefolding, lemmatization, and stemming on the documents. import pandas as

Fix this code so that it gives correct output:

Conduct punctuation removal, stop word removal, casefolding, lemmatization, and stemming on the documents.

import pandas as pd import nltk from nltk.tokenize import RegexpTokenizer from nltk.corpus import stopwords import re from nltk.stem import PorterStemmer from nltk.stem import WordNetLemmatizer nltk.download("wordnet")

sentences=["Can we go to Disney??!!!!!! Let's go on a plane!","The New England Patriots won the Super Bowl.." ,"I HATE going to school so early","When will I be considered an adult?" ,"I want to go to A&M, Baylor, or the University of Texas."]

#remove punctuation and stop words using nltk tokens=[] stop_words=[] tokenizer = RegexpTokenizer(r'w+') print("sentences after punctuation removal are :") print(" ") for i in range(len(sentences)): tokens.append(tokenizer.tokenize(sentences[i])) print(" ".join(list(tokens[i]))) print(" ")

print("sentences after stop word removal are :") print(" ") for i in range(len(sentences)): stop_words.append([w for w in tokens[i] if not w in stopwords.words('english')]) print(" ".join(list(stop_words[i]))) print(" ")

#casefold string print("sentences after casefold are :") for i in range(len(stop_words)): for j in range(len(stop_words[i])): stop_words[i][j]=stop_words[i][j].casefold() print(" ".join(list(stop_words[i]))) print(" ") print("lemmatization:") #lemmatization of words lemmatizer = WordNetLemmatizer() for i in range(len(stop_words)): for j in range(len(stop_words[i])): print(stop_words[i][j],":",lemmatizer.lemmatize(stop_words[i][j])) stop_words[i][j]=lemmatizer.lemmatize(stop_words[i][j]) #stemming the documents print(" ") print("steming:") ps = PorterStemmer() for i in range(len(stop_words)): for j in range(len(stop_words[i])): print(stop_words[i][j],":",ps.stem(stop_words[i][j])) stop_words[i][j]=ps.stem(stop_words[i][j]) print(" ") print("final output:") #final output after completing above operations for i in range(len(stop_words)): print(" ".join(list(stop_words[i])))

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer
Step: 1 Unlock blur-text-image
Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock
Step: 3 Unlock

Students Have Also Explored These Related Databases Questions!