Question: in this project, I will have to make aTextAnalyzer class. The methods of the class are described below. I will do my work in the

in this project, I will have to make aTextAnalyzer class. The methods of the class are described below. I will do my work in the Analyzing Text Jupyter notebook included in the project files. Be sure to comment on my code well.

import requests, re

from bs4 import BeautifulSoup

from collections import Counter

import statistics as stats

import string

#I must create your class here

import operator "Part 1 complete

import matplotlib.pyplot as plt; plt.rcdefaults()

class TextAnalyzer:

def __init__(self, src, src_type="discover"):

self._src_type = None

self._content = None

self._orig_content = None

# determine src_type if not specified

if src_type == "discover":

if src.startswith("http"):

src_type = "url"

elif src.endswith(".txt"):

src_type = "path"

else:

src_type = "text"

self._src_type = src_type

# load content based on src_type

if self._src_type == "url":

response = requests.get(src)

self._orig_content = response.text

elif self._src_type == "path":

with open(src, "r") as f:

self._orig_content = f.read()

elif self._src_type == "text":

self._orig_content = src

# preprocess content

self._content = self._preprocess(self._orig_content)

def _preprocess(self, text):

# remove punctuation

text = text.translate(str.maketrans("", "", string.punctuation))

# remove whitespace

text = re.sub(r"\s+", " ", text)

# convert to lowercase

text = text.lower()

return text

def __init__(self, src, src_type="discover"):

self._src_type = None

self._content = None

self._orig_content = None

# determine src_type if not specified

if src_type == "discover":

if src.startswith("http"):

src_type = "url"

elif src.endswith(".txt"):

src_type = "path"

else:

src_type = "text"

self._src_type = src_type

# load content based on src_type

if self._src_type == "url":

response = requests.get(src)

self._orig_content = response.text

elif self._src_type == "path":

with open(src, "r") as f:

self._orig_content = f.read()

elif self._src_type == "text":

self._orig_content = src

# preprocess content

self._content = self._preprocess(self._orig_content)

def _preprocess(self, text):

# remove punctuation

text = text.translate(str.maketrans("", "", string.punctuation))

# remove whitespace

text = re.sub(r"\s+", " ", text)

# convert to lowercase

text = text.lower()

return text

Part 2 complete

def set_content_to_tag(self, tag, tag_id=None):

"""

Changes _content to the text within a specific element of an HTML document.

Keyword arguments:

tag (str) - Tag to read

tag_id (str) - ID of tag to read

"""

try:

# Create a BeautifulSoup object from the original content

soup = BeautifulSoup(self._orig_content, "html.parser")

# Check if tag_id is specified, and get the text of the tag

if tag_id:

tag_text = soup.find(id=tag_id).get_text()

else:

tag_text = soup.find(tag).get_text()

# Preprocess the tag text and set _content to the preprocessed text

self._content = self._preprocess(tag_text)

except AttributeError:

print("Error: tag not found in HTML document.")

Part 3 complete

def reset_content(self):

"""

Resets _content to full text that was originally loaded.

Useful after a call to set_content_to_tag().

"""

# Reset _content to the preprocessed original content

self._content = self._preprocess(self._orig_content)

Part 4 complete

def _words(self, casesensitive=False):

words = self._content.split()

words = [word.strip(string.punctuation) for word in words]

if not casesensitive:

words = [word.upper() for word in words]

return words

Part 5 complete

def common_words(self, minlen=1, maxlen=100, count=10, casesensitive=False):

words = self._words(casesensitive)

word_counts = Counter(words)

filtered_words = [(word, count) for word, count in word_counts.items() if minlen <= len(word) <= maxlen]

sorted_words = sorted(filtered_words, key=lambda x: x[1], reverse=True)

return sorted_words[:count]

Part 6 (help please)

char_distribution(self, casesensitive=False, letters_only=False)

Returns a list of 2-element tuples of the format (char, num), where num is the number of times char shows up in _content. The list should be sorted by num in descending order.

Keyword arguments:

  • casesensitive(bool) - Consider case?
  • letters_only(bool) - Exclude non-letters?

Part 7 (help please)

plot_common_words(self, minlen=1, maxlen=100, count=10, casesensitive=False)

Plots most common words.

Keyword arguments:

  • minlen(int) - Minimum length of words to include.
  • maxlen(int) - Maximum length of words to include.
  • count(int) - Number of words to include.
  • casesensitive(bool) - If False makes all words uppercase.

Part 8 (help please) plot_char_distribution(self, casesensitive=False, letters_only=False)

Plots character distribution.

Keyword arguments:

  • casesensitive(bool) - IfFalsemakes all words uppercase.
  • letters_only(bool) - Exclude non-letters

Part 9 (help please)

Properties

In addition, the class must include these properties:

avg_word_length(self)

The average word length in_contentrounded to the 100th place (e.g, 3.82).

word_count(self)

The number of words in_content.

distinct_word_count(self)

The number of distinct words in_content. This should not be case sensitive: "You" and "you" should be considered the same word.

words(self)

A list of all words used in _content, including repeats, in all uppercase letters.

positivity(self)

A positivity score calculated as follows:

  1. Create localtallyvariable with initial value of 0.
  2. Incrementtallyby 1 for every word inself.wordsfound in positive.txt (in same directory)
  3. Decrementtallyby 1 for every word inself.wordsfound in negative.txt (in same directory)
  4. Calculate score as follows:

round( tally / self.word_count * 1000)

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer
Step: 1 Unlock blur-text-image
Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock
Step: 3 Unlock

Students Have Also Explored These Related Mathematics Questions!