Question: I have an assignment to work with a team on the following project: In this assignment, you will work with your group on an application

I have an assignment to work with a team on the following project:

In this assignment, you will work with your group on an application of your choice to do data processing using Numpy, Pandas and plotting that we have learned. Please select one or multiple dataset

(

),

do data processing on the selected dataset

(

)

using Jupyter Notebook as our class examples. In your notebook, you are required to provide clear descriptions about dataset

(

),

your goal for this application and the end result

(

) .

- * -

coding: utf

- 8 - * -

" " "

import pandas as pd

import json

import numpy as np

# Assign Dataset JSON File Paths

_

FILE

_

PATH

=

"DataSets

\

Eng

_

sentences.json"

FRA

_

FILE

_

PATH

=

"DataSets

\

Fra

_

sentences.json"

MULTI

_

LANG

_

SENTENCE

_

FILE

_

PATH

=

"DataSets

\

0_

sentences.json"

LINKS

_

FILE

_

PATH

=

"DataSets

\

Links

.

json" # Contains Translations to match the sentences from CC

0_

sentences.json

# Read the JSON file into a DataFrame

_

data

=

.

read

_

json

(

_

FILE

_

PATH, lines

=

True

)

# English

fra

_

data

=

.

read

_

json

(

FRA

_

FILE

_

PATH, lines

=

True

)

# French

sentences

_

data

=

.

read

_

json

(

MULTI

_

LANG

_

SENTENCE

_

FILE

_

PATH, lines

=

True

)

# Multi Lang Sentences

with open

(

LINKS

_

FILE

_

PATH,

'

')

as link

_

file: # Link

/

Translation

link

_

data

= []

for line in link

_

file:

link

_

data.append

(

json

.

loads

(

line

))

# Create a DataFrame

_

data.set

_

index

('

Sentence id

',

inplace

=

True

)

# English

fra

_

data.set

_

index

('

Sentence id

',

inplace

=

True

)

# French

sentences

_

data.set

_

index

('

Sentence id

',

inplace

=

True

)

# Multi Lang Sentences

link

_

=

.

DataFrame

(

link

_

data

)

# Link

/

Translation

# Reset index to include 'Sentence id

'

as a column

_

data.reset

_

index

(

inplace

=

True

)

# English

fra

_

data.reset

_

index

(

inplace

=

True

)

# French

sentences

_

data.reset

_

index

(

inplace

=

True

)

# Multi Lang Sentences

link

_

.

reset

_

index

(

drop

=

True, inplace

=

True

)

# Link

/

Translation

# Display the first few rows of the DataFrame

(

_

data.head

())

(

fra

_

data.head

())

(

sentences

_

data.head

())

(

link

_

.

head

())

= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =

# First person's part

= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =

# Sentences

_

data contains all the sentences available on the website

# Count number of sentences in each available languages

lang

_

count

=

sentences

_

data.groupby

(['

Lang

']) ['

Sentence id

'] .

agg

('

count

') .

reset

_

index

()

lang

_

count

=

lang

_

count.rename

(

columns

= {'

Sentence id

'

:'lang

_

count'

})

idx

=

lang

_

count.lang

_

count.idxmax

()

('

{}

language has

{}

available sentences which is the most in dataset'.format

(

lang

_

count.iloc

[

idx

] [0],

lang

_

count.iloc

[

idx

] [1]))

# link

_

df contains the links between the sentences.

1 77

means that sentence #

77

is the translation of sentence #

1 .

# The reciprocal link is also present, so the file will also contain a line that says

77 1 .

# Count number of sentences that has been used

link

_

translate

=

link

_

.

groupby

(['

Sentence id

']) .

agg

('

count

') .

reset

_

index

()

link

_

translate

=

link

_

translate.rename

(

columns

= {'

Translation id

'

:'translation

_

count'

})

# Get the translations

link

_

translate

=

link

_

translate.merge

(

sentences

_

data.iloc

[

,

3],

how

=

'left', on

=

'Sentence id

')

# Keep only the valid translation

valid

_

translation

=

link

_

translate

[

~link

_

translate.Lang.isna

()]

valid

_

translation

_

count

=

valid

_

translation.groupby

('

Lang

') ['

Sentence id

'] .

agg

('

count

') .

reset

_

index

()

# Count number of each language valid translations

valid

_

translation

_

count

=

valid

_

translation

_

count.rename

(

columns

= {'

Sentence id

'

:'valid

_

count'

})

idx

=

valid

_

translation

_

count.valid

_

count.idxmax

()

('

{}

language has

{}

valid sentences which is the most in dataset'.format

(

valid

_

translation

_

count.iloc

[

idx

] [0],

valid

_

translation

_

count.iloc

[

idx

] [1]))

# Most popular translated sentence

idx

=

valid

_

translation.translation

_

count.idxmax

()

('

{}

is the most translated sentences'.format

(

valid

_

translation.loc

[

idx

] [- 1]))

# Translation that has the most different meaning in English

_

data

=

_

data.merge

(

link

_

,

how

=

'left', right

_

=

'Sentence id

',

left

_

=

'Sentence id

')

_

translate

_

count

=

_

data

['

Translation id

'] .

value

_

counts

()

idx

=

_

translate

_

count.index

[0]

('

Translation that has the most different meaning in English:

')

_

data

[

_

data

['

Translation id

'] = =

idx

] [['

Translation id

',

'Text'

]]

# Translation that has the most different meaning in French

fra

_

data

=

fra

_

data.merge

(

link

_

,

how

=

'left', right

_

=

'Sentence id

',

left

_

=

'Sentence id

')

fra

_

translate

_

count

=

fra

_

data

['

Translation id

'] .

value

_

counts

()

idx

=

fra

_

translate

_

count.index

[0]

('

Translation that has the most different meaning in French:

')

fra

_

data

[

fra

_

data

['

Translation id

'] = =

idx

] [['

Translation id

',

'Text'

]]

- -

What would be a good addition to this if I am assigned Data processing?

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer

Step: 1 Unlock blur-text-image

Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock

Step: 3 Unlock

Students Have Also Explored These Related Programming Questions!

Centre for Business School of Management Strategic Analysis INDUSTRY PROJECT Professor: Victor Goodman Course Number MGMT 3 0 0 6 REPORT & PPT DUE: NOVEMBER 2 7 th , 2 0 2 3 1 1 : 5 9 pm EST ( late...

This is an analysis that i need help in:The first paragraph should introduce the members of your group and identify the group roles (Organizer, Compiler, Editor and Poster, Researcher) each person...

Application of Forensic Accounting Concepts CASE STUDY 28: Fighting International Fraud With the FCPA - HEWLETT -PACKARD'S CURRENT PROBLEMS WITH INTERNATIONAL KICKBACKS Andrew and Susie were on a...

Match the statement with the most appropriate methodology Prompts Projects benefit from teams that bring together a variety of skills to address complex challenges Periodic reviews with stakeholders...

Question 1 Question 2 Question 3 Question 4 Question 5 Question 6 Question 7 Question 8 Question 9 Question 10 Question 11 Question 12 Question 13 Question 14 Question 15 8 Points What is the most...

Project Overview Visualize that you and your group members are members of a software development project for a client. As members of the software development team, you have participated in the...

- Final Project Supplementary Materials: A zip file of your supplementary materials. You are required to include all the code for your project in the supplementary materials Your final write-up...

Faculty of Computer & Information Technology Department of Computer Engineering CPE311: OBJECT ORIENTED SOFTWARE ANALYSIS & DESIGN First Semester 2020/2021 Manager 1. Create warehouse 2. Add item to...

THE TITLE OF THE TOPIC IS NEUM INTRODUCTION: This is a group project of 4/5 students, aimed to assess the students skills in building a system (website) from the information technology field...

Match the following terms to the definition - Stakeholders - Project - Deliverable - Work Breakdown Structure - Project Schedule a. An output of a schedule model that presents linked activities with...

Hansen Supermarkets purchased a radio frequency identification (RFID) system for one of its stores at a cost of $150,000. Hansen determined that the system had an expected life of seven years (or...

Marko Company sold spray paint equipment to Spain for 5,000,000 pesetas (P) on October 1, with payment due in six months. The exchange rates were October 1, 20X6 ...1 peseta = $0.0068 December 31,...

A factor buys receivables from businesses and O charges a fee to the bank. sells them to the bank. O purchases collection services at a factor of ten. collects the payment directly from customers.

Financial Concepts Assignment 7a Time Value of Money Instructions Note: Unless otherwise stated, assume that interest is calculated on an annual basis. Leave all answers to TWO decimal places . For...