Question: I am working on a project using Python and Jupyter. This is what I have to work with: # - * - coding: utf -

I am working on a project using Python and Jupyter. This is what I have to work with: #

- * -

coding: utf

- 8 - * -

" " "

Created on Fri Sep

20 2024

@author: CS

634

Group

1,

Fall

2024

" " "

import pandas as pd

import json

import numpy as np

# Assign Dataset JSON File Paths

_

FILE

_

PATH

=

"DataSets

\

Eng

_

sentences.json"

FRA

_

FILE

_

PATH

=

"DataSets

\

Fra

_

sentences.json"

MULTI

_

LANG

_

SENTENCE

_

FILE

_

PATH

=

"DataSets

\

0_

sentences.json"

LINKS

_

FILE

_

PATH

=

"DataSets

\

Links

.

json" # Contains Translations to match the sentences from CC

0_

sentences.json

# Read the JSON file into a DataFrame

_

data

=

.

read

_

json

(

_

FILE

_

PATH, lines

=

True

)

# English

fra

_

data

=

.

read

_

json

(

FRA

_

FILE

_

PATH, lines

=

True

)

# French

sentences

_

data

=

.

read

_

json

(

MULTI

_

LANG

_

SENTENCE

_

FILE

_

PATH, lines

=

True

)

# Multi Lang Sentences

with open

(

LINKS

_

FILE

_

PATH,

'

')

as link

_

file: # Link

/

Translation

link

_

data

= []

for line in link

_

file:

link

_

data.append

(

json

.

loads

(

line

))

# Create a DataFrame

_

data.set

_

index

('

Sentence id

',

inplace

=

True

)

# English

fra

_

data.set

_

index

('

Sentence id

',

inplace

=

True

)

# French

sentences

_

data.set

_

index

('

Sentence id

',

inplace

=

True

)

# Multi Lang Sentences

link

_

=

.

DataFrame

(

link

_

data

)

# Link

/

Translation

# Reset index to include 'Sentence id

'

as a column

_

data.reset

_

index

(

inplace

=

True

)

# English

fra

_

data.reset

_

index

(

inplace

=

True

)

# French

sentences

_

data.reset

_

index

(

inplace

=

True

)

# Multi Lang Sentences

link

_

.

reset

_

index

(

drop

=

True, inplace

=

True

)

# Link

/

Translation

# Display the first few rows of the DataFrame

(

_

data.head

())

(

fra

_

data.head

())

(

sentences

_

data.head

())

(

link

_

.

head

())

= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =

# Owen's part

= = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =

# Sentences

_

data contains all the sentences available on the website

# Count number of sentences in each available languages

lang

_

count

=

sentences

_

data.groupby

(['

Lang

']) ['

Sentence id

'] .

agg

('

count

') .

reset

_

index

()

lang

_

count

=

lang

_

count.rename

(

columns

= {'

Sentence id

'

:'lang

_

count'

})

idx

=

lang

_

count.lang

_

count.idxmax

()

('

{}

language has

{}

available sentences which is the most in dataset'.format

(

lang

_

count.iloc

[

idx

] [0],

lang

_

count.iloc

[

idx

] [1]))

# link

_

df contains the links between the sentences.

1 77

means that sentence #

77

is the translation of sentence #

1 .

# The reciprocal link is also present, so the file will also contain a line that says

77 1 .

# Count number of sentences that has been used

link

_

translate

=

link

_

.

groupby

(['

Sentence id

']) .

agg

('

count

') .

reset

_

index

()

link

_

translate

=

link

_

translate.rename

(

columns

= {'

Translation id

'

:'translation

_

count'

})

# Get the translations

link

_

translate

=

link

_

translate.merge

(

sentences

_

data.iloc

[

,

3],

how

=

'left', on

=

'Sentence id

')

# Keep only the valid translation

valid

_

translation

=

link

_

translate

[

~link

_

translate.Lang.isna

()]

valid

_

translation

_

count

=

valid

_

translation.groupby

('

Lang

') ['

Sentence id

'] .

agg

('

count

') .

reset

_

index

()

# Count number of each language valid translations

valid

_

translation

_

count

=

valid

_

translation

_

count.rename

(

columns

= {'

Sentence id

'

:'valid

_

count'

})

idx

=

valid

_

translation

_

count.valid

_

count.idxmax

()

('

{}

language has

{}

valid sentences which is the most in dataset'.format

(

valid

_

translation

_

count.iloc

[

idx

] [0],

valid

_

translation

_

count.iloc

[

idx

] [1]))

# Most popular translated sentence

idx

=

valid

_

translation.translation

_

count.idxmax

()

('

{}

is the most translated sentences'.format

(

valid

_

translation.loc

[

idx

] [- 1]))

# Translation that has the most different meaning in English

_

data

=

_

data.merge

(

link

_

,

how

=

'left', right

_

=

'Sentence id

',

left

_

=

'Sentence id

')

_

translate

_

count

=

_

data

['

Translation id

'] .

value

_

counts

()

idx

=

_

translate

_

count.index

[0]

('

Translation that has the most different meaning in English:

')

_

data

[

_

data

['

Translation id

'] = =

idx

] [['

Translation id

',

'Text'

]]

# Translation that has the most different meaning in French

fra

_

data

=

fra

_

data.merge

(

link

_

,

how

=

'left', right

_

=

'Sentence id

',

left

_

=

'Sentence id

')

fra

_

translate

_

count

=

fra

_

data

['

Translation id

'] .

value

_

counts

()

idx

=

fra

_

translate

_

count.index

[0]

('

Translation that has the most different meaning in French:

')

fra

_

data

[

fra

_

data

['

Translation id

'] = =

idx

] [['

Translation id

',

'Text'

]]

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer

Step: 1 Unlock blur-text-image

Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock

Step: 3 Unlock

Students Have Also Explored These Related Programming Questions!

Instructions Perform the tasks and answer the questions using Python code (Jupyter Notebook). Take screenshots of your work and paste them Task 1 Write a program to calculate the monthly EMI amount...

louis is currently working on a project using python. in doing so she would like to use more than one command at a time so that she may increase her work performance. what would allow her to execute...

In this part, you need to prepare a business - style response to a hypothetical but realistic situation and make recommendations on the proposed stock investment project. You should develop a busines...

For your final project, you will write a Python program which does the following: Prompts the user for a keyword Create a new jumbled alphabet which starts with the keyword but does not duplicate any...

For the below project, please write an executive summary covering the points below. 1. Project Description: Our group project will focus on visualizing the impact of COVID-19 on the US Stock Market....

29022021 OSCI GOS Assignment Assignment 5 Goals The goal of this assignment is to work with scripts and packages in Python Instructions You will be doing your work in Python for this assignment You...

You are doing a fantastic job at Chada Tech in your new role as a junior developer, and you exceeded expectations in your last assignment for Airgead Banking. Since your team is impressed with your...

Note: we are not concerned with rounding at the calculation level, however, please take note of the decimal precision represented in the output below Output Once you have calculated the necessary...

Please finish four scripts described in the homework, Thank you! Homework: Find improperly marked files Warning: it will be difficult to do this homework without attending the lab session for hints...

Please implement this exercise using python code in jupyter notebook. The information is below. Can you show the answer/example with a plot as well for the 2 robots? %matplotlib inline import...

Alanine, an amino add found in proteins, is chiral. Draw the two enantiorners of ala- nine using the standard convention of solid, wedged, and dashedlines. NH2 Alanine CHO2H

Several years ago, the Wisconsin Institute of Charitable Giving (WICG) conducted a comprehensive study on the grant-making activities of the nearly 1,300 charitable foundations in the state. The data...

The financial statements of the lvanhoe Manufacturing Supply Company report net credit sales of $312000 and net accounts receivable of $48000 and $21600 at the beginning of the year and the end of...

QUESTION 1 (25) With the aid of examples from different organisations, discuss what you understand by the term full risk models and the criticisms of full models. QUESTION 2 (25) 4.1 With the aid of...