Question: import pandas as pd import numpy as np from scipy.stats import pearsonr import matplotlib.pyplot as plt # Read data files house _ prices _ df

import pandas as pd
import numpy as np
from scipy.stats import pearsonr
import matplotlib.pyplot as plt
# Read data files
house_prices_df = pd.read_excel("MeanHousePricesClean-1.xlsx")
crime_df = pd.read_excel("CrimeClean-1-1.xlsx")
population_df = pd.read_excel("PopulationClean.xlsx")
area_df = pd.read_excel("SuburbAreas-1.xlsx")
# Step B: Clean and prepare data
def prepare_data(df, columns):
df = df.dropna(subset=columns) # Remove rows with missing values in key columns
return df
# Rename columns for consistency
house_prices_df = house_prices_df.rename(columns={'Year': 'year'})
crime_df = crime_df.rename(columns={
'Year': 'year',
'Crime rate per 100,000 population': 'crime_rate',
'Local Government Area': 'local_government_area'
})
population_df = population_df.rename(columns={'Year': 'year'})
area_df = area_df.rename(columns={'Property': 'local_government_area'})
# Clean the area DataFrame to remove non-relevant rows
area_df = area_df[area_df['local_government_area']!= 'Area sq Km']
# Step C: Analysis functions
def analyze_correlation(df, col1, col2):
df = df.dropna(subset=[col1, col2])
if len(df)2:
return np.nan
correlation, _= pearsonr(df[col1], df[col2])
return correlation
# Reshape house_prices_df to long format
house_prices_long = house_prices_df.melt(id_vars=['year'], var_name='local_government_area', value_name='mean_house_price')
# Reshape population_df to long format
population_long = population_df.melt(id_vars=['year'], var_name='local_government_area', value_name='population')
# Reshape area_df to long format
area_long = area_df.melt(id_vars=['local_government_area'], var_name='year', value_name='area')
# Merge the datasets on 'year' and 'local_government_area'
merged_df = pd.merge(crime_df, house_prices_long, on=['year', 'local_government_area'], how='inner')
merged_df = pd.merge(merged_df, population_long, on=['year', 'local_government_area'], how='inner')
merged_df = pd.merge(merged_df, area_long, on='local_government_area', how='inner')
# Calculate population density
merged_df['population_density']= merged_df['population']/ merged_df['area']
# Step D: Prepare the data by cleaning
merged_df = prepare_data(merged_df,['mean_house_price', 'crime_rate', 'population_density'])
# Step E: Perform correlation analysis
house_price_population_corr = analyze_correlation(merged_df, 'mean_house_price', 'population_density')
crime_house_price_corr = analyze_correlation(merged_df, 'crime_rate', 'mean_house_price')
crime_population_density_corr = analyze_correlation(merged_df, 'crime_rate', 'population_density')
# Step F: Print the results
print(f"Correlation between house prices and population density: {house_price_population_corr}")
print(f"Correlation between crime rate and house prices: {crime_house_price_corr}")
print(f"Correlation between crime rate and population density: {crime_population_density_corr}")
# Plotting for visual analysis
plt.figure(figsize=(10,6))
plt.scatter(merged_df['population_density'], merged_df['mean_house_price'])
plt.title('House Price vs Population Density')
plt.xlabel('Population Density (people per square km)')
plt.ylabel('Mean House Price')
plt.grid(True)
plt.show()
plt.figure(figsize=(10,6))
plt.scatter(merged_df['mean_house_price'], merged_df['crime_rate'])
plt.title('Crime Rate vs House Price')
plt.xlabel('Mean House Price')
plt.ylabel('Crime Rate (per 100,000 population)')
plt.grid(True)
plt.show()
plt.figure(figsize=(10,6))
plt.scatter(merged_df['population_density'], merged_df['crime_rate'])
plt.title('Crime Rate vs Population Density')
plt.xlabel('Population Density (people per square km)')
plt.ylabel('Crime Rate (per 100,000 population)')
plt.grid(True)
plt.show()
The above python code is run in google colab and it produces nil output as nan . modify and correct the code and add some more correlation if it helps to achieve my output. chegg expert gave me the code above, it is running free of errors but my plots are nil and the corelation is shown as Nan. help me. i have access to all the 4 excel sheets of data as attached. something is wrong since it keeps on producing Nan

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer
Step: 1 Unlock blur-text-image
Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock
Step: 3 Unlock

Students Have Also Explored These Related Databases Questions!