Question: I am going to provide you one code. Please check and fix this. import pandas as pd import numpy as np from datetime import datetime

I am going to provide you one code. Please check and fix this.
import pandas as pd
import numpy as np
from datetime import datetime
def convert_to_int(value):
return int(value)
def replace_negatives_with_averages(df):
rows, cols = df.shape
for row in range(rows):
row_values =[]
for col in range(cols):
value = df[col][row]
if isinstance(value,(int)) and value !=-1:
row_values.append(value)
for col in range(cols):
if df[col][row]==-1:
next_value =7
valid_values =[]
new_col = col + next_value
while new_col < cols:
value = df[new_col][row]
if isinstance(value,(int, float)) and value !=-1:
valid_values.append(value)
new_col = new_col + next_value
avg = sum(valid_values)/ len(valid_values)
for col_1 in range(col, cols, next_value):
if df[col_1][row]==-1:
df[col_1][row]= avg
def process_csv(file_path, output_path='output.csv'):
processed_data ={}
df = pd.read_csv(file_path, header=None)
data = df.copy()
for col in data.columns:
processed_column = data[col][1:]
processed_1=pd.DataFrame(processed_column)
date = str(processed_column[1])
if date =="---":
processed_column[1]=-2
# blank_date = processed_column.iloc[0]
processed_column[2:]=-1
processed_data[col]= processed_column.tolist()
else:
day_index = datetime.strptime(date,'%d/%m/%Y').weekday()
processed_column[1]= day_index
processed_data[col]= processed_column.tolist()
processed_df = pd.DataFrame(processed_data)
processed_df.fillna(-1, inplace=True)
processed_df = processed_df.map(convert_to_int)
pd.set_option('display.max_columns', None)
processed_df.to_csv('first-pre.csv', index=False, header=False)
new_df = processed_df.transpose()
# data_dict=[]
for idx in range(len(new_df)):
temp=new_df[0][idx]
if temp ==-2:
prec = new_df[0][idx -1]
if prec <6:
missing_days = list(range(prec +1,7))
for i in missing_days:
new_rows = pd.DataFrame({0: i}, index=[0])
# Split the DataFrame into two parts
part_before = new_df[:idx]
part_after = new_df[idx:]
new_df = pd.concat([part_before, new_rows, part_after]).reset_index(drop=True)
# Move the index forward to account for the new row
idx = idx +1
next = new_df[0][idx +1]
if next >0: # Fill days from week start until next
missing_days_1= list(range(0, next))
for i in missing_days_1:
# Split the DataFrame into two parts: before and after the current position
part_before = new_df[:idx +1]
part_after = new_df[idx +1:]
# Create a new DataFrame with the missing day
new_row = pd.DataFrame({0: i}, index=[0])
new_df = pd.concat([part_before, new_row, part_after]).reset_index(drop=True)
# Move the index forward to account for the new row
idx =idx +1
idx = idx +1
for i in range(len(new_df)):
last_value = new_df[0][len(new_df)-1]
for i in range(last_value +1,7):
new_row = pd.DataFrame({0: i},index=[0])
new_df = pd.concat([new_df, new_row]).reset_index(drop=True)
print(new_df)
# new_df.to_csv('second_pre.csv')
new_df = pd.DataFrame(new_df).transpose()
new_df.fillna(-1, inplace=True)
new_df = new_df.map(convert_to_int)
new_df.to_csv('final.csv')
if (new_df[0]==-2).any():
new_df = new_df.drop(columns=[-2])
week_labels =[]
week_counter =1
for i, col in enumerate(new_df.columns):
day = new_df[col][0]
if i !=0 and day ==0:
week_counter +=1
week_label = f'week_{week_counter}'
week_labels.append(week_label)
week_labels_df = pd.DataFrame([week_labels], columns=new_df.columns)
combined_df = pd.concat([week_labels_df, new_df])
combined_df.reset_index(drop=True, inplace=True)
# for row in combined_df[1:]:
# combined_df[row]= combined_df[row].apply(convert_to_int)
replace_negatives_with_averages(combined_df)
final_df = combined_df
print("final_df is:")
final_df[1:]= final_df[1:].astype(int)
print("final df[1] is:", final_df[1:])
exit(0)
final_df.to_csv('output.csv', index=None, header= None)
if __name__=='__main__':
input_file = 'centova_dir_stadio.csv' # Input file path
output_path = 'output.csv' # Output file path
result = process_csv(input_file, output_path)
print("Processed DataFrame:")
print(result)
Now if you understand the logic you will know that I am trying to replace the negatives (-1) with the average o same hours for same days in weekday values ( ex, take all monday 7 pm data and make the average and if there is -1 in any monday 7 pm, replace the -1 with the average).
in the replace negatives with average function, I want to create two empty matrix of sum and count and then I am going to provide you one pseudo code. Can you please adjust the code by using this?
First create these two matrixes..
then,
for row in (2,26):
for col in cols:
if df[col][row]!=-1 then
sum[col%7][row]<---sum[col%7][row]+df[col][row]
count[col%7][row]++
for row
for col
if df[col][row]==1 then
df[col][row]<----sum[col%7

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer
Step: 1 Unlock blur-text-image
Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock
Step: 3 Unlock

Students Have Also Explored These Related Programming Questions!