Question: I am going to provide you one code. Please check and fix this. In the csv file, the rows represent the hours and the columns

I am going to provide you one code. Please check and fix this. In the csv file, the rows represent the hours and the columns represent the dates...
import pandas as pd
import numpy as np
from datetime import datetime
def convert_to_int(value):
return int(value)
def replace_negatives_with_averages(df):
rows, cols = df.shape
for row in range(rows):
row_values =[]
for col in range(cols):
value = df[col][row]
if isinstance(value,(int)) and value !=-1:
row_values.append(value)
for col in range(cols):
if df[col][row]==-1:
next_value =7
valid_values =[]
new_col = col + next_value
while new_col < cols:
value = df[new_col][row]
if isinstance(value,(int, float)) and value !=-1:
valid_values.append(value)
new_col = new_col + next_value
avg = sum(valid_values)/ len(valid_values)
for col_1 in range(col, cols, next_value):
if df[col_1][row]==-1:
df[col_1][row]= avg
def process_csv(file_path, output_path='output.csv'):
processed_data ={}
df = pd.read_csv(file_path, header=None)
data = df.copy()
for col in data.columns:
processed_column = data[col][1:]
processed_1=pd.DataFrame(processed_column)
date = str(processed_column[1])
if date =="---":
processed_column[1]=-2
# blank_date = processed_column.iloc[0]
processed_column[2:]=-1
processed_data[col]= processed_column.tolist()
else:
day_index = datetime.strptime(date,'%d/%m/%Y').weekday()
processed_column[1]= day_index
processed_data[col]= processed_column.tolist()
processed_df = pd.DataFrame(processed_data)
processed_df.fillna(-1, inplace=True)
processed_df = processed_df.map(convert_to_int)
pd.set_option('display.max_columns', None)
processed_df.to_csv('first-pre.csv', index=False, header=False)
new_df = processed_df.transpose()
# data_dict=[]
for idx in range(len(new_df)):
temp=new_df[0][idx]
if temp ==-2:
prec = new_df[0][idx -1]
if prec <6:
missing_days = list(range(prec +1,7))
for i in missing_days:
new_rows = pd.DataFrame({0: i}, index=[0])
# Split the DataFrame into two parts
part_before = new_df[:idx]
part_after = new_df[idx:]
new_df = pd.concat([part_before, new_rows, part_after]).reset_index(drop=True)
# Move the index forward to account for the new row
idx = idx +1
next = new_df[0][idx +1]
if next >0: # Fill days from week start until next
missing_days_1= list(range(0, next))
for i in missing_days_1:
# Split the DataFrame into two parts: before and after the current position
part_before = new_df[:idx +1]
part_after = new_df[idx +1:]
# Create a new DataFrame with the missing day
new_row = pd.DataFrame({0: i}, index=[0])
new_df = pd.concat([part_before, new_row, part_after]).reset_index(drop=True)
# Move the index forward to account for the new row
idx =idx +1
idx = idx +1
for i in range(len(new_df)):
last_value = new_df[0][len(new_df)-1]
for i in range(last_value +1,7):
new_row = pd.DataFrame({0: i},index=[0])
new_df = pd.concat([new_df, new_row]).reset_index(drop=True)
print(new_df)
# new_df.to_csv('second_pre.csv')
new_df = pd.DataFrame(new_df).transpose()
new_df.fillna(-1, inplace=True)
new_df = new_df.map(convert_to_int)
new_df.to_csv('final.csv')
if (new_df[0]==-2).any():
new_df = new_df.drop(columns=[-2])
week_labels =[]
week_counter =1
for i, col in enumerate(new_df.columns):
day = new_df[col][0]
if i !=0 and day ==0:
week_counter +=1
week_label = f'week_{week_counter}'
week_labels.append(week_label)
week_labels_df = pd.DataFrame([week_labels], columns=new_df.columns)
combined_df = pd.concat([week_labels_df, new_df])
combined_df.reset_index(drop=True, inplace=True)
# for row in combined_df[1:]:
# combined_df[row]= combined_df[row].apply(convert_to_int)
replace_negatives_with_averages(combined_df)
final_df = combined_df
print("final_df is:")
final_df[1:]= final_df[1:].astype(int)
print("final df[1] is:", final_df[1:])
exit(0)
final_df.to_csv('output.csv', index=None, header= None)
if __name__=='__main__':
input_file = 'centova_dir_stadio.csv' # Input file path
output_path = 'output.csv' # Output file path
result = process_csv(input_file, output_path)
print("Processed DataFrame:")
print(result)
Now if you understand the logic you will know that I am trying to replace the negatives (-1) with the average o same hours for same days in weekday values ( ex, take all monday 7 pm data and make the average and if there is -1 in any monday 7 pm, replace the -1 with the average).
in the replace negatives with average function, I want to create two empty matrix of sum and count and then I am going to provide you one pseudo code. Can you please adjust the code by using this?
First create these two matrixes..
then,
for row in (2,26):
for col in cols:
if df[col][row]!=-1 then
sum[col%7][row]<---sum[col%7][row]+df[col][row]
count[col%7][row]++
for row
for col
if df[col][row]==1 then
df[col][row]<----sum[col<

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer
Step: 1 Unlock blur-text-image
Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock
Step: 3 Unlock

Students Have Also Explored These Related Programming Questions!