Question: import pandas as pd from google.cloud import bigquery def create _ time _ dimension ( df ) : Creates a time dimension

import pandas as pd
from google.cloud import bigquery
def create_time_dimension(df):
"""Creates a time dimension table."""
df['created_date']= pd.to_datetime(df['created_date'], format='%Y-%m-%dT%H:%M:%S.%f')
time_dim = df[['created_date']].copy()
time_dim['hour']= time_dim['created_date'].dt.hour
time_dim['minute']= time_dim['created_date'].dt.minute
time_dim['id']= time_dim['created_date'].dt.strftime('%H%M').astype(int)# Create unique ID from hour and minute
time_dim = time_dim[['id', 'hour', 'minute']].drop_duplicates()
return time_dim
def create_date_dimension(df):
"""Creates a date dimension table."""
df['created_date']= pd.to_datetime(df['created_date'], format='%Y-%m-%dT%H:%M:%S.%f')
date_dim = df[['created_date']].copy()
date_dim['date']= date_dim['created_date'].dt.date
date_dim['year']= date_dim['created_date'].dt.year
date_dim['month']= date_dim['created_date'].dt.month
date_dim['day']= date_dim['created_date'].dt.day
date_dim['id']= date_dim['created_date'].dt.strftime('%Y%m%d').astype(int)# Create unique ID from year, month, day
date_dim = date_dim[['id', 'date', 'year', 'month', 'day']].drop_duplicates()
return date_dim
def create_complaint_type_dim(df):
"""Creates a complaint type dimension table."""
complaint_types = df[['complaint_type', 'descriptor']].drop_duplicates()
complaint_types.reset_index(drop=True, inplace=True)
complaint_types.insert(0,'id', complaint_types.index +1)# Adding an ID column starting from 1
return complaint_types
def create_location_type_dim(df):
"""Creates a location type dimension table."""
location_types = df[['location_type', 'borough']].drop_duplicates()
location_types.reset_index(drop=True, inplace=True)
location_types.insert(0,'id', location_types.index +1)# Adding an ID column starting from 1
return location_types
def create_agency_dim(df):
"""Creates an agency dimension table."""
agencies = df[['agency', 'agency_name']].drop_duplicates()
agencies.reset_index(drop=True, inplace=True)
agencies.insert(0,'id', agencies.index +1)# Adding an ID column starting from 1
return agencies
def create_complaint_status_dim(df):
"""Creates a complaint status dimension table."""
statuses = df[['status']].drop_duplicates()
statuses.reset_index(drop=True, inplace=True)
statuses.insert(0,'id', statuses.index +1)# Adding an ID column starting from 1
return statuses
def create_complaint_fact_table(df, bq_client, project_id, dataset_id):
"""Creates a complaint fact table and loads it to BigQuery.
Args:
df (pandas.DataFrame): The raw complaint data DataFrame.
bq_client (bigquery.Client): A BigQuery client object.
project_id (str): The Google Cloud Project ID.
dataset_id (str): The BigQuery dataset ID.
"""
# Create dimension tables
complaint_type_dim = create_complaint_type_dim(df)
location_type_dim = create_location_type_dim(df)
time_dim = create_time_dim(df)
agency_dim = create_agency_dim(df)
complaint_status_dim = create_complaint_status_dim(df)
# Create the fact table
fact_table = df[['Time', 'date', 'complaint_type', 'location_type', 'agency', 'complaint_status']]
fact_table = fact_table.merge(complaint_type_dim, on='complaint_type')
fact_table = fact_table.merge(location_type_dim, on='location_type')
fact_table = fact_table.merge(time_dim, on='created_date')
fact_table = fact_table.merge(agency_dim, on='agency')
fact_table = fact_table.merge(complaint_status_dim, on='complaint_status')
# Load dimension tables and fact table to BigQuery
for table_name, table_data in [
('complaint_type_dim', complaint_type_dim),
('location_type_dim', location_type_dim),
('time_dim', time_dim),
('agency_dim', agency_dim),
('complaint_status_dim', complaint_status_dim),
('complaint_fact', fact_table)
]:
table_id = f"{project_id}.{dataset_id}.{table_name}"
job_config = bigquery.LoadJobConfig(
write_disposition="WRITE_TRUNCATE"
)
job = bq_client.load_table_from_dataframe(table_data, table_id, job_config=job_config)
job.result()# Wait for the job to complete
# Example usage
if __name__=="__main__":

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer
Step: 1 Unlock blur-text-image
Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock
Step: 3 Unlock

Students Have Also Explored These Related Programming Questions!