Question: import pandas as pd from google.cloud import bigquery def create _ time _ dimension ( df ) : Creates a time dimension
import pandas as pd
from google.cloud import bigquery
def createtimedimensiondf:
Creates a time dimension table."""
dfcreateddate' pdtodatetimedfcreateddate' formatYmdTH:M:Sf
timedim dfcreateddate'copy
timedimhour timedimcreateddate'dthour
timedimminute timedimcreateddate'dtminute
timedimid timedimcreateddate'dtstrftimeHMastypeint# Create unique ID from hour and minute
timedim timedimid 'hour', 'minute'dropduplicates
return timedim
def createdatedimensiondf:
Creates a date dimension table."""
dfcreateddate' pdtodatetimedfcreateddate' formatYmdTH:M:Sf
datedim dfcreateddate'copy
datedimdate datedimcreateddate'dtdate
datedimyear datedimcreateddate'dtyear
datedimmonth datedimcreateddate'dtmonth
datedimday datedimcreateddate'dtday
datedimid datedimcreateddate'dtstrftimeYmdastypeint# Create unique ID from year, month, day
datedim datedimid 'date', 'year', 'month', 'day'dropduplicates
return datedim
def createcomplainttypedimdf:
Creates a complaint type dimension table."""
complainttypes dfcomplainttype', 'descriptor'dropduplicates
complainttypes.resetindexdropTrue, inplaceTrue
complainttypes.insertid complainttypes.index # Adding an ID column starting from
return complainttypes
def createlocationtypedimdf:
Creates a location type dimension table."""
locationtypes dflocationtype', 'borough'dropduplicates
locationtypes.resetindexdropTrue, inplaceTrue
locationtypes.insertid locationtypes.index # Adding an ID column starting from
return locationtypes
def createagencydimdf:
Creates an agency dimension table."""
agencies dfagency 'agencyname'dropduplicates
agencies.resetindexdropTrue, inplaceTrue
agencies.insertid agencies.index # Adding an ID column starting from
return agencies
def createcomplaintstatusdimdf:
Creates a complaint status dimension table."""
statuses dfstatusdropduplicates
statuses.resetindexdropTrue, inplaceTrue
statuses.insertid statuses.index # Adding an ID column starting from
return statuses
def createcomplaintfacttabledf bqclient, projectid datasetid:
Creates a complaint fact table and loads it to BigQuery.
Args:
df pandasDataFrame: The raw complaint data DataFrame.
bqclient bigqueryClient: A BigQuery client object.
projectid str: The Google Cloud Project ID
datasetid str: The BigQuery dataset ID
# Create dimension tables
complainttypedim createcomplainttypedimdf
locationtypedim createlocationtypedimdf
timedim createtimedimdf
agencydim createagencydimdf
complaintstatusdim createcomplaintstatusdimdf
# Create the fact table
facttable dfTime 'date', 'complainttype', 'locationtype', 'agency', 'complaintstatus'
facttable facttable.mergecomplainttypedim, on'complainttype'
facttable facttable.mergelocationtypedim, on'locationtype'
facttable facttable.mergetimedim, on'createddate'
facttable facttable.mergeagencydim, on'agency'
facttable facttable.mergecomplaintstatusdim, on'complaintstatus'
# Load dimension tables and fact table to BigQuery
for tablename, tabledata in
complainttypedim', complainttypedim
locationtypedim', locationtypedim
timedim', timedim
agencydim', agencydim
complaintstatusdim', complaintstatusdim
complaintfact', facttable
:
tableid fprojectiddatasetidtablename
jobconfig bigquery.LoadJobConfig
writedisposition"WRITETRUNCATE"
job bqclient.loadtablefromdataframetabledata, tableid jobconfigjobconfig
job.result# Wait for the job to complete
# Example usage
if namemain:
Step by Step Solution
There are 3 Steps involved in it
1 Expert Approved Answer
Step: 1 Unlock
Question Has Been Solved by an Expert!
Get step-by-step solutions from verified subject matter experts
Step: 2 Unlock
Step: 3 Unlock
