Question: import os import numpy as np import librosa from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier from sklearn.metrics import confusion _ matrix, classification _ report import pickle import

import os import numpy as np import librosa from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier from sklearn.metrics import confusion_matrix, classification_report import pickle import soundfile as sf from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold import sounddevice as sd from sklearn.preprocessing import StandardScaler from sklearn.feature_selection import SelectKBest, f_classif # Paths to audio files audio_files ={ "Ifrah": "C:/software test/voices/Ifra5.wav", "Sharonne": "C:/software test/voices/Sharonne1.wav", "Talha": "C:/software test/voices/Talha_converted.wav" } # Define a function to load an audio file and extract features def extract_features(audio_path): y, sr = librosa.load(audio_path) return extract_features_from_array(y, sr) # Extract features from an audio array def extract_features_from_array(y, sr): mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13).T, axis=0) chroma = np.mean(librosa.feature.chroma_stft(y=y, sr=sr).T, axis=0) spectral_contrast = np.mean(librosa.feature.spectral_contrast(y=y, sr=sr).T, axis=0) zero_crossings = np.mean(librosa.feature.zero_crossing_rate(y=y).T, axis=0) spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr).T, axis=0) rms = np.mean(librosa.feature.rms(y=y).T, axis=0) mel_spectrogram = np.mean(librosa.feature.melspectrogram(y=y, sr=sr).T, axis=0) spectral_bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr).T, axis=0) mfcc_delta = np.mean(librosa.feature.delta(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)).T, axis=0) features = np.hstack([mfccs, chroma, spectral_contrast, zero_crossings, spectral_rolloff, rms, mel_spectrogram, spectral_bandwidth, mfcc_delta]) return features # Augment audio data def augment_audio(y, sr): noise = np.random.normal(0,0.005, len(y)) # Reduced noise level y_noisy = y + noise y_pitched = librosa.effects.pitch_shift(y, sr=sr, n_steps=1) # Smaller pitch shift y_speed = librosa.effects.time_stretch(y.astype('float32'), rate=1.05) # Less aggressive time stretch return [y_noisy, y_pitched, y_speed] # Function to record and save audio for a speaker def record_and_save(): duration =10 # seconds print("Please record your audio for speaker identification...") recording = sd.rec(int(duration *16000), samplerate=16000, channels=1, dtype='float32') sd.wait() filename = "speaker_recorded.wav" sf.write(filename, recording, 16000) print(f"Recording saved as {filename}") return filename # Extract features from uploaded files data =[] labels =[] for label, audio_path in audio_files.items(): y, sr = librosa.load(audio_path) # Original features features = extract_features(audio_path) data.append(features) labels.append(label) # Augmented features augmented_audios = augment_audio(y, sr) for aug_y in augmented_audios: aug_features = extract_features_from_array(aug_y, sr) data.append(aug_features) labels.append(label) # Convert data and labels to NumPy arrays data = np.array(data) labels = np.array(labels) # Split data into training and testing sets X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.3, random_state=42) # Normalize features scaler = StandardScaler() X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) # Feature selection to reduce dimensionality and remove noise selector = SelectKBest(score_func=f_classif, k=50) # Increased k to retain more features X_train = selector.fit_transform(X_train, y_train) X_test = selector.transform(X_test) # Train a Random Forest Classifier with GridSearch for hyperparameter tuning param_grid ={'n_estimators': [50,100,200], 'max_depth': [None,10,20], 'min_samples_split': [5,10], # Increased to prevent overfitting 'min_samples_leaf': [2,4] # Increased to prevent overfitting } kfold = StratifiedKFold(n_splits=3) grid = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=kfold, refit=True, verbose=3) grid.fit(X_train, y_train) # Use the best model from grid search rf_classifier = grid.best_estimator_ # Train a Gradient Boosting Classifier for comparison gb_classifier = GradientBoostingClassifier(random_state=42) gb_classifier.fit(X_train, y_train) # Predict on the test set with Random Forest y_pred_rf = rf_classifier.predict(X_test) # Predict on the test set with Gradient Boosting y_pred_gb = gb_classifier.predict(X_test) # Evaluate the Random Forest model conf_matrix_rf = confusion_matrix(y_test, y_pred_rf) class_report_rf = classification_report(y_test, y_pred_rf) print("Random Forest Confusion Matrix:") print(conf_matrix_rf) print("
Random Forest Classification Report:") print(class_report_rf) # Evaluate the Gradient Boosting model conf_matrix_gb = confusion_matrix(y_test, y_pred_gb) class_report_gb = classification_report(y_test, y_pred_gb) print("Gradient Boosting Confusion Matrix:") print(conf_matrix_gb) print("
Gradient Boosting Classification Report:") print(class_report_gb) # Save the trained models and scaler with open("rf_classifier_model.pkl","wb") as model_file: pickle.dump(rf_classifier, model_file) with open("gb_classifier_model.pkl","wb") as model_file: pickle.dump(gb_classifier, model_file) with open("scaler.pkl","wb") as scaler_file: pickle.dump(scaler, scaler_file) # Save the feature selector with open("selector.pkl","wb") as selector_file: pickle.dump(selector, selector_file) # Function to record and predict the speaker def record_and_predict(): # Record audio filename = record_and_save() # Extract features from the recorded audio features = extract_features(filename).reshape(1,-1) # Load scaler, selector, and model for prediction with open("scaler.pkl","rb") as scaler_file: scaler = pickle.load(scaler_file) features = scaler.transform(features) with open("selector.pkl","rb") as selector_file: selector = pickle.load(selector_file) features = selector.transform(features) with open("rf_classifier_model.pkl","rb") as model_file: model = pickle.load(model_file) prediction = model.predict(features) print(f"The speaker is predicted to be: {prediction[0]}
") # Record and predict speaker once def main(): while True: record_and_predict() cont = input("Do you want to identify another speaker? (yes/no): ").strip().lower() if cont != 'yes': break # Run the main function if __name__=="__main__": main()
Can you correct this code in order to allow the machine to correctly identify the person who records the Audio from the audio that we put in the code to teach the machine our voice the machine must allow the user to record an audio and identify if it is ifrah or sharonne or talha

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer
Step: 1 Unlock blur-text-image
Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock
Step: 3 Unlock

Students Have Also Explored These Related Programming Questions!