In [20]:

# Usual Libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import IPython.display as ipd
import sklearn
from sklearn import preprocessing
from sklearn.decomposition import PCA
from sklearn.metrics.pairwise import cosine_similarity

#Import Classifier Models
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import SGDClassifier, LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier, XGBRFClassifier
from xgboost import plot_tree, plot_importance

from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score, roc_curve
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import RFE

# Librosa (the mother of audio files)
import librosa
import librosa.display
import IPython.display as ipd
import warnings
warnings.filterwarnings('ignore')

#Debug machine learning classifiers and explain their predictions
import eli5
from eli5.sklearn import PermutationImportance

In [8]:

import os
general_path = 'gtzan'
print(list(os.listdir(f'{general_path}/genres_original/')))

['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']

In [4]:

# Importing file from metal genre
y, sr = librosa.load(f'{general_path}/genres_original/metal/metal.00017.wav')
print('y:', y, '\n')
print('y shape:', np.shape(y), '\n')
print('Sample Rate (KHz):', sr, '\n')

# Verify length of the audio
print('Check Len of Audio:', 661794/22050)

y: [ 0.05142212  0.07095337  0.0239563  ...  0.00317383 -0.03045654
  0.02932739] 

y shape: (661504,) 

Sample Rate (KHz): 22050 

Check Len of Audio: 30.013333333333332

In [5]:

# Trim leading and trailing silence from an audio signal (silence before and after the actual audio)
metal_file, _ = librosa.effects.trim(y)

# the result is an numpy ndarray
print('Audio File:', metal_file, '\n')
print('Audio File shape:', np.shape(metal_file))

Audio File: [ 0.05142212  0.07095337  0.0239563  ...  0.00317383 -0.03045654
  0.02932739] 

Audio File shape: (661504,)

In [6]:

plt.figure(figsize = (16, 6))
librosa.display.waveshow(y = metal_file, sr = sr, color = "#A300F9");
plt.title("Sound Waves in Reggae 36", fontsize = 23);

In [7]:

# Default FFT window size
n_fft = 2048 # FFT window size
hop_length = 512 # number audio of frames between STFT columns (looks like a good default)

# Short-time Fourier transform (STFT)
D = np.abs(librosa.stft(metal_file, n_fft = n_fft, hop_length = hop_length))

print('Shape of D object:', np.shape(D))

Shape of D object: (1025, 1293)

In [8]:

plt.figure(figsize = (16, 6))
plt.plot(D);

In [9]:

# Convert an amplitude spectrogram to Decibels-scaled spectrogram.
DB = librosa.amplitude_to_db(D, ref = np.max)

# Creating the Spectogram
plt.figure(figsize = (16, 6))
librosa.display.specshow(DB, sr = sr, hop_length = hop_length, x_axis = 'time', y_axis = 'log',
                        cmap = 'cool')
plt.colorbar();

In [10]:

y, sr = librosa.load(f'{general_path}/genres_original/metal/metal.00017.wav')
y, _ = librosa.effects.trim(y)


S = librosa.feature.melspectrogram(y, sr=sr)
S_DB = librosa.amplitude_to_db(S, ref=np.max)
plt.figure(figsize = (16, 6))
librosa.display.specshow(S_DB, sr=sr, hop_length=hop_length, x_axis = 'time', y_axis = 'log',
                        cmap = 'cool');
plt.colorbar();
plt.title("Metal Mel Spectrogram", fontsize = 23);

In [11]:

# Total zero_crossings in our 1 song
zero_crossings = librosa.zero_crossings(metal_file, pad=False)
print(sum(zero_crossings))

In [12]:

y_harm, y_perc = librosa.effects.hpss(metal_file)

plt.figure(figsize = (16, 6))
plt.plot(y_harm, color = '#A300F9');
plt.plot(y_perc, color = '#FFB100');

In [13]:

tempo, _ = librosa.beat.beat_track(y, sr = sr)
tempo

Out[13]:

107.666015625

In [14]:

# Calculate the Spectral Centroids
spectral_centroids = librosa.feature.spectral_centroid(metal_file, sr=sr)[0]

# Shape is a vector
print('Centroids:', spectral_centroids, '\n')
print('Shape of Spectral Centroids:', spectral_centroids.shape, '\n')

# Computing the time variable for visualization
frames = range(len(spectral_centroids))

# Converts frame counts to time (seconds)
t = librosa.frames_to_time(frames)

print('frames:', frames, '\n')
print('t:', t)

# Function that normalizes the Sound Data
def normalize(x, axis=0):
    return sklearn.preprocessing.minmax_scale(x, axis=axis)

Centroids: [3068.15111516 3128.41194354 3123.48928657 ... 2558.13730833 2960.96376713
 3665.36126394] 

Shape of Spectral Centroids: (1293,) 

frames: range(0, 1293) 

t: [0.00000000e+00 2.32199546e-02 4.64399093e-02 ... 2.99537415e+01
 2.99769615e+01 3.00001814e+01]

In [15]:

#Plotting the Spectral Centroid along the waveform
plt.figure(figsize = (16, 6))
librosa.display.waveshow(metal_file, sr=sr, alpha=0.4, color = '#A300F9')
plt.plot(t, normalize(spectral_centroids), color='#FFB100')

Out[15]:

[<matplotlib.lines.Line2D at 0x1b83f17cdc8>]

In [16]:

# Spectral RollOff Vector
spectral_rolloff = librosa.feature.spectral_rolloff(metal_file, sr=sr)[0]

# The plot
plt.figure(figsize = (16, 6))
librosa.display.waveshow(metal_file, sr=sr, alpha=0.4, color = '#A300F9');
plt.plot(t, normalize(spectral_rolloff), color='#FFB100');

In [17]:

mfccs = librosa.feature.mfcc(metal_file, sr=sr)
print('mfccs shape:', mfccs.shape)

#Displaying  the MFCCs:
plt.figure(figsize = (16, 6))
librosa.display.specshow(mfccs, sr=sr, x_axis='time', cmap = 'cool');

mfccs shape: (20, 1293)

In [18]:

# Perform Feature Scaling
mfccs = sklearn.preprocessing.scale(mfccs, axis=1)
print('Mean:', mfccs.mean(), '\n')
print('Var:', mfccs.var())

plt.figure(figsize = (16, 6))
librosa.display.specshow(mfccs, sr=sr, x_axis='time', cmap = 'cool');

Mean: -1.1801075e-09 

Var: 1.0

In [19]:

# Increase or decrease hop_length to change how granular you want your data to be
hop_length = 5000

# Chromogram
chromagram = librosa.feature.chroma_stft(metal_file, sr=sr, hop_length=hop_length)
print('Chromogram shape:', chromagram.shape)

plt.figure(figsize=(16, 6))
librosa.display.specshow(chromagram, x_axis='time', y_axis='chroma', 
                         hop_length=hop_length, cmap='coolwarm');

Chromogram shape: (12, 133)

In [29]:

data = pd.read_csv(f'{general_path}/features_30_sec.csv')
data

Out[29]:

	filename	length	chroma_stft_mean	chroma_stft_var	rms_mean	rms_var	spectral_centroid_mean	spectral_centroid_var	spectral_bandwidth_mean	spectral_bandwidth_var	...	mfcc16_var	mfcc17_mean	mfcc17_var	mfcc18_mean	mfcc18_var	mfcc19_mean	mfcc19_var	mfcc20_mean	mfcc20_var	label
0	blues.00000.wav	661794	0.350088	0.088757	0.130228	0.002827	1784.165850	129774.064525	2002.449060	85882.761315	...	52.420910	-1.690215	36.524071	-0.408979	41.597103	-2.303523	55.062923	1.221291	46.936035	blues
1	blues.00001.wav	661794	0.340914	0.094980	0.095948	0.002373	1530.176679	375850.073649	2039.036516	213843.755497	...	55.356403	-0.731125	60.314529	0.295073	48.120598	-0.283518	51.106190	0.531217	45.786282	blues
2	blues.00002.wav	661794	0.363637	0.085275	0.175570	0.002746	1552.811865	156467.643368	1747.702312	76254.192257	...	40.598766	-7.729093	47.639427	-1.816407	52.382141	-3.439720	46.639660	-2.231258	30.573025	blues
3	blues.00003.wav	661794	0.404785	0.093999	0.141093	0.006346	1070.106615	184355.942417	1596.412872	166441.494769	...	44.427753	-3.319597	50.206673	0.636965	37.319130	-0.619121	37.259739	-3.407448	31.949339	blues
4	blues.00004.wav	661794	0.308526	0.087841	0.091529	0.002303	1835.004266	343399.939274	1748.172116	88445.209036	...	86.099236	-5.454034	75.269707	-0.916874	53.613918	-4.404827	62.910812	-11.703234	55.195160	blues
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
995	rock.00095.wav	661794	0.352063	0.080487	0.079486	0.000345	2008.149458	282174.689224	2106.541053	88609.749506	...	45.050526	-13.289984	41.754955	2.484145	36.778877	-6.713265	54.866825	-1.193787	49.950665	rock
996	rock.00096.wav	661794	0.398687	0.075086	0.076458	0.000588	2006.843354	182114.709510	2068.942009	82426.016726	...	33.851742	-10.848309	39.395096	1.881229	32.010040	-7.461491	39.196327	-2.795338	31.773624	rock
997	rock.00097.wav	661794	0.432142	0.075268	0.081651	0.000322	2077.526598	231657.968040	1927.293153	74717.124394	...	33.597008	-12.845291	36.367264	3.440978	36.001110	-12.588070	42.502201	-2.106337	29.865515	rock
998	rock.00098.wav	661794	0.362485	0.091506	0.083860	0.001211	1398.699344	240318.731073	1818.450280	109090.207161	...	46.324894	-4.416050	43.583942	1.556207	34.331261	-5.041897	47.227180	-3.590644	41.299088	rock
999	rock.00099.wav	661794	0.358401	0.085884	0.054454	0.000336	1609.795082	422203.216152	1797.213044	120115.632927	...	59.167755	-7.069775	73.760391	0.028346	76.504326	-2.025783	72.189316	1.155239	49.662510	rock

1000 rows × 60 columns

In [10]:

# Computing the Correlation Matrix
spike_cols = [col for col in data.columns if 'mean' in col]
corr = data[spike_cols].corr()

# Generate a mask for the upper triangle
mask = np.triu(np.ones_like(corr, dtype=np.bool))

# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(16, 11));

# Generate a custom diverging colormap
cmap = sns.diverging_palette(0, 25, as_cmap=True, s = 90, l = 45, n = 5)

# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr, mask=mask, cmap=cmap, vmax=.3, center=0,
            square=True, linewidths=.5, cbar_kws={"shrink": .5})

plt.title('Correlation Heatmap (for the MEAN variables)', fontsize = 25)
plt.xticks(fontsize = 10)
plt.yticks(fontsize = 10);
plt.savefig("Corr Heatmap.jpg")

In [11]:

x = data[["label", "tempo"]]

f, ax = plt.subplots(figsize=(16, 9));
sns.boxplot(x = "label", y = "tempo", data = x, palette = 'husl');

plt.title('BPM Boxplot for Genres', fontsize = 25)
plt.xticks(fontsize = 14)
plt.yticks(fontsize = 10);
plt.xlabel("Genre", fontsize = 15)
plt.ylabel("BPM", fontsize = 15)
plt.savefig("BPM Boxplot.jpg")

In [12]:

data = data.iloc[0:, 1:]
y = data['label']
X = data.loc[:, data.columns != 'label']

#### NORMALIZE X ####
cols = X.columns
min_max_scaler = preprocessing.MinMaxScaler()
np_scaled = min_max_scaler.fit_transform(X)
X = pd.DataFrame(np_scaled, columns = cols)


#### PCA 2 COMPONENTS ####
pca = PCA(n_components=2)
principalComponents = pca.fit_transform(X)
principalDf = pd.DataFrame(data = principalComponents, columns = ['principal component 1', 'principal component 2'])

# concatenate with target label
finalDf = pd.concat([principalDf, y], axis = 1)

pca.explained_variance_ratio_

Out[12]:

array([0.2439355 , 0.21781804])

In [13]:

plt.figure(figsize = (16, 9))
sns.scatterplot(x = "principal component 1", y = "principal component 2", data = finalDf, hue = "label", alpha = 0.7,
               s = 100);

plt.title('PCA on Genres', fontsize = 25)
plt.xticks(fontsize = 14)
plt.yticks(fontsize = 10);
plt.xlabel("Principal Component 1", fontsize = 15)
plt.ylabel("Principal Component 2", fontsize = 15)
plt.savefig("PCA Scattert.jpg")

In [14]:

data = pd.read_csv(f'{general_path}/features_3_sec.csv')
data = data.iloc[0:, 1:] 
data.head()

Out[14]:

	length	chroma_stft_mean	chroma_stft_var	rms_mean	rms_var	spectral_centroid_mean	spectral_centroid_var	spectral_bandwidth_mean	spectral_bandwidth_var	rolloff_mean	...	mfcc16_var	mfcc17_mean	mfcc17_var	mfcc18_mean	mfcc18_var	mfcc19_mean	mfcc19_var	mfcc20_mean	mfcc20_var	label
0	66149	0.335406	0.091048	0.130405	0.003521	1773.065032	167541.630869	1972.744388	117335.771563	3714.560359	...	39.687145	-3.241280	36.488243	0.722209	38.099152	-5.050335	33.618073	-0.243027	43.771767	blues
1	66149	0.343065	0.086147	0.112699	0.001450	1816.693777	90525.690866	2010.051501	65671.875673	3869.682242	...	64.748276	-6.055294	40.677654	0.159015	51.264091	-2.837699	97.030830	5.784063	59.943081	blues
2	66149	0.346815	0.092243	0.132003	0.004620	1788.539719	111407.437613	2084.565132	75124.921716	3997.639160	...	67.336563	-1.768610	28.348579	2.378768	45.717648	-1.938424	53.050835	2.517375	33.105122	blues
3	66149	0.363639	0.086856	0.132565	0.002448	1655.289045	111952.284517	1960.039988	82913.639269	3568.300218	...	47.739452	-3.841155	28.337118	1.218588	34.770935	-3.580352	50.836224	3.630866	32.023678	blues
4	66149	0.335579	0.088129	0.143289	0.001701	1630.656199	79667.267654	1948.503884	60204.020268	3469.992864	...	30.336359	0.664582	45.880913	1.689446	51.363583	-3.392489	26.738789	0.536961	29.146694	blues

5 rows × 59 columns

In [15]:

y = data['label'] # genre variable.
X = data.loc[:, data.columns != 'label'] #select all columns but not the labels

#### NORMALIZE X ####
# Normalize so everything is on the same scale. 
cols = X.columns
min_max_scaler = preprocessing.MinMaxScaler()
np_scaled = min_max_scaler.fit_transform(X)

# new data frame with the new scaled data. 
X = pd.DataFrame(np_scaled, columns = cols)

In [16]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [17]:

def model_assess(model, title = "Default"):
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    #print(confusion_matrix(y_test, preds))
    print('Accuracy', title, ':', round(accuracy_score(y_test, preds), 5), '\n')
#xgb is having trouble processing the genres as categorical variables
#Needed to transform y_test and y_train using label encoder
def model_assessXGB(model, y_train, y_test, title = "Default"):
    #Encode categorical variables into array using LabelEncoder
    le = preprocessing.LabelEncoder()
    le.fit(y_train)
    ytrain2 = le.transform(y_train)
    le.fit(y_test)
    y_test2 = le.transform(y_test)
    
    model.fit(X_train, ytrain2)
    preds = model.predict(X_test)
    #print(confusion_matrix(y_test, preds))
    print('Accuracy', title, ':', round(accuracy_score(y_test2, preds), 5), '\n')

In [83]:

# Naive Bayes
nb = GaussianNB()
model_assess(nb, "Naive Bayes")

# Stochastic Gradient Descent
sgd = SGDClassifier(max_iter=5000, random_state=0)
model_assess(sgd, "Stochastic Gradient Descent")

# KNN
knn = KNeighborsClassifier(n_neighbors=19)
model_assess(knn, "KNN")

# Decission trees
tree = DecisionTreeClassifier()
model_assess(tree, "Decission trees")

# Random Forest
rforest = RandomForestClassifier(n_estimators=1000, max_depth=10, random_state=0)
model_assess(rforest, "Random Forest")

# Support Vector Machine
svm = SVC(decision_function_shape="ovo")
model_assess(svm, "Support Vector Machine")

# Logistic Regression
lg = LogisticRegression(random_state=0, solver='lbfgs', multi_class='multinomial')
model_assess(lg, "Logistic Regression")

# Neural Nets
nn = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5000, 10), random_state=1)
model_assess(nn, "Neural Nets")

# Cross Gradient Booster
xgb = XGBClassifier(n_estimators=1000, learning_rate=0.05)
model_assessXGB(xgb, y_train, y_test, "Cross Gradient Booster")

# Cross Gradient Booster (Random Forest)
xgbrf = XGBRFClassifier(objective= 'multi:softmax')
model_assessXGB(xgbrf, y_train, y_test, "Cross Gradient Booster (Random Forest)")

Accuracy Naive Bayes : 0.51952 

Accuracy Stochastic Gradient Descent : 0.65532 

Accuracy KNN : 0.80581 

Accuracy Decission trees : 0.64364 

Accuracy Random Forest : 0.81415 

Accuracy Support Vector Machine : 0.75409 

Accuracy Logistic Regression : 0.6977 

Accuracy Neural Nets : 0.67668 

Accuracy Cross Gradient Booster : 0.90224 

Accuracy Cross Gradient Booster (Random Forest) : 0.74575

In [18]:

# Final model --> Cross Gradient Booster
xgb = XGBClassifier(n_estimators=1000, learning_rate=0.05)

#Encode categorical variables into array using LabelEncoder
le = preprocessing.LabelEncoder()
le.fit(y_train)
ytrain2 = le.transform(y_train)
le.fit(y_test)
y_test2 = le.transform(y_test)

xgb.fit(X_train, ytrain2)
preds = xgb.predict(X_test)
#print(confusion_matrix(y_test, preds))
print('Accuracy', "Cross Gradient Booster", ':', round(accuracy_score(y_test2, preds), 5), '\n')

Accuracy Cross Gradient Booster : 0.90224

In [101]:

# Confusion Matrix
confusion_matr = confusion_matrix(y_test2, preds) #normalize = 'true'
plt.figure( figsize = (16, 9))
sns.heatmap(confusion_matr, cmap="mako", annot=False, 
            xticklabels = ["blues", "classical", "country", "disco", "hiphop",
                           "jazz", "metal", "pop", "reggae", "rock"],
            yticklabels = ["blues", "classical", "country", "disco", "hiphop",
                           "jazz", "metal", "pop", "reggae", "rock"]);
#plt.savefig("conf matrix")

In [19]:

import eli5
from eli5.sklearn import PermutationImportance

perm = PermutationImportance(estimator=xgb, random_state=1)
perm.fit(X_test, y_test2)

eli5.show_weights(estimator=perm, feature_names = X_test.columns.tolist())

Using TensorFlow backend.

Out[19]:

Weight	Feature
0.1205 ± 0.0095	perceptr_var
0.0416 ± 0.0031	perceptr_mean
0.0390 ± 0.0049	mfcc4_mean
0.0345 ± 0.0044	chroma_stft_mean
0.0339 ± 0.0062	harmony_mean
0.0280 ± 0.0065	harmony_var
0.0228 ± 0.0049	mfcc9_mean
0.0208 ± 0.0049	mfcc6_mean
0.0181 ± 0.0024	rms_var
0.0174 ± 0.0026	mfcc3_mean
0.0148 ± 0.0031	spectral_bandwidth_mean
0.0147 ± 0.0056	mfcc11_mean
0.0137 ± 0.0046	tempo
0.0116 ± 0.0036	chroma_stft_var
0.0113 ± 0.0026	mfcc7_mean
0.0109 ± 0.0038	mfcc1_var
0.0101 ± 0.0029	mfcc3_var
0.0089 ± 0.0057	mfcc8_mean
0.0089 ± 0.0020	mfcc5_mean
0.0072 ± 0.0038	mfcc18_mean
… 38 more …

In [21]:

# Read data
data = pd.read_csv(f'{general_path}/features_30_sec.csv', index_col='filename')

# Extract labels
labels = data[['label']]

# Drop labels from original dataframe
data = data.drop(columns=['length','label'])
data.head()

# Scale the data
data_scaled=preprocessing.scale(data)
print('Scaled data type:', type(data_scaled))

Scaled data type: <class 'numpy.ndarray'>

In [22]:

# Cosine similarity
similarity = cosine_similarity(data_scaled)
print("Similarity shape:", similarity.shape)

# Convert into a dataframe and then set the row index and column names as labels
sim_df_labels = pd.DataFrame(similarity)
sim_df_names = sim_df_labels.set_index(labels.index)
sim_df_names.columns = labels.index

sim_df_names.head()

Similarity shape: (1000, 1000)

Out[22]:

filename	blues.00000.wav	blues.00001.wav	blues.00002.wav	blues.00003.wav	blues.00004.wav	blues.00005.wav	blues.00006.wav	blues.00007.wav	blues.00008.wav	blues.00009.wav	...	rock.00090.wav	rock.00091.wav	rock.00092.wav	rock.00093.wav	rock.00094.wav	rock.00095.wav	rock.00096.wav	rock.00097.wav	rock.00098.wav	rock.00099.wav
filename
blues.00000.wav	1.000000	0.049231	0.589618	0.284862	0.025561	-0.346688	-0.219483	-0.167626	0.641877	-0.097889	...	-0.082829	0.546169	0.578558	0.662590	0.571629	0.610942	0.640835	0.496294	0.284958	0.304098
blues.00001.wav	0.049231	1.000000	-0.096834	0.520903	0.080749	0.307856	0.318286	0.415258	0.120649	0.404168	...	-0.098111	-0.325126	-0.370792	-0.191698	-0.330834	-0.077301	-0.222119	-0.302573	0.499562	0.311723
blues.00002.wav	0.589618	-0.096834	1.000000	0.210411	0.400266	-0.082019	-0.028061	0.104446	0.468113	-0.132532	...	-0.032408	0.561074	0.590779	0.583293	0.514537	0.495707	0.566837	0.589983	0.216378	0.321069
blues.00003.wav	0.284862	0.520903	0.210411	1.000000	0.126437	0.134796	0.300746	0.324566	0.352758	0.295184	...	-0.320107	-0.206516	-0.151132	0.041986	-0.172515	-0.000287	0.020515	-0.107821	0.502279	0.183210
blues.00004.wav	0.025561	0.080749	0.400266	0.126437	1.000000	0.556066	0.482195	0.623455	0.029703	0.471657	...	0.087605	0.017366	0.138035	0.104684	-0.034594	0.063454	0.063546	0.172944	0.153192	0.061785

5 rows × 1000 columns

In [23]:

def find_similar_songs(name):
    # Find songs most similar to another song
    series = sim_df_names[name].sort_values(ascending = False)
    
    # Remove cosine similarity == 1 (songs will always have the best match with themselves)
    series = series.drop(name)
    
    # Display the 5 top matches 
    print("\n*******\nSimilar songs to ", name)
    print(series.head(5))

In [27]:

# metal.00002 - Iron Maiden "Flight of Icarus"
find_similar_songs('metal.00002.wav') 

ipd.Audio(f'{general_path}/genres_original/metal/metal.00002.wav')

*******
Similar songs to  metal.00002.wav
filename
metal.00028.wav    0.904367
metal.00059.wav    0.896096
rock.00018.wav     0.891910
rock.00017.wav     0.886526
rock.00016.wav     0.867508
Name: metal.00002.wav, dtype: float64

Out[27]:

In [28]:

ipd.Audio(f'{general_path}/genres_original/metal/metal.00028.wav')

Out[28]:

In [ ]: