Building Music Recommender System Using Machine Learning Classifier Models
jupyter_notebook machine_learning classifier audio_data In [20]:
# Usual Libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import IPython.display as ipd
import sklearn
from sklearn import preprocessing
from sklearn.decomposition import PCA
from sklearn.metrics.pairwise import cosine_similarity
#Import Classifier Models
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import SGDClassifier, LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier, XGBRFClassifier
from xgboost import plot_tree, plot_importance
from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score, roc_curve
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import RFE
# Librosa (the mother of audio files)
import librosa
import librosa.display
import IPython.display as ipd
import warnings
warnings.filterwarnings('ignore')
#Debug machine learning classifiers and explain their predictions
import eli5
from eli5.sklearn import PermutationImportance
In [8]:
import os
general_path = 'gtzan'
print(list(os.listdir(f'{general_path}/genres_original/')))
['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']
In [4]:
# Importing file from metal genre
y, sr = librosa.load(f'{general_path}/genres_original/metal/metal.00017.wav')
print('y:', y, '\n')
print('y shape:', np.shape(y), '\n')
print('Sample Rate (KHz):', sr, '\n')
# Verify length of the audio
print('Check Len of Audio:', 661794/22050)
y: [ 0.05142212 0.07095337 0.0239563 ... 0.00317383 -0.03045654 0.02932739] y shape: (661504,) Sample Rate (KHz): 22050 Check Len of Audio: 30.013333333333332
In [5]:
# Trim leading and trailing silence from an audio signal (silence before and after the actual audio)
metal_file, _ = librosa.effects.trim(y)
# the result is an numpy ndarray
print('Audio File:', metal_file, '\n')
print('Audio File shape:', np.shape(metal_file))
Audio File: [ 0.05142212 0.07095337 0.0239563 ... 0.00317383 -0.03045654 0.02932739] Audio File shape: (661504,)
In [6]:
plt.figure(figsize = (16, 6))
librosa.display.waveshow(y = metal_file, sr = sr, color = "#A300F9");
plt.title("Sound Waves in Reggae 36", fontsize = 23);
In [7]:
# Default FFT window size
n_fft = 2048 # FFT window size
hop_length = 512 # number audio of frames between STFT columns (looks like a good default)
# Short-time Fourier transform (STFT)
D = np.abs(librosa.stft(metal_file, n_fft = n_fft, hop_length = hop_length))
print('Shape of D object:', np.shape(D))
Shape of D object: (1025, 1293)
In [8]:
plt.figure(figsize = (16, 6))
plt.plot(D);
In [9]:
# Convert an amplitude spectrogram to Decibels-scaled spectrogram.
DB = librosa.amplitude_to_db(D, ref = np.max)
# Creating the Spectogram
plt.figure(figsize = (16, 6))
librosa.display.specshow(DB, sr = sr, hop_length = hop_length, x_axis = 'time', y_axis = 'log',
                        cmap = 'cool')
plt.colorbar();
In [10]:
y, sr = librosa.load(f'{general_path}/genres_original/metal/metal.00017.wav')
y, _ = librosa.effects.trim(y)
S = librosa.feature.melspectrogram(y, sr=sr)
S_DB = librosa.amplitude_to_db(S, ref=np.max)
plt.figure(figsize = (16, 6))
librosa.display.specshow(S_DB, sr=sr, hop_length=hop_length, x_axis = 'time', y_axis = 'log',
                        cmap = 'cool');
plt.colorbar();
plt.title("Metal Mel Spectrogram", fontsize = 23);
In [11]:
# Total zero_crossings in our 1 song
zero_crossings = librosa.zero_crossings(metal_file, pad=False)
print(sum(zero_crossings))
105180
In [12]:
y_harm, y_perc = librosa.effects.hpss(metal_file)
plt.figure(figsize = (16, 6))
plt.plot(y_harm, color = '#A300F9');
plt.plot(y_perc, color = '#FFB100');
In [13]:
tempo, _ = librosa.beat.beat_track(y, sr = sr)
tempo
Out[13]:
107.666015625
In [14]:
# Calculate the Spectral Centroids
spectral_centroids = librosa.feature.spectral_centroid(metal_file, sr=sr)[0]
# Shape is a vector
print('Centroids:', spectral_centroids, '\n')
print('Shape of Spectral Centroids:', spectral_centroids.shape, '\n')
# Computing the time variable for visualization
frames = range(len(spectral_centroids))
# Converts frame counts to time (seconds)
t = librosa.frames_to_time(frames)
print('frames:', frames, '\n')
print('t:', t)
# Function that normalizes the Sound Data
def normalize(x, axis=0):
    return sklearn.preprocessing.minmax_scale(x, axis=axis)
Centroids: [3068.15111516 3128.41194354 3123.48928657 ... 2558.13730833 2960.96376713 3665.36126394] Shape of Spectral Centroids: (1293,) frames: range(0, 1293) t: [0.00000000e+00 2.32199546e-02 4.64399093e-02 ... 2.99537415e+01 2.99769615e+01 3.00001814e+01]
In [15]:
#Plotting the Spectral Centroid along the waveform
plt.figure(figsize = (16, 6))
librosa.display.waveshow(metal_file, sr=sr, alpha=0.4, color = '#A300F9')
plt.plot(t, normalize(spectral_centroids), color='#FFB100')
Out[15]:
[<matplotlib.lines.Line2D at 0x1b83f17cdc8>]
In [16]:
# Spectral RollOff Vector
spectral_rolloff = librosa.feature.spectral_rolloff(metal_file, sr=sr)[0]
# The plot
plt.figure(figsize = (16, 6))
librosa.display.waveshow(metal_file, sr=sr, alpha=0.4, color = '#A300F9');
plt.plot(t, normalize(spectral_rolloff), color='#FFB100');
In [17]:
mfccs = librosa.feature.mfcc(metal_file, sr=sr)
print('mfccs shape:', mfccs.shape)
#Displaying  the MFCCs:
plt.figure(figsize = (16, 6))
librosa.display.specshow(mfccs, sr=sr, x_axis='time', cmap = 'cool');
mfccs shape: (20, 1293)
In [18]:
# Perform Feature Scaling
mfccs = sklearn.preprocessing.scale(mfccs, axis=1)
print('Mean:', mfccs.mean(), '\n')
print('Var:', mfccs.var())
plt.figure(figsize = (16, 6))
librosa.display.specshow(mfccs, sr=sr, x_axis='time', cmap = 'cool');
Mean: -1.1801075e-09 Var: 1.0
In [19]:
# Increase or decrease hop_length to change how granular you want your data to be
hop_length = 5000
# Chromogram
chromagram = librosa.feature.chroma_stft(metal_file, sr=sr, hop_length=hop_length)
print('Chromogram shape:', chromagram.shape)
plt.figure(figsize=(16, 6))
librosa.display.specshow(chromagram, x_axis='time', y_axis='chroma', 
                         hop_length=hop_length, cmap='coolwarm');
Chromogram shape: (12, 133)
In [29]:
data = pd.read_csv(f'{general_path}/features_30_sec.csv')
data
Out[29]:
| filename | length | chroma_stft_mean | chroma_stft_var | rms_mean | rms_var | spectral_centroid_mean | spectral_centroid_var | spectral_bandwidth_mean | spectral_bandwidth_var | ... | mfcc16_var | mfcc17_mean | mfcc17_var | mfcc18_mean | mfcc18_var | mfcc19_mean | mfcc19_var | mfcc20_mean | mfcc20_var | label | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | blues.00000.wav | 661794 | 0.350088 | 0.088757 | 0.130228 | 0.002827 | 1784.165850 | 129774.064525 | 2002.449060 | 85882.761315 | ... | 52.420910 | -1.690215 | 36.524071 | -0.408979 | 41.597103 | -2.303523 | 55.062923 | 1.221291 | 46.936035 | blues | 
| 1 | blues.00001.wav | 661794 | 0.340914 | 0.094980 | 0.095948 | 0.002373 | 1530.176679 | 375850.073649 | 2039.036516 | 213843.755497 | ... | 55.356403 | -0.731125 | 60.314529 | 0.295073 | 48.120598 | -0.283518 | 51.106190 | 0.531217 | 45.786282 | blues | 
| 2 | blues.00002.wav | 661794 | 0.363637 | 0.085275 | 0.175570 | 0.002746 | 1552.811865 | 156467.643368 | 1747.702312 | 76254.192257 | ... | 40.598766 | -7.729093 | 47.639427 | -1.816407 | 52.382141 | -3.439720 | 46.639660 | -2.231258 | 30.573025 | blues | 
| 3 | blues.00003.wav | 661794 | 0.404785 | 0.093999 | 0.141093 | 0.006346 | 1070.106615 | 184355.942417 | 1596.412872 | 166441.494769 | ... | 44.427753 | -3.319597 | 50.206673 | 0.636965 | 37.319130 | -0.619121 | 37.259739 | -3.407448 | 31.949339 | blues | 
| 4 | blues.00004.wav | 661794 | 0.308526 | 0.087841 | 0.091529 | 0.002303 | 1835.004266 | 343399.939274 | 1748.172116 | 88445.209036 | ... | 86.099236 | -5.454034 | 75.269707 | -0.916874 | 53.613918 | -4.404827 | 62.910812 | -11.703234 | 55.195160 | blues | 
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | 
| 995 | rock.00095.wav | 661794 | 0.352063 | 0.080487 | 0.079486 | 0.000345 | 2008.149458 | 282174.689224 | 2106.541053 | 88609.749506 | ... | 45.050526 | -13.289984 | 41.754955 | 2.484145 | 36.778877 | -6.713265 | 54.866825 | -1.193787 | 49.950665 | rock | 
| 996 | rock.00096.wav | 661794 | 0.398687 | 0.075086 | 0.076458 | 0.000588 | 2006.843354 | 182114.709510 | 2068.942009 | 82426.016726 | ... | 33.851742 | -10.848309 | 39.395096 | 1.881229 | 32.010040 | -7.461491 | 39.196327 | -2.795338 | 31.773624 | rock | 
| 997 | rock.00097.wav | 661794 | 0.432142 | 0.075268 | 0.081651 | 0.000322 | 2077.526598 | 231657.968040 | 1927.293153 | 74717.124394 | ... | 33.597008 | -12.845291 | 36.367264 | 3.440978 | 36.001110 | -12.588070 | 42.502201 | -2.106337 | 29.865515 | rock | 
| 998 | rock.00098.wav | 661794 | 0.362485 | 0.091506 | 0.083860 | 0.001211 | 1398.699344 | 240318.731073 | 1818.450280 | 109090.207161 | ... | 46.324894 | -4.416050 | 43.583942 | 1.556207 | 34.331261 | -5.041897 | 47.227180 | -3.590644 | 41.299088 | rock | 
| 999 | rock.00099.wav | 661794 | 0.358401 | 0.085884 | 0.054454 | 0.000336 | 1609.795082 | 422203.216152 | 1797.213044 | 120115.632927 | ... | 59.167755 | -7.069775 | 73.760391 | 0.028346 | 76.504326 | -2.025783 | 72.189316 | 1.155239 | 49.662510 | rock | 
1000 rows × 60 columns
In [10]:
# Computing the Correlation Matrix
spike_cols = [col for col in data.columns if 'mean' in col]
corr = data[spike_cols].corr()
# Generate a mask for the upper triangle
mask = np.triu(np.ones_like(corr, dtype=np.bool))
# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(16, 11));
# Generate a custom diverging colormap
cmap = sns.diverging_palette(0, 25, as_cmap=True, s = 90, l = 45, n = 5)
# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr, mask=mask, cmap=cmap, vmax=.3, center=0,
            square=True, linewidths=.5, cbar_kws={"shrink": .5})
plt.title('Correlation Heatmap (for the MEAN variables)', fontsize = 25)
plt.xticks(fontsize = 10)
plt.yticks(fontsize = 10);
plt.savefig("Corr Heatmap.jpg")
In [11]:
x = data[["label", "tempo"]]
f, ax = plt.subplots(figsize=(16, 9));
sns.boxplot(x = "label", y = "tempo", data = x, palette = 'husl');
plt.title('BPM Boxplot for Genres', fontsize = 25)
plt.xticks(fontsize = 14)
plt.yticks(fontsize = 10);
plt.xlabel("Genre", fontsize = 15)
plt.ylabel("BPM", fontsize = 15)
plt.savefig("BPM Boxplot.jpg")
In [12]:
data = data.iloc[0:, 1:]
y = data['label']
X = data.loc[:, data.columns != 'label']
#### NORMALIZE X ####
cols = X.columns
min_max_scaler = preprocessing.MinMaxScaler()
np_scaled = min_max_scaler.fit_transform(X)
X = pd.DataFrame(np_scaled, columns = cols)
#### PCA 2 COMPONENTS ####
pca = PCA(n_components=2)
principalComponents = pca.fit_transform(X)
principalDf = pd.DataFrame(data = principalComponents, columns = ['principal component 1', 'principal component 2'])
# concatenate with target label
finalDf = pd.concat([principalDf, y], axis = 1)
pca.explained_variance_ratio_
Out[12]:
array([0.2439355 , 0.21781804])
In [13]:
plt.figure(figsize = (16, 9))
sns.scatterplot(x = "principal component 1", y = "principal component 2", data = finalDf, hue = "label", alpha = 0.7,
               s = 100);
plt.title('PCA on Genres', fontsize = 25)
plt.xticks(fontsize = 14)
plt.yticks(fontsize = 10);
plt.xlabel("Principal Component 1", fontsize = 15)
plt.ylabel("Principal Component 2", fontsize = 15)
plt.savefig("PCA Scattert.jpg")
In [14]:
data = pd.read_csv(f'{general_path}/features_3_sec.csv')
data = data.iloc[0:, 1:] 
data.head()
Out[14]:
| length | chroma_stft_mean | chroma_stft_var | rms_mean | rms_var | spectral_centroid_mean | spectral_centroid_var | spectral_bandwidth_mean | spectral_bandwidth_var | rolloff_mean | ... | mfcc16_var | mfcc17_mean | mfcc17_var | mfcc18_mean | mfcc18_var | mfcc19_mean | mfcc19_var | mfcc20_mean | mfcc20_var | label | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 66149 | 0.335406 | 0.091048 | 0.130405 | 0.003521 | 1773.065032 | 167541.630869 | 1972.744388 | 117335.771563 | 3714.560359 | ... | 39.687145 | -3.241280 | 36.488243 | 0.722209 | 38.099152 | -5.050335 | 33.618073 | -0.243027 | 43.771767 | blues | 
| 1 | 66149 | 0.343065 | 0.086147 | 0.112699 | 0.001450 | 1816.693777 | 90525.690866 | 2010.051501 | 65671.875673 | 3869.682242 | ... | 64.748276 | -6.055294 | 40.677654 | 0.159015 | 51.264091 | -2.837699 | 97.030830 | 5.784063 | 59.943081 | blues | 
| 2 | 66149 | 0.346815 | 0.092243 | 0.132003 | 0.004620 | 1788.539719 | 111407.437613 | 2084.565132 | 75124.921716 | 3997.639160 | ... | 67.336563 | -1.768610 | 28.348579 | 2.378768 | 45.717648 | -1.938424 | 53.050835 | 2.517375 | 33.105122 | blues | 
| 3 | 66149 | 0.363639 | 0.086856 | 0.132565 | 0.002448 | 1655.289045 | 111952.284517 | 1960.039988 | 82913.639269 | 3568.300218 | ... | 47.739452 | -3.841155 | 28.337118 | 1.218588 | 34.770935 | -3.580352 | 50.836224 | 3.630866 | 32.023678 | blues | 
| 4 | 66149 | 0.335579 | 0.088129 | 0.143289 | 0.001701 | 1630.656199 | 79667.267654 | 1948.503884 | 60204.020268 | 3469.992864 | ... | 30.336359 | 0.664582 | 45.880913 | 1.689446 | 51.363583 | -3.392489 | 26.738789 | 0.536961 | 29.146694 | blues | 
5 rows × 59 columns
In [15]:
y = data['label'] # genre variable.
X = data.loc[:, data.columns != 'label'] #select all columns but not the labels
#### NORMALIZE X ####
# Normalize so everything is on the same scale. 
cols = X.columns
min_max_scaler = preprocessing.MinMaxScaler()
np_scaled = min_max_scaler.fit_transform(X)
# new data frame with the new scaled data. 
X = pd.DataFrame(np_scaled, columns = cols)
In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
In [17]:
def model_assess(model, title = "Default"):
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    #print(confusion_matrix(y_test, preds))
    print('Accuracy', title, ':', round(accuracy_score(y_test, preds), 5), '\n')
#xgb is having trouble processing the genres as categorical variables
#Needed to transform y_test and y_train using label encoder
def model_assessXGB(model, y_train, y_test, title = "Default"):
    #Encode categorical variables into array using LabelEncoder
    le = preprocessing.LabelEncoder()
    le.fit(y_train)
    ytrain2 = le.transform(y_train)
    le.fit(y_test)
    y_test2 = le.transform(y_test)
    
    model.fit(X_train, ytrain2)
    preds = model.predict(X_test)
    #print(confusion_matrix(y_test, preds))
    print('Accuracy', title, ':', round(accuracy_score(y_test2, preds), 5), '\n')
In [83]:
# Naive Bayes
nb = GaussianNB()
model_assess(nb, "Naive Bayes")
# Stochastic Gradient Descent
sgd = SGDClassifier(max_iter=5000, random_state=0)
model_assess(sgd, "Stochastic Gradient Descent")
# KNN
knn = KNeighborsClassifier(n_neighbors=19)
model_assess(knn, "KNN")
# Decission trees
tree = DecisionTreeClassifier()
model_assess(tree, "Decission trees")
# Random Forest
rforest = RandomForestClassifier(n_estimators=1000, max_depth=10, random_state=0)
model_assess(rforest, "Random Forest")
# Support Vector Machine
svm = SVC(decision_function_shape="ovo")
model_assess(svm, "Support Vector Machine")
# Logistic Regression
lg = LogisticRegression(random_state=0, solver='lbfgs', multi_class='multinomial')
model_assess(lg, "Logistic Regression")
# Neural Nets
nn = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5000, 10), random_state=1)
model_assess(nn, "Neural Nets")
# Cross Gradient Booster
xgb = XGBClassifier(n_estimators=1000, learning_rate=0.05)
model_assessXGB(xgb, y_train, y_test, "Cross Gradient Booster")
# Cross Gradient Booster (Random Forest)
xgbrf = XGBRFClassifier(objective= 'multi:softmax')
model_assessXGB(xgbrf, y_train, y_test, "Cross Gradient Booster (Random Forest)")
Accuracy Naive Bayes : 0.51952 Accuracy Stochastic Gradient Descent : 0.65532 Accuracy KNN : 0.80581 Accuracy Decission trees : 0.64364 Accuracy Random Forest : 0.81415 Accuracy Support Vector Machine : 0.75409 Accuracy Logistic Regression : 0.6977 Accuracy Neural Nets : 0.67668 Accuracy Cross Gradient Booster : 0.90224 Accuracy Cross Gradient Booster (Random Forest) : 0.74575
In [18]:
# Final model --> Cross Gradient Booster
xgb = XGBClassifier(n_estimators=1000, learning_rate=0.05)
#Encode categorical variables into array using LabelEncoder
le = preprocessing.LabelEncoder()
le.fit(y_train)
ytrain2 = le.transform(y_train)
le.fit(y_test)
y_test2 = le.transform(y_test)
xgb.fit(X_train, ytrain2)
preds = xgb.predict(X_test)
#print(confusion_matrix(y_test, preds))
print('Accuracy', "Cross Gradient Booster", ':', round(accuracy_score(y_test2, preds), 5), '\n')
Accuracy Cross Gradient Booster : 0.90224
In [101]:
# Confusion Matrix
confusion_matr = confusion_matrix(y_test2, preds) #normalize = 'true'
plt.figure( figsize = (16, 9))
sns.heatmap(confusion_matr, cmap="mako", annot=False, 
            xticklabels = ["blues", "classical", "country", "disco", "hiphop",
                           "jazz", "metal", "pop", "reggae", "rock"],
            yticklabels = ["blues", "classical", "country", "disco", "hiphop",
                           "jazz", "metal", "pop", "reggae", "rock"]);
#plt.savefig("conf matrix")
In [19]:
import eli5
from eli5.sklearn import PermutationImportance
perm = PermutationImportance(estimator=xgb, random_state=1)
perm.fit(X_test, y_test2)
eli5.show_weights(estimator=perm, feature_names = X_test.columns.tolist())
Using TensorFlow backend.
Out[19]:
| Weight | Feature | 
|---|---|
| 0.1205 ± 0.0095 | perceptr_var | 
| 0.0416 ± 0.0031 | perceptr_mean | 
| 0.0390 ± 0.0049 | mfcc4_mean | 
| 0.0345 ± 0.0044 | chroma_stft_mean | 
| 0.0339 ± 0.0062 | harmony_mean | 
| 0.0280 ± 0.0065 | harmony_var | 
| 0.0228 ± 0.0049 | mfcc9_mean | 
| 0.0208 ± 0.0049 | mfcc6_mean | 
| 0.0181 ± 0.0024 | rms_var | 
| 0.0174 ± 0.0026 | mfcc3_mean | 
| 0.0148 ± 0.0031 | spectral_bandwidth_mean | 
| 0.0147 ± 0.0056 | mfcc11_mean | 
| 0.0137 ± 0.0046 | tempo | 
| 0.0116 ± 0.0036 | chroma_stft_var | 
| 0.0113 ± 0.0026 | mfcc7_mean | 
| 0.0109 ± 0.0038 | mfcc1_var | 
| 0.0101 ± 0.0029 | mfcc3_var | 
| 0.0089 ± 0.0057 | mfcc8_mean | 
| 0.0089 ± 0.0020 | mfcc5_mean | 
| 0.0072 ± 0.0038 | mfcc18_mean | 
| … 38 more … | |
In [21]:
# Read data
data = pd.read_csv(f'{general_path}/features_30_sec.csv', index_col='filename')
# Extract labels
labels = data[['label']]
# Drop labels from original dataframe
data = data.drop(columns=['length','label'])
data.head()
# Scale the data
data_scaled=preprocessing.scale(data)
print('Scaled data type:', type(data_scaled))
Scaled data type: <class 'numpy.ndarray'>
In [22]:
# Cosine similarity
similarity = cosine_similarity(data_scaled)
print("Similarity shape:", similarity.shape)
# Convert into a dataframe and then set the row index and column names as labels
sim_df_labels = pd.DataFrame(similarity)
sim_df_names = sim_df_labels.set_index(labels.index)
sim_df_names.columns = labels.index
sim_df_names.head()
Similarity shape: (1000, 1000)
Out[22]:
| filename | blues.00000.wav | blues.00001.wav | blues.00002.wav | blues.00003.wav | blues.00004.wav | blues.00005.wav | blues.00006.wav | blues.00007.wav | blues.00008.wav | blues.00009.wav | ... | rock.00090.wav | rock.00091.wav | rock.00092.wav | rock.00093.wav | rock.00094.wav | rock.00095.wav | rock.00096.wav | rock.00097.wav | rock.00098.wav | rock.00099.wav | 
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| filename | |||||||||||||||||||||
| blues.00000.wav | 1.000000 | 0.049231 | 0.589618 | 0.284862 | 0.025561 | -0.346688 | -0.219483 | -0.167626 | 0.641877 | -0.097889 | ... | -0.082829 | 0.546169 | 0.578558 | 0.662590 | 0.571629 | 0.610942 | 0.640835 | 0.496294 | 0.284958 | 0.304098 | 
| blues.00001.wav | 0.049231 | 1.000000 | -0.096834 | 0.520903 | 0.080749 | 0.307856 | 0.318286 | 0.415258 | 0.120649 | 0.404168 | ... | -0.098111 | -0.325126 | -0.370792 | -0.191698 | -0.330834 | -0.077301 | -0.222119 | -0.302573 | 0.499562 | 0.311723 | 
| blues.00002.wav | 0.589618 | -0.096834 | 1.000000 | 0.210411 | 0.400266 | -0.082019 | -0.028061 | 0.104446 | 0.468113 | -0.132532 | ... | -0.032408 | 0.561074 | 0.590779 | 0.583293 | 0.514537 | 0.495707 | 0.566837 | 0.589983 | 0.216378 | 0.321069 | 
| blues.00003.wav | 0.284862 | 0.520903 | 0.210411 | 1.000000 | 0.126437 | 0.134796 | 0.300746 | 0.324566 | 0.352758 | 0.295184 | ... | -0.320107 | -0.206516 | -0.151132 | 0.041986 | -0.172515 | -0.000287 | 0.020515 | -0.107821 | 0.502279 | 0.183210 | 
| blues.00004.wav | 0.025561 | 0.080749 | 0.400266 | 0.126437 | 1.000000 | 0.556066 | 0.482195 | 0.623455 | 0.029703 | 0.471657 | ... | 0.087605 | 0.017366 | 0.138035 | 0.104684 | -0.034594 | 0.063454 | 0.063546 | 0.172944 | 0.153192 | 0.061785 | 
5 rows × 1000 columns
In [23]:
def find_similar_songs(name):
    # Find songs most similar to another song
    series = sim_df_names[name].sort_values(ascending = False)
    
    # Remove cosine similarity == 1 (songs will always have the best match with themselves)
    series = series.drop(name)
    
    # Display the 5 top matches 
    print("\n*******\nSimilar songs to ", name)
    print(series.head(5))
In [27]:
# metal.00002 - Iron Maiden "Flight of Icarus"
find_similar_songs('metal.00002.wav') 
ipd.Audio(f'{general_path}/genres_original/metal/metal.00002.wav')
******* Similar songs to metal.00002.wav filename metal.00028.wav 0.904367 metal.00059.wav 0.896096 rock.00018.wav 0.891910 rock.00017.wav 0.886526 rock.00016.wav 0.867508 Name: metal.00002.wav, dtype: float64
Out[27]:
In [28]:
ipd.Audio(f'{general_path}/genres_original/metal/metal.00028.wav')
Out[28]:
In [ ]: