-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathNPRSpotifySearch.py
More file actions
220 lines (205 loc) · 14.5 KB
/
NPRSpotifySearch.py
File metadata and controls
220 lines (205 loc) · 14.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
import re
import json
import requests
import time
from urllib import parse
from collections import Counter
from unidecode import unidecode
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
from difflib import SequenceMatcher
from ratelimit import limits, RateLimitException
from backoff import on_exception, expo
import Secrets
NUMBER_OF_CALLS = 5
IN_SECONDS = 1
# TODO create a way to make corrections and updates
# TODO listener sends track correction (incorrect song, incorrect rendition), same as above
# TODO replace current track with correct?
# TODO missing, replace, add, found, duplicate entry (but show had unique uncaptured)
class NPRSpotifySearch:
def __init__(self):
self.requestSession = requests.Session()
self.secrets = Secrets.Secrets()
self.retries = Retry(total=10, backoff_factor=1, status_forcelist=[ 204, 304, 400, 401, 403, 404, 500, 502, 503, 504 ])
self.requestSession.mount('https://api.spotify.com/', HTTPAdapter(max_retries=self.retries, pool_maxsize=25))
self.secretsSession = Secrets.Secrets()
self.nprTrackName = None
self.nprArtistsName = list()
self.track = None
self.artists = list()
def SearchSpotify(self, track, artists, user_id):
trackCopy = track
trackResponses = list()
auxiliaryList = list()
if artists == None:
artists = list("") # we could still search and accept a track result hit without an artist entry?
else:
for artist in artists:
artist = self.RemoveCommonPhrasesArtists(self.RemoveParenthesis(self.RemoveBrackets(artist)))
artist = artist.split()
artist.extend(artists)
for word in artist:
if word not in auxiliaryList:
auxiliaryList.append(word)
auxiliaryList.append("") # Need an empty string for when track match is really high but artist is 0
for artist in auxiliaryList:
artistResponses = list()
# TODO could probably drop all non-alphanumeric instead of an incremental hunt and drop approach
artistResponses.append(self.SearchExplicitTrackAndArtist(unidecode(trackCopy), unidecode(artist), user_id))
trackCopy = self.RemoveBrackets(unidecode(trackCopy))
artistResponses.append(self.SearchExplicitTrackAndArtist(unidecode(trackCopy), unidecode(artist), user_id))
trackCopy = self.RemoveParenthesis(unidecode(track))
artistResponses.append(self.SearchExplicitTrackAndArtist(unidecode(trackCopy), unidecode(artist), user_id))
trackCopy = self.RemoveCommonPhrasesTracks(unidecode(trackCopy))
artistResponses.append(self.SearchExplicitTrackAndArtist(unidecode(trackCopy), unidecode(artist), user_id))
trackCopy = self.RemoveNumbers(unidecode(trackCopy))
artistResponses.append(self.SearchExplicitTrackAndArtist(unidecode(trackCopy), unidecode(artist), user_id))
artistResponses.append(self.SearchImplicitTrackExplicitArtist(unidecode(trackCopy), unidecode(artist), user_id))
artistResponses.append(self.SearchExplicitTrackAndArtist(unidecode(trackCopy.split("(")[0]), unidecode(artist), user_id))
artistResponses.append(self.SearchExplicitTrackAndArtist(unidecode(trackCopy.split("[")[0]), unidecode(artist), user_id))
artistResponses.append(self.SearchExplicitTrackAndArtist(unidecode(trackCopy.partition(":")[2]), unidecode(artist), user_id))
# hail Marry's
artistResponses.append(self.SearchImplicitTrackImplicitArtist(unidecode(track), unidecode(self.ReplaceAmpersand(artist)), user_id))
artistResponses.append(self.SearchImplicitTrackAndArtistCombined(unidecode(track), unidecode(self.ReplaceAmpersand(artist)), user_id))
trackResponses.append(artistResponses)
print("\"{0}\" by \"{1}\" searched.".format(track, str(artists)))
bestChoice = self.ChooseBestMatch(trackResponses, track, artists)
return bestChoice
# Using libdiff to create a match threshhold of sorts.
def ChooseBestMatch(self, responses, nprTrack, nprArtists):
bestMatch = dict()
bestMatch["Result Track Name"] = None
bestMatch["Result Artist Names"] = list()
bestMatch["Result Track-Match Percent"] = 0.0
bestMatch["Result Artists-Match Percent"] = 0.0
bestMatch["Result Track URI"] = None
if responses == None or len(responses) == 0.0:
print("hmmm...")
# this is all really gross looking
else:
for response in responses:
for result in response:
if len(result["tracks"]["items"]) != 0.0:
for item in result["tracks"]["items"]:
# if item == None: # got a none one time, pretty rare, triples the time to make a playlist
resultTrackName = item["name"]
resultTrackNameSplit = list() # split track name words into a new list for comparison later
for word in resultTrackName.split(): # go through each word in track name
word = re.sub(r'[^\w]', ' ', word).lower().strip() # remove non-alphanumerics
resultTrackNameSplit.extend(word.split()) # split strings that end up like 'no 1'
resultTrackNameSplit = list(filter(None, resultTrackNameSplit)) # get rid of empty strings
nprTrackNameSplit = list()
for word in nprTrack.split():
word = re.sub(r'[^\w]', ' ', word).lower().strip()
nprTrackNameSplit.extend(word.split())
nprTrackNameSplit = list(filter(None, nprTrackNameSplit))
resultMatchesToNPR = [sub for sub in resultTrackNameSplit if sub in nprTrackNameSplit] # see what of the result matches the npr name
nprRemovedPhrasesArtists = list()
for artist in nprArtists:
artist = self.RemoveCommonPhrasesArtists(artist)
artist = re.sub(r'[^\w]', ' ', artist).lower().strip()
nprRemovedPhrasesArtists.extend(artist.split())
nprRemovedPhrasesArtists = list(filter(None, nprRemovedPhrasesArtists))
resultTrackArtistNames = item["artists"]
resultArtistNames = list() # need to split first, last, misc names into individual strings
resultArtistNamesCopy = list() # copy to store later in best match
for artist in resultTrackArtistNames:
resultArtistNamesCopy.append(artist["name"]) # copy to store later in best match
artist["name"] = re.sub(r'[^\w]', ' ', artist["name"]).lower().strip()
resultArtistNames.extend(artist["name"].split())
resultArtistNames = list(filter(None, resultArtistNames))
resultArtistNPRMatches = [sub for sub in resultArtistNames if sub in nprRemovedPhrasesArtists]
resultsArtistsSet = set(resultArtistNPRMatches)
nprArtistNamesSet = set(nprRemovedPhrasesArtists)
resultsTrackNameSet = set(resultMatchesToNPR)
nprTrackNameSet = set(nprTrackNameSplit)
# First, a quick exact-match check
if nprArtistNamesSet == resultsArtistsSet and nprTrackNameSet == resultsTrackNameSet or bestMatch["Result Track-Match Percent"] == 1.0 and bestMatch["Result Artists-Match Percent"] == 1.0: # check artist names first, less likely to match
bestMatch["Result Track Name"] = item["name"]
bestMatch["Result Artist Names"] = resultArtistNamesCopy
bestMatch["Result Album Name"] = item["album"]["name"]
bestMatch["Result Track-Match Percent"] = 1.0
bestMatch["Result Artists-Match Percent"] = 1.0
bestMatch["Result Track URI"] = item["uri"]
return bestMatch
# Fun weighting(?) results-land
else:
seqTrack = SequenceMatcher(a=nprTrackNameSplit, b=resultTrackNameSplit)
seqArtist = SequenceMatcher(a=nprRemovedPhrasesArtists, b=resultArtistNames)
trackMatchScore = seqTrack.ratio()
artistsMatchScore = seqArtist.ratio()
if artistsMatchScore >= 0.5 and artistsMatchScore >= bestMatch["Result Artists-Match Percent"]: # high artist name accuracy
if trackMatchScore >= 0.75 and trackMatchScore >= bestMatch["Result Track-Match Percent"]: # good chance at match
bestMatch["Result Track Name"] = item["name"]
bestMatch["Result Artist Names"] = resultArtistNamesCopy
bestMatch["Result Album Name"] = item["album"]["name"]
bestMatch["Result Track-Match Percent"] = trackMatchScore
bestMatch["Result Artists-Match Percent"] = artistsMatchScore
bestMatch["Result Track URI"] = item["uri"]
if trackMatchScore >= 0.5 and trackMatchScore >= bestMatch["Result Track-Match Percent"]: # high artist name accuracy
if artistsMatchScore >= 0.65 and artistsMatchScore >= bestMatch["Result Artists-Match Percent"]: # good chance at match
bestMatch["Result Track Name"] = item["name"]
bestMatch["Result Artist Names"] = resultArtistNamesCopy
bestMatch["Result Album Name"] = item["album"]["name"]
bestMatch["Result Track-Match Percent"] = trackMatchScore
bestMatch["Result Artists-Match Percent"] = artistsMatchScore
bestMatch["Result Track URI"] = item["uri"]
return bestMatch
def RemoveBrackets(self, track):
newTrack = track.translate({ord(i): None for i in '[]'})
return newTrack
def RemoveParenthesis(self, track):
newTrack = track.translate({ord(i): None for i in '()'})
return newTrack
def RemoveNumbers(self, track):
newTrack = track.translate({ord(i): None for i in '0'})
return newTrack
def RemoveCommonPhrasesTracks(self, track):
stop_words = ['feat.', 'feat', 'original', 'edit', 'featuring', 'feature']
stopwords_dict = Counter(stop_words)
result = ' '.join([word for word in track.lower().split() if word not in stopwords_dict])
return result
def RemoveCommonPhrasesArtists(self, track):
stop_words = ['and', 'various', 'artists', "conducted", "by", "et", "al", "et.", "al."]
stopwords_dict = Counter(stop_words)
result = ' '.join([word for word in track.lower().split() if word not in stopwords_dict])
return result
def ReplaceAmpersand(self, track):
result = track.replace('&', 'and')
return result
# Explicit Track or Artist means I define a type encoded in what I send: eg. track:"Smells like teen spirit" artist:"Nirvana"
# without those it can mean different results, etc.
# TODO better names
@on_exception(expo, RateLimitException, max_tries=8)
@limits(calls=NUMBER_OF_CALLS, period=IN_SECONDS)
def SearchExplicitTrackAndArtist(self, track, artist, user_id):
query = "https://api.spotify.com/v1/search?q={}&type=track%2Cartist&market=US&limit=5".format(parse.quote('track:' + '"' + track + '"' + ' ' + 'artist:"' + artist + '"'))
response = self.requestSession.get(query, headers={"Content-Type": "application/json", "Authorization": "Bearer {}".format(self.secrets.GiveToken(user_id))})
if response.status_code not in [200, 201, 202]:
raise Exception('API response: {}'.format(response.status_code))
return response.json()
@on_exception(expo, RateLimitException, max_tries=8)
@limits(calls=NUMBER_OF_CALLS, period=IN_SECONDS)
def SearchImplicitTrackExplicitArtist(self, track, artist, user_id):
query = "https://api.spotify.com/v1/search?q={}&type=track%2Cartist&market=US&limit=5".format(parse.quote('"' + track + '"' + ' ' + 'artist:"' + artist + '"'))
response = self.requestSession.get(query, headers={"Content-Type": "application/json", "Authorization": "Bearer {}".format(self.secrets.GiveToken(user_id))})
if response.status_code not in [200, 201, 202]:
raise Exception('API response: {}'.format(response.status_code))
return response.json()
@on_exception(expo, RateLimitException, max_tries=8)
@limits(calls=NUMBER_OF_CALLS, period=IN_SECONDS)
def SearchImplicitTrackImplicitArtist(self, track, artist, user_id):
query = "https://api.spotify.com/v1/search?q={}&type=track%2Cartist&market=US&limit=5".format(parse.quote('"' + track + '"' + ' ' + '"' + artist + '"'))
response = self.requestSession.get(query, headers={"Content-Type": "application/json", "Authorization": "Bearer {}".format(self.secrets.GiveToken(user_id))})
if response.status_code not in [200, 201, 202]:
raise Exception('API response: {}'.format(response.status_code))
return response.json()
@on_exception(expo, RateLimitException, max_tries=8)
@limits(calls=NUMBER_OF_CALLS, period=IN_SECONDS)
def SearchImplicitTrackAndArtistCombined(self, track, artist, user_id):
query = "https://api.spotify.com/v1/search?q={}&type=track&market=US&limit=5".format(parse.quote(str(track + " AND " + artist)))
response = self.requestSession.get(query, headers={"Content-Type": "application/json", "Authorization": "Bearer {}".format(self.secrets.GiveToken(user_id))})
if response.status_code not in [200, 201, 202]:
raise Exception('API response: {}'.format(response.status_code))
return response.json()