adding latest working version
This commit is contained in:
BIN
src/.DS_Store
vendored
BIN
src/.DS_Store
vendored
Binary file not shown.
BIN
src/musicbrainzapi/.DS_Store
vendored
BIN
src/musicbrainzapi/.DS_Store
vendored
Binary file not shown.
@@ -1 +0,0 @@
|
||||
from . import lyrics
|
||||
@@ -1,758 +0,0 @@
|
||||
from __future__ import annotations
|
||||
from abc import ABC, abstractmethod, abstractstaticmethod
|
||||
from dataclasses import dataclass
|
||||
|
||||
from pprint import pprint
|
||||
from typing import Union, List, Dict
|
||||
from collections import Counter
|
||||
import html
|
||||
import json
|
||||
import os
|
||||
import string
|
||||
import math
|
||||
|
||||
from beautifultable import BeautifulTable
|
||||
import musicbrainzngs
|
||||
import click
|
||||
import addict
|
||||
import requests
|
||||
import numpy as np
|
||||
|
||||
from musicbrainzapi.api import authenticate
|
||||
|
||||
|
||||
class LyricsConcreteBuilder(ABC):
|
||||
"""docstring for Lyrics"""
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def product(self) -> None:
|
||||
pass
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def artist(self) -> str:
|
||||
pass
|
||||
|
||||
@artist.setter
|
||||
@abstractmethod
|
||||
def artist(self, artist: str) -> None:
|
||||
pass
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def country(self) -> Union[str, None]:
|
||||
pass
|
||||
|
||||
@country.setter
|
||||
@abstractmethod
|
||||
def country(self, country: Union[str, None]) -> None:
|
||||
pass
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def artist_id(self) -> str:
|
||||
pass
|
||||
|
||||
@artist_id.setter
|
||||
@abstractmethod
|
||||
def artist_id(self, artist_id: str) -> None:
|
||||
pass
|
||||
|
||||
@abstractstaticmethod
|
||||
def set_useragent():
|
||||
authenticate.set_useragent()
|
||||
|
||||
# @abstractstaticmethod
|
||||
# def browse_releases(self) -> dict:
|
||||
# pass
|
||||
|
||||
@abstractmethod
|
||||
def __init__(self) -> None:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def reset(self) -> None:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def find_artists(self) -> None:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def sort_artists(self) -> None:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_accuracy_scores(self) -> None:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_top_five_results(self) -> None:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def find_all_albums(self) -> None:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def find_all_tracks(self) -> None:
|
||||
pass
|
||||
|
||||
|
||||
class LyricsBuilder(LyricsConcreteBuilder):
|
||||
"""docstring for LyricsBuilder"""
|
||||
|
||||
@property
|
||||
def product(self) -> Lyrics:
|
||||
product = self._product
|
||||
return product
|
||||
|
||||
@property
|
||||
def artist(self) -> str:
|
||||
return self._artist
|
||||
|
||||
@artist.setter
|
||||
def artist(self, artist: str) -> None:
|
||||
self._artist = artist
|
||||
self._product.artist = artist
|
||||
|
||||
@property
|
||||
def country(self) -> Union[str, None]:
|
||||
return self._country
|
||||
|
||||
@country.setter
|
||||
def country(self, country: Union[str, None]) -> None:
|
||||
self._country = country
|
||||
self._product.country = country
|
||||
|
||||
@property
|
||||
def artist_id(self) -> str:
|
||||
return self._artist_id
|
||||
|
||||
@artist_id.setter
|
||||
def artist_id(self, artist_id: str) -> None:
|
||||
self._artist_id = artist_id
|
||||
self._product.artist_id = artist_id
|
||||
|
||||
@property
|
||||
def all_albums_with_tracks(self) -> list:
|
||||
return self._all_albums_with_tracks
|
||||
|
||||
@all_albums_with_tracks.setter
|
||||
def all_albums_with_tracks(self, all_albums_with_tracks: list) -> None:
|
||||
self._all_albums_with_tracks = all_albums_with_tracks
|
||||
self._product.all_albums_with_tracks = all_albums_with_tracks
|
||||
|
||||
@staticmethod
|
||||
def set_useragent() -> None:
|
||||
authenticate.set_useragent()
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.reset()
|
||||
|
||||
def reset(self) -> None:
|
||||
self._product = Lyrics()
|
||||
|
||||
def find_artists(self) -> None:
|
||||
self.musicbrainz_artists = musicbrainzngs.search_artists(
|
||||
artist=self.artist, country=self.country
|
||||
)
|
||||
# pprint(self.musicbrainz_artists['artist-list'])
|
||||
# for i in self.musicbrainz_artists['artist-list']:
|
||||
# print(i['name'])
|
||||
# raise(SystemExit)
|
||||
return self
|
||||
|
||||
def sort_artists(self) -> None:
|
||||
self._sort_names = dict(
|
||||
(i.get('id'), f'{i.get("name")} | {i.get("disambiguation")}')
|
||||
if i.get('disambiguation') is not None
|
||||
else (i.get('id'), f'{i.get("name")}')
|
||||
for i in self.musicbrainz_artists['artist-list']
|
||||
)
|
||||
return self
|
||||
|
||||
def get_accuracy_scores(self) -> None:
|
||||
self._accuracy_scores = dict(
|
||||
(i.get('id'), int(i.get('ext:score', '0')))
|
||||
for i in self.musicbrainz_artists['artist-list']
|
||||
)
|
||||
return self
|
||||
|
||||
def get_top_five_results(self) -> None:
|
||||
self._top_five_results = dict(
|
||||
(i, self._accuracy_scores.get(i))
|
||||
for i in sorted(
|
||||
self._accuracy_scores,
|
||||
key=self._accuracy_scores.get,
|
||||
reverse=True,
|
||||
)[0:5]
|
||||
)
|
||||
return self
|
||||
|
||||
def find_all_albums(self) -> None:
|
||||
limit, offset, page = (100, 0, 1)
|
||||
|
||||
resp_0 = addict.Dict(
|
||||
musicbrainzngs.browse_release_groups(
|
||||
artist=self.artist_id, release_type=['album'], limit=limit
|
||||
)
|
||||
)
|
||||
|
||||
total_releases = resp_0['release-group-count']
|
||||
response_releases = len(resp_0['release-group-list'])
|
||||
|
||||
with click.progressbar(
|
||||
length=total_releases,
|
||||
label=f'Searching Musicbrainz for all albums from {self.artist}',
|
||||
) as bar:
|
||||
|
||||
release_group_ids = addict.Dict(
|
||||
(i.id, i.title)
|
||||
for i in resp_0['release-group-list']
|
||||
if i.type == 'Album'
|
||||
)
|
||||
|
||||
bar.update(response_releases)
|
||||
|
||||
while response_releases > 0:
|
||||
# Get next page
|
||||
offset += limit
|
||||
page += 1
|
||||
|
||||
resp_1 = addict.Dict(
|
||||
musicbrainzngs.browse_release_groups(
|
||||
artist=self.artist_id,
|
||||
release_type=['album'],
|
||||
limit=limit,
|
||||
offset=offset,
|
||||
)
|
||||
)
|
||||
response_releases = len(resp_1['release-group-list'])
|
||||
|
||||
release_group_ids = addict.Dict(
|
||||
**release_group_ids,
|
||||
**addict.Dict(
|
||||
(i.id, i.title)
|
||||
for i in resp_1['release-group-list']
|
||||
if i.type == 'Album'
|
||||
),
|
||||
)
|
||||
bar.update(response_releases)
|
||||
|
||||
self.release_group_ids = release_group_ids
|
||||
click.echo(f'Found {len(release_group_ids)} albums for {self.artist}.')
|
||||
|
||||
del (resp_0, resp_1)
|
||||
return self
|
||||
|
||||
def find_all_tracks(self) -> None:
|
||||
self.all_albums = list()
|
||||
total_albums = len(self.release_group_ids)
|
||||
self.total_track_count = 0
|
||||
|
||||
with click.progressbar(
|
||||
length=total_albums,
|
||||
label=(
|
||||
'Searching Musicbrainz for all tracks in all albums for '
|
||||
f'{self.artist}'
|
||||
),
|
||||
) as bar:
|
||||
for id, alb in self.release_group_ids.items():
|
||||
resp_0 = addict.Dict(
|
||||
musicbrainzngs.browse_releases(
|
||||
release_group=id,
|
||||
release_type=['album'],
|
||||
includes=['recordings'],
|
||||
limit=100,
|
||||
)
|
||||
)
|
||||
|
||||
album_track_count = [
|
||||
i['medium-list'][0]['track-count']
|
||||
for i in resp_0['release-list']
|
||||
]
|
||||
|
||||
self.total_track_count += max(album_track_count)
|
||||
|
||||
max_track_pos = album_track_count.index(max(album_track_count))
|
||||
|
||||
album_tracks = resp_0['release-list'][max_track_pos]
|
||||
|
||||
try:
|
||||
album_year = resp_0['release-list'][
|
||||
max_track_pos
|
||||
].date.split('-')[0]
|
||||
except TypeError:
|
||||
album_year = 'Missing'
|
||||
|
||||
album_tracks = addict.Dict(
|
||||
(
|
||||
alb + f' [{album_year}]',
|
||||
[
|
||||
i.recording.title
|
||||
for i in resp_0['release-list'][max_track_pos][
|
||||
'medium-list'
|
||||
][0]['track-list']
|
||||
],
|
||||
)
|
||||
)
|
||||
|
||||
self.all_albums.append(album_tracks)
|
||||
|
||||
bar.update(1)
|
||||
|
||||
# pprint(self.all_albums)
|
||||
click.echo(
|
||||
f'Found {self.total_track_count} tracks across'
|
||||
f' {len(self.release_group_ids)} albums for {self.artist}'
|
||||
)
|
||||
del resp_0
|
||||
return self
|
||||
|
||||
def find_lyrics_urls(self) -> None:
|
||||
self.all_albums_lyrics_url = list()
|
||||
for x in self.all_albums:
|
||||
for alb, tracks in x.items():
|
||||
lyrics = addict.Dict(
|
||||
(
|
||||
alb,
|
||||
[
|
||||
self.construct_lyrics_url(self.artist, i)
|
||||
for i in tracks
|
||||
],
|
||||
)
|
||||
)
|
||||
self.all_albums_lyrics_url.append(lyrics)
|
||||
|
||||
# pprint(self.all_albums_lyrics_url)
|
||||
return self
|
||||
|
||||
# change this for progressbar for i loop
|
||||
def find_all_lyrics(self) -> None:
|
||||
self.all_albums_lyrics = list()
|
||||
|
||||
with click.progressbar(
|
||||
length=self.total_track_count,
|
||||
label=f'Finding lyrics for {self.total_track_count}'
|
||||
f' tracks for {self.artist}. This may take some time! ☕️',
|
||||
) as bar:
|
||||
bar.update(5)
|
||||
for x in self.all_albums_lyrics_url:
|
||||
for alb, urls in x.items():
|
||||
# bar.update(1)
|
||||
update = len(urls)
|
||||
lyrics = addict.Dict(
|
||||
(alb, [self.request_lyrics_from_url(i) for i in urls])
|
||||
)
|
||||
self.all_albums_lyrics.append(lyrics)
|
||||
bar.update(update)
|
||||
|
||||
with open(f'{os.getcwd()}/all_albums_lyrics.json', 'w') as f:
|
||||
json.dump(self.all_albums_lyrics, f, indent=2)
|
||||
return self
|
||||
|
||||
def count_words_in_lyrics(self) -> None:
|
||||
# remove punctuation, fix click bar
|
||||
self.all_albums_lyrics_count = list()
|
||||
# print(self.total_track_count)
|
||||
with click.progressbar(
|
||||
length=self.total_track_count, label=f'Processing lyrics'
|
||||
) as bar:
|
||||
for x in self.all_albums_lyrics:
|
||||
for alb, lyrics in x.items():
|
||||
update = len(lyrics)
|
||||
bar.update(1)
|
||||
lyrics = addict.Dict(
|
||||
(
|
||||
alb,
|
||||
[
|
||||
Counter(i.split()).most_common()
|
||||
if i is not None
|
||||
else 'No Lyrics'
|
||||
for i in lyrics
|
||||
],
|
||||
)
|
||||
)
|
||||
self.all_albums_lyrics_count.append(lyrics)
|
||||
bar.update(update - 1)
|
||||
click.echo(f'Processed lyrics for {self.total_track_count} tracks.')
|
||||
return self
|
||||
|
||||
# rename this
|
||||
def calculate_average_all_albums(self) -> None:
|
||||
self.all_albums_lyrics_sum = list()
|
||||
# album_lyrics = self.all_albums_lyrics_count
|
||||
with open(f'{os.getcwd()}/lyrics_count.json', 'r') as f:
|
||||
album_lyrics = json.load(f)
|
||||
count = 0
|
||||
for i in album_lyrics:
|
||||
count += len(i)
|
||||
for album, lyrics_list in i.items():
|
||||
album_avg = list()
|
||||
d = addict.Dict()
|
||||
# print(album)
|
||||
for j in lyrics_list:
|
||||
if j != 'No Lyrics':
|
||||
song_total = 0
|
||||
for k in j:
|
||||
song_total += k[1]
|
||||
else:
|
||||
song_total = "No Lyrics"
|
||||
album_avg.append(song_total)
|
||||
# We want to avoid a ValueError when we loop through
|
||||
# the first time
|
||||
try:
|
||||
d = addict.Dict(**d, **addict.Dict(album, album_avg))
|
||||
except ValueError:
|
||||
d = addict.Dict((album, album_avg))
|
||||
# print(d)
|
||||
self.all_albums_lyrics_sum.append(d)
|
||||
# print(count)
|
||||
with open(f'{os.getcwd()}/lyrics_sum_all_album.json', 'w+') as f:
|
||||
json.dump(self.all_albums_lyrics_sum, f)
|
||||
return self
|
||||
|
||||
def calculate_final_average_by_album(self) -> None:
|
||||
self.album_statistics = addict.Dict()
|
||||
# album_lyrics = self.all_albums_lyrics_sum
|
||||
with open(f'{os.getcwd()}/lyrics_sum_all_album.json', 'r') as f:
|
||||
album_lyrics = json.load(f)
|
||||
|
||||
for i in album_lyrics:
|
||||
for album, count in i.items():
|
||||
# We filter twice, once to remove strings, then to filter
|
||||
# the integers
|
||||
_count = [d for d in count if isinstance(d, int)]
|
||||
_count = [d for d in _count if d > 1]
|
||||
_d = self.get_descriptive_statistics(_count)
|
||||
self.album_statistics = addict.Dict(
|
||||
**self.album_statistics, **addict.Dict((album, _d))
|
||||
)
|
||||
with open(f'{os.getcwd()}/album_statistics.json', 'w') as f:
|
||||
json.dump(self.album_statistics, f, indent=2)
|
||||
# pprint(self.album_statistics)
|
||||
|
||||
# implement above in this
|
||||
def calculate_final_average_by_year(self) -> None:
|
||||
group_by_years = addict.Dict()
|
||||
self.year_statistics = addict.Dict()
|
||||
# album_lyrics = self.all_albums_lyrics_sum
|
||||
with open(f'{os.getcwd()}/lyrics_sum_all_album.json', 'r') as f:
|
||||
album_lyrics = json.load(f)
|
||||
|
||||
# Merge years together
|
||||
for i in album_lyrics:
|
||||
for album, count in i.items():
|
||||
year = album.split('[')[-1].strip(']')
|
||||
try:
|
||||
group_by_years = addict.Dict(
|
||||
**group_by_years, **addict.Dict((year, count))
|
||||
)
|
||||
# First loop returns value error for empty dict
|
||||
except ValueError:
|
||||
group_by_years = addict.Dict((year, count))
|
||||
# Multiple years raise a TypeError - we append
|
||||
except TypeError:
|
||||
group_by_years.get(year).extend(count)
|
||||
|
||||
for year, y_count in group_by_years.items():
|
||||
_y_count = [d for d in y_count if isinstance(d, int)]
|
||||
_y_count = [d for d in _y_count if d > 1]
|
||||
_d = self.get_descriptive_statistics(_y_count)
|
||||
self.year_statistics = addict.Dict(
|
||||
**self.year_statistics, **addict.Dict((year, _d))
|
||||
)
|
||||
# pprint(self.year_statistics)
|
||||
|
||||
@staticmethod
|
||||
def construct_lyrics_url(artist: str, song: str) -> str:
|
||||
lyrics_api_base = 'https://api.lyrics.ovh/v1'
|
||||
lyrics_api_url = html.escape(f'{lyrics_api_base}/{artist}/{song}')
|
||||
return lyrics_api_url
|
||||
|
||||
@staticmethod
|
||||
def request_lyrics_from_url(url: str) -> str:
|
||||
resp = requests.get(url)
|
||||
|
||||
# No lyrics for a song will return a key of 'error', we pass on this.
|
||||
try:
|
||||
lyrics = LyricsBuilder.strip_punctuation(resp.json()['lyrics'])
|
||||
return lyrics
|
||||
except (KeyError, json.decoder.JSONDecodeError):
|
||||
return
|
||||
|
||||
@staticmethod
|
||||
def strip_punctuation(word: str) -> str:
|
||||
_strip = word.translate(str.maketrans('', '', string.punctuation))
|
||||
return _strip
|
||||
|
||||
@staticmethod
|
||||
def get_descriptive_statistics(nums: list) -> Dict[str, int]:
|
||||
if len(nums) == 0:
|
||||
return
|
||||
avg = math.ceil(np.mean(nums))
|
||||
median = math.ceil(np.median(nums))
|
||||
std = math.ceil(np.std(nums))
|
||||
max = math.ceil(np.max(nums))
|
||||
min = math.ceil(np.min(nums))
|
||||
p_10 = math.ceil(np.percentile(nums, 10))
|
||||
p_25 = math.ceil(np.percentile(nums, 25))
|
||||
p_75 = math.ceil(np.percentile(nums, 75))
|
||||
p_90 = math.ceil(np.percentile(nums, 90))
|
||||
count = len(nums)
|
||||
_d = addict.Dict(
|
||||
('avg', avg),
|
||||
('median', median),
|
||||
('std', std),
|
||||
('max', max),
|
||||
('min', min),
|
||||
('p_10', p_10),
|
||||
('p_25', p_25),
|
||||
('p_75', p_75),
|
||||
('p_90', p_90),
|
||||
('count', count),
|
||||
)
|
||||
return _d
|
||||
|
||||
|
||||
class LyricsClickDirector:
|
||||
"""docstring for LyricsClickDirector"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._builder = None
|
||||
|
||||
@property
|
||||
def builder(self) -> LyricsBuilder:
|
||||
return self._builder
|
||||
|
||||
@builder.setter
|
||||
def builder(self, builder: LyricsBuilder) -> None:
|
||||
self._builder = builder
|
||||
|
||||
def _get_initial_artists(self, artist: str, country: str) -> None:
|
||||
self.builder.artist = artist
|
||||
self.builder.country = country
|
||||
self.builder.set_useragent()
|
||||
self.builder.find_artists()
|
||||
self.builder.sort_artists()
|
||||
self.builder.get_accuracy_scores()
|
||||
self.builder.get_top_five_results()
|
||||
return self
|
||||
|
||||
def _confirm_final_artist(self) -> None:
|
||||
artist_meta = None
|
||||
for i, j in self.builder._top_five_results.items():
|
||||
artist_meta = 'Multiple' if j <= 100 else None
|
||||
|
||||
if artist_meta == 'Multiple':
|
||||
_position = []
|
||||
click.echo(
|
||||
click.style(
|
||||
f'Musicbrainz found several results for '
|
||||
f'{self.builder.artist[0]}. Which artist/group do you want'
|
||||
'?',
|
||||
fg='green',
|
||||
)
|
||||
)
|
||||
for i, j in zip(self.builder._top_five_results, range(1, 6)):
|
||||
click.echo(
|
||||
f'[{j}] {self.builder._sort_names.get(i)}'
|
||||
f' ({self.builder._accuracy_scores.get(i)}% match)'
|
||||
)
|
||||
_position.append(i)
|
||||
chosen = int(
|
||||
click.prompt(
|
||||
click.style(f'Enter choice, default is', blink=True),
|
||||
default=1,
|
||||
type=click.IntRange(
|
||||
1, len(self.builder._top_five_results)
|
||||
),
|
||||
)
|
||||
)
|
||||
choice = _position[chosen - 1]
|
||||
click.echo(f'You chose {self.builder._sort_names.get(choice)}')
|
||||
self._artist = self.builder._sort_names.get(choice).split('|')[0]
|
||||
self._artist_id = choice
|
||||
|
||||
# Set artist and artistID on builder + product
|
||||
self.builder.artist_id = self._artist_id
|
||||
self.builder.artist = self._artist
|
||||
|
||||
elif artist_meta is None:
|
||||
click.echo(
|
||||
f'Musicbrainz did not find any results for '
|
||||
f'{self.builder.artist[0]}. Check the spelling or consider '
|
||||
'alternative names that the artist/group may go by.'
|
||||
)
|
||||
raise SystemExit()
|
||||
return self
|
||||
|
||||
def _query_for_data(self) -> None:
|
||||
self.builder.find_all_albums()
|
||||
self.builder.find_all_tracks()
|
||||
self.builder._product.all_albums_with_tracks = self.builder.all_albums
|
||||
return self
|
||||
|
||||
def _get_lyrics(self) -> None:
|
||||
self.builder.find_lyrics_urls()
|
||||
self.builder.find_all_lyrics()
|
||||
self.builder._product.all_albums_with_lyrics = (
|
||||
self.builder.all_albums_lyrics
|
||||
)
|
||||
self.builder.count_words_in_lyrics()
|
||||
with open(f'{os.getcwd()}/lyrics_count.json', 'w+') as file:
|
||||
json.dump(
|
||||
self.builder.all_albums_lyrics_count,
|
||||
file,
|
||||
indent=2,
|
||||
sort_keys=True,
|
||||
)
|
||||
self.builder._product.all_albums_lyrics_count = (
|
||||
self.builder.all_albums_lyrics_count
|
||||
)
|
||||
return self
|
||||
|
||||
def _calculate_basic_statistics(self) -> None:
|
||||
self.builder.calculate_average_all_albums()
|
||||
self.builder._product.all_albums_lyrics_sum = (
|
||||
self.builder.all_albums_lyrics_sum
|
||||
)
|
||||
return self
|
||||
|
||||
def _calculate_descriptive_statistics(self) -> None:
|
||||
self.builder.calculate_final_average_by_album()
|
||||
self.builder.calculate_final_average_by_year()
|
||||
self.builder._product.album_statistics = self.builder.album_statistics
|
||||
self.builder._product.year_statistics = self.builder.year_statistics
|
||||
return self
|
||||
|
||||
def _dev(self) -> None:
|
||||
self.builder.calculate_final_average_by_album()
|
||||
self.builder.calculate_final_average_by_year()
|
||||
self.builder._product.album_statistics = self.builder.album_statistics
|
||||
self.builder._product.year_statistics = self.builder.year_statistics
|
||||
self.builder._product.artist_id = None
|
||||
self.builder._product.artist = 'Katzenjammer'
|
||||
self.builder._product.show_summary()
|
||||
self.builder._product.show_summary_statistics(group_by='year')
|
||||
return self
|
||||
|
||||
@staticmethod
|
||||
def _get_product(builder_inst: LyricsBuilder) -> Lyrics:
|
||||
return builder_inst._product
|
||||
|
||||
|
||||
@dataclass
|
||||
class Lyrics:
|
||||
"""docstring for Lyrics"""
|
||||
|
||||
artist_id: str
|
||||
artist: str
|
||||
country: Union[str, None]
|
||||
all_albums_with_tracks: List[Dict[str, List[str]]]
|
||||
all_albums_with_lyrics: List[Dict[str, List[str]]]
|
||||
all_albums_lyrics_count: List[Dict[str, List[List[str, int]]]]
|
||||
all_albums_lyrics_sum: List[Dict[str, List[int, str]]]
|
||||
album_statistics: Dict[str, Dict[str, int]]
|
||||
year_statistics: Dict[str, Dict[str, int]]
|
||||
|
||||
__attributes = [
|
||||
'all_albums_with_tracks',
|
||||
'all_albums_with_lyrics',
|
||||
'all_albums_lyrics_count',
|
||||
'all_albums_lyrics_sum',
|
||||
'album_statistics',
|
||||
'year_statistics',
|
||||
]
|
||||
|
||||
def __init__(self) -> None:
|
||||
pass
|
||||
|
||||
def show_summary(self):
|
||||
all_averages = []
|
||||
|
||||
for i in self.album_statistics.values():
|
||||
try:
|
||||
all_averages.append(i['avg'])
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
# print(all_averages)
|
||||
try:
|
||||
final_average = math.ceil(np.mean(all_averages))
|
||||
except ValueError:
|
||||
click.echo(
|
||||
'Oops! https://lyrics.ovh couldn\'t find any lyrics across any'
|
||||
' album. This is caused by inconsistent Artist names from'
|
||||
' Musicbrainz and lyrics.ovh. Try another artist.'
|
||||
)
|
||||
raise (SystemExit)
|
||||
output = BeautifulTable(max_width=200)
|
||||
output.set_style(BeautifulTable.STYLE_BOX_ROUNDED)
|
||||
output.column_headers = [
|
||||
'Average number of words in tracks across all albums\n'
|
||||
f'for {self.artist}'
|
||||
]
|
||||
output.append_row([final_average])
|
||||
click.echo(output)
|
||||
|
||||
return self
|
||||
|
||||
def show_summary_statistics(self, group_by: str) -> None:
|
||||
stats_obj = getattr(self, f'{group_by}_statistics')
|
||||
stats = [
|
||||
'avg',
|
||||
'std',
|
||||
'min',
|
||||
'max',
|
||||
'median',
|
||||
'count',
|
||||
'p_10',
|
||||
'p_25',
|
||||
'p_75',
|
||||
'p_90',
|
||||
]
|
||||
output_0 = BeautifulTable(max_width=200)
|
||||
output_0.set_style(BeautifulTable.STYLE_BOX_ROUNDED)
|
||||
output_0.column_headers = [
|
||||
'Descriptive statistics for number of words in tracks across all'
|
||||
f' {group_by}s\nfor {self.artist}'
|
||||
]
|
||||
output_1 = BeautifulTable(max_width=200)
|
||||
output_1.set_style(BeautifulTable.STYLE_BOX_ROUNDED)
|
||||
output_1.column_headers = [
|
||||
group_by,
|
||||
stats[0],
|
||||
stats[1],
|
||||
stats[2],
|
||||
stats[3],
|
||||
stats[4],
|
||||
stats[5],
|
||||
stats[6],
|
||||
stats[7],
|
||||
stats[8],
|
||||
stats[9],
|
||||
]
|
||||
for group, s in stats_obj.items():
|
||||
try:
|
||||
output_1.append_row(
|
||||
[
|
||||
group,
|
||||
s.get(stats[0]),
|
||||
s.get(stats[1]),
|
||||
s.get(stats[2]),
|
||||
s.get(stats[3]),
|
||||
s.get(stats[4]),
|
||||
s.get(stats[5]),
|
||||
s.get(stats[6]),
|
||||
s.get(stats[7]),
|
||||
s.get(stats[8]),
|
||||
s.get(stats[9]),
|
||||
]
|
||||
)
|
||||
except AttributeError:
|
||||
continue
|
||||
output_0.append_row([output_1])
|
||||
click.echo(output_0)
|
||||
return self
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,140 @@
|
||||
from __future__ import annotations
|
||||
from typing import Union, Dict, List
|
||||
from dataclasses import dataclass
|
||||
import math
|
||||
|
||||
from beautifultable import BeautifulTable
|
||||
import click
|
||||
import numpy as np
|
||||
|
||||
|
||||
@dataclass
|
||||
class Lyrics:
|
||||
"""Lyrics object for an artist.
|
||||
"""
|
||||
|
||||
artist_id: str
|
||||
artist: str
|
||||
country: Union[str, None]
|
||||
all_albums_with_tracks: List[Dict[str, List[str]]]
|
||||
all_albums_with_lyrics: List[Dict[str, List[str]]]
|
||||
all_albums_lyrics_count: List[Dict[str, List[List[str, int]]]]
|
||||
all_albums_lyrics_sum: List[Dict[str, List[int, str]]]
|
||||
album_statistics: Dict[str, Dict[str, int]]
|
||||
year_statistics: Dict[str, Dict[str, int]]
|
||||
|
||||
_attributes = [
|
||||
'all_albums_with_tracks',
|
||||
'all_albums_with_lyrics',
|
||||
'all_albums_lyrics_count',
|
||||
'all_albums_lyrics_sum',
|
||||
'album_statistics',
|
||||
'year_statistics',
|
||||
]
|
||||
|
||||
def __init__(self) -> None:
|
||||
pass
|
||||
|
||||
def show_summary(self) -> None:
|
||||
"""Show the average word count for all lyrics
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
"""
|
||||
all_averages = []
|
||||
|
||||
for i in self.album_statistics.values():
|
||||
try:
|
||||
all_averages.append(i['avg'])
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
# print(all_averages)
|
||||
try:
|
||||
final_average = math.ceil(np.mean(all_averages))
|
||||
except ValueError:
|
||||
click.echo(
|
||||
'Oops! https://lyrics.ovh couldn\'t find any lyrics across any'
|
||||
' album. This is caused by inconsistent Artist names from'
|
||||
' Musicbrainz and lyrics.ovh. Try another artist.'
|
||||
)
|
||||
raise (SystemExit)
|
||||
output = BeautifulTable(max_width=200)
|
||||
output.set_style(BeautifulTable.STYLE_BOX_ROUNDED)
|
||||
output.column_headers = [
|
||||
'Average number of words in tracks across all albums\n'
|
||||
f'for {self.artist}'
|
||||
]
|
||||
output.append_row([final_average])
|
||||
click.echo(output)
|
||||
|
||||
return self
|
||||
|
||||
def show_summary_statistics(self, group_by: str) -> None:
|
||||
"""Summary
|
||||
|
||||
Parameters
|
||||
----------
|
||||
group_by : str
|
||||
Parameter to group statistics by. Valid options are album or year
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
"""
|
||||
stats_obj = getattr(self, f'{group_by}_statistics')
|
||||
stats = [
|
||||
'avg',
|
||||
'std',
|
||||
'min',
|
||||
'max',
|
||||
'median',
|
||||
'count',
|
||||
'p_10',
|
||||
'p_25',
|
||||
'p_75',
|
||||
'p_90',
|
||||
]
|
||||
output_0 = BeautifulTable(max_width=200)
|
||||
output_0.set_style(BeautifulTable.STYLE_BOX_ROUNDED)
|
||||
output_0.column_headers = [
|
||||
'Descriptive statistics for number of words in tracks across all'
|
||||
f' {group_by}s\nfor {self.artist}'
|
||||
]
|
||||
output_1 = BeautifulTable(max_width=200)
|
||||
output_1.set_style(BeautifulTable.STYLE_BOX_ROUNDED)
|
||||
output_1.column_headers = [
|
||||
group_by,
|
||||
stats[0],
|
||||
stats[1],
|
||||
stats[2],
|
||||
stats[3],
|
||||
stats[4],
|
||||
stats[5],
|
||||
stats[6],
|
||||
stats[7],
|
||||
stats[8],
|
||||
stats[9],
|
||||
]
|
||||
for group, s in stats_obj.items():
|
||||
try:
|
||||
output_1.append_row(
|
||||
[
|
||||
group,
|
||||
s.get(stats[0]),
|
||||
s.get(stats[1]),
|
||||
s.get(stats[2]),
|
||||
s.get(stats[3]),
|
||||
s.get(stats[4]),
|
||||
s.get(stats[5]),
|
||||
s.get(stats[6]),
|
||||
s.get(stats[7]),
|
||||
s.get(stats[8]),
|
||||
s.get(stats[9]),
|
||||
]
|
||||
)
|
||||
except AttributeError:
|
||||
continue
|
||||
output_0.append_row([output_1])
|
||||
click.echo(output_0)
|
||||
return self
|
||||
|
||||
@@ -0,0 +1,568 @@
|
||||
from __future__ import annotations
|
||||
from collections import Counter
|
||||
import html
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
import string
|
||||
from typing import Union, Dict
|
||||
|
||||
import addict
|
||||
import click
|
||||
import musicbrainzngs
|
||||
import numpy as np
|
||||
import requests
|
||||
|
||||
from musicbrainzapi.api.lyrics.concrete_builder import LyricsConcreteBuilder
|
||||
from musicbrainzapi.api.lyrics import Lyrics
|
||||
from musicbrainzapi.api import authenticate
|
||||
|
||||
|
||||
class LyricsBuilder(LyricsConcreteBuilder):
|
||||
"""docstring for LyricsBuilder
|
||||
|
||||
Attributes
|
||||
----------
|
||||
album_statistics : addict.Dict
|
||||
Dictionary containing album statistics
|
||||
all_albums : list
|
||||
List of all albums + track titles
|
||||
all_albums_lyrics : list
|
||||
List of all albums + track lyrics
|
||||
all_albums_lyrics_count : list
|
||||
List of all albums + track lyrics counted by each word
|
||||
all_albums_lyrics_sum : list
|
||||
List of all albums + track lyrics counted and summed up.
|
||||
all_albums_lyrics_url : list
|
||||
List of all albums + link to lyrics api for each track.
|
||||
musicbrainz_artists : addict.Dict
|
||||
Dictionary of response from Musicbrainzapi
|
||||
release_group_ids : addict.Dict
|
||||
Dictionary of Musicbrainz release-group ids
|
||||
total_track_count : int
|
||||
Total number of tracks across all albums
|
||||
year_statistics : addict.Dict
|
||||
Dictionary containing album statistics
|
||||
"""
|
||||
|
||||
@property
|
||||
def product(self) -> Lyrics:
|
||||
product = self._product
|
||||
return product
|
||||
|
||||
@property
|
||||
def artist(self) -> str:
|
||||
return self._artist
|
||||
|
||||
@artist.setter
|
||||
def artist(self, artist: str) -> None:
|
||||
self._artist = artist
|
||||
self._product.artist = artist
|
||||
|
||||
@property
|
||||
def country(self) -> Union[str, None]:
|
||||
return self._country
|
||||
|
||||
@country.setter
|
||||
def country(self, country: Union[str, None]) -> None:
|
||||
self._country = country
|
||||
self._product.country = country
|
||||
|
||||
@property
|
||||
def artist_id(self) -> str:
|
||||
return self._artist_id
|
||||
|
||||
@artist_id.setter
|
||||
def artist_id(self, artist_id: str) -> None:
|
||||
self._artist_id = artist_id
|
||||
self._product.artist_id = artist_id
|
||||
|
||||
@property
|
||||
def all_albums_with_tracks(self) -> list:
|
||||
return self._all_albums_with_tracks
|
||||
|
||||
@all_albums_with_tracks.setter
|
||||
def all_albums_with_tracks(self, all_albums_with_tracks: list) -> None:
|
||||
self._all_albums_with_tracks = all_albums_with_tracks
|
||||
self._product.all_albums_with_tracks = all_albums_with_tracks
|
||||
|
||||
@staticmethod
|
||||
def set_useragent() -> None:
|
||||
authenticate.set_useragent()
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.reset()
|
||||
|
||||
def reset(self) -> None:
|
||||
"""Reset the builder and create new product.
|
||||
"""
|
||||
self._product = Lyrics()
|
||||
|
||||
def find_artists(self) -> None:
|
||||
"""Find artists from the musicbrainz api
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
"""
|
||||
self.musicbrainz_artists = musicbrainzngs.search_artists(
|
||||
artist=self.artist, country=self.country
|
||||
)
|
||||
return self
|
||||
|
||||
def sort_artists(self) -> None:
|
||||
"""Sort the artists from the Musicbrainzapi
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
"""
|
||||
self._sort_names = dict(
|
||||
(i.get('id'), f'{i.get("name")} | {i.get("disambiguation")}')
|
||||
if i.get('disambiguation') is not None
|
||||
else (i.get('id'), f'{i.get("name")}')
|
||||
for i in self.musicbrainz_artists['artist-list']
|
||||
)
|
||||
return self
|
||||
|
||||
def get_accuracy_scores(self) -> None:
|
||||
"""Get accuracy scores from the Musicbrainzapi
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
"""
|
||||
self._accuracy_scores = dict(
|
||||
(i.get('id'), int(i.get('ext:score', '0')))
|
||||
for i in self.musicbrainz_artists['artist-list']
|
||||
)
|
||||
return self
|
||||
|
||||
def get_top_five_results(self) -> None:
|
||||
"""Get the top five artists from the Musicbrainzapi
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
"""
|
||||
self._top_five_results = dict(
|
||||
(i, self._accuracy_scores.get(i))
|
||||
for i in sorted(
|
||||
self._accuracy_scores,
|
||||
key=self._accuracy_scores.get,
|
||||
reverse=True,
|
||||
)[0:5]
|
||||
)
|
||||
return self
|
||||
|
||||
def find_all_albums(self) -> None:
|
||||
"""Find all albums for the chosen artist
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
"""
|
||||
limit, offset, page = (100, 0, 1)
|
||||
|
||||
resp_0 = addict.Dict(
|
||||
musicbrainzngs.browse_release_groups(
|
||||
artist=self.artist_id, release_type=['album'], limit=limit
|
||||
)
|
||||
)
|
||||
|
||||
total_releases = resp_0['release-group-count']
|
||||
response_releases = len(resp_0['release-group-list'])
|
||||
|
||||
with click.progressbar(
|
||||
length=total_releases,
|
||||
label=f'Searching Musicbrainz for all albums from {self.artist}',
|
||||
) as bar:
|
||||
|
||||
release_group_ids = addict.Dict(
|
||||
(i.id, i.title)
|
||||
for i in resp_0['release-group-list']
|
||||
if i.type == 'Album'
|
||||
)
|
||||
|
||||
bar.update(response_releases)
|
||||
|
||||
while response_releases > 0:
|
||||
# Get next page
|
||||
offset += limit
|
||||
page += 1
|
||||
|
||||
resp_1 = addict.Dict(
|
||||
musicbrainzngs.browse_release_groups(
|
||||
artist=self.artist_id,
|
||||
release_type=['album'],
|
||||
limit=limit,
|
||||
offset=offset,
|
||||
)
|
||||
)
|
||||
response_releases = len(resp_1['release-group-list'])
|
||||
|
||||
release_group_ids = addict.Dict(
|
||||
**release_group_ids,
|
||||
**addict.Dict(
|
||||
(i.id, i.title)
|
||||
for i in resp_1['release-group-list']
|
||||
if i.type == 'Album'
|
||||
),
|
||||
)
|
||||
bar.update(response_releases)
|
||||
|
||||
self.release_group_ids = release_group_ids
|
||||
click.echo(f'Found {len(release_group_ids)} albums for {self.artist}.')
|
||||
|
||||
del (resp_0, resp_1)
|
||||
return self
|
||||
|
||||
def find_all_tracks(self) -> None:
|
||||
"""Find all tracks from all albums.
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
"""
|
||||
self.all_albums = list()
|
||||
total_albums = len(self.release_group_ids)
|
||||
self.total_track_count = 0
|
||||
|
||||
with click.progressbar(
|
||||
length=total_albums,
|
||||
label=(
|
||||
'Searching Musicbrainz for all tracks in all albums for '
|
||||
f'{self.artist}'
|
||||
),
|
||||
) as bar:
|
||||
for id, alb in self.release_group_ids.items():
|
||||
resp_0 = addict.Dict(
|
||||
musicbrainzngs.browse_releases(
|
||||
release_group=id,
|
||||
release_type=['album'],
|
||||
includes=['recordings'],
|
||||
limit=100,
|
||||
)
|
||||
)
|
||||
|
||||
album_track_count = [
|
||||
i['medium-list'][0]['track-count']
|
||||
for i in resp_0['release-list']
|
||||
]
|
||||
|
||||
self.total_track_count += max(album_track_count)
|
||||
|
||||
max_track_pos = album_track_count.index(max(album_track_count))
|
||||
|
||||
album_tracks = resp_0['release-list'][max_track_pos]
|
||||
|
||||
try:
|
||||
album_year = resp_0['release-list'][
|
||||
max_track_pos
|
||||
].date.split('-')[0]
|
||||
except TypeError:
|
||||
album_year = 'Missing'
|
||||
|
||||
album_tracks = addict.Dict(
|
||||
(
|
||||
alb + f' [{album_year}]',
|
||||
[
|
||||
i.recording.title
|
||||
for i in resp_0['release-list'][max_track_pos][
|
||||
'medium-list'
|
||||
][0]['track-list']
|
||||
],
|
||||
)
|
||||
)
|
||||
|
||||
self.all_albums.append(album_tracks)
|
||||
|
||||
bar.update(1)
|
||||
|
||||
# pprint(self.all_albums)
|
||||
click.echo(
|
||||
f'Found {self.total_track_count} tracks across'
|
||||
f' {len(self.release_group_ids)} albums for {self.artist}'
|
||||
)
|
||||
del resp_0
|
||||
return self
|
||||
|
||||
def find_lyrics_urls(self) -> None:
|
||||
"""Construct the URL for the lyrics api.
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
"""
|
||||
self.all_albums_lyrics_url = list()
|
||||
for x in self.all_albums:
|
||||
for alb, tracks in x.items():
|
||||
lyrics = addict.Dict(
|
||||
(
|
||||
alb,
|
||||
[
|
||||
self.construct_lyrics_url(self.artist, i)
|
||||
for i in tracks
|
||||
],
|
||||
)
|
||||
)
|
||||
self.all_albums_lyrics_url.append(lyrics)
|
||||
|
||||
# pprint(self.all_albums_lyrics_url)
|
||||
return self
|
||||
|
||||
def find_all_lyrics(self) -> None:
|
||||
"""Get lyrics for each track from the lyrics api
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
"""
|
||||
self.all_albums_lyrics = list()
|
||||
|
||||
with click.progressbar(
|
||||
length=self.total_track_count,
|
||||
label=f'Finding lyrics for {self.total_track_count}'
|
||||
f' tracks for {self.artist}. This may take some time! ☕️',
|
||||
) as bar:
|
||||
bar.update(5)
|
||||
for x in self.all_albums_lyrics_url:
|
||||
for alb, urls in x.items():
|
||||
# bar.update(1)
|
||||
update = len(urls)
|
||||
lyrics = addict.Dict(
|
||||
(alb, [self.request_lyrics_from_url(i) for i in urls])
|
||||
)
|
||||
self.all_albums_lyrics.append(lyrics)
|
||||
bar.update(update)
|
||||
|
||||
with open(f'{os.getcwd()}/all_albums_lyrics.json', 'w') as f:
|
||||
json.dump(self.all_albums_lyrics, f, indent=2)
|
||||
return self
|
||||
|
||||
def count_words_in_lyrics(self) -> None:
|
||||
"""Count all words in each track
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
"""
|
||||
self.all_albums_lyrics_count = list()
|
||||
# print(self.total_track_count)
|
||||
with click.progressbar(
|
||||
length=self.total_track_count, label=f'Processing lyrics'
|
||||
) as bar:
|
||||
for x in self.all_albums_lyrics:
|
||||
for alb, lyrics in x.items():
|
||||
update = len(lyrics)
|
||||
bar.update(1)
|
||||
lyrics = addict.Dict(
|
||||
(
|
||||
alb,
|
||||
[
|
||||
Counter(i.split()).most_common()
|
||||
if i is not None
|
||||
else 'No Lyrics'
|
||||
for i in lyrics
|
||||
],
|
||||
)
|
||||
)
|
||||
self.all_albums_lyrics_count.append(lyrics)
|
||||
bar.update(update - 1)
|
||||
click.echo(f'Processed lyrics for {self.total_track_count} tracks.')
|
||||
return self
|
||||
|
||||
def calculate_average_all_albums(self) -> None:
|
||||
"""Summary
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
Description
|
||||
"""
|
||||
self.all_albums_lyrics_sum = list()
|
||||
album_lyrics = self.all_albums_lyrics_count
|
||||
# with open(f'{os.getcwd()}/lyrics_count.json', 'r') as f:
|
||||
# album_lyrics = json.load(f)
|
||||
count = 0
|
||||
for i in album_lyrics:
|
||||
count += len(i)
|
||||
for album, lyrics_list in i.items():
|
||||
album_avg = list()
|
||||
d = addict.Dict()
|
||||
# print(album)
|
||||
for j in lyrics_list:
|
||||
if j != 'No Lyrics':
|
||||
song_total = 0
|
||||
for k in j:
|
||||
song_total += k[1]
|
||||
else:
|
||||
song_total = "No Lyrics"
|
||||
album_avg.append(song_total)
|
||||
# We want to avoid a ValueError when we loop through
|
||||
# the first time
|
||||
try:
|
||||
d = addict.Dict(**d, **addict.Dict(album, album_avg))
|
||||
except ValueError:
|
||||
d = addict.Dict((album, album_avg))
|
||||
# print(d)
|
||||
self.all_albums_lyrics_sum.append(d)
|
||||
# print(count)
|
||||
# with open(f'{os.getcwd()}/lyrics_sum_all_album.json', 'w+') as f:
|
||||
# json.dump(self.all_albums_lyrics_sum, f)
|
||||
# return self
|
||||
|
||||
def calculate_final_average_by_album(self) -> None:
|
||||
"""Calculates descriptive statistics by album.
|
||||
"""
|
||||
self.album_statistics = addict.Dict()
|
||||
album_lyrics = self.all_albums_lyrics_sum
|
||||
# with open(f'{os.getcwd()}/lyrics_sum_all_album.json', 'r') as f:
|
||||
# album_lyrics = json.load(f)
|
||||
|
||||
for i in album_lyrics:
|
||||
for album, count in i.items():
|
||||
# We filter twice, once to remove strings, then to filter
|
||||
# the integers
|
||||
_count = [d for d in count if isinstance(d, int)]
|
||||
_count = [d for d in _count if d > 1]
|
||||
_d = self.get_descriptive_statistics(_count)
|
||||
self.album_statistics = addict.Dict(
|
||||
**self.album_statistics, **addict.Dict((album, _d))
|
||||
)
|
||||
# with open(f'{os.getcwd()}/album_statistics.json', 'w') as f:
|
||||
# json.dump(self.album_statistics, f, indent=2)
|
||||
# pprint(self.album_statistics)
|
||||
|
||||
def calculate_final_average_by_year(self) -> None:
|
||||
"""Calculates descriptive statistic by year.
|
||||
"""
|
||||
group_by_years = addict.Dict()
|
||||
self.year_statistics = addict.Dict()
|
||||
album_lyrics = self.all_albums_lyrics_sum
|
||||
# with open(f'{os.getcwd()}/lyrics_sum_all_album.json', 'r') as f:
|
||||
# album_lyrics = json.load(f)
|
||||
|
||||
# Merge years together
|
||||
for i in album_lyrics:
|
||||
for album, count in i.items():
|
||||
year = album.split('[')[-1].strip(']')
|
||||
try:
|
||||
group_by_years = addict.Dict(
|
||||
**group_by_years, **addict.Dict((year, count))
|
||||
)
|
||||
# First loop returns value error for empty dict
|
||||
except ValueError:
|
||||
group_by_years = addict.Dict((year, count))
|
||||
# Multiple years raise a TypeError - we append
|
||||
except TypeError:
|
||||
group_by_years.get(year).extend(count)
|
||||
|
||||
for year, y_count in group_by_years.items():
|
||||
_y_count = [d for d in y_count if isinstance(d, int)]
|
||||
_y_count = [d for d in _y_count if d > 1]
|
||||
_d = self.get_descriptive_statistics(_y_count)
|
||||
self.year_statistics = addict.Dict(
|
||||
**self.year_statistics, **addict.Dict((year, _d))
|
||||
)
|
||||
# pprint(self.year_statistics)
|
||||
|
||||
@staticmethod
|
||||
def construct_lyrics_url(artist: str, song: str) -> str:
|
||||
"""Builds the URL for the lyrics api.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
artist : str
|
||||
Artist
|
||||
song : str
|
||||
Track title
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
URL for lyrics from the lyrics api.
|
||||
"""
|
||||
lyrics_api_base = 'https://api.lyrics.ovh/v1'
|
||||
lyrics_api_url = html.escape(f'{lyrics_api_base}/{artist}/{song}')
|
||||
return lyrics_api_url
|
||||
|
||||
@staticmethod
|
||||
def request_lyrics_from_url(url: str) -> str:
|
||||
"""Gets lyrics from the lyrics api.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
url : str
|
||||
URL of the track for the lyrics api.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
Lyrics of the trakc
|
||||
"""
|
||||
resp = requests.get(url)
|
||||
|
||||
# No lyrics for a song will return a key of 'error', we pass on this.
|
||||
try:
|
||||
lyrics = LyricsBuilder.strip_punctuation(resp.json()['lyrics'])
|
||||
return lyrics
|
||||
except (KeyError, json.decoder.JSONDecodeError):
|
||||
return
|
||||
|
||||
@staticmethod
|
||||
def strip_punctuation(word: str) -> str:
|
||||
"""Removes punctuation from lyrics.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
word : str
|
||||
Word to remove punctuation from.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
Same word without any punctuation.
|
||||
"""
|
||||
_strip = word.translate(str.maketrans('', '', string.punctuation))
|
||||
return _strip
|
||||
|
||||
@staticmethod
|
||||
def get_descriptive_statistics(nums: list) -> Dict[str, int]:
|
||||
"""Calculates descriptive statistics.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
nums : list
|
||||
A list containing total number of words from a track.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Dict[str, int]
|
||||
Dictionary of statistic and value.
|
||||
"""
|
||||
if len(nums) == 0:
|
||||
return
|
||||
avg = math.ceil(np.mean(nums))
|
||||
median = math.ceil(np.median(nums))
|
||||
std = math.ceil(np.std(nums))
|
||||
max = math.ceil(np.max(nums))
|
||||
min = math.ceil(np.min(nums))
|
||||
p_10 = math.ceil(np.percentile(nums, 10))
|
||||
p_25 = math.ceil(np.percentile(nums, 25))
|
||||
p_75 = math.ceil(np.percentile(nums, 75))
|
||||
p_90 = math.ceil(np.percentile(nums, 90))
|
||||
count = len(nums)
|
||||
_d = addict.Dict(
|
||||
('avg', avg),
|
||||
('median', median),
|
||||
('std', std),
|
||||
('max', max),
|
||||
('min', min),
|
||||
('p_10', p_10),
|
||||
('p_25', p_25),
|
||||
('p_75', p_75),
|
||||
('p_90', p_90),
|
||||
('count', count),
|
||||
)
|
||||
return _d
|
||||
|
||||
@@ -0,0 +1,81 @@
|
||||
from __future__ import annotations
|
||||
from abc import ABC, abstractstaticmethod, abstractmethod
|
||||
from typing import Union
|
||||
|
||||
from musicbrainzapi.api import authenticate
|
||||
|
||||
|
||||
class LyricsConcreteBuilder(ABC):
|
||||
"""Abstract concrete builder for Lyrics
|
||||
"""
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def product(self) -> None:
|
||||
pass
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def artist(self) -> str:
|
||||
pass
|
||||
|
||||
@artist.setter
|
||||
@abstractmethod
|
||||
def artist(self, artist: str) -> None:
|
||||
pass
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def country(self) -> Union[str, None]:
|
||||
pass
|
||||
|
||||
@country.setter
|
||||
@abstractmethod
|
||||
def country(self, country: Union[str, None]) -> None:
|
||||
pass
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def artist_id(self) -> str:
|
||||
pass
|
||||
|
||||
@artist_id.setter
|
||||
@abstractmethod
|
||||
def artist_id(self, artist_id: str) -> None:
|
||||
pass
|
||||
|
||||
@abstractstaticmethod
|
||||
def set_useragent():
|
||||
authenticate.set_useragent()
|
||||
|
||||
@abstractmethod
|
||||
def __init__(self) -> None:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def reset(self) -> None:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def find_artists(self) -> None:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def sort_artists(self) -> None:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_accuracy_scores(self) -> None:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_top_five_results(self) -> None:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def find_all_albums(self) -> None:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def find_all_tracks(self) -> None:
|
||||
pass
|
||||
|
||||
@@ -0,0 +1,202 @@
|
||||
from __future__ import annotations
|
||||
import json
|
||||
import os
|
||||
|
||||
import click
|
||||
|
||||
from musicbrainzapi.api.lyrics.builder import LyricsBuilder
|
||||
from musicbrainzapi.api.lyrics import Lyrics
|
||||
|
||||
|
||||
class LyricsClickDirector:
|
||||
"""Director for Lyrics builder.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._builder = None
|
||||
|
||||
@property
|
||||
def builder(self) -> LyricsBuilder:
|
||||
return self._builder
|
||||
|
||||
@builder.setter
|
||||
def builder(self, builder: LyricsBuilder) -> None:
|
||||
self._builder = builder
|
||||
|
||||
def _get_initial_artists(self, artist: str, country: str) -> None:
|
||||
"""Search Musicbrainz api for an artist
|
||||
|
||||
Parameters
|
||||
----------
|
||||
artist : str
|
||||
Artist to search for
|
||||
country : str
|
||||
Country artist comes from.
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
"""
|
||||
self.builder.artist = artist
|
||||
self.builder.country = country
|
||||
self.builder.set_useragent()
|
||||
self.builder.find_artists()
|
||||
self.builder.sort_artists()
|
||||
self.builder.get_accuracy_scores()
|
||||
self.builder.get_top_five_results()
|
||||
return self
|
||||
|
||||
def _confirm_final_artist(self) -> None:
|
||||
"""Confirm the artist from the user.
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
|
||||
Raises
|
||||
------
|
||||
SystemExit
|
||||
If no artist is found will cleanly quit.
|
||||
"""
|
||||
artist_meta = None
|
||||
for i, j in self.builder._top_five_results.items():
|
||||
artist_meta = 'Multiple' if j <= 100 else None
|
||||
|
||||
if artist_meta == 'Multiple':
|
||||
_position = []
|
||||
click.echo(
|
||||
click.style(
|
||||
f'Musicbrainz found several results for '
|
||||
f'{self.builder.artist[0]}. Which artist/group do you want'
|
||||
'?',
|
||||
fg='green',
|
||||
)
|
||||
)
|
||||
for i, j in zip(self.builder._top_five_results, range(1, 6)):
|
||||
click.echo(
|
||||
f'[{j}] {self.builder._sort_names.get(i)}'
|
||||
f' ({self.builder._accuracy_scores.get(i)}% match)'
|
||||
)
|
||||
_position.append(i)
|
||||
chosen = int(
|
||||
click.prompt(
|
||||
click.style(f'Enter choice, default is', blink=True),
|
||||
default=1,
|
||||
type=click.IntRange(
|
||||
1, len(self.builder._top_five_results)
|
||||
),
|
||||
)
|
||||
)
|
||||
choice = _position[chosen - 1]
|
||||
click.echo(f'You chose {self.builder._sort_names.get(choice)}')
|
||||
self._artist = self.builder._sort_names.get(choice).split('|')[0]
|
||||
self._artist_id = choice
|
||||
|
||||
# Set artist and artistID on builder + product
|
||||
self.builder.artist_id = self._artist_id
|
||||
self.builder.artist = self._artist
|
||||
|
||||
elif artist_meta is None:
|
||||
click.echo(
|
||||
f'Musicbrainz did not find any results for '
|
||||
f'{self.builder.artist[0]}. Check the spelling or consider '
|
||||
'alternative names that the artist/group may go by.'
|
||||
)
|
||||
raise SystemExit()
|
||||
return self
|
||||
|
||||
def _query_for_data(self) -> None:
|
||||
"""Query Musicbrainz api for albums + track data.
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
"""
|
||||
self.builder.find_all_albums()
|
||||
self.builder.find_all_tracks()
|
||||
self.builder._product.all_albums_with_tracks = self.builder.all_albums
|
||||
return self
|
||||
|
||||
def _get_lyrics(self) -> None:
|
||||
"""Get Lyrics for each track
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
"""
|
||||
self.builder.find_lyrics_urls()
|
||||
self.builder.find_all_lyrics()
|
||||
self.builder._product.all_albums_with_lyrics = (
|
||||
self.builder.all_albums_lyrics
|
||||
)
|
||||
self.builder.count_words_in_lyrics()
|
||||
with open(f'{os.getcwd()}/lyrics_count.json', 'w+') as file:
|
||||
json.dump(
|
||||
self.builder.all_albums_lyrics_count,
|
||||
file,
|
||||
indent=2,
|
||||
sort_keys=True,
|
||||
)
|
||||
self.builder._product.all_albums_lyrics_count = (
|
||||
self.builder.all_albums_lyrics_count
|
||||
)
|
||||
return self
|
||||
|
||||
def _calculate_basic_statistics(self) -> None:
|
||||
"""Calculate a basic average for all tracks.
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
"""
|
||||
self.builder.calculate_average_all_albums()
|
||||
self.builder._product.all_albums_lyrics_sum = (
|
||||
self.builder.all_albums_lyrics_sum
|
||||
)
|
||||
return self
|
||||
|
||||
def _calculate_descriptive_statistics(self) -> None:
|
||||
"""Calculate descriptive statistics for album and/or year.
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
"""
|
||||
self.builder.calculate_final_average_by_album()
|
||||
self.builder.calculate_final_average_by_year()
|
||||
self.builder._product.album_statistics = self.builder.album_statistics
|
||||
self.builder._product.year_statistics = self.builder.year_statistics
|
||||
return self
|
||||
|
||||
def _dev(self) -> None:
|
||||
"""Dev function - used for testing
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
"""
|
||||
self.builder.calculate_final_average_by_album()
|
||||
self.builder.calculate_final_average_by_year()
|
||||
self.builder._product.album_statistics = self.builder.album_statistics
|
||||
self.builder._product.year_statistics = self.builder.year_statistics
|
||||
self.builder._product.artist_id = None
|
||||
self.builder._product.artist = 'Katzenjammer'
|
||||
self.builder._product.show_summary()
|
||||
self.builder._product.show_summary_statistics(group_by='year')
|
||||
return self
|
||||
|
||||
@staticmethod
|
||||
def _get_product(builder_inst: LyricsBuilder) -> Lyrics:
|
||||
"""Returns the constructed Lyrics object
|
||||
|
||||
Parameters
|
||||
----------
|
||||
builder_inst : LyricsBuilder
|
||||
Builder class for Lyrics object
|
||||
|
||||
Returns
|
||||
-------
|
||||
Lyrics
|
||||
Lyrics object
|
||||
"""
|
||||
return builder_inst._product
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import os
|
||||
import sys
|
||||
from importlib import import_module
|
||||
|
||||
import click
|
||||
@@ -31,12 +30,16 @@ class ComplexCLI(click.MultiCommand):
|
||||
rv.sort()
|
||||
return rv
|
||||
|
||||
# def get_command(self, ctx, name):
|
||||
# try:
|
||||
# mod = import_module(f'musicbrainzapi.cli.commands.cmd_{name}')
|
||||
# except ImportError as e:
|
||||
# print(e)
|
||||
# return
|
||||
# return mod.cli
|
||||
|
||||
def get_command(self, ctx, name):
|
||||
try:
|
||||
mod = import_module(f'musicbrainzapi.cli.commands.cmd_{name}')
|
||||
except ImportError as e:
|
||||
print(e)
|
||||
return
|
||||
mod = import_module(f'musicbrainzapi.cli.commands.cmd_{name}')
|
||||
return mod.cli
|
||||
|
||||
|
||||
@@ -61,9 +64,5 @@ def cli(ctx, path):
|
||||
"""A complex command line interface."""
|
||||
# ctx.verbose = verbose
|
||||
if path is not None:
|
||||
# click.echo(f'Path set to {os.path.expanduser(path)}')
|
||||
click.echo(f'Path set to {os.path.expanduser(path)}')
|
||||
ctx.path = os.path.expanduser(path)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
cli()
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import json
|
||||
from typing import Union
|
||||
|
||||
import click
|
||||
@@ -5,20 +6,18 @@ import click
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
from musicbrainzapi.cli.cli import pass_environment
|
||||
from musicbrainzapi.api.command_builders import lyrics
|
||||
|
||||
import musicbrainzapi.wordcloud
|
||||
from musicbrainzapi.api.lyrics.builder import LyricsBuilder
|
||||
from musicbrainzapi.api.lyrics.director import LyricsClickDirector
|
||||
|
||||
|
||||
# @click.argument('path', required=False, type=click.Path(resolve_path=True))
|
||||
# @click.command(short_help='a test command')
|
||||
|
||||
|
||||
@click.option('--dev', is_flag=True)
|
||||
@click.option('--dev', is_flag=True, help='Development flag. Do not use.')
|
||||
@click.option(
|
||||
'--save-output',
|
||||
required=False,
|
||||
help='Save the output to json files locally. Will use the path parameter if'
|
||||
' provided else defaults to current working directory.',
|
||||
help='Save the output to json files locally. Will use the path parameter'
|
||||
' if provided else defaults to current working directory.',
|
||||
is_flag=True,
|
||||
default=False,
|
||||
)
|
||||
@@ -64,12 +63,27 @@ def cli(
|
||||
wordcloud: bool,
|
||||
save_output: bool,
|
||||
) -> None:
|
||||
"""Search for lyrics statistics of an Artist/Group."""
|
||||
path = ctx.path
|
||||
print(f'home={ctx.home}')
|
||||
# lyrics_obj = list()
|
||||
director = lyrics.LyricsClickDirector()
|
||||
builder = lyrics.LyricsBuilder()
|
||||
"""Search for lyrics statistics of an Artist/Group.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
ctx : musicbrainzapi.cli.cli.Environment
|
||||
click environment class
|
||||
artist : str
|
||||
artist
|
||||
country : Union[str, None]
|
||||
country
|
||||
dev : bool
|
||||
dev flag - not to be used
|
||||
show_summary : str
|
||||
summary flag - used to display descriptive statistics
|
||||
wordcloud : bool
|
||||
wordcloud flag - used to create a wordcloud from lyrics
|
||||
save_output : bool
|
||||
save output flag - used to save output locally to disk
|
||||
"""
|
||||
director = LyricsClickDirector()
|
||||
builder = LyricsBuilder()
|
||||
director.builder = builder
|
||||
if dev:
|
||||
director._dev()
|
||||
@@ -105,7 +119,9 @@ def cli(
|
||||
lyrics_0.all_albums_lyrics_count
|
||||
)
|
||||
cloud.create_word_cloud()
|
||||
click.confirm('Wordcloud ready - press enter to show.', default=True)
|
||||
show = click.confirm(
|
||||
'Wordcloud ready - press enter to show.', default=True
|
||||
)
|
||||
plt.imshow(
|
||||
cloud.wc.recolor(
|
||||
color_func=cloud.generate_grey_colours, random_state=3
|
||||
@@ -113,4 +129,12 @@ def cli(
|
||||
interpolation='bilinear',
|
||||
)
|
||||
plt.axis('off')
|
||||
plt.show()
|
||||
if show:
|
||||
plt.show()
|
||||
if save_output:
|
||||
click.echo(f'Saving output to {ctx.path}')
|
||||
path = ctx.path if ctx.path[-1] == '/' else ctx.path + '/'
|
||||
attr = lyrics_0._attributes
|
||||
for a in attr:
|
||||
with open(f'{path}{a}.json', 'w') as f:
|
||||
json.dump(getattr(lyrics_0, a), f, indent=2)
|
||||
|
||||
Binary file not shown.
@@ -10,21 +10,44 @@ from PIL import Image
|
||||
from wordcloud import STOPWORDS, WordCloud
|
||||
import numpy as np
|
||||
|
||||
from musicbrainzapi.api.command_builders.lyrics import Lyrics
|
||||
from musicbrainzapi.api.lyrics import Lyrics
|
||||
|
||||
if typing.TYPE_CHECKING:
|
||||
import PIL.PngImagePlugin.PngImageFile
|
||||
|
||||
|
||||
class LyricsWordcloud:
|
||||
|
||||
"""docstring for LyricsWordcloud"""
|
||||
"""Create a word cloud from Lyrics.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
all_albums_lyrics_count : list
|
||||
List of all albums + track lyrics counted by each word
|
||||
char_mask : np.array
|
||||
numpy array containing data for the word cloud image
|
||||
freq : collections.Counter
|
||||
Counter object containing counts for all words across all tracks
|
||||
lyrics_list : list
|
||||
List of all words from all lyrics across all tracks.
|
||||
pillow_img : PIL.PngImagePlugin.PngImageFile
|
||||
pillow image of the word cloud base
|
||||
wc : wordcloud.WordCloud
|
||||
WordCloud object
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
pillow_img: 'PIL.PngImagePlugin.PngImageFile',
|
||||
all_albums_lyrics_count: 'Lyrics.all_albums_lyrics_count',
|
||||
):
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
pillow_img : PIL.PngImagePlugin.PngImageFile
|
||||
pillow image of the word cloud base
|
||||
all_albums_lyrics_count : Lyrics.all_albums_lyrics_count
|
||||
List of all albums + track lyrics counted by each word
|
||||
"""
|
||||
self.pillow_img = pillow_img
|
||||
self.all_albums_lyrics_count = all_albums_lyrics_count
|
||||
|
||||
@@ -32,6 +55,13 @@ class LyricsWordcloud:
|
||||
def use_microphone(
|
||||
cls, all_albums_lyrics_count: 'Lyrics.all_albums_lyrics_count',
|
||||
) -> LyricsWordcloud:
|
||||
"""Class method to instantiate with a microphone base image.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
all_albums_lyrics_count : Lyrics.all_albums_lyrics_count
|
||||
List of all albums + track lyrics counted by each word
|
||||
"""
|
||||
mic_resource = resources.path(
|
||||
'musicbrainzapi.wordcloud.resources', 'mic.png'
|
||||
)
|
||||
@@ -41,6 +71,12 @@ class LyricsWordcloud:
|
||||
return cls(mic_img, all_albums_lyrics_count)
|
||||
|
||||
def _get_lyrics_list(self) -> None:
|
||||
"""Gets all words from lyrics in a single list + cleans them.
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
"""
|
||||
self.lyrics_list = list()
|
||||
for i in self.all_albums_lyrics_count:
|
||||
for album, lyric in i.items():
|
||||
@@ -64,9 +100,13 @@ class LyricsWordcloud:
|
||||
return self
|
||||
|
||||
def _get_frequencies(self) -> None:
|
||||
"""Get frequencies of words from a list.
|
||||
"""
|
||||
self.freq = collections.Counter(self.lyrics_list)
|
||||
|
||||
def _get_char_mask(self) -> None:
|
||||
"""Gets a numpy array for the image file.
|
||||
"""
|
||||
self.char_mask = np.array(self.pillow_img)
|
||||
|
||||
@staticmethod
|
||||
@@ -81,16 +121,28 @@ class LyricsWordcloud:
|
||||
return colour
|
||||
|
||||
def _generate_word_cloud(self) -> None:
|
||||
"""Generates a word cloud
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
"""
|
||||
self.wc = WordCloud(
|
||||
max_words=50,
|
||||
width=500,
|
||||
height=500,
|
||||
max_words=150,
|
||||
width=1500,
|
||||
height=1500,
|
||||
mask=self.char_mask,
|
||||
random_state=1,
|
||||
).generate_from_frequencies(self.freq)
|
||||
return self
|
||||
|
||||
def _generate_plot(self) -> None:
|
||||
"""Plots the wordcloud and sets matplotlib options.
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
"""
|
||||
plt.imshow(
|
||||
self.wc.recolor(
|
||||
color_func=self.generate_grey_colours, random_state=3
|
||||
@@ -100,13 +152,18 @@ class LyricsWordcloud:
|
||||
plt.axis('off')
|
||||
return self
|
||||
|
||||
def save_to_disk(self, path: str):
|
||||
pass
|
||||
|
||||
def show_word_cloud(self):
|
||||
"""Shows the word cloud.
|
||||
"""
|
||||
plt.show()
|
||||
|
||||
def create_word_cloud(self) -> None:
|
||||
"""Creates a word cloud
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
"""
|
||||
self._get_lyrics_list()
|
||||
self._get_frequencies()
|
||||
self._get_char_mask()
|
||||
|
||||
BIN
src/musicbrainzapi/wordcloud/resources/.DS_Store
vendored
Normal file
BIN
src/musicbrainzapi/wordcloud/resources/.DS_Store
vendored
Normal file
Binary file not shown.
Binary file not shown.
|
Before Width: | Height: | Size: 186 KiB After Width: | Height: | Size: 220 KiB |
Reference in New Issue
Block a user