adding latest working version

This commit is contained in:
2020-03-07 20:36:42 +00:00
parent 3d1aeaed3c
commit 7900da7299
22 changed files with 1107 additions and 173464 deletions

BIN
.DS_Store vendored

Binary file not shown.

View File

@@ -1,461 +0,0 @@
{
"Please Please Me [1963]": {
"avg": 158,
"median": 160,
"std": 53,
"max": 288,
"min": 103,
"p_10": 106,
"p_25": 108,
"p_75": 185,
"p_90": 200,
"count": 12
},
"With the Beatles [1963]": {
"avg": 174,
"median": 170,
"std": 60,
"max": 274,
"min": 59,
"p_10": 92,
"p_25": 143,
"p_75": 215,
"p_90": 241,
"count": 14
},
"Introducing\u2026 The Beatles [1964]": {
"avg": 167,
"median": 151,
"std": 59,
"max": 288,
"min": 105,
"p_10": 108,
"p_25": 114,
"p_75": 193,
"p_90": 257,
"count": 11
},
"Meet the Beatles! [2014]": {
"avg": 160,
"median": 158,
"std": 33,
"max": 215,
"min": 91,
"p_10": 130,
"p_25": 141,
"p_75": 179,
"p_90": 200,
"count": 24
},
"Twist and Shout [1964]": {
"avg": 170,
"median": 176,
"std": 44,
"max": 257,
"min": 105,
"p_10": 109,
"p_25": 141,
"p_75": 198,
"p_90": 204,
"count": 12
},
"The Beatles\u2019 Second Album [2004]": {
"avg": 205,
"median": 199,
"std": 59,
"max": 339,
"min": 135,
"p_10": 139,
"p_25": 155,
"p_75": 240,
"p_90": 251,
"count": 20
},
"The Beatles\u2019 Long Tall Sally [1964]": {
"avg": 182,
"median": 180,
"std": 70,
"max": 339,
"min": 82,
"p_10": 94,
"p_25": 134,
"p_75": 221,
"p_90": 241,
"count": 12
},
"Something New [2014]": {
"avg": 172,
"median": 178,
"std": 28,
"max": 220,
"min": 103,
"p_10": 146,
"p_25": 165,
"p_75": 185,
"p_90": 187,
"count": 22
},
"Beatles for Sale [1964]": {
"avg": 147,
"median": 146,
"std": 76,
"max": 358,
"min": 53,
"p_10": 74,
"p_25": 102,
"p_75": 159,
"p_90": 202,
"count": 12
},
"Beatles \u201965 [2004]": {
"avg": 161,
"median": 147,
"std": 69,
"max": 358,
"min": 71,
"p_10": 104,
"p_25": 141,
"p_75": 155,
"p_90": 179,
"count": 21
},
"Beatles VI [2014]": {
"avg": 159,
"median": 177,
"std": 52,
"max": 220,
"min": 53,
"p_10": 84,
"p_25": 137,
"p_75": 190,
"p_90": 210,
"count": 18
},
"Rubber Soul [1999]": {
"avg": 175,
"median": 168,
"std": 42,
"max": 239,
"min": 89,
"p_10": 133,
"p_25": 147,
"p_75": 217,
"p_90": 227,
"count": 20
},
"Revolver [2014]": {
"avg": 159,
"median": 160,
"std": 48,
"max": 233,
"min": 90,
"p_10": 102,
"p_25": 113,
"p_75": 205,
"p_90": 214,
"count": 22
},
"Sgt. Pepper\u2019s Lonely Hearts Club Band [1994]": {
"avg": 194,
"median": 176,
"std": 69,
"max": 395,
"min": 87,
"p_10": 136,
"p_25": 151,
"p_75": 217,
"p_90": 276,
"count": 19
},
"The Beatles [1968]": {
"avg": 139,
"median": 131,
"std": 91,
"max": 324,
"min": 9,
"p_10": 26,
"p_25": 72,
"p_75": 198,
"p_90": 255,
"count": 15
},
"Abbey Road [2009]": {
"avg": 91,
"median": 76,
"std": 55,
"max": 234,
"min": 28,
"p_10": 35,
"p_25": 52,
"p_75": 127,
"p_90": 141,
"count": 14
},
"Let It Be [2010]": {
"avg": 173,
"median": 177,
"std": 70,
"max": 268,
"min": 57,
"p_10": 69,
"p_25": 117,
"p_75": 230,
"p_90": 264,
"count": 13
},
"Decca Sessions 1.1.62 [1987]": {
"avg": 182,
"median": 167,
"std": 55,
"max": 289,
"min": 107,
"p_10": 126,
"p_25": 130,
"p_75": 223,
"p_90": 241,
"count": 9
},
"The Unreleased Tracks, Volume 2 [1990]": {
"avg": 141,
"median": 119,
"std": 45,
"max": 241,
"min": 105,
"p_10": 108,
"p_25": 110,
"p_75": 153,
"p_90": 201,
"count": 8
},
"Reunions 74 & 92 [1992]": {
"avg": 149,
"median": 170,
"std": 65,
"max": 240,
"min": 32,
"p_10": 71,
"p_25": 125,
"p_75": 170,
"p_90": 205,
"count": 6
},
"Studio 2 Sessions at Abbey Road, Vol. 1 [1995]": {
"avg": 132,
"median": 108,
"std": 36,
"max": 185,
"min": 105,
"p_10": 105,
"p_25": 105,
"p_75": 185,
"p_90": 185,
"count": 20
},
"Studio 2 Sessions at Abbey Road, Vol. 2 [1995]": null,
"Studio 2 Sessions at Abbey Road, Vol. 3 [1995]": {
"avg": 179,
"median": 163,
"std": 51,
"max": 248,
"min": 117,
"p_10": 117,
"p_25": 146,
"p_75": 248,
"p_90": 248,
"count": 16
},
"Studio 2 Sessions at Abbey Road, Vol. 4 [1995]": {
"avg": 222,
"median": 233,
"std": 46,
"max": 269,
"min": 145,
"p_10": 150,
"p_25": 188,
"p_75": 269,
"p_90": 269,
"count": 18
},
"The Alternate Abbey Road [2000]": {
"avg": 100,
"median": 76,
"std": 59,
"max": 234,
"min": 32,
"p_10": 37,
"p_25": 57,
"p_75": 146,
"p_90": 178,
"count": 16
},
"Alf Together Now [2001]": {
"avg": 193,
"median": 170,
"std": 68,
"max": 339,
"min": 111,
"p_10": 164,
"p_25": 168,
"p_75": 170,
"p_90": 323,
"count": 12
},
"Collectors Items [2002]": {
"avg": 241,
"median": 204,
"std": 160,
"max": 639,
"min": 89,
"p_10": 101,
"p_25": 154,
"p_75": 243,
"p_90": 457,
"count": 16
},
"Complete Home Recordings 1967\u20131968 [2002]": {
"avg": 197,
"median": 204,
"std": 74,
"max": 346,
"min": 70,
"p_10": 111,
"p_25": 111,
"p_75": 270,
"p_90": 270,
"count": 20
},
"Complete Home Recordings 1968 [2002]": {
"avg": 160,
"median": 153,
"std": 88,
"max": 351,
"min": 32,
"p_10": 41,
"p_25": 94,
"p_75": 209,
"p_90": 272,
"count": 24
},
"The Seven Years of Christmas [2002]": {
"avg": 790,
"median": 659,
"std": 202,
"max": 1122,
"min": 589,
"p_10": 617,
"p_25": 659,
"p_75": 921,
"p_90": 1042,
"count": 5
},
"20 X 4 Remastered Edition [2005]": {
"avg": 176,
"median": 178,
"std": 69,
"max": 269,
"min": 66,
"p_10": 98,
"p_25": 145,
"p_75": 220,
"p_90": 250,
"count": 5
},
"Inside Sgt. Pepper (Part Three) [2007]": {
"avg": 235,
"median": 235,
"std": 0,
"max": 235,
"min": 235,
"p_10": 235,
"p_25": 235,
"p_75": 235,
"p_90": 235,
"count": 9
},
"Carnival of Light - Fakes, Frauds, and Phonies (Million Volt Laugh and Sound Rave) [2009]": null,
"Transcending Time [2010]": {
"avg": 130,
"median": 115,
"std": 68,
"max": 243,
"min": 57,
"p_10": 58,
"p_25": 72,
"p_75": 174,
"p_90": 218,
"count": 6
},
"Hot as Sun [2010]": {
"avg": 141,
"median": 156,
"std": 43,
"max": 182,
"min": 70,
"p_10": 95,
"p_25": 132,
"p_75": 166,
"p_90": 176,
"count": 4
},
"From the Basement to the Boardroom [2011]": {
"avg": 179,
"median": 173,
"std": 161,
"max": 589,
"min": 32,
"p_10": 32,
"p_25": 54,
"p_75": 194,
"p_90": 297,
"count": 9
},
"The Decca Tapes [2013]": {
"avg": 172,
"median": 167,
"std": 56,
"max": 289,
"min": 101,
"p_10": 108,
"p_25": 127,
"p_75": 211,
"p_90": 229,
"count": 12
},
"Kinfauns Demos [Missing]": {
"avg": 153,
"median": 150,
"std": 84,
"max": 324,
"min": 32,
"p_10": 32,
"p_25": 82,
"p_75": 208,
"p_90": 271,
"count": 28
},
"Rare Tracks [Missing]": {
"avg": 235,
"median": 222,
"std": 67,
"max": 395,
"min": 171,
"p_10": 176,
"p_25": 190,
"p_75": 244,
"p_90": 299,
"count": 8
},
"The Lost Album (Two and a Half) [Missing]": {
"avg": 168,
"median": 163,
"std": 67,
"max": 282,
"min": 32,
"p_10": 112,
"p_25": 122,
"p_75": 210,
"p_90": 265,
"count": 22
},
"8 Mile and Abbey: Eminem Meets the Beatles [2014]": null
}

File diff suppressed because one or more lines are too long

Binary file not shown.

Before

Width:  |  Height:  |  Size: 47 KiB

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

BIN
src/.DS_Store vendored

Binary file not shown.

Binary file not shown.

View File

@@ -1 +0,0 @@
from . import lyrics

View File

@@ -1,758 +0,0 @@
from __future__ import annotations
from abc import ABC, abstractmethod, abstractstaticmethod
from dataclasses import dataclass
from pprint import pprint
from typing import Union, List, Dict
from collections import Counter
import html
import json
import os
import string
import math
from beautifultable import BeautifulTable
import musicbrainzngs
import click
import addict
import requests
import numpy as np
from musicbrainzapi.api import authenticate
class LyricsConcreteBuilder(ABC):
"""docstring for Lyrics"""
@property
@abstractmethod
def product(self) -> None:
pass
@property
@abstractmethod
def artist(self) -> str:
pass
@artist.setter
@abstractmethod
def artist(self, artist: str) -> None:
pass
@property
@abstractmethod
def country(self) -> Union[str, None]:
pass
@country.setter
@abstractmethod
def country(self, country: Union[str, None]) -> None:
pass
@property
@abstractmethod
def artist_id(self) -> str:
pass
@artist_id.setter
@abstractmethod
def artist_id(self, artist_id: str) -> None:
pass
@abstractstaticmethod
def set_useragent():
authenticate.set_useragent()
# @abstractstaticmethod
# def browse_releases(self) -> dict:
# pass
@abstractmethod
def __init__(self) -> None:
pass
@abstractmethod
def reset(self) -> None:
pass
@abstractmethod
def find_artists(self) -> None:
pass
@abstractmethod
def sort_artists(self) -> None:
pass
@abstractmethod
def get_accuracy_scores(self) -> None:
pass
@abstractmethod
def get_top_five_results(self) -> None:
pass
@abstractmethod
def find_all_albums(self) -> None:
pass
@abstractmethod
def find_all_tracks(self) -> None:
pass
class LyricsBuilder(LyricsConcreteBuilder):
"""docstring for LyricsBuilder"""
@property
def product(self) -> Lyrics:
product = self._product
return product
@property
def artist(self) -> str:
return self._artist
@artist.setter
def artist(self, artist: str) -> None:
self._artist = artist
self._product.artist = artist
@property
def country(self) -> Union[str, None]:
return self._country
@country.setter
def country(self, country: Union[str, None]) -> None:
self._country = country
self._product.country = country
@property
def artist_id(self) -> str:
return self._artist_id
@artist_id.setter
def artist_id(self, artist_id: str) -> None:
self._artist_id = artist_id
self._product.artist_id = artist_id
@property
def all_albums_with_tracks(self) -> list:
return self._all_albums_with_tracks
@all_albums_with_tracks.setter
def all_albums_with_tracks(self, all_albums_with_tracks: list) -> None:
self._all_albums_with_tracks = all_albums_with_tracks
self._product.all_albums_with_tracks = all_albums_with_tracks
@staticmethod
def set_useragent() -> None:
authenticate.set_useragent()
def __init__(self) -> None:
self.reset()
def reset(self) -> None:
self._product = Lyrics()
def find_artists(self) -> None:
self.musicbrainz_artists = musicbrainzngs.search_artists(
artist=self.artist, country=self.country
)
# pprint(self.musicbrainz_artists['artist-list'])
# for i in self.musicbrainz_artists['artist-list']:
# print(i['name'])
# raise(SystemExit)
return self
def sort_artists(self) -> None:
self._sort_names = dict(
(i.get('id'), f'{i.get("name")} | {i.get("disambiguation")}')
if i.get('disambiguation') is not None
else (i.get('id'), f'{i.get("name")}')
for i in self.musicbrainz_artists['artist-list']
)
return self
def get_accuracy_scores(self) -> None:
self._accuracy_scores = dict(
(i.get('id'), int(i.get('ext:score', '0')))
for i in self.musicbrainz_artists['artist-list']
)
return self
def get_top_five_results(self) -> None:
self._top_five_results = dict(
(i, self._accuracy_scores.get(i))
for i in sorted(
self._accuracy_scores,
key=self._accuracy_scores.get,
reverse=True,
)[0:5]
)
return self
def find_all_albums(self) -> None:
limit, offset, page = (100, 0, 1)
resp_0 = addict.Dict(
musicbrainzngs.browse_release_groups(
artist=self.artist_id, release_type=['album'], limit=limit
)
)
total_releases = resp_0['release-group-count']
response_releases = len(resp_0['release-group-list'])
with click.progressbar(
length=total_releases,
label=f'Searching Musicbrainz for all albums from {self.artist}',
) as bar:
release_group_ids = addict.Dict(
(i.id, i.title)
for i in resp_0['release-group-list']
if i.type == 'Album'
)
bar.update(response_releases)
while response_releases > 0:
# Get next page
offset += limit
page += 1
resp_1 = addict.Dict(
musicbrainzngs.browse_release_groups(
artist=self.artist_id,
release_type=['album'],
limit=limit,
offset=offset,
)
)
response_releases = len(resp_1['release-group-list'])
release_group_ids = addict.Dict(
**release_group_ids,
**addict.Dict(
(i.id, i.title)
for i in resp_1['release-group-list']
if i.type == 'Album'
),
)
bar.update(response_releases)
self.release_group_ids = release_group_ids
click.echo(f'Found {len(release_group_ids)} albums for {self.artist}.')
del (resp_0, resp_1)
return self
def find_all_tracks(self) -> None:
self.all_albums = list()
total_albums = len(self.release_group_ids)
self.total_track_count = 0
with click.progressbar(
length=total_albums,
label=(
'Searching Musicbrainz for all tracks in all albums for '
f'{self.artist}'
),
) as bar:
for id, alb in self.release_group_ids.items():
resp_0 = addict.Dict(
musicbrainzngs.browse_releases(
release_group=id,
release_type=['album'],
includes=['recordings'],
limit=100,
)
)
album_track_count = [
i['medium-list'][0]['track-count']
for i in resp_0['release-list']
]
self.total_track_count += max(album_track_count)
max_track_pos = album_track_count.index(max(album_track_count))
album_tracks = resp_0['release-list'][max_track_pos]
try:
album_year = resp_0['release-list'][
max_track_pos
].date.split('-')[0]
except TypeError:
album_year = 'Missing'
album_tracks = addict.Dict(
(
alb + f' [{album_year}]',
[
i.recording.title
for i in resp_0['release-list'][max_track_pos][
'medium-list'
][0]['track-list']
],
)
)
self.all_albums.append(album_tracks)
bar.update(1)
# pprint(self.all_albums)
click.echo(
f'Found {self.total_track_count} tracks across'
f' {len(self.release_group_ids)} albums for {self.artist}'
)
del resp_0
return self
def find_lyrics_urls(self) -> None:
self.all_albums_lyrics_url = list()
for x in self.all_albums:
for alb, tracks in x.items():
lyrics = addict.Dict(
(
alb,
[
self.construct_lyrics_url(self.artist, i)
for i in tracks
],
)
)
self.all_albums_lyrics_url.append(lyrics)
# pprint(self.all_albums_lyrics_url)
return self
# change this for progressbar for i loop
def find_all_lyrics(self) -> None:
self.all_albums_lyrics = list()
with click.progressbar(
length=self.total_track_count,
label=f'Finding lyrics for {self.total_track_count}'
f' tracks for {self.artist}. This may take some time! ☕️',
) as bar:
bar.update(5)
for x in self.all_albums_lyrics_url:
for alb, urls in x.items():
# bar.update(1)
update = len(urls)
lyrics = addict.Dict(
(alb, [self.request_lyrics_from_url(i) for i in urls])
)
self.all_albums_lyrics.append(lyrics)
bar.update(update)
with open(f'{os.getcwd()}/all_albums_lyrics.json', 'w') as f:
json.dump(self.all_albums_lyrics, f, indent=2)
return self
def count_words_in_lyrics(self) -> None:
# remove punctuation, fix click bar
self.all_albums_lyrics_count = list()
# print(self.total_track_count)
with click.progressbar(
length=self.total_track_count, label=f'Processing lyrics'
) as bar:
for x in self.all_albums_lyrics:
for alb, lyrics in x.items():
update = len(lyrics)
bar.update(1)
lyrics = addict.Dict(
(
alb,
[
Counter(i.split()).most_common()
if i is not None
else 'No Lyrics'
for i in lyrics
],
)
)
self.all_albums_lyrics_count.append(lyrics)
bar.update(update - 1)
click.echo(f'Processed lyrics for {self.total_track_count} tracks.')
return self
# rename this
def calculate_average_all_albums(self) -> None:
self.all_albums_lyrics_sum = list()
# album_lyrics = self.all_albums_lyrics_count
with open(f'{os.getcwd()}/lyrics_count.json', 'r') as f:
album_lyrics = json.load(f)
count = 0
for i in album_lyrics:
count += len(i)
for album, lyrics_list in i.items():
album_avg = list()
d = addict.Dict()
# print(album)
for j in lyrics_list:
if j != 'No Lyrics':
song_total = 0
for k in j:
song_total += k[1]
else:
song_total = "No Lyrics"
album_avg.append(song_total)
# We want to avoid a ValueError when we loop through
# the first time
try:
d = addict.Dict(**d, **addict.Dict(album, album_avg))
except ValueError:
d = addict.Dict((album, album_avg))
# print(d)
self.all_albums_lyrics_sum.append(d)
# print(count)
with open(f'{os.getcwd()}/lyrics_sum_all_album.json', 'w+') as f:
json.dump(self.all_albums_lyrics_sum, f)
return self
def calculate_final_average_by_album(self) -> None:
self.album_statistics = addict.Dict()
# album_lyrics = self.all_albums_lyrics_sum
with open(f'{os.getcwd()}/lyrics_sum_all_album.json', 'r') as f:
album_lyrics = json.load(f)
for i in album_lyrics:
for album, count in i.items():
# We filter twice, once to remove strings, then to filter
# the integers
_count = [d for d in count if isinstance(d, int)]
_count = [d for d in _count if d > 1]
_d = self.get_descriptive_statistics(_count)
self.album_statistics = addict.Dict(
**self.album_statistics, **addict.Dict((album, _d))
)
with open(f'{os.getcwd()}/album_statistics.json', 'w') as f:
json.dump(self.album_statistics, f, indent=2)
# pprint(self.album_statistics)
# implement above in this
def calculate_final_average_by_year(self) -> None:
group_by_years = addict.Dict()
self.year_statistics = addict.Dict()
# album_lyrics = self.all_albums_lyrics_sum
with open(f'{os.getcwd()}/lyrics_sum_all_album.json', 'r') as f:
album_lyrics = json.load(f)
# Merge years together
for i in album_lyrics:
for album, count in i.items():
year = album.split('[')[-1].strip(']')
try:
group_by_years = addict.Dict(
**group_by_years, **addict.Dict((year, count))
)
# First loop returns value error for empty dict
except ValueError:
group_by_years = addict.Dict((year, count))
# Multiple years raise a TypeError - we append
except TypeError:
group_by_years.get(year).extend(count)
for year, y_count in group_by_years.items():
_y_count = [d for d in y_count if isinstance(d, int)]
_y_count = [d for d in _y_count if d > 1]
_d = self.get_descriptive_statistics(_y_count)
self.year_statistics = addict.Dict(
**self.year_statistics, **addict.Dict((year, _d))
)
# pprint(self.year_statistics)
@staticmethod
def construct_lyrics_url(artist: str, song: str) -> str:
lyrics_api_base = 'https://api.lyrics.ovh/v1'
lyrics_api_url = html.escape(f'{lyrics_api_base}/{artist}/{song}')
return lyrics_api_url
@staticmethod
def request_lyrics_from_url(url: str) -> str:
resp = requests.get(url)
# No lyrics for a song will return a key of 'error', we pass on this.
try:
lyrics = LyricsBuilder.strip_punctuation(resp.json()['lyrics'])
return lyrics
except (KeyError, json.decoder.JSONDecodeError):
return
@staticmethod
def strip_punctuation(word: str) -> str:
_strip = word.translate(str.maketrans('', '', string.punctuation))
return _strip
@staticmethod
def get_descriptive_statistics(nums: list) -> Dict[str, int]:
if len(nums) == 0:
return
avg = math.ceil(np.mean(nums))
median = math.ceil(np.median(nums))
std = math.ceil(np.std(nums))
max = math.ceil(np.max(nums))
min = math.ceil(np.min(nums))
p_10 = math.ceil(np.percentile(nums, 10))
p_25 = math.ceil(np.percentile(nums, 25))
p_75 = math.ceil(np.percentile(nums, 75))
p_90 = math.ceil(np.percentile(nums, 90))
count = len(nums)
_d = addict.Dict(
('avg', avg),
('median', median),
('std', std),
('max', max),
('min', min),
('p_10', p_10),
('p_25', p_25),
('p_75', p_75),
('p_90', p_90),
('count', count),
)
return _d
class LyricsClickDirector:
"""docstring for LyricsClickDirector"""
def __init__(self) -> None:
self._builder = None
@property
def builder(self) -> LyricsBuilder:
return self._builder
@builder.setter
def builder(self, builder: LyricsBuilder) -> None:
self._builder = builder
def _get_initial_artists(self, artist: str, country: str) -> None:
self.builder.artist = artist
self.builder.country = country
self.builder.set_useragent()
self.builder.find_artists()
self.builder.sort_artists()
self.builder.get_accuracy_scores()
self.builder.get_top_five_results()
return self
def _confirm_final_artist(self) -> None:
artist_meta = None
for i, j in self.builder._top_five_results.items():
artist_meta = 'Multiple' if j <= 100 else None
if artist_meta == 'Multiple':
_position = []
click.echo(
click.style(
f'Musicbrainz found several results for '
f'{self.builder.artist[0]}. Which artist/group do you want'
'?',
fg='green',
)
)
for i, j in zip(self.builder._top_five_results, range(1, 6)):
click.echo(
f'[{j}] {self.builder._sort_names.get(i)}'
f' ({self.builder._accuracy_scores.get(i)}% match)'
)
_position.append(i)
chosen = int(
click.prompt(
click.style(f'Enter choice, default is', blink=True),
default=1,
type=click.IntRange(
1, len(self.builder._top_five_results)
),
)
)
choice = _position[chosen - 1]
click.echo(f'You chose {self.builder._sort_names.get(choice)}')
self._artist = self.builder._sort_names.get(choice).split('|')[0]
self._artist_id = choice
# Set artist and artistID on builder + product
self.builder.artist_id = self._artist_id
self.builder.artist = self._artist
elif artist_meta is None:
click.echo(
f'Musicbrainz did not find any results for '
f'{self.builder.artist[0]}. Check the spelling or consider '
'alternative names that the artist/group may go by.'
)
raise SystemExit()
return self
def _query_for_data(self) -> None:
self.builder.find_all_albums()
self.builder.find_all_tracks()
self.builder._product.all_albums_with_tracks = self.builder.all_albums
return self
def _get_lyrics(self) -> None:
self.builder.find_lyrics_urls()
self.builder.find_all_lyrics()
self.builder._product.all_albums_with_lyrics = (
self.builder.all_albums_lyrics
)
self.builder.count_words_in_lyrics()
with open(f'{os.getcwd()}/lyrics_count.json', 'w+') as file:
json.dump(
self.builder.all_albums_lyrics_count,
file,
indent=2,
sort_keys=True,
)
self.builder._product.all_albums_lyrics_count = (
self.builder.all_albums_lyrics_count
)
return self
def _calculate_basic_statistics(self) -> None:
self.builder.calculate_average_all_albums()
self.builder._product.all_albums_lyrics_sum = (
self.builder.all_albums_lyrics_sum
)
return self
def _calculate_descriptive_statistics(self) -> None:
self.builder.calculate_final_average_by_album()
self.builder.calculate_final_average_by_year()
self.builder._product.album_statistics = self.builder.album_statistics
self.builder._product.year_statistics = self.builder.year_statistics
return self
def _dev(self) -> None:
self.builder.calculate_final_average_by_album()
self.builder.calculate_final_average_by_year()
self.builder._product.album_statistics = self.builder.album_statistics
self.builder._product.year_statistics = self.builder.year_statistics
self.builder._product.artist_id = None
self.builder._product.artist = 'Katzenjammer'
self.builder._product.show_summary()
self.builder._product.show_summary_statistics(group_by='year')
return self
@staticmethod
def _get_product(builder_inst: LyricsBuilder) -> Lyrics:
return builder_inst._product
@dataclass
class Lyrics:
"""docstring for Lyrics"""
artist_id: str
artist: str
country: Union[str, None]
all_albums_with_tracks: List[Dict[str, List[str]]]
all_albums_with_lyrics: List[Dict[str, List[str]]]
all_albums_lyrics_count: List[Dict[str, List[List[str, int]]]]
all_albums_lyrics_sum: List[Dict[str, List[int, str]]]
album_statistics: Dict[str, Dict[str, int]]
year_statistics: Dict[str, Dict[str, int]]
__attributes = [
'all_albums_with_tracks',
'all_albums_with_lyrics',
'all_albums_lyrics_count',
'all_albums_lyrics_sum',
'album_statistics',
'year_statistics',
]
def __init__(self) -> None:
pass
def show_summary(self):
all_averages = []
for i in self.album_statistics.values():
try:
all_averages.append(i['avg'])
except (TypeError, ValueError):
pass
# print(all_averages)
try:
final_average = math.ceil(np.mean(all_averages))
except ValueError:
click.echo(
'Oops! https://lyrics.ovh couldn\'t find any lyrics across any'
' album. This is caused by inconsistent Artist names from'
' Musicbrainz and lyrics.ovh. Try another artist.'
)
raise (SystemExit)
output = BeautifulTable(max_width=200)
output.set_style(BeautifulTable.STYLE_BOX_ROUNDED)
output.column_headers = [
'Average number of words in tracks across all albums\n'
f'for {self.artist}'
]
output.append_row([final_average])
click.echo(output)
return self
def show_summary_statistics(self, group_by: str) -> None:
stats_obj = getattr(self, f'{group_by}_statistics')
stats = [
'avg',
'std',
'min',
'max',
'median',
'count',
'p_10',
'p_25',
'p_75',
'p_90',
]
output_0 = BeautifulTable(max_width=200)
output_0.set_style(BeautifulTable.STYLE_BOX_ROUNDED)
output_0.column_headers = [
'Descriptive statistics for number of words in tracks across all'
f' {group_by}s\nfor {self.artist}'
]
output_1 = BeautifulTable(max_width=200)
output_1.set_style(BeautifulTable.STYLE_BOX_ROUNDED)
output_1.column_headers = [
group_by,
stats[0],
stats[1],
stats[2],
stats[3],
stats[4],
stats[5],
stats[6],
stats[7],
stats[8],
stats[9],
]
for group, s in stats_obj.items():
try:
output_1.append_row(
[
group,
s.get(stats[0]),
s.get(stats[1]),
s.get(stats[2]),
s.get(stats[3]),
s.get(stats[4]),
s.get(stats[5]),
s.get(stats[6]),
s.get(stats[7]),
s.get(stats[8]),
s.get(stats[9]),
]
)
except AttributeError:
continue
output_0.append_row([output_1])
click.echo(output_0)
return self

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,140 @@
from __future__ import annotations
from typing import Union, Dict, List
from dataclasses import dataclass
import math
from beautifultable import BeautifulTable
import click
import numpy as np
@dataclass
class Lyrics:
"""Lyrics object for an artist.
"""
artist_id: str
artist: str
country: Union[str, None]
all_albums_with_tracks: List[Dict[str, List[str]]]
all_albums_with_lyrics: List[Dict[str, List[str]]]
all_albums_lyrics_count: List[Dict[str, List[List[str, int]]]]
all_albums_lyrics_sum: List[Dict[str, List[int, str]]]
album_statistics: Dict[str, Dict[str, int]]
year_statistics: Dict[str, Dict[str, int]]
_attributes = [
'all_albums_with_tracks',
'all_albums_with_lyrics',
'all_albums_lyrics_count',
'all_albums_lyrics_sum',
'album_statistics',
'year_statistics',
]
def __init__(self) -> None:
pass
def show_summary(self) -> None:
"""Show the average word count for all lyrics
Returns
-------
None
"""
all_averages = []
for i in self.album_statistics.values():
try:
all_averages.append(i['avg'])
except (TypeError, ValueError):
pass
# print(all_averages)
try:
final_average = math.ceil(np.mean(all_averages))
except ValueError:
click.echo(
'Oops! https://lyrics.ovh couldn\'t find any lyrics across any'
' album. This is caused by inconsistent Artist names from'
' Musicbrainz and lyrics.ovh. Try another artist.'
)
raise (SystemExit)
output = BeautifulTable(max_width=200)
output.set_style(BeautifulTable.STYLE_BOX_ROUNDED)
output.column_headers = [
'Average number of words in tracks across all albums\n'
f'for {self.artist}'
]
output.append_row([final_average])
click.echo(output)
return self
def show_summary_statistics(self, group_by: str) -> None:
"""Summary
Parameters
----------
group_by : str
Parameter to group statistics by. Valid options are album or year
Returns
-------
None
"""
stats_obj = getattr(self, f'{group_by}_statistics')
stats = [
'avg',
'std',
'min',
'max',
'median',
'count',
'p_10',
'p_25',
'p_75',
'p_90',
]
output_0 = BeautifulTable(max_width=200)
output_0.set_style(BeautifulTable.STYLE_BOX_ROUNDED)
output_0.column_headers = [
'Descriptive statistics for number of words in tracks across all'
f' {group_by}s\nfor {self.artist}'
]
output_1 = BeautifulTable(max_width=200)
output_1.set_style(BeautifulTable.STYLE_BOX_ROUNDED)
output_1.column_headers = [
group_by,
stats[0],
stats[1],
stats[2],
stats[3],
stats[4],
stats[5],
stats[6],
stats[7],
stats[8],
stats[9],
]
for group, s in stats_obj.items():
try:
output_1.append_row(
[
group,
s.get(stats[0]),
s.get(stats[1]),
s.get(stats[2]),
s.get(stats[3]),
s.get(stats[4]),
s.get(stats[5]),
s.get(stats[6]),
s.get(stats[7]),
s.get(stats[8]),
s.get(stats[9]),
]
)
except AttributeError:
continue
output_0.append_row([output_1])
click.echo(output_0)
return self

View File

@@ -0,0 +1,568 @@
from __future__ import annotations
from collections import Counter
import html
import json
import math
import os
import string
from typing import Union, Dict
import addict
import click
import musicbrainzngs
import numpy as np
import requests
from musicbrainzapi.api.lyrics.concrete_builder import LyricsConcreteBuilder
from musicbrainzapi.api.lyrics import Lyrics
from musicbrainzapi.api import authenticate
class LyricsBuilder(LyricsConcreteBuilder):
"""docstring for LyricsBuilder
Attributes
----------
album_statistics : addict.Dict
Dictionary containing album statistics
all_albums : list
List of all albums + track titles
all_albums_lyrics : list
List of all albums + track lyrics
all_albums_lyrics_count : list
List of all albums + track lyrics counted by each word
all_albums_lyrics_sum : list
List of all albums + track lyrics counted and summed up.
all_albums_lyrics_url : list
List of all albums + link to lyrics api for each track.
musicbrainz_artists : addict.Dict
Dictionary of response from Musicbrainzapi
release_group_ids : addict.Dict
Dictionary of Musicbrainz release-group ids
total_track_count : int
Total number of tracks across all albums
year_statistics : addict.Dict
Dictionary containing album statistics
"""
@property
def product(self) -> Lyrics:
product = self._product
return product
@property
def artist(self) -> str:
return self._artist
@artist.setter
def artist(self, artist: str) -> None:
self._artist = artist
self._product.artist = artist
@property
def country(self) -> Union[str, None]:
return self._country
@country.setter
def country(self, country: Union[str, None]) -> None:
self._country = country
self._product.country = country
@property
def artist_id(self) -> str:
return self._artist_id
@artist_id.setter
def artist_id(self, artist_id: str) -> None:
self._artist_id = artist_id
self._product.artist_id = artist_id
@property
def all_albums_with_tracks(self) -> list:
return self._all_albums_with_tracks
@all_albums_with_tracks.setter
def all_albums_with_tracks(self, all_albums_with_tracks: list) -> None:
self._all_albums_with_tracks = all_albums_with_tracks
self._product.all_albums_with_tracks = all_albums_with_tracks
@staticmethod
def set_useragent() -> None:
authenticate.set_useragent()
def __init__(self) -> None:
self.reset()
def reset(self) -> None:
"""Reset the builder and create new product.
"""
self._product = Lyrics()
def find_artists(self) -> None:
"""Find artists from the musicbrainz api
Returns
-------
None
"""
self.musicbrainz_artists = musicbrainzngs.search_artists(
artist=self.artist, country=self.country
)
return self
def sort_artists(self) -> None:
"""Sort the artists from the Musicbrainzapi
Returns
-------
None
"""
self._sort_names = dict(
(i.get('id'), f'{i.get("name")} | {i.get("disambiguation")}')
if i.get('disambiguation') is not None
else (i.get('id'), f'{i.get("name")}')
for i in self.musicbrainz_artists['artist-list']
)
return self
def get_accuracy_scores(self) -> None:
"""Get accuracy scores from the Musicbrainzapi
Returns
-------
None
"""
self._accuracy_scores = dict(
(i.get('id'), int(i.get('ext:score', '0')))
for i in self.musicbrainz_artists['artist-list']
)
return self
def get_top_five_results(self) -> None:
"""Get the top five artists from the Musicbrainzapi
Returns
-------
None
"""
self._top_five_results = dict(
(i, self._accuracy_scores.get(i))
for i in sorted(
self._accuracy_scores,
key=self._accuracy_scores.get,
reverse=True,
)[0:5]
)
return self
def find_all_albums(self) -> None:
"""Find all albums for the chosen artist
Returns
-------
None
"""
limit, offset, page = (100, 0, 1)
resp_0 = addict.Dict(
musicbrainzngs.browse_release_groups(
artist=self.artist_id, release_type=['album'], limit=limit
)
)
total_releases = resp_0['release-group-count']
response_releases = len(resp_0['release-group-list'])
with click.progressbar(
length=total_releases,
label=f'Searching Musicbrainz for all albums from {self.artist}',
) as bar:
release_group_ids = addict.Dict(
(i.id, i.title)
for i in resp_0['release-group-list']
if i.type == 'Album'
)
bar.update(response_releases)
while response_releases > 0:
# Get next page
offset += limit
page += 1
resp_1 = addict.Dict(
musicbrainzngs.browse_release_groups(
artist=self.artist_id,
release_type=['album'],
limit=limit,
offset=offset,
)
)
response_releases = len(resp_1['release-group-list'])
release_group_ids = addict.Dict(
**release_group_ids,
**addict.Dict(
(i.id, i.title)
for i in resp_1['release-group-list']
if i.type == 'Album'
),
)
bar.update(response_releases)
self.release_group_ids = release_group_ids
click.echo(f'Found {len(release_group_ids)} albums for {self.artist}.')
del (resp_0, resp_1)
return self
def find_all_tracks(self) -> None:
"""Find all tracks from all albums.
Returns
-------
None
"""
self.all_albums = list()
total_albums = len(self.release_group_ids)
self.total_track_count = 0
with click.progressbar(
length=total_albums,
label=(
'Searching Musicbrainz for all tracks in all albums for '
f'{self.artist}'
),
) as bar:
for id, alb in self.release_group_ids.items():
resp_0 = addict.Dict(
musicbrainzngs.browse_releases(
release_group=id,
release_type=['album'],
includes=['recordings'],
limit=100,
)
)
album_track_count = [
i['medium-list'][0]['track-count']
for i in resp_0['release-list']
]
self.total_track_count += max(album_track_count)
max_track_pos = album_track_count.index(max(album_track_count))
album_tracks = resp_0['release-list'][max_track_pos]
try:
album_year = resp_0['release-list'][
max_track_pos
].date.split('-')[0]
except TypeError:
album_year = 'Missing'
album_tracks = addict.Dict(
(
alb + f' [{album_year}]',
[
i.recording.title
for i in resp_0['release-list'][max_track_pos][
'medium-list'
][0]['track-list']
],
)
)
self.all_albums.append(album_tracks)
bar.update(1)
# pprint(self.all_albums)
click.echo(
f'Found {self.total_track_count} tracks across'
f' {len(self.release_group_ids)} albums for {self.artist}'
)
del resp_0
return self
def find_lyrics_urls(self) -> None:
"""Construct the URL for the lyrics api.
Returns
-------
None
"""
self.all_albums_lyrics_url = list()
for x in self.all_albums:
for alb, tracks in x.items():
lyrics = addict.Dict(
(
alb,
[
self.construct_lyrics_url(self.artist, i)
for i in tracks
],
)
)
self.all_albums_lyrics_url.append(lyrics)
# pprint(self.all_albums_lyrics_url)
return self
def find_all_lyrics(self) -> None:
"""Get lyrics for each track from the lyrics api
Returns
-------
None
"""
self.all_albums_lyrics = list()
with click.progressbar(
length=self.total_track_count,
label=f'Finding lyrics for {self.total_track_count}'
f' tracks for {self.artist}. This may take some time! ☕️',
) as bar:
bar.update(5)
for x in self.all_albums_lyrics_url:
for alb, urls in x.items():
# bar.update(1)
update = len(urls)
lyrics = addict.Dict(
(alb, [self.request_lyrics_from_url(i) for i in urls])
)
self.all_albums_lyrics.append(lyrics)
bar.update(update)
with open(f'{os.getcwd()}/all_albums_lyrics.json', 'w') as f:
json.dump(self.all_albums_lyrics, f, indent=2)
return self
def count_words_in_lyrics(self) -> None:
"""Count all words in each track
Returns
-------
None
"""
self.all_albums_lyrics_count = list()
# print(self.total_track_count)
with click.progressbar(
length=self.total_track_count, label=f'Processing lyrics'
) as bar:
for x in self.all_albums_lyrics:
for alb, lyrics in x.items():
update = len(lyrics)
bar.update(1)
lyrics = addict.Dict(
(
alb,
[
Counter(i.split()).most_common()
if i is not None
else 'No Lyrics'
for i in lyrics
],
)
)
self.all_albums_lyrics_count.append(lyrics)
bar.update(update - 1)
click.echo(f'Processed lyrics for {self.total_track_count} tracks.')
return self
def calculate_average_all_albums(self) -> None:
"""Summary
Returns
-------
None
Description
"""
self.all_albums_lyrics_sum = list()
album_lyrics = self.all_albums_lyrics_count
# with open(f'{os.getcwd()}/lyrics_count.json', 'r') as f:
# album_lyrics = json.load(f)
count = 0
for i in album_lyrics:
count += len(i)
for album, lyrics_list in i.items():
album_avg = list()
d = addict.Dict()
# print(album)
for j in lyrics_list:
if j != 'No Lyrics':
song_total = 0
for k in j:
song_total += k[1]
else:
song_total = "No Lyrics"
album_avg.append(song_total)
# We want to avoid a ValueError when we loop through
# the first time
try:
d = addict.Dict(**d, **addict.Dict(album, album_avg))
except ValueError:
d = addict.Dict((album, album_avg))
# print(d)
self.all_albums_lyrics_sum.append(d)
# print(count)
# with open(f'{os.getcwd()}/lyrics_sum_all_album.json', 'w+') as f:
# json.dump(self.all_albums_lyrics_sum, f)
# return self
def calculate_final_average_by_album(self) -> None:
"""Calculates descriptive statistics by album.
"""
self.album_statistics = addict.Dict()
album_lyrics = self.all_albums_lyrics_sum
# with open(f'{os.getcwd()}/lyrics_sum_all_album.json', 'r') as f:
# album_lyrics = json.load(f)
for i in album_lyrics:
for album, count in i.items():
# We filter twice, once to remove strings, then to filter
# the integers
_count = [d for d in count if isinstance(d, int)]
_count = [d for d in _count if d > 1]
_d = self.get_descriptive_statistics(_count)
self.album_statistics = addict.Dict(
**self.album_statistics, **addict.Dict((album, _d))
)
# with open(f'{os.getcwd()}/album_statistics.json', 'w') as f:
# json.dump(self.album_statistics, f, indent=2)
# pprint(self.album_statistics)
def calculate_final_average_by_year(self) -> None:
"""Calculates descriptive statistic by year.
"""
group_by_years = addict.Dict()
self.year_statistics = addict.Dict()
album_lyrics = self.all_albums_lyrics_sum
# with open(f'{os.getcwd()}/lyrics_sum_all_album.json', 'r') as f:
# album_lyrics = json.load(f)
# Merge years together
for i in album_lyrics:
for album, count in i.items():
year = album.split('[')[-1].strip(']')
try:
group_by_years = addict.Dict(
**group_by_years, **addict.Dict((year, count))
)
# First loop returns value error for empty dict
except ValueError:
group_by_years = addict.Dict((year, count))
# Multiple years raise a TypeError - we append
except TypeError:
group_by_years.get(year).extend(count)
for year, y_count in group_by_years.items():
_y_count = [d for d in y_count if isinstance(d, int)]
_y_count = [d for d in _y_count if d > 1]
_d = self.get_descriptive_statistics(_y_count)
self.year_statistics = addict.Dict(
**self.year_statistics, **addict.Dict((year, _d))
)
# pprint(self.year_statistics)
@staticmethod
def construct_lyrics_url(artist: str, song: str) -> str:
"""Builds the URL for the lyrics api.
Parameters
----------
artist : str
Artist
song : str
Track title
Returns
-------
str
URL for lyrics from the lyrics api.
"""
lyrics_api_base = 'https://api.lyrics.ovh/v1'
lyrics_api_url = html.escape(f'{lyrics_api_base}/{artist}/{song}')
return lyrics_api_url
@staticmethod
def request_lyrics_from_url(url: str) -> str:
"""Gets lyrics from the lyrics api.
Parameters
----------
url : str
URL of the track for the lyrics api.
Returns
-------
str
Lyrics of the trakc
"""
resp = requests.get(url)
# No lyrics for a song will return a key of 'error', we pass on this.
try:
lyrics = LyricsBuilder.strip_punctuation(resp.json()['lyrics'])
return lyrics
except (KeyError, json.decoder.JSONDecodeError):
return
@staticmethod
def strip_punctuation(word: str) -> str:
"""Removes punctuation from lyrics.
Parameters
----------
word : str
Word to remove punctuation from.
Returns
-------
str
Same word without any punctuation.
"""
_strip = word.translate(str.maketrans('', '', string.punctuation))
return _strip
@staticmethod
def get_descriptive_statistics(nums: list) -> Dict[str, int]:
"""Calculates descriptive statistics.
Parameters
----------
nums : list
A list containing total number of words from a track.
Returns
-------
Dict[str, int]
Dictionary of statistic and value.
"""
if len(nums) == 0:
return
avg = math.ceil(np.mean(nums))
median = math.ceil(np.median(nums))
std = math.ceil(np.std(nums))
max = math.ceil(np.max(nums))
min = math.ceil(np.min(nums))
p_10 = math.ceil(np.percentile(nums, 10))
p_25 = math.ceil(np.percentile(nums, 25))
p_75 = math.ceil(np.percentile(nums, 75))
p_90 = math.ceil(np.percentile(nums, 90))
count = len(nums)
_d = addict.Dict(
('avg', avg),
('median', median),
('std', std),
('max', max),
('min', min),
('p_10', p_10),
('p_25', p_25),
('p_75', p_75),
('p_90', p_90),
('count', count),
)
return _d

View File

@@ -0,0 +1,81 @@
from __future__ import annotations
from abc import ABC, abstractstaticmethod, abstractmethod
from typing import Union
from musicbrainzapi.api import authenticate
class LyricsConcreteBuilder(ABC):
"""Abstract concrete builder for Lyrics
"""
@property
@abstractmethod
def product(self) -> None:
pass
@property
@abstractmethod
def artist(self) -> str:
pass
@artist.setter
@abstractmethod
def artist(self, artist: str) -> None:
pass
@property
@abstractmethod
def country(self) -> Union[str, None]:
pass
@country.setter
@abstractmethod
def country(self, country: Union[str, None]) -> None:
pass
@property
@abstractmethod
def artist_id(self) -> str:
pass
@artist_id.setter
@abstractmethod
def artist_id(self, artist_id: str) -> None:
pass
@abstractstaticmethod
def set_useragent():
authenticate.set_useragent()
@abstractmethod
def __init__(self) -> None:
pass
@abstractmethod
def reset(self) -> None:
pass
@abstractmethod
def find_artists(self) -> None:
pass
@abstractmethod
def sort_artists(self) -> None:
pass
@abstractmethod
def get_accuracy_scores(self) -> None:
pass
@abstractmethod
def get_top_five_results(self) -> None:
pass
@abstractmethod
def find_all_albums(self) -> None:
pass
@abstractmethod
def find_all_tracks(self) -> None:
pass

View File

@@ -0,0 +1,202 @@
from __future__ import annotations
import json
import os
import click
from musicbrainzapi.api.lyrics.builder import LyricsBuilder
from musicbrainzapi.api.lyrics import Lyrics
class LyricsClickDirector:
"""Director for Lyrics builder.
"""
def __init__(self) -> None:
self._builder = None
@property
def builder(self) -> LyricsBuilder:
return self._builder
@builder.setter
def builder(self, builder: LyricsBuilder) -> None:
self._builder = builder
def _get_initial_artists(self, artist: str, country: str) -> None:
"""Search Musicbrainz api for an artist
Parameters
----------
artist : str
Artist to search for
country : str
Country artist comes from.
Returns
-------
None
"""
self.builder.artist = artist
self.builder.country = country
self.builder.set_useragent()
self.builder.find_artists()
self.builder.sort_artists()
self.builder.get_accuracy_scores()
self.builder.get_top_five_results()
return self
def _confirm_final_artist(self) -> None:
"""Confirm the artist from the user.
Returns
-------
None
Raises
------
SystemExit
If no artist is found will cleanly quit.
"""
artist_meta = None
for i, j in self.builder._top_five_results.items():
artist_meta = 'Multiple' if j <= 100 else None
if artist_meta == 'Multiple':
_position = []
click.echo(
click.style(
f'Musicbrainz found several results for '
f'{self.builder.artist[0]}. Which artist/group do you want'
'?',
fg='green',
)
)
for i, j in zip(self.builder._top_five_results, range(1, 6)):
click.echo(
f'[{j}] {self.builder._sort_names.get(i)}'
f' ({self.builder._accuracy_scores.get(i)}% match)'
)
_position.append(i)
chosen = int(
click.prompt(
click.style(f'Enter choice, default is', blink=True),
default=1,
type=click.IntRange(
1, len(self.builder._top_five_results)
),
)
)
choice = _position[chosen - 1]
click.echo(f'You chose {self.builder._sort_names.get(choice)}')
self._artist = self.builder._sort_names.get(choice).split('|')[0]
self._artist_id = choice
# Set artist and artistID on builder + product
self.builder.artist_id = self._artist_id
self.builder.artist = self._artist
elif artist_meta is None:
click.echo(
f'Musicbrainz did not find any results for '
f'{self.builder.artist[0]}. Check the spelling or consider '
'alternative names that the artist/group may go by.'
)
raise SystemExit()
return self
def _query_for_data(self) -> None:
"""Query Musicbrainz api for albums + track data.
Returns
-------
None
"""
self.builder.find_all_albums()
self.builder.find_all_tracks()
self.builder._product.all_albums_with_tracks = self.builder.all_albums
return self
def _get_lyrics(self) -> None:
"""Get Lyrics for each track
Returns
-------
None
"""
self.builder.find_lyrics_urls()
self.builder.find_all_lyrics()
self.builder._product.all_albums_with_lyrics = (
self.builder.all_albums_lyrics
)
self.builder.count_words_in_lyrics()
with open(f'{os.getcwd()}/lyrics_count.json', 'w+') as file:
json.dump(
self.builder.all_albums_lyrics_count,
file,
indent=2,
sort_keys=True,
)
self.builder._product.all_albums_lyrics_count = (
self.builder.all_albums_lyrics_count
)
return self
def _calculate_basic_statistics(self) -> None:
"""Calculate a basic average for all tracks.
Returns
-------
None
"""
self.builder.calculate_average_all_albums()
self.builder._product.all_albums_lyrics_sum = (
self.builder.all_albums_lyrics_sum
)
return self
def _calculate_descriptive_statistics(self) -> None:
"""Calculate descriptive statistics for album and/or year.
Returns
-------
None
"""
self.builder.calculate_final_average_by_album()
self.builder.calculate_final_average_by_year()
self.builder._product.album_statistics = self.builder.album_statistics
self.builder._product.year_statistics = self.builder.year_statistics
return self
def _dev(self) -> None:
"""Dev function - used for testing
Returns
-------
None
"""
self.builder.calculate_final_average_by_album()
self.builder.calculate_final_average_by_year()
self.builder._product.album_statistics = self.builder.album_statistics
self.builder._product.year_statistics = self.builder.year_statistics
self.builder._product.artist_id = None
self.builder._product.artist = 'Katzenjammer'
self.builder._product.show_summary()
self.builder._product.show_summary_statistics(group_by='year')
return self
@staticmethod
def _get_product(builder_inst: LyricsBuilder) -> Lyrics:
"""Returns the constructed Lyrics object
Parameters
----------
builder_inst : LyricsBuilder
Builder class for Lyrics object
Returns
-------
Lyrics
Lyrics object
"""
return builder_inst._product

View File

@@ -1,5 +1,4 @@
import os
import sys
from importlib import import_module
import click
@@ -31,12 +30,16 @@ class ComplexCLI(click.MultiCommand):
rv.sort()
return rv
# def get_command(self, ctx, name):
# try:
# mod = import_module(f'musicbrainzapi.cli.commands.cmd_{name}')
# except ImportError as e:
# print(e)
# return
# return mod.cli
def get_command(self, ctx, name):
try:
mod = import_module(f'musicbrainzapi.cli.commands.cmd_{name}')
except ImportError as e:
print(e)
return
return mod.cli
@@ -61,9 +64,5 @@ def cli(ctx, path):
"""A complex command line interface."""
# ctx.verbose = verbose
if path is not None:
# click.echo(f'Path set to {os.path.expanduser(path)}')
click.echo(f'Path set to {os.path.expanduser(path)}')
ctx.path = os.path.expanduser(path)
if __name__ == '__main__':
cli()

View File

@@ -1,3 +1,4 @@
import json
from typing import Union
import click
@@ -5,20 +6,18 @@ import click
import matplotlib.pyplot as plt
from musicbrainzapi.cli.cli import pass_environment
from musicbrainzapi.api.command_builders import lyrics
import musicbrainzapi.wordcloud
from musicbrainzapi.api.lyrics.builder import LyricsBuilder
from musicbrainzapi.api.lyrics.director import LyricsClickDirector
# @click.argument('path', required=False, type=click.Path(resolve_path=True))
# @click.command(short_help='a test command')
@click.option('--dev', is_flag=True)
@click.option('--dev', is_flag=True, help='Development flag. Do not use.')
@click.option(
'--save-output',
required=False,
help='Save the output to json files locally. Will use the path parameter if'
' provided else defaults to current working directory.',
help='Save the output to json files locally. Will use the path parameter'
' if provided else defaults to current working directory.',
is_flag=True,
default=False,
)
@@ -64,12 +63,27 @@ def cli(
wordcloud: bool,
save_output: bool,
) -> None:
"""Search for lyrics statistics of an Artist/Group."""
path = ctx.path
print(f'home={ctx.home}')
# lyrics_obj = list()
director = lyrics.LyricsClickDirector()
builder = lyrics.LyricsBuilder()
"""Search for lyrics statistics of an Artist/Group.
Parameters
----------
ctx : musicbrainzapi.cli.cli.Environment
click environment class
artist : str
artist
country : Union[str, None]
country
dev : bool
dev flag - not to be used
show_summary : str
summary flag - used to display descriptive statistics
wordcloud : bool
wordcloud flag - used to create a wordcloud from lyrics
save_output : bool
save output flag - used to save output locally to disk
"""
director = LyricsClickDirector()
builder = LyricsBuilder()
director.builder = builder
if dev:
director._dev()
@@ -105,7 +119,9 @@ def cli(
lyrics_0.all_albums_lyrics_count
)
cloud.create_word_cloud()
click.confirm('Wordcloud ready - press enter to show.', default=True)
show = click.confirm(
'Wordcloud ready - press enter to show.', default=True
)
plt.imshow(
cloud.wc.recolor(
color_func=cloud.generate_grey_colours, random_state=3
@@ -113,4 +129,12 @@ def cli(
interpolation='bilinear',
)
plt.axis('off')
if show:
plt.show()
if save_output:
click.echo(f'Saving output to {ctx.path}')
path = ctx.path if ctx.path[-1] == '/' else ctx.path + '/'
attr = lyrics_0._attributes
for a in attr:
with open(f'{path}{a}.json', 'w') as f:
json.dump(getattr(lyrics_0, a), f, indent=2)

View File

@@ -10,21 +10,44 @@ from PIL import Image
from wordcloud import STOPWORDS, WordCloud
import numpy as np
from musicbrainzapi.api.command_builders.lyrics import Lyrics
from musicbrainzapi.api.lyrics import Lyrics
if typing.TYPE_CHECKING:
import PIL.PngImagePlugin.PngImageFile
class LyricsWordcloud:
"""Create a word cloud from Lyrics.
"""docstring for LyricsWordcloud"""
Attributes
----------
all_albums_lyrics_count : list
List of all albums + track lyrics counted by each word
char_mask : np.array
numpy array containing data for the word cloud image
freq : collections.Counter
Counter object containing counts for all words across all tracks
lyrics_list : list
List of all words from all lyrics across all tracks.
pillow_img : PIL.PngImagePlugin.PngImageFile
pillow image of the word cloud base
wc : wordcloud.WordCloud
WordCloud object
"""
def __init__(
self,
pillow_img: 'PIL.PngImagePlugin.PngImageFile',
all_albums_lyrics_count: 'Lyrics.all_albums_lyrics_count',
):
"""
Parameters
----------
pillow_img : PIL.PngImagePlugin.PngImageFile
pillow image of the word cloud base
all_albums_lyrics_count : Lyrics.all_albums_lyrics_count
List of all albums + track lyrics counted by each word
"""
self.pillow_img = pillow_img
self.all_albums_lyrics_count = all_albums_lyrics_count
@@ -32,6 +55,13 @@ class LyricsWordcloud:
def use_microphone(
cls, all_albums_lyrics_count: 'Lyrics.all_albums_lyrics_count',
) -> LyricsWordcloud:
"""Class method to instantiate with a microphone base image.
Parameters
----------
all_albums_lyrics_count : Lyrics.all_albums_lyrics_count
List of all albums + track lyrics counted by each word
"""
mic_resource = resources.path(
'musicbrainzapi.wordcloud.resources', 'mic.png'
)
@@ -41,6 +71,12 @@ class LyricsWordcloud:
return cls(mic_img, all_albums_lyrics_count)
def _get_lyrics_list(self) -> None:
"""Gets all words from lyrics in a single list + cleans them.
Returns
-------
None
"""
self.lyrics_list = list()
for i in self.all_albums_lyrics_count:
for album, lyric in i.items():
@@ -64,9 +100,13 @@ class LyricsWordcloud:
return self
def _get_frequencies(self) -> None:
"""Get frequencies of words from a list.
"""
self.freq = collections.Counter(self.lyrics_list)
def _get_char_mask(self) -> None:
"""Gets a numpy array for the image file.
"""
self.char_mask = np.array(self.pillow_img)
@staticmethod
@@ -81,16 +121,28 @@ class LyricsWordcloud:
return colour
def _generate_word_cloud(self) -> None:
"""Generates a word cloud
Returns
-------
None
"""
self.wc = WordCloud(
max_words=50,
width=500,
height=500,
max_words=150,
width=1500,
height=1500,
mask=self.char_mask,
random_state=1,
).generate_from_frequencies(self.freq)
return self
def _generate_plot(self) -> None:
"""Plots the wordcloud and sets matplotlib options.
Returns
-------
None
"""
plt.imshow(
self.wc.recolor(
color_func=self.generate_grey_colours, random_state=3
@@ -100,13 +152,18 @@ class LyricsWordcloud:
plt.axis('off')
return self
def save_to_disk(self, path: str):
pass
def show_word_cloud(self):
"""Shows the word cloud.
"""
plt.show()
def create_word_cloud(self) -> None:
"""Creates a word cloud
Returns
-------
None
"""
self._get_lyrics_list()
self._get_frequencies()
self._get_char_mask()

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 186 KiB

After

Width:  |  Height:  |  Size: 220 KiB