adding latest working version

This commit is contained in:
2020-03-07 01:24:24 +00:00
parent 94476113da
commit 19e706eb50
8 changed files with 432 additions and 138028 deletions

8
album_statistics.json Normal file
View File

@@ -0,0 +1,8 @@
{
"The All\u2010American Rejects [2003]": null,
"Move Along [2005]": null,
"B-Sides & Rarities [2007]": null,
"When the World Comes Down [2009]": null,
"Kids in the Street [2012]": null,
"The All\u2010American Rejects [2000]": null
}

111
all_albums_lyrics.json Normal file
View File

@@ -0,0 +1,111 @@
[
{
"The All\u2010American Rejects [2003]": [
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null
]
},
{
"Move Along [2005]": [
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null
]
},
{
"B-Sides & Rarities [2007]": [
null,
null,
null,
null,
null,
null,
null,
null
]
},
{
"When the World Comes Down [2009]": [
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null
]
},
{
"Kids in the Street [2012]": [
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null
]
},
{
"The All\u2010American Rejects [2000]": [
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null
]
}
]

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

14
poetry.lock generated
View File

@@ -48,6 +48,14 @@ version = "1.5"
[package.dependencies] [package.dependencies]
pycodestyle = ">=2.5.0" pycodestyle = ">=2.5.0"
[[package]]
category = "main"
description = "Print ASCII tables for terminals"
name = "beautifultable"
optional = false
python-versions = "*"
version = "0.8.0"
[[package]] [[package]]
category = "dev" category = "dev"
description = "The uncompromising code formatter." description = "The uncompromising code formatter."
@@ -501,7 +509,7 @@ docs = ["sphinx", "jaraco.packaging (>=3.2)", "rst.linker (>=1.9)"]
testing = ["jaraco.itertools", "func-timeout"] testing = ["jaraco.itertools", "func-timeout"]
[metadata] [metadata]
content-hash = "16e37a137dfee91b8c49b120f770dc04b5361fb91d73ce60b0eae19f64ac2ce5" content-hash = "b3e363ce109826fbe16ba73b6102a62254689ead837a371aecd2857fa5ed9f45"
python-versions = "^3.7" python-versions = "^3.7"
[metadata.files] [metadata.files]
@@ -524,6 +532,10 @@ attrs = [
autopep8 = [ autopep8 = [
{file = "autopep8-1.5.tar.gz", hash = "sha256:0f592a0447acea0c2b0a9602be1e4e3d86db52badd2e3c84f0193bfd89fd3a43"}, {file = "autopep8-1.5.tar.gz", hash = "sha256:0f592a0447acea0c2b0a9602be1e4e3d86db52badd2e3c84f0193bfd89fd3a43"},
] ]
beautifultable = [
{file = "beautifultable-0.8.0-py2.py3-none-any.whl", hash = "sha256:28e2e93d44a4e84511c4869da4b907345435a06728925e295790f24e1d57300c"},
{file = "beautifultable-0.8.0.tar.gz", hash = "sha256:d44d9551bbed7bfa88675324f84efb9aa857384d44e9fb21eb530f0a0badb815"},
]
black = [ black = [
{file = "black-19.10b0-py36-none-any.whl", hash = "sha256:1b30e59be925fafc1ee4565e5e08abef6b03fe455102883820fe5ee2e4734e0b"}, {file = "black-19.10b0-py36-none-any.whl", hash = "sha256:1b30e59be925fafc1ee4565e5e08abef6b03fe455102883820fe5ee2e4734e0b"},
{file = "black-19.10b0.tar.gz", hash = "sha256:c2edb73a08e9e0e6f65a0e6af18b059b8b1cdd5bef997d7a0b181df93dc81539"}, {file = "black-19.10b0.tar.gz", hash = "sha256:c2edb73a08e9e0e6f65a0e6af18b059b8b1cdd5bef997d7a0b181df93dc81539"},

View File

@@ -11,6 +11,7 @@ musicbrainzngs = "^0.7.1"
addict = "^2.2.1" addict = "^2.2.1"
progress = "^1.5" progress = "^1.5"
numpy = "^1.18.1" numpy = "^1.18.1"
beautifultable = "^0.8.0"
[tool.poetry.dev-dependencies] [tool.poetry.dev-dependencies]
pytest = "^5.2" pytest = "^5.2"

View File

@@ -1,21 +1,22 @@
from __future__ import annotations from __future__ import annotations
from abc import ABC, abstractmethod, abstractstaticmethod from abc import ABC, abstractmethod, abstractstaticmethod
from dataclasses import dataclass from dataclasses import dataclass
from pprint import pprint
# from pprint import pprint
from typing import Union, List, Dict from typing import Union, List, Dict
from collections import Counter
import html import html
import json import json
import os import os
from collections import Counter
import string import string
import math import math
import statistics
from beautifultable import BeautifulTable
import musicbrainzngs import musicbrainzngs
import click import click
import addict import addict
import requests import requests
import numpy import numpy as np
from musicbrainzapi.api import authenticate from musicbrainzapi.api import authenticate
@@ -151,7 +152,7 @@ class LyricsBuilder(LyricsConcreteBuilder):
self.reset() self.reset()
def reset(self) -> None: def reset(self) -> None:
self._product = Lyrics self._product = Lyrics()
def find_artists(self) -> None: def find_artists(self) -> None:
self.musicbrainz_artists = musicbrainzngs.search_artists( self.musicbrainz_artists = musicbrainzngs.search_artists(
@@ -325,7 +326,7 @@ class LyricsBuilder(LyricsConcreteBuilder):
# pprint(self.all_albums_lyrics_url) # pprint(self.all_albums_lyrics_url)
return self return self
# change this for progressbar for i loop # change this for progressbar for i loop
def find_all_lyrics(self) -> None: def find_all_lyrics(self) -> None:
self.all_albums_lyrics = list() self.all_albums_lyrics = list()
@@ -334,7 +335,6 @@ class LyricsBuilder(LyricsConcreteBuilder):
label=f'Finding lyrics for {self.total_track_count}' label=f'Finding lyrics for {self.total_track_count}'
f' tracks for {self.artist}. This may take some time! ☕️', f' tracks for {self.artist}. This may take some time! ☕️',
) as bar: ) as bar:
for x in self.all_albums_lyrics_url: for x in self.all_albums_lyrics_url:
for alb, urls in x.items(): for alb, urls in x.items():
bar.update(1) bar.update(1)
@@ -344,12 +344,15 @@ class LyricsBuilder(LyricsConcreteBuilder):
) )
self.all_albums_lyrics.append(lyrics) self.all_albums_lyrics.append(lyrics)
bar.update(update - 1) bar.update(update - 1)
with open(f'{os.getcwd()}/all_albums_lyrics.json', 'w') as f:
json.dump(self.all_albums_lyrics, f, indent=2)
return self return self
def count_words_in_lyrics(self) -> None: def count_words_in_lyrics(self) -> None:
# remove punctuation, fix click bar # remove punctuation, fix click bar
self.all_albums_lyrics_count = list() self.all_albums_lyrics_count = list()
print(self.total_track_count) # print(self.total_track_count)
with click.progressbar( with click.progressbar(
length=self.total_track_count, label=f'Processing lyrics' length=self.total_track_count, label=f'Processing lyrics'
) as bar: ) as bar:
@@ -376,16 +379,16 @@ class LyricsBuilder(LyricsConcreteBuilder):
# rename this # rename this
def calculate_average_all_albums(self) -> None: def calculate_average_all_albums(self) -> None:
self.all_albums_lyrics_sum = list() self.all_albums_lyrics_sum = list()
# with open(f'{os.getcwd()}/lyrics_count.json', 'r') as f: # album_lyrics = self.all_albums_lyrics_count
# album_lyrics = json.load(f) with open(f'{os.getcwd()}/lyrics_count.json', 'r') as f:
album_lyrics = self.all_albums_lyrics_count album_lyrics = json.load(f)
count = 0 count = 0
for i in album_lyrics: for i in album_lyrics:
count += len(i) count += len(i)
for album, lyrics_list in i.items(): for album, lyrics_list in i.items():
album_avg = list() album_avg = list()
d = addict.Dict() d = addict.Dict()
print(album) # print(album)
for j in lyrics_list: for j in lyrics_list:
if j != 'No Lyrics': if j != 'No Lyrics':
song_total = 0 song_total = 0
@@ -409,9 +412,9 @@ class LyricsBuilder(LyricsConcreteBuilder):
def calculate_final_average_by_album(self) -> None: def calculate_final_average_by_album(self) -> None:
self.album_statistics = addict.Dict() self.album_statistics = addict.Dict()
album_lyrics = self.all_albums_lyrics_sum # album_lyrics = self.all_albums_lyrics_sum
# with open(f'{os.getcwd()}/lyrics_sum_all_album.json', 'r') as f: with open(f'{os.getcwd()}/lyrics_sum_all_album.json', 'r') as f:
# album_lyrics = json.load(f) album_lyrics = json.load(f)
for i in album_lyrics: for i in album_lyrics:
for album, count in i.items(): for album, count in i.items():
@@ -423,15 +426,17 @@ class LyricsBuilder(LyricsConcreteBuilder):
self.album_statistics = addict.Dict( self.album_statistics = addict.Dict(
**self.album_statistics, **addict.Dict((album, _d)) **self.album_statistics, **addict.Dict((album, _d))
) )
pprint(self.album_statistics) with open(f'{os.getcwd()}/album_statistics.json', 'w') as f:
json.dump(self.album_statistics, f, indent=2)
# pprint(self.album_statistics)
# implement above in this # implement above in this
def calculate_final_average_by_year(self) -> None: def calculate_final_average_by_year(self) -> None:
group_by_years = addict.Dict() group_by_years = addict.Dict()
self.year_statistics = addict.Dict() self.year_statistics = addict.Dict()
album_lyrics = self.all_albums_lyrics_sum # album_lyrics = self.all_albums_lyrics_sum
# with open(f'{os.getcwd()}/lyrics_sum_all_album.json', 'r') as f: with open(f'{os.getcwd()}/lyrics_sum_all_album.json', 'r') as f:
# album_lyrics = json.load(f) album_lyrics = json.load(f)
# Merge years together # Merge years together
for i in album_lyrics: for i in album_lyrics:
@@ -455,7 +460,7 @@ class LyricsBuilder(LyricsConcreteBuilder):
self.year_statistics = addict.Dict( self.year_statistics = addict.Dict(
**self.year_statistics, **addict.Dict((year, _d)) **self.year_statistics, **addict.Dict((year, _d))
) )
pprint(self.year_statistics) # pprint(self.year_statistics)
@staticmethod @staticmethod
def construct_lyrics_url(artist: str, song: str) -> str: def construct_lyrics_url(artist: str, song: str) -> str:
@@ -481,15 +486,17 @@ class LyricsBuilder(LyricsConcreteBuilder):
@staticmethod @staticmethod
def get_descriptive_statistics(nums: list) -> Dict[str, int]: def get_descriptive_statistics(nums: list) -> Dict[str, int]:
avg = math.ceil(numpy.mean(nums)) if len(nums) == 0:
median = math.ceil(numpy.median(nums)) return
std = math.ceil(numpy.std(nums)) avg = math.ceil(np.mean(nums))
max = math.ceil(numpy.max(nums)) median = math.ceil(np.median(nums))
min = math.ceil(numpy.min(nums)) std = math.ceil(np.std(nums))
p_10 = math.ceil(numpy.percentile(nums, 10)) max = math.ceil(np.max(nums))
p_25 = math.ceil(numpy.percentile(nums, 25)) min = math.ceil(np.min(nums))
p_75 = math.ceil(numpy.percentile(nums, 75)) p_10 = math.ceil(np.percentile(nums, 10))
p_90 = math.ceil(numpy.percentile(nums, 90)) p_25 = math.ceil(np.percentile(nums, 25))
p_75 = math.ceil(np.percentile(nums, 75))
p_90 = math.ceil(np.percentile(nums, 90))
count = len(nums) count = len(nums)
_d = addict.Dict( _d = addict.Dict(
('avg', avg), ('avg', avg),
@@ -501,7 +508,7 @@ class LyricsBuilder(LyricsConcreteBuilder):
('p_25', p_25), ('p_25', p_25),
('p_75', p_75), ('p_75', p_75),
('p_90', p_90), ('p_90', p_90),
('count', count) ('count', count),
) )
return _d return _d
@@ -595,7 +602,7 @@ class LyricsClickDirector:
json.dump( json.dump(
self.builder.all_albums_lyrics_count, self.builder.all_albums_lyrics_count,
file, file,
indent=4, indent=2,
sort_keys=True, sort_keys=True,
) )
self.builder._product.all_albums_lyrics_count = ( self.builder._product.all_albums_lyrics_count = (
@@ -603,33 +610,40 @@ class LyricsClickDirector:
) )
return self return self
def _calculate_average(self) -> None: def _calculate_basic_statistics(self) -> None:
self.builder.calculate_average_all_albums() self.builder.calculate_average_all_albums()
self.builder._product.all_albums_lyrics_sum = ( self.builder._product.all_albums_lyrics_sum = (
self.builder.all_albums_lyrics_sum self.builder.all_albums_lyrics_sum
) )
pprint(self.builder._product.all_albums_lyrics_sum) return self
def _calculate_descriptive_statistics(self) -> None:
self.builder.calculate_final_average_by_album() self.builder.calculate_final_average_by_album()
self.builder.calculate_final_average_by_year() self.builder.calculate_final_average_by_year()
self.builder._product.album_statistics = self.builder.album_statistics
self.builder._product.year_statistics = self.builder.year_statistics
return self
def _dev(self) -> None: def _dev(self) -> None:
self.builder.calculate_final_average_by_album() self.builder.calculate_final_average_by_album()
self.builder.calculate_final_average_by_year() self.builder.calculate_final_average_by_year()
self.builder._product.album_statistics = self.builder.album_statistics
self.builder._product.year_statistics = self.builder.year_statistics
self.builder._product.artist_id = None
self.builder._product.artist = 'Katzenjammer'
self.builder._product.show_summary()
self.builder._product.show_summary_statistics(group_by='year')
return self
@staticmethod
def _get_product(builder_inst: LyricsBuilder) -> Lyrics:
return builder_inst._product
@dataclass @dataclass
class Lyrics: class Lyrics:
"""docstring for Lyrics""" """docstring for Lyrics"""
__slots__ = [
'artist_id',
'artist',
'country',
'all_albums_with_tracks',
'all_albums_with_lyrics',
'all_albums_lyrics_count',
'all_albums_lyrics_sum',
]
artist_id: str artist_id: str
artist: str artist: str
country: Union[str, None] country: Union[str, None]
@@ -637,3 +651,92 @@ class Lyrics:
all_albums_with_lyrics: List[Dict[str, List[str]]] all_albums_with_lyrics: List[Dict[str, List[str]]]
all_albums_lyrics_count: List[Dict[str, List[List[str, int]]]] all_albums_lyrics_count: List[Dict[str, List[List[str, int]]]]
all_albums_lyrics_sum: List[Dict[str, List[int, str]]] all_albums_lyrics_sum: List[Dict[str, List[int, str]]]
album_statistics: Dict[str, Dict[str, int]]
year_statistics: Dict[str, Dict[str, int]]
def __init__(self) -> None:
pass
def show_summary(self):
all_averages = []
for i in self.album_statistics.values():
try:
all_averages.append(i['avg'])
except (TypeError, ValueError):
pass
print(all_averages)
try:
final_average = math.ceil(np.mean(all_averages))
except ValueError:
click.echo(
'Oops! https://lyrics.ovh couldn\'t find any lyrics across all'
' albums. This is caused by inconsistent Artist names from'
' Musicbrainz and lyrics.ovh. Try another artist.'
)
raise(SystemExit)
output = BeautifulTable(max_width=200)
output.set_style(BeautifulTable.STYLE_BOX_ROUNDED)
output.column_headers = [
'Average number of words in tracks across all albums\n'
f'for {self.artist}'
]
output.append_row([final_average])
click.echo(output)
return self
def show_summary_statistics(self, group_by: str) -> None:
stats_obj = getattr(self, f'{group_by}_statistics')
stats = [
'avg',
'std',
'min',
'max',
'median',
'count',
'p_10',
'p_25',
'p_75',
'p_90',
]
output_0 = BeautifulTable(max_width=200)
output_0.set_style(BeautifulTable.STYLE_BOX_ROUNDED)
output_0.column_headers = [
'Descriptive statistics for number of words in tracks across all'
f' {group_by}s\nfor {self.artist}'
]
output_1 = BeautifulTable(max_width=200)
output_1.set_style(BeautifulTable.STYLE_BOX_ROUNDED)
output_1.column_headers = [
group_by,
stats[0],
stats[1],
stats[2],
stats[3],
stats[4],
stats[5],
stats[6],
stats[7],
stats[8],
stats[9],
]
for group, s in stats_obj.items():
output_1.append_row(
[
group,
s.get(stats[0]),
s.get(stats[1]),
s.get(stats[2]),
s.get(stats[3]),
s.get(stats[4]),
s.get(stats[5]),
s.get(stats[6]),
s.get(stats[7]),
s.get(stats[8]),
s.get(stats[9]),
]
)
output_0.append_row([output_1])
click.echo(output_0)
return self

View File

@@ -8,6 +8,23 @@ from musicbrainzapi.api.command_builders import lyrics
# @click.argument('path', required=False, type=click.Path(resolve_path=True)) # @click.argument('path', required=False, type=click.Path(resolve_path=True))
# @click.command(short_help='a test command') # @click.command(short_help='a test command')
@click.option('--dev', is_flag=True)
@click.option(
'--save-output',
required=False,
help='Save the output to json files locally. Will use the path parameter if'
' provided else defaults to current working directory.',
is_flag=True,
default=False
)
@click.option(
'--show-summary',
required=False,
help='Show summary statistics for the artist.',
type=click.Choice(['album', 'year', 'all']),
)
@click.option( @click.option(
'--country', '--country',
'-c', '-c',
@@ -24,32 +41,45 @@ from musicbrainzapi.api.command_builders import lyrics
required=True, required=True,
multiple=True, multiple=True,
type=str, type=str,
help='Artist/Group to search lyrics for.', help='Artist/Group to search.',
) )
@click.option(
'--save-lyrics', required=False, is_flag=True, help='Save the lyrics '
)
@click.option('--dev', is_flag=True)
@click.command() @click.command()
@pass_environment @pass_environment
def cli( def cli(
ctx, artist: str, country: Union[str, None], save_lyrics, dev: bool ctx,
artist: str,
country: Union[str, None],
dev: bool,
show_summary: str,
save_output: bool
) -> None: ) -> None:
""" """Search for lyrics statistics of an Artist/Group."""
Search for lyrics of an Artist/Group. # lyrics_obj = list()
"""
print(f'save_lyrics={save_lyrics}')
director = lyrics.LyricsClickDirector() director = lyrics.LyricsClickDirector()
builder = lyrics.LyricsBuilder() builder = lyrics.LyricsBuilder()
director.builder = builder director.builder = builder
if dev: if dev:
director._dev() director._dev()
raise(SystemExit) raise (SystemExit)
# build the Lyrics object
director._get_initial_artists(artist, country) director._get_initial_artists(artist, country)
director._confirm_final_artist() director._confirm_final_artist()
director._query_for_data() director._query_for_data()
director._get_lyrics() director._get_lyrics()
director._calculate_average() director._calculate_basic_statistics()
if show_summary is not None:
director._calculate_descriptive_statistics()
def dev(): # Get the Lyrics object
pass lyrics_0 = director.builder.product
# lyrics_obj.append(lyrics_0)
# Show basic count
lyrics_0.show_summary()
# Show summary statistics
if show_summary == 'all':
lyrics_0.show_summary_statistics(group_by='album')
lyrics_0.show_summary_statistics(group_by='year')
elif show_summary in ['album', 'year']:
lyrics_0.show_summary_statistics(group_by=show_summary)