diff --git a/lyrics_count.json b/lyrics_count.json index d7492a9..cd630a3 100644 --- a/lyrics_count.json +++ b/lyrics_count.json @@ -4083,74 +4083,22 @@ [ [ "I", - 38 + 18 ], [ "Im", - 14 + 8 ], [ "when", - 12 + 6 ], [ "pray", - 11 - ], - [ - "will", - 10 - ], - [ - "dance", - 10 - ], - [ - "walk", - 10 - ], - [ - "away", - 10 - ], - [ - "gonna", - 10 - ], - [ - "sing", - 10 - ], - [ - "wont", - 10 - ], - [ - "beg", - 10 - ], - [ - "or", - 10 - ], - [ - "oh", - 9 - ], - [ - "the", - 7 - ], - [ - "no", 5 ], [ - "to", - 4 - ], - [ - "me", + "the", 4 ], [ @@ -4158,47 +4106,71 @@ 4 ], [ - "and", + "will", + 4 + ], + [ + "dance", + 4 + ], + [ + "walk", + 4 + ], + [ + "away", + 4 + ], + [ + "Oh", + 4 + ], + [ + "gonna", + 4 + ], + [ + "sing", + 4 + ], + [ + "wont", + 4 + ], + [ + "beg", + 4 + ], + [ + "or", + 4 + ], + [ + "me", 3 ], [ - "now", + "to", 3 ], [ "gone", 3 ], - [ - "more", - 3 - ], [ "of", 3 ], [ - "Ive", + "and", 2 ], [ - "for", + "now", 2 ], [ - "with", - 2 - ], - [ - "a", - 2 - ], - [ - "This", - 2 - ], - [ - "you", + "The", 2 ], [ @@ -4206,7 +4178,7 @@ 2 ], [ - "never", + "Cause", 2 ], [ @@ -4233,6 +4205,14 @@ "on", 2 ], + [ + "No", + 2 + ], + [ + "more", + 2 + ], [ "full", 2 @@ -4241,18 +4221,14 @@ "been", 2 ], - [ - "remember", - 2 - ], - [ - "Oh", - 2 - ], [ "In", 1 ], + [ + "our", + 1 + ], [ "cold", 1 @@ -4269,6 +4245,10 @@ "lingered", 1 ], + [ + "for", + 1 + ], [ "far", 1 @@ -4293,6 +4273,10 @@ "rosary", 1 ], + [ + "To", + 1 + ], [ "youd", 1 @@ -4301,6 +4285,14 @@ "stay", 1 ], + [ + "with", + 1 + ], + [ + "a", + 1 + ], [ "while", 1 @@ -4345,6 +4337,10 @@ "before", 1 ], + [ + "This", + 1 + ], [ "time", 1 @@ -4361,6 +4357,14 @@ "leaving", 1 ], + [ + "you", + 1 + ], + [ + "never", + 1 + ], [ "turn", 1 @@ -4369,10 +4373,6 @@ "around", 1 ], - [ - "Cause", - 1 - ], [ "standing", 1 @@ -4414,7 +4414,7 @@ 1 ], [ - "bridges", + "brigdes", 1 ], [ @@ -4449,10 +4449,22 @@ "earned", 1 ], + [ + "remember", + 1 + ], [ "why", 1 ], + [ + "Ive", + 1 + ], + [ + "Remember", + 1 + ], [ "where", 1 @@ -4464,70 +4476,6 @@ [ "from", 1 - ], - [ - "ooo", - 1 - ], - [ - "leave", - 1 - ], - [ - "my", - 1 - ], - [ - "mask", - 1 - ], - [ - "The", - 1 - ], - [ - "part", - 1 - ], - [ - "played", - 1 - ], - [ - "knew", - 1 - ], - [ - "Tonight", - 1 - ], - [ - "youre", - 1 - ], - [ - "alone", - 1 - ], - [ - "No", - 1 - ], - [ - "is", - 1 - ], - [ - "not", - 1 - ], - [ - "home", - 1 - ], - [ - "oooo", - 1 ] ], [ diff --git a/lyrics_sum_all_album.json b/lyrics_sum_all_album.json index 25cd38e..1ffa66c 100644 --- a/lyrics_sum_all_album.json +++ b/lyrics_sum_all_album.json @@ -1 +1 @@ -[{"Le Pop [2009]": [1, 244, 203, 158, "No Lyrics", 129, 108, 1, 122, 163, 251, 176, 183]}, {"A Kiss Before You Go [2011]": [40, 319, 203, 205, 197, 188, 188, 177, 201, 323, 1, 101]}, {"Rockland [2015]": [187, 192, 313, 278, 290, 325, 264, 316, 200, 362, 237, 422, 194, 304]}] \ No newline at end of file +[{"Le Pop [2009]": [1, 244, 203, 158, "No Lyrics", 129, 108, 1, 122, 163, 251, 176, 183]}, {"A Kiss Before You Go [2011]": [40, 191, 203, 205, 197, 188, 188, 177, 201, 323, 1, 101]}, {"Rockland [2015]": [187, 192, 313, 278, 290, 325, 264, 316, 200, 362, 237, 422, 194, 304]}] \ No newline at end of file diff --git a/poetry.lock b/poetry.lock index fe03720..80e5914 100644 --- a/poetry.lock +++ b/poetry.lock @@ -163,6 +163,14 @@ optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" version = "0.7.1" +[[package]] +category = "main" +description = "NumPy is the fundamental package for array computing with Python." +name = "numpy" +optional = false +python-versions = ">=3.5" +version = "1.18.1" + [[package]] category = "dev" description = "Core utilities for Python packages" @@ -493,7 +501,7 @@ docs = ["sphinx", "jaraco.packaging (>=3.2)", "rst.linker (>=1.9)"] testing = ["jaraco.itertools", "func-timeout"] [metadata] -content-hash = "50cff0758d9f4bfa77596b28f6f5f0c7f1db897b7c0d615071379c288b1d110d" +content-hash = "16e37a137dfee91b8c49b120f770dc04b5361fb91d73ce60b0eae19f64ac2ce5" python-versions = "^3.7" [metadata.files] @@ -560,6 +568,29 @@ musicbrainzngs = [ {file = "musicbrainzngs-0.7.1-py2.py3-none-any.whl", hash = "sha256:e841a8f975104c0a72290b09f59326050194081a5ae62ee512f41915090e1a10"}, {file = "musicbrainzngs-0.7.1.tar.gz", hash = "sha256:ab1c0100fd0b305852e65f2ed4113c6de12e68afd55186987b8ed97e0f98e627"}, ] +numpy = [ + {file = "numpy-1.18.1-cp35-cp35m-macosx_10_6_intel.whl", hash = "sha256:20b26aaa5b3da029942cdcce719b363dbe58696ad182aff0e5dcb1687ec946dc"}, + {file = "numpy-1.18.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:70a840a26f4e61defa7bdf811d7498a284ced303dfbc35acb7be12a39b2aa121"}, + {file = "numpy-1.18.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:17aa7a81fe7599a10f2b7d95856dc5cf84a4eefa45bc96123cbbc3ebc568994e"}, + {file = "numpy-1.18.1-cp35-cp35m-win32.whl", hash = "sha256:f3d0a94ad151870978fb93538e95411c83899c9dc63e6fb65542f769568ecfa5"}, + {file = "numpy-1.18.1-cp35-cp35m-win_amd64.whl", hash = "sha256:1786a08236f2c92ae0e70423c45e1e62788ed33028f94ca99c4df03f5be6b3c6"}, + {file = "numpy-1.18.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:ae0975f42ab1f28364dcda3dde3cf6c1ddab3e1d4b2909da0cb0191fa9ca0480"}, + {file = "numpy-1.18.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:cf7eb6b1025d3e169989416b1adcd676624c2dbed9e3bcb7137f51bfc8cc2572"}, + {file = "numpy-1.18.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:b765ed3930b92812aa698a455847141869ef755a87e099fddd4ccf9d81fffb57"}, + {file = "numpy-1.18.1-cp36-cp36m-win32.whl", hash = "sha256:2d75908ab3ced4223ccba595b48e538afa5ecc37405923d1fea6906d7c3a50bc"}, + {file = "numpy-1.18.1-cp36-cp36m-win_amd64.whl", hash = "sha256:9acdf933c1fd263c513a2df3dceecea6f3ff4419d80bf238510976bf9bcb26cd"}, + {file = "numpy-1.18.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:56bc8ded6fcd9adea90f65377438f9fea8c05fcf7c5ba766bef258d0da1554aa"}, + {file = "numpy-1.18.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:e422c3152921cece8b6a2fb6b0b4d73b6579bd20ae075e7d15143e711f3ca2ca"}, + {file = "numpy-1.18.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:b3af02ecc999c8003e538e60c89a2b37646b39b688d4e44d7373e11c2debabec"}, + {file = "numpy-1.18.1-cp37-cp37m-win32.whl", hash = "sha256:d92350c22b150c1cae7ebb0ee8b5670cc84848f6359cf6b5d8f86617098a9b73"}, + {file = "numpy-1.18.1-cp37-cp37m-win_amd64.whl", hash = "sha256:77c3bfe65d8560487052ad55c6998a04b654c2fbc36d546aef2b2e511e760971"}, + {file = "numpy-1.18.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c98c5ffd7d41611407a1103ae11c8b634ad6a43606eca3e2a5a269e5d6e8eb07"}, + {file = "numpy-1.18.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:9537eecf179f566fd1c160a2e912ca0b8e02d773af0a7a1120ad4f7507cd0d26"}, + {file = "numpy-1.18.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:e840f552a509e3380b0f0ec977e8124d0dc34dc0e68289ca28f4d7c1d0d79474"}, + {file = "numpy-1.18.1-cp38-cp38-win32.whl", hash = "sha256:590355aeade1a2eaba17617c19edccb7db8d78760175256e3cf94590a1a964f3"}, + {file = "numpy-1.18.1-cp38-cp38-win_amd64.whl", hash = "sha256:39d2c685af15d3ce682c99ce5925cc66efc824652e10990d2462dfe9b8918c6a"}, + {file = "numpy-1.18.1.zip", hash = "sha256:b6ff59cee96b454516e47e7721098e6ceebef435e3e21ac2d6c3b8b02628eb77"}, +] packaging = [ {file = "packaging-20.1-py2.py3-none-any.whl", hash = "sha256:170748228214b70b672c581a3dd610ee51f733018650740e98c7df862a583f73"}, {file = "packaging-20.1.tar.gz", hash = "sha256:e665345f9eef0c621aa0bf2f8d78cf6d21904eef16a93f020240b704a57f1334"}, diff --git a/pyproject.toml b/pyproject.toml index 592b54e..7d1e7ec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,6 +10,7 @@ requests = "^2.23.0" musicbrainzngs = "^0.7.1" addict = "^2.2.1" progress = "^1.5" +numpy = "^1.18.1" [tool.poetry.dev-dependencies] pytest = "^5.2" @@ -24,9 +25,9 @@ YAPF = "^0.29.0" pudb = "^2019.2" pyls-black = "^0.4.4" +[tool.poetry.plugins."console_scripts"] +"musicbrainzapi" = "musicbrainzapi.cli.cli:cli" [build-system] requires = ["poetry>=0.12"] build-backend = "poetry.masonry.api" -[tool.poetry.plugins."console_scripts"] -"musicbrainzapi" = "musicbrainzapi.cli.cli:cli" diff --git a/src/musicbrainzapi/api/command_builders/lyrics.py b/src/musicbrainzapi/api/command_builders/lyrics.py index 45726ca..11b48ba 100644 --- a/src/musicbrainzapi/api/command_builders/lyrics.py +++ b/src/musicbrainzapi/api/command_builders/lyrics.py @@ -9,11 +9,13 @@ import os from collections import Counter import string import math +import statistics import musicbrainzngs import click import addict import requests +import numpy from musicbrainzapi.api import authenticate @@ -396,63 +398,66 @@ class LyricsBuilder(LyricsConcreteBuilder): d = addict.Dict(**d, **addict.Dict(album, album_avg)) except ValueError: d = addict.Dict((album, album_avg)) - print(d) + # print(d) self.all_albums_lyrics_sum.append(d) - print(count) + # print(count) with open(f'{os.getcwd()}/lyrics_sum_all_album.json', 'w+') as f: json.dump(self.all_albums_lyrics_sum, f) return self def calculate_final_average_by_album(self) -> None: - self.album_averages = addict.Dict() + self.album_statistics = addict.Dict() + # album_lyrics = self.all_albums_lyrics_sum with open(f'{os.getcwd()}/lyrics_sum_all_album.json', 'r') as f: album_lyrics = json.load(f) - for i in album_lyrics: - for album, count in i.items(): - album_total, album_running = (0, 0) - for c in count: - if isinstance(c, int): - album_running += c - album_total += 1 - else: - pass - avg = math.ceil(album_running / album_total) - self.album_averages = addict.Dict( - **self.album_averages, **addict.Dict((album, avg)) - ) - print(self.album_averages) + for i in album_lyrics: + for album, count in i.items(): + _count = [d for d in count if isinstance(d, int)] + avg = math.ceil(statistics.mean(_count)) + stdev = math.ceil(statistics.stdev(_count)) + # self.album_statistics = addict.Dict( + # **self.album_statistics, **addict.Dict((album, avg)) + # ) + self.album_statistics = addict.Dict( + **self.album_statistics, + **addict.Dict( + (album, addict.Dict(('avg', avg), ('std', stdev))) + ), + ) + print(self.album_statistics) def calculate_final_average_by_year(self) -> None: group_by_years = addict.Dict() self.year_averages = addict.Dict() + # album_lyrics = self.all_albums_lyrics_sum with open(f'{os.getcwd()}/lyrics_sum_all_album.json', 'r') as f: album_lyrics = json.load(f) - for i in album_lyrics: - for album, count in i.items(): - year = album.split('[')[-1].strip(']') - try: - group_by_years = addict.Dict( - **group_by_years, **addict.Dict((year, count)) - ) - # First loop returns value error for empty dict - except ValueError: - group_by_years = addict.Dict((year, count)) - # Multiple years raise a TypeError - we append - except TypeError: - group_by_years.get(year).extend(count) - for year, y_count in group_by_years.items(): - year_total, year_running = (0, 0) - for y in y_count: - if isinstance(y, int): - year_running += y - year_total += 1 - else: - pass - avg = math.ceil(year_running / year_total) - print(year, avg) - self.year_averages = addict.Dict( - **self.year_averages, **addict.Dict((year, avg)) - ) + for i in album_lyrics: + for album, count in i.items(): + year = album.split('[')[-1].strip(']') + try: + group_by_years = addict.Dict( + **group_by_years, **addict.Dict((year, count)) + ) + # First loop returns value error for empty dict + except ValueError: + group_by_years = addict.Dict((year, count)) + # Multiple years raise a TypeError - we append + except TypeError: + group_by_years.get(year).extend(count) + for year, y_count in group_by_years.items(): + year_total, year_running = (0, 0) + for y in y_count: + if isinstance(y, int): + year_running += y + year_total += 1 + else: + pass + avg = math.ceil(year_running / year_total) + # print(year, avg) + self.year_averages = addict.Dict( + **self.year_averages, **addict.Dict((year, avg)) + ) print(self.year_averages) @staticmethod @@ -477,6 +482,26 @@ class LyricsBuilder(LyricsConcreteBuilder): _strip = word.translate(str.maketrans('', '', string.punctuation)) return _strip + @staticmethod + def get_descriptive_statistics(nums: list) -> Dict[str, int]: + avg = math.ceil(numpy.mean(nums)) + median = math.ceil(numpy.median(nums)) + std = math.ceil(numpy.std(nums)) + max = math.ceil(numpy.max(nums)) + min = math.ceil(numpy.min(nums)) + p_25 = math.ceil(numpy.percentile(nums, 25)) + p_75 = math.ceil(numpy.percentile(nums, 75)) + _d = addict.Dict( + ('avg', avg), + ('median', median), + ('std', std), + ('max', max), + ('min', min), + ('p_25', p_25), + ('p_75', p_75), + ) + return _d + class LyricsClickDirector: """docstring for LyricsClickDirector""" @@ -581,6 +606,8 @@ class LyricsClickDirector: self.builder.all_albums_lyrics_sum ) pprint(self.builder._product.all_albums_lyrics_sum) + self.builder.calculate_final_average_by_album() + self.builder.calculate_final_average_by_year() def _dev(self) -> None: self.builder.calculate_final_average_by_album()