mirror of
https://github.com/dtomlinson91/street_group_tech_test
synced 2025-12-22 03:55:43 +00:00
Merge final release (#1)
* adding initial skeleton * updating .gitignore * updating dev dependencies * adding report.py * updating notes * adding prospector.yaml * updating beam to install gcp extras * adding documentation * adding data exploration report + code * adding latest beam pipeline code * adding latest beam pipeline code * adding debug.py * adding latesty beam pipeline code * adding latest beam pipeline code * adding latest beam pipeline code * updating .gitignore * updating folder structure for data input/output * updating prospector.yaml * adding latest beam pipeline code * updating prospector.yaml * migrate beam pipeline to main.py * updating .gitignore * updating .gitignore * adding download script for data set * adding initial docs * moving inputs/outputs to use pathlib * removing shard_name_template from output file * adding pyenv 3.7.9 * removing requirements.txt for documentation * updating README.md * updating download data script for new location in GCS * adding latest beam pipeline code for dataflow * adding latest beam pipeline code for dataflow * adding latest beam pipeline code for dataflow * moving dataflow notes * updating prospector.yaml * adding latest beam pipeline code for dataflow * updating beam pipeline to use GroupByKey * updating download_data script with new bucket * update prospector.yaml * update dataflow documentation with new commands for vpc * adding latest beam pipeline code for dataflow with group optimisation * updating dataflow documentation * adding latest beam pipeline code for dataflow with group optimisation * updating download_data script with pp-2020 dataset * adding temporary notes * updating dataflow notes * adding latest beam pipeline code * updating dataflow notes * adding latest beam pipeline code for dataflow * adding debug print * moving panda-profiling report into docs * updating report.py * adding entrypoint command * adding initial docs * adding commands.md to notes * commenting out debug imports * updating documentation * updating latest beam pipeline with default inputs * updating poetry * adding requirements.txt * updating documentation
This commit is contained in:
38
exploration/report.py
Normal file
38
exploration/report.py
Normal file
@@ -0,0 +1,38 @@
|
||||
import pathlib
|
||||
|
||||
import pandas as pd
|
||||
from pandas_profiling import ProfileReport
|
||||
|
||||
|
||||
def main():
|
||||
input_file = (
|
||||
pathlib.Path(__file__).parents[1] / "data" / "input" / "pp-complete.csv"
|
||||
)
|
||||
with input_file.open() as csv:
|
||||
df_report = pd.read_csv(
|
||||
csv,
|
||||
names=[
|
||||
"transaction_id",
|
||||
"price",
|
||||
"date_of_transfer",
|
||||
"postcode",
|
||||
"property_type",
|
||||
"old_new",
|
||||
"duration",
|
||||
"paon",
|
||||
"saon",
|
||||
"street",
|
||||
"locality",
|
||||
"town_city",
|
||||
"district",
|
||||
"county",
|
||||
"ppd_category",
|
||||
"record_status",
|
||||
],
|
||||
)
|
||||
profile = ProfileReport(df_report, title="Price Paid Data", minimal=True)
|
||||
profile.to_file("price_paid_data_report.html")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user