mirror of
https://github.com/dtomlinson91/street_group_tech_test
synced 2025-12-22 03:55:43 +00:00
Merge final release (#1)
* adding initial skeleton * updating .gitignore * updating dev dependencies * adding report.py * updating notes * adding prospector.yaml * updating beam to install gcp extras * adding documentation * adding data exploration report + code * adding latest beam pipeline code * adding latest beam pipeline code * adding debug.py * adding latesty beam pipeline code * adding latest beam pipeline code * adding latest beam pipeline code * updating .gitignore * updating folder structure for data input/output * updating prospector.yaml * adding latest beam pipeline code * updating prospector.yaml * migrate beam pipeline to main.py * updating .gitignore * updating .gitignore * adding download script for data set * adding initial docs * moving inputs/outputs to use pathlib * removing shard_name_template from output file * adding pyenv 3.7.9 * removing requirements.txt for documentation * updating README.md * updating download data script for new location in GCS * adding latest beam pipeline code for dataflow * adding latest beam pipeline code for dataflow * adding latest beam pipeline code for dataflow * moving dataflow notes * updating prospector.yaml * adding latest beam pipeline code for dataflow * updating beam pipeline to use GroupByKey * updating download_data script with new bucket * update prospector.yaml * update dataflow documentation with new commands for vpc * adding latest beam pipeline code for dataflow with group optimisation * updating dataflow documentation * adding latest beam pipeline code for dataflow with group optimisation * updating download_data script with pp-2020 dataset * adding temporary notes * updating dataflow notes * adding latest beam pipeline code * updating dataflow notes * adding latest beam pipeline code for dataflow * adding debug print * moving panda-profiling report into docs * updating report.py * adding entrypoint command * adding initial docs * adding commands.md to notes * commenting out debug imports * updating documentation * updating latest beam pipeline with default inputs * updating poetry * adding requirements.txt * updating documentation
This commit is contained in:
30
analyse_properties/debug.py
Normal file
30
analyse_properties/debug.py
Normal file
@@ -0,0 +1,30 @@
|
||||
import apache_beam as beam
|
||||
|
||||
|
||||
class DebugShowEmptyColumn(beam.DoFn):
|
||||
def __init__(self, index):
|
||||
self.index = index
|
||||
|
||||
def process(self, element):
|
||||
column = element[self.index]
|
||||
if len(column) == 0:
|
||||
yield element
|
||||
return None
|
||||
|
||||
|
||||
class DebugShowColumnWithValueIn(beam.DoFn):
|
||||
def __init__(self, index, value):
|
||||
self.index = index
|
||||
self.value = value
|
||||
|
||||
def process(self, element):
|
||||
column = element[self.index]
|
||||
if self.value in column:
|
||||
yield element
|
||||
return None
|
||||
|
||||
|
||||
class DebugPrint(beam.DoFn):
|
||||
def process(self, element):
|
||||
print(element)
|
||||
yield element
|
||||
Reference in New Issue
Block a user