diff --git a/notes/tmp/errordata b/notes/tmp/errordata new file mode 100644 index 0000000..2df356d --- /dev/null +++ b/notes/tmp/errordata @@ -0,0 +1,27 @@ +"Error message from worker: Traceback (most recent call last): + File "/usr/local/lib/python3.7/site-packages/dataflow_worker/batchworker.py", line 651, in do_work + work_executor.execute() + File "/usr/local/lib/python3.7/site-packages/dataflow_worker/executor.py", line 181, in execute + op.finish() + File "dataflow_worker/native_operations.py", line 93, in dataflow_worker.native_operations.NativeWriteOperation.finish + File "dataflow_worker/native_operations.py", line 94, in dataflow_worker.native_operations.NativeWriteOperation.finish + File "dataflow_worker/native_operations.py", line 95, in dataflow_worker.native_operations.NativeWriteOperation.finish + File "/usr/local/lib/python3.7/site-packages/dataflow_worker/nativeavroio.py", line 308, in __exit__ + self._data_file_writer.flush() + File "fastavro/_write.pyx", line 664, in fastavro._write.Writer.flush + File "fastavro/_write.pyx", line 639, in fastavro._write.Writer.dump + File "fastavro/_write.pyx", line 451, in fastavro._write.snappy_write_block + File "fastavro/_write.pyx", line 458, in fastavro._write.snappy_write_block + File "/usr/local/lib/python3.7/site-packages/apache_beam/io/filesystemio.py", line 200, in write + self._uploader.put(b) + File "/usr/local/lib/python3.7/site-packages/apache_beam/io/gcp/gcsio.py", line 720, in put + self._conn.send_bytes(data.tobytes()) + File "/usr/local/lib/python3.7/multiprocessing/connection.py", line 200, in send_bytes + self._send_bytes(m[offset:offset + size]) + File "/usr/local/lib/python3.7/multiprocessing/connection.py", line 393, in _send_bytes + header = struct.pack("!i", n) +struct.error: 'i' format requires -2147483648 <= number <= 2147483647 +" + + +"Out of memory: Killed process 2042 (python) total-vm:28616496kB, anon-rss:25684136kB, file-rss:0kB, shmem-rss:0kB, UID:0 pgtables:51284kB oom_score_adj:900" diff --git a/notes/tmp/exampledata b/notes/tmp/exampledata new file mode 100644 index 0000000..97c9248 --- /dev/null +++ b/notes/tmp/exampledata @@ -0,0 +1,44 @@ +[{ + "property_id": "3cf3c06632c46754696f2017933702f3", + "flat_appartment": "", + "builing": "", + "number": "63", + "street": "ROTTON PARK STREET", + "locality": "", + "town": "BIRMINGHAM", + "district": "BIRMINGHAM", + "county": "WEST MIDLANDS", + "postcode": "B16 0AE", + "property_transactions": [ + { "price": "385000", "transaction_date": "2021-01-08", "year": "2021" }, + { "price": "701985", "transaction_date": "2019-03-28", "year": "2019" }, + { "price": "1748761", "transaction_date": "2020-05-27", "year": "2020" } + ], + "latest_transaction_year": "2021" +}, +{ + "property_id": "c650d5d7bb0daf0a19bb2cacabbee74e", + "readable_address": "16 STATION ROAD\nPARKGATE\nNESTON\nCHESHIRE WEST AND CHESTER\nCH64 6QJ", + "flat_appartment": "", + "builing": "", + "number": "16", + "street": "STATION ROAD", + "locality": "PARKGATE", + "town": "NESTON", + "district": "CHESHIRE WEST AND CHESTER", + "county": "CHESHIRE WEST AND CHESTER", + "postcode": "CH64 6QJ", + "property_transactions": [ + { + "price": "280000", + "transaction_date": "2020-11-30", + "year": "2020" + }, + { + "price": "265000", + "transaction_date": "2020-05-29", + "year": "2020" + } + ], + "latest_transaction_year": "2020" +}] diff --git a/notes/tmp/runningdata b/notes/tmp/runningdata new file mode 100644 index 0000000..0929edb --- /dev/null +++ b/notes/tmp/runningdata @@ -0,0 +1,16 @@ + + +Create Mapping table +('fd4634faec47c29de40bbf7840723b41', ['317500', '2020-11-13 00:00', 'B90 3LA', '1', '', 'VERSTONE ROAD', 'SHIRLEY', 'SOLIHULL', 'SOLIHULL', 'WEST MIDLANDS', '']) +('fd4634faec47c29de40bbf7840723b41', ['317500', '2020-11-13 00:00', 'B90 3LA', '1', '', 'VERSTONE ROAD', 'SHIRLEY', 'SOLIHULL', 'SOLIHULL', 'WEST MIDLANDS', '']) + +Condensing +{'fd4634faec47c29de40bbf7840723b41': ['317500', '2020-11-13 00:00', 'B90 3LA', '1', '', 'VERSTONE ROAD', 'SHIRLEY', 'SOLIHULL', 'SOLIHULL', 'WEST MIDLANDS', '']} + + +Prepared +GroupByKey +('fe205bfe66bc7f18c50c8f3d77ec3e30', ['fd4634faec47c29de40bbf7840723b41', 'fd4634faec47c29de40bbf7840723b41']) + +deduplicated +('fe205bfe66bc7f18c50c8f3d77ec3e30', ['fd4634faec47c29de40bbf7840723b41'])