Reader comments on an old post about the ijson parser prompted me to check out the project’s more recent releases. The latest pre-release (v3.0rc1) added a coroutine interface, which allow users to supply their own file readers and have more control over when the parser is called. It looked like a fun feature to explore, and here is a bit of code to try it out.
Below is some test code for parsing the large San Francisco City Lots JSON dataset using ijson v3.0rc1 coroutines. I’m also using a generator function to lazily read the JSON file line by line. As an alternative, the example in the ijson documentation shows reading from a file object in chunks. The parser_coroutine function gets the output generated by the low-level parser and prints it:
All python code is Python 3.8+.
import ijson @ijson.coroutine def parser_coroutine(): while True: prefix, event, value = (yield) print(f"prefix: {prefix}, event: {event}, value: {value}") def read_in_line(file_object): while True: line = file_object.readline() if not line: break yield line parse_coro_impl = ijson.parse_coro(parser_coroutine()) with open('citylots.json', 'r') as file_reader: for line in read_in_line(file_reader): print("line:", line) parse_coro_impl.send(line.encode())
The dataset contains an array of JSON objects with the city lots property information. The low-level ijson parser iterates over the JSON elements and breaks them down into three element tuples that describe where the element fits in the JSON structure, it’s type and value. Here is the beginning of the dataset file and the first JSON object in the features array:
$ head -n 5 citylots.json { "type": "FeatureCollection", "features": [ { "type": "Feature", "properties": { "MAPBLKLOT": "0001001", "BLKLOT": "0001001", "BLOCK_NUM": "0001", "LOT_NUM": "001", "FROM_ST": "0", "TO_ST": "0", "STREET": "UNKNOWN", "ST_TYPE": null, "ODD_EVEN": "E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -122.422003528252475, 37.808480096967251, 0.0 ], [ -122.422076013325281, 37.808835019815085, 0.0 ], [ -122.421102174348633, 37.808803534992904, 0.0 ], [ -122.421062569067274, 37.808601056818148, 0.0 ], [ -122.422003528252475, 37.808480096967251, 0.0 ] ] ] } } ,
line: { prefix: , event: start_map, value: None line: "type": "FeatureCollection", prefix: , event: map_key, value: type prefix: type, event: string, value: FeatureCollection line: "features": [ prefix: , event: map_key, value: features prefix: features, event: start_array, value: None line: { "type": "Feature", "properties": { "MAPBLKLOT": "0001001", "BLKLOT": "0001001", "BLOCK_NUM": "0001", "LOT_NUM": "001", "FROM_ST": "0", "TO_ST": "0", "STREET": "UNKNOWN", "ST_TYPE": null, "ODD_EVEN": "E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -122.422003528252475, 37.808480096967251, 0.0 ], [ -122.422076013325281, 37.808835019815085, 0.0 ], [ -122.421102174348633, 37.808803534992904, 0.0 ], [ -122.421062569067274, 37.808601056818148, 0.0 ], [ -122.422003528252475, 37.808480096967251, 0.0 ] ] ] } } prefix: features.item, event: start_map, value: None prefix: features.item, event: map_key, value: type prefix: features.item.type, event: string, value: Feature prefix: features.item, event: map_key, value: properties prefix: features.item.properties, event: start_map, value: None prefix: features.item.properties, event: map_key, value: MAPBLKLOT prefix: features.item.properties.MAPBLKLOT, event: string, value: 0001001 prefix: features.item.properties, event: map_key, value: BLKLOT prefix: features.item.properties.BLKLOT, event: string, value: 0001001 prefix: features.item.properties, event: map_key, value: BLOCK_NUM prefix: features.item.properties.BLOCK_NUM, event: string, value: 0001 prefix: features.item.properties, event: map_key, value: LOT_NUM prefix: features.item.properties.LOT_NUM, event: string, value: 001 prefix: features.item.properties, event: map_key, value: FROM_ST prefix: features.item.properties.FROM_ST, event: string, value: 0 prefix: features.item.properties, event: map_key, value: TO_ST prefix: features.item.properties.TO_ST, event: string, value: 0 prefix: features.item.properties, event: map_key, value: STREET prefix: features.item.properties.STREET, event: string, value: UNKNOWN prefix: features.item.properties, event: map_key, value: ST_TYPE prefix: features.item.properties.ST_TYPE, event: null, value: None prefix: features.item.properties, event: map_key, value: ODD_EVEN prefix: features.item.properties.ODD_EVEN, event: string, value: E prefix: features.item.properties, event: end_map, value: None prefix: features.item, event: map_key, value: geometry prefix: features.item.geometry, event: start_map, value: None prefix: features.item.geometry, event: map_key, value: type prefix: features.item.geometry.type, event: string, value: Polygon prefix: features.item.geometry, event: map_key, value: coordinates prefix: features.item.geometry.coordinates, event: start_array, value: None prefix: features.item.geometry.coordinates.item, event: start_array, value: None prefix: features.item.geometry.coordinates.item.item, event: start_array, value: None prefix: features.item.geometry.coordinates.item.item.item, event: number, value: -122.422003528252475 prefix: features.item.geometry.coordinates.item.item.item, event: number, value: 37.808480096967251 prefix: features.item.geometry.coordinates.item.item.item, event: number, value: 0.0 prefix: features.item.geometry.coordinates.item.item, event: end_array, value: None prefix: features.item.geometry.coordinates.item.item, event: start_array, value: None prefix: features.item.geometry.coordinates.item.item.item, event: number, value: -122.422076013325281 prefix: features.item.geometry.coordinates.item.item.item, event: number, value: 37.808835019815085 prefix: features.item.geometry.coordinates.item.item.item, event: number, value: 0.0 prefix: features.item.geometry.coordinates.item.item, event: end_array, value: None prefix: features.item.geometry.coordinates.item.item, event: start_array, value: None prefix: features.item.geometry.coordinates.item.item.item, event: number, value: -122.421102174348633 prefix: features.item.geometry.coordinates.item.item.item, event: number, value: 37.808803534992904 prefix: features.item.geometry.coordinates.item.item.item, event: number, value: 0.0 prefix: features.item.geometry.coordinates.item.item, event: end_array, value: None prefix: features.item.geometry.coordinates.item.item, event: start_array, value: None prefix: features.item.geometry.coordinates.item.item.item, event: number, value: -122.421062569067274 prefix: features.item.geometry.coordinates.item.item.item, event: number, value: 37.808601056818148 prefix: features.item.geometry.coordinates.item.item.item, event: number, value: 0.0 prefix: features.item.geometry.coordinates.item.item, event: end_array, value: None prefix: features.item.geometry.coordinates.item.item, event: start_array, value: None prefix: features.item.geometry.coordinates.item.item.item, event: number, value: -122.422003528252475 prefix: features.item.geometry.coordinates.item.item.item, event: number, value: 37.808480096967251 prefix: features.item.geometry.coordinates.item.item.item, event: number, value: 0.0 prefix: features.item.geometry.coordinates.item.item, event: end_array, value: None prefix: features.item.geometry.coordinates.item, event: end_array, value: None prefix: features.item.geometry.coordinates, event: end_array, value: None prefix: features.item.geometry, event: end_map, value: None prefix: features.item, event: end_map, value: None line: ,
This is the last JSON object in the features array and end of the file:
$ tail -n 5 citylots.json , { "type": "Feature", "properties": { "MAPBLKLOT": "VACSTWIL", "BLKLOT": "VACSTWIL", "BLOCK_NUM": "VACST", "LOT_NUM": "WIL", "FROM_ST": null, "TO_ST": null, "STREET": null, "ST_TYPE": null, "ODD_EVEN": null }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -122.424075168366173, 37.782909438426415, 0.0 ], [ -122.424094360186615, 37.783004345097652, 0.0 ], [ -122.423873989053888, 37.783032415674377, 0.0 ], [ -122.423624577385425, 37.783064185117638, 0.0 ], [ -122.422685009512293, 37.783183859540742, 0.0 ], [ -122.42266581942296, 37.783088951742364, 0.0 ], [ -122.422930605732873, 37.783055226671081, 0.0 ], [ -122.423605385014795, 37.782969278389494, 0.0 ], [ -122.423714717181866, 37.782955351504917, 0.0 ], [ -122.423841131273619, 37.782939249620277, 0.0 ], [ -122.424075168366173, 37.782909438426415, 0.0 ] ] ] } } ] }
line: , line: { "type": "Feature", "properties": { "MAPBLKLOT": "VACSTWIL", "BLKLOT": "VACSTWIL", "BLOCK_NUM": "VACST", "LOT_NUM": "WIL", "FROM_ST": null, "TO_ST": null, "STREE T": null, "ST_TYPE": null, "ODD_EVEN": null }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -122.424075168366173, 37.782909438426415, 0.0 ], [ -122.42409436018 6615, 37.783004345097652, 0.0 ], [ -122.423873989053888, 37.783032415674377, 0.0 ], [ -122.423624577385425, 37.783064185117638, 0.0 ], [ -122.422685009512293, 37.783183 859540742, 0.0 ], [ -122.42266581942296, 37.783088951742364, 0.0 ], [ -122.422930605732873, 37.783055226671081, 0.0 ], [ -122.423605385014795, 37.782969278389494, 0.0 ] , [ -122.423714717181866, 37.782955351504917, 0.0 ], [ -122.423841131273619, 37.782939249620277, 0.0 ], [ -122.424075168366173, 37.782909438426415, 0.0 ] ] ] } } prefix: features.item, event: start_map, value: None prefix: features.item, event: map_key, value: type prefix: features.item.type, event: string, value: Feature prefix: features.item, event: map_key, value: properties prefix: features.item.properties, event: start_map, value: None prefix: features.item.properties, event: map_key, value: MAPBLKLOT prefix: features.item.properties.MAPBLKLOT, event: string, value: VACSTWIL prefix: features.item.properties, event: map_key, value: BLKLOT prefix: features.item.properties.BLKLOT, event: string, value: VACSTWIL prefix: features.item.properties, event: map_key, value: BLOCK_NUM prefix: features.item.properties.BLOCK_NUM, event: string, value: VACST prefix: features.item.properties, event: map_key, value: LOT_NUM prefix: features.item.properties.LOT_NUM, event: string, value: WIL prefix: features.item.properties, event: map_key, value: FROM_ST prefix: features.item.properties.FROM_ST, event: null, value: None prefix: features.item.properties, event: map_key, value: TO_ST prefix: features.item.properties.TO_ST, event: null, value: None prefix: features.item.properties, event: map_key, value: STREET prefix: features.item.properties.STREET, event: null, value: None prefix: features.item.properties, event: map_key, value: ST_TYPE prefix: features.item.properties.ST_TYPE, event: null, value: None prefix: features.item.properties, event: map_key, value: ODD_EVEN prefix: features.item.properties.ODD_EVEN, event: null, value: None prefix: features.item.properties, event: end_map, value: None prefix: features.item, event: map_key, value: geometry prefix: features.item.geometry, event: start_map, value: None prefix: features.item.geometry, event: map_key, value: type prefix: features.item.geometry.type, event: string, value: Polygon prefix: features.item.geometry, event: map_key, value: coordinates prefix: features.item.geometry.coordinates, event: start_array, value: None prefix: features.item.geometry.coordinates.item, event: start_array, value: None prefix: features.item.geometry.coordinates.item.item, event: start_array, value: None prefix: features.item.geometry.coordinates.item.item.item, event: number, value: -122.424075168366173 prefix: features.item.geometry.coordinates.item.item.item, event: number, value: 37.782909438426415 prefix: features.item.geometry.coordinates.item.item.item, event: number, value: 0.0 prefix: features.item.geometry.coordinates.item.item, event: end_array, value: None prefix: features.item.geometry.coordinates.item.item, event: start_array, value: None prefix: features.item.geometry.coordinates.item.item.item, event: number, value: -122.424094360186615 prefix: features.item.geometry.coordinates.item.item.item, event: number, value: 37.783004345097652 prefix: features.item.geometry.coordinates.item.item.item, event: number, value: 0.0 prefix: features.item.geometry.coordinates.item.item, event: end_array, value: None prefix: features.item.geometry.coordinates.item.item, event: start_array, value: None prefix: features.item.geometry.coordinates.item.item.item, event: number, value: -122.423873989053888 prefix: features.item.geometry.coordinates.item.item.item, event: number, value: 37.783032415674377 prefix: features.item.geometry.coordinates.item.item.item, event: number, value: 0.0 prefix: features.item.geometry.coordinates.item.item, event: end_array, value: None prefix: features.item.geometry.coordinates.item.item, event: start_array, value: None prefix: features.item.geometry.coordinates.item.item.item, event: number, value: -122.423624577385425 prefix: features.item.geometry.coordinates.item.item.item, event: number, value: 37.783064185117638 prefix: features.item.geometry.coordinates.item.item.item, event: number, value: 0.0 prefix: features.item.geometry.coordinates.item.item, event: end_array, value: None prefix: features.item.geometry.coordinates.item.item, event: start_array, value: None prefix: features.item.geometry.coordinates.item.item.item, event: number, value: -122.422685009512293 prefix: features.item.geometry.coordinates.item.item.item, event: number, value: 37.783183859540742 prefix: features.item.geometry.coordinates.item.item.item, event: number, value: 0.0 prefix: features.item.geometry.coordinates.item.item, event: end_array, value: None prefix: features.item.geometry.coordinates.item.item, event: start_array, value: None prefix: features.item.geometry.coordinates.item.item.item, event: number, value: -122.42266581942296 prefix: features.item.geometry.coordinates.item.item.item, event: number, value: 37.783088951742364 prefix: features.item.geometry.coordinates.item.item.item, event: number, value: 0.0 prefix: features.item.geometry.coordinates.item.item, event: end_array, value: None prefix: features.item.geometry.coordinates.item.item, event: start_array, value: None prefix: features.item.geometry.coordinates.item.item.item, event: number, value: -122.422930605732873 prefix: features.item.geometry.coordinates.item.item.item, event: number, value: 37.783055226671081 prefix: features.item.geometry.coordinates.item.item.item, event: number, value: 0.0 prefix: features.item.geometry.coordinates.item.item, event: end_array, value: None prefix: features.item.geometry.coordinates.item.item, event: start_array, value: None prefix: features.item.geometry.coordinates.item.item.item, event: number, value: -122.423605385014795 prefix: features.item.geometry.coordinates.item.item.item, event: number, value: 37.782969278389494 prefix: features.item.geometry.coordinates.item.item.item, event: number, value: 0.0 prefix: features.item.geometry.coordinates.item.item, event: end_array, value: None prefix: features.item.geometry.coordinates.item.item, event: start_array, value: None prefix: features.item.geometry.coordinates.item.item.item, event: number, value: -122.423714717181866 prefix: features.item.geometry.coordinates.item.item.item, event: number, value: 37.782955351504917 prefix: features.item.geometry.coordinates.item.item.item, event: number, value: 0.0 prefix: features.item.geometry.coordinates.item.item, event: end_array, value: None prefix: features.item.geometry.coordinates.item.item, event: start_array, value: None prefix: features.item.geometry.coordinates.item.item.item, event: number, value: -122.423841131273619 prefix: features.item.geometry.coordinates.item.item.item, event: number, value: 37.782939249620277 prefix: features.item.geometry.coordinates.item.item.item, event: number, value: 0.0 prefix: features.item.geometry.coordinates.item.item, event: end_array, value: None prefix: features.item.geometry.coordinates.item.item, event: start_array, value: None prefix: features.item.geometry.coordinates.item.item.item, event: number, value: -122.424075168366173 prefix: features.item.geometry.coordinates.item.item.item, event: number, value: 37.782909438426415 prefix: features.item.geometry.coordinates.item.item.item, event: number, value: 0.0 prefix: features.item.geometry.coordinates.item.item, event: end_array, value: None prefix: features.item.geometry.coordinates.item, event: end_array, value: None prefix: features.item.geometry.coordinates, event: end_array, value: None prefix: features.item.geometry, event: end_map, value: None prefix: features.item, event: end_map, value: None line: line: ] prefix: features, event: end_array, value: None line: } prefix: , event: end_map, value: None
Here is a similar example with the higher-level interface, where we can just parse the properties objects as Python dicts:
import ijson @ijson.coroutine def items_coroutine(): while True: items_obj = (yield) print(items_obj.keys()) def read_in_line(file_object): while True: line = file_object.readline() if not line: break yield line items_coro_impl = ijson.items_coro(items_coroutine(), 'features.item.properties') with open('citylots.json', 'r') as file_reader: for line in read_in_line(file_reader): print("line:", line) items_coro_impl.send(line.encode())
$ head -n 10 citylots.json { "type": "FeatureCollection", "features": [ { "type": "Feature", "properties": { "MAPBLKLOT": "0001001", "BLKLOT": "0001001", "BLOCK_NUM": "0001", "LOT_NUM": "001", "FROM_ST": "0", "TO_ST": "0", "STREET": "UNKNOW N", "ST_TYPE": null, "ODD_EVEN": "E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -122.422003528252475, 37.808480096967251, 0.0 ], [ -122.422076013325281, 3 7.808835019815085, 0.0 ], [ -122.421102174348633, 37.808803534992904, 0.0 ], [ -122.421062569067274, 37.808601056818148, 0.0 ], [ -122.422003528252475, 37.8084800969672 51, 0.0 ] ] ] } } , { "type": "Feature", "properties": { "MAPBLKLOT": "0002001", "BLKLOT": "0002001", "BLOCK_NUM": "0002", "LOT_NUM": "001", "FROM_ST": "0", "TO_ST": "0", "STREET": "UNKNOW N", "ST_TYPE": null, "ODD_EVEN": "E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -122.42082593937107, 37.808631474146033, 0.0 ], [ -122.420858049679694, 37 .808795641369592, 0.0 ], [ -122.419811958704301, 37.808761809714007, 0.0 ], [ -122.42082593937107, 37.808631474146033, 0.0 ] ] ] } } ,
line: { line: "type": "FeatureCollection", line: "features": [ line: { "type": "Feature", "properties": { "MAPBLKLOT": "0001001", "BLKLOT": "0001001", "BLOCK_NUM": "0001", "LOT_NUM": "001", "FROM_ST": "0", "TO_ST": "0", "STREET": " UNKNOWN", "ST_TYPE": null, "ODD_EVEN": "E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -122.422003528252475, 37.808480096967251, 0.0 ], [ -122.422076013325 281, 37.808835019815085, 0.0 ], [ -122.421102174348633, 37.808803534992904, 0.0 ], [ -122.421062569067274, 37.808601056818148, 0.0 ], [ -122.422003528252475, 37.8084800 96967251, 0.0 ] ] ] } } dict_keys(['MAPBLKLOT', 'BLKLOT', 'BLOCK_NUM', 'LOT_NUM', 'FROM_ST', 'TO_ST', 'STREET', 'ST_TYPE', 'ODD_EVEN']) line: , line: { "type": "Feature", "properties": { "MAPBLKLOT": "0002001", "BLKLOT": "0002001", "BLOCK_NUM": "0002", "LOT_NUM": "001", "FROM_ST": "0", "TO_ST": "0", "STREET": " UNKNOWN", "ST_TYPE": null, "ODD_EVEN": "E" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -122.42082593937107, 37.808631474146033, 0.0 ], [ -122.4208580496796 94, 37.808795641369592, 0.0 ], [ -122.419811958704301, 37.808761809714007, 0.0 ], [ -122.42082593937107, 37.808631474146033, 0.0 ] ] ] } } dict_keys(['MAPBLKLOT', 'BLKLOT', 'BLOCK_NUM', 'LOT_NUM', 'FROM_ST', 'TO_ST', 'STREET', 'ST_TYPE', 'ODD_EVEN']) line: ,