We have a json file as structure below. .
sample.json:
{
"new hires": [
{"name": "Interpol Lundquist", "age": "50", "sex": "male", "accounts": "interpol_lundquist", "join_date": "2010-08-12 01:42:28"},
{"name": "Hebrides Adair", "age": "47", "sex": "male", "accounts": "hebrides_adair", "join_date": "2013-07-16 20:47:08"},
{"name": "Cantabrigian Gilchrist", "age": "21", "sex": "male", "accounts": "cantabrigian_gilchrist", "join_date": "2010-02-18 02:46:07"},
{"name": "Missy Chesapeake", "age": "42", "sex": "male", "accounts": "missy_chesapeake", "join_date": "2015-09-17 08:17:45"}
]
}
we need to saving the above json data in to the mongodb with BSON encoded format as below format. We need to save the id with binary format and subtype.
[{
"_id": {
"$binary": {
"base64": "xraIZaUFinO8IOoY5cqI0A==",
"subType": "03"
}
},
"name": "name",
"age": "52",
"sex": null,
"accounts": ""
}]
sample code:
#!/usr/bin/env python3
#-*- coding: utf-8 -*-
# import the built-in JSON library
import json
# import the BSON library from PyMongo's bson
from bson import BSON
# here's an example of an invalid JSON string
bad_json = '{"this is": "missing the closing bracket"'
# json.loads() will throw a ValueError if JSON is invalid
try:
json.loads(bad_json)
except ValueError as error:
print ("json.loads() ValueError for BSON object:", error)
# declare an empty string object
json_string = ""
# use Python's open() function to load a JSON file
with open("data.json", 'r', encoding='utf-8') as json_data:
print ("data.json TYPE:", type(json_data))
# iterate over the _io.TextIOWrapper returned by open() using enumerate()
for i, line in enumerate(json_data):
# append the parsed IO string to the JSON string
json_string += line
# make sure the string is a valid JSON object first
try:
# use json.loads() to validate the string and create JSON dict
json_docs = json.loads(json_string)
# loads() method returns a Python dict
print ("json_docs TYPE:", type(json_docs))
# return a list of all of the JSON document keys
print ("MongoDB collections:", list(json_docs.keys()))
except ValueError as error:
# quit the script if string is not a valid JSON
print ("json.loads() ValueError for BSON object:", error)
quit()
# iterate the json_docs dict keys (use iteritems() for Python 2.7)
for key, val in json_docs.items():
# iterate each JSON document in the list
for i, doc in enumerate(json_docs[key]):
# bytearray([source[, encoding[, errors]]])
try:
# print the original JSON document
print ("\ndoc:", doc)
# encode the document using the BSON library
data = BSON.encode(doc)
print ("BSON encoded data:", type(data))
myclient = MongoClient("mongodb://localhost:27017/")
mydb = myclient["test"]
mycol = mydb["BSON"]
data = BSON.encode({'a': 1})
mycol.insert_many(data)
# print the result of the BSON encoding
print ("data:", data)
# decode the BSON document back to a Python dict object
decode_doc = BSON.decode(data)
print ("decode_doc:", type(decode_doc))
except Exception as error:
# catch any BSON encoding or decoding errors
print ("enumerate() JSON documents ERROR:", error)
# # decode the BSON document back to a Python dict object
# decode_doc = BSON.decode(data)
# print ("decode_doc:", type(decode_doc))
except Exception as error:
# catch any BSON encoding or decoding errors
print ("enumerate() JSON documents ERROR:", error)
When trying to run the above code, facing the below error:
enumerate() JSON documents ERROR: document must be an instance of dict, bson.son.SON, bson.raw_bson.RawBSONDocument, or a type that inherits from collections.MutableMapping