У меня есть файл CSV с именем data.csv. Я конвертирую CSV файл в вложенный JSON и вставляю его в mongodb с помощью python. Это код, но здесь я хочу пометить FirstName и LastName под родительским именем "Имя". Может ли кто-нибудь помочь?
import json
import pandas as pd
from pymongo import MongoClient
try:
conn = MongoClient()
print("Connected successfully!!!")
except:
print("Could not connect to MongoDB")
# database
db = conn.database
collection = db.collection3
df = pd.read_csv(r'C:\Users\swetha1\Desktop\data.csv')
def get_nested_rec(key, grp):
rec = {}
rec['PrimaryId'] = key[0]
rec['FirstName'] = key[1]
rec['LastName'] = key[2]
rec['City'] = key[3]
for field in ['CarName','DogName']:
rec[field] = list(grp[field].unique())
return rec
records = []
for key, grp in df.groupby(['PrimaryId','FirstName','LastName','City']):
rec = get_nested_rec(key, grp)
records.append(rec)
records = dict(data = records)
r=json.dumps(records,default=int,indent=4)
my_dict = json.loads(r)
print(my_dict)
collection.insert(my_dict)
print('inserted')
приведенный выше код преобразует CSV в вложенный JSON
вывод преобразования CSV в вложенный JSON выглядит следующим образом:
Connected successfully!!!
{
"data": [
{
"PrimaryId": 100,
"FirstName": "John",
"LastName": "Smith",
"City": "NewYork",
"CarName": [
"Toyota",
"BMW"
],
"DogName": [
"Spike",
"Rusty"
]
},
{
"PrimaryId": 101,
"FirstName": "Ben",
"LastName": "Swan",
"City": "Sydney",
"CarName": [
"Volkswagen",
"Ford",
"Audi"
],
"DogName": [
"Buddy",
"Max"
]
},
{
"PrimaryId": 102,
"FirstName": "Julia",
"LastName": "Brown",
"City": "London",
"CarName": [
"Mini"
],
"DogName": [
"Lucy"
]
}
]
}
желаемая структура вывода должна быть примерно такой: PS: я дал пустые значения здесь только для образца.
"info":[
{ "primaryId":" "
"City":" "
"Name":
{ "FirstName":" "
"LastName" :" "
}
"CarName":
{ "car1": " "
"car2": " "
}
"DogName":
{ "Dog1": " "
"Dog2": " "
}
}]
data.csv
PrimaryId,FirstName,LastName,City,CarName,DogName
100,John,Smith,NewYork,Toyota,Spike
100,John,Smith,NewYork,BMW,Spike
100,John,Smith,NewYork,Toyota,Rusty
100,John,Smith,NewYork,BMW,Rusty
101,Ben,Swan,Sydney,Volkswagen,Buddy
101,Ben,Swan,Sydney,Ford,Buddy
101,Ben,Swan,Sydney,Audi,Buddy
101,Ben,Swan,Sydney,Volkswagen,Max
101,Ben,Swan,Sydney,Ford,Max
101,Ben,Swan,Sydney,Audi,Max
102,Julia,Brown,London,Mini,Lucy
Это один из подходов.
Демо - версия:
import pandas as pd
df = pd.read_csv(filename)
def get_nested_rec(key, grp):
rec = {}
rec['PrimaryId'] = key[0]
rec['City'] = key[3]
rec["Name"] = [{'FirstName': key[1], 'LastName': key[2]}] #Update.
for field in ['CarName','DogName']:
rec[field] = list(grp[field].unique())
return rec
records = []
for key, grp in df.groupby(['PrimaryId','FirstName','LastName','City']):
rec = get_nested_rec(key, grp)
records.append(rec)
records = dict(data = records)
print(records)
Выход:
{'data': [{'CarName': ['Toyota', 'BMW'],
'City': 'NewYork',
'DogName': ['Spike', 'Rusty'],
'Name': [{'FirstName': 'John', 'LastName': 'Smith'}],
'PrimaryId': 100},
{'CarName': ['Volkswagen', 'Ford', 'Audi'],
'City': 'Sydney',
'DogName': ['Buddy', 'Max'],
'Name': [{'FirstName': 'Ben', 'LastName': 'Swan'}],
'PrimaryId': 101},
{'CarName': ['Mini'],
'City': 'London',
'DogName': ['Lucy'],
'Name': [{'FirstName': 'Julia', 'LastName': 'Brown'}],
'PrimaryId': 102}]}