Я пытаюсь написать функцию, которая читает CSV файл студентов-добровольцев с разной степенью. Целью этой функции является создание словаря, где ключи - это градусы, а значения - это частота градусов.
Данные организованы следующим образом;
name degree email
ABC PhD. [email protected]
CDE Ph.D. [email protected]
FGH MD,PHD [email protected]
Стремитесь получить словарь следующим образом:
#degree_count{'phd':3,'md':1}
def degree_frequency(csv_file):
f = open('csv_file')
csv_f = csv.reader(f)
#Creating a list to store all the degrees from the csv file
student_degree_list=[]
#Creating an empty dictionary to count the frequency
degree_count={}
for row in csv_f:
student_degree_list.append(row[1])
#Replacing fullstops to account for variations in writing degrees ( eg JD vs J.D)
[word.replace(".", "") for word in student_degree_list]
[word.lower() for word in student_degree_list]
for ele in student_degree_list:
if ele in degree_count:
degree_count[ele]=degree_count[ele]+1
else:
degree_count[ele]=0
return degree_count
import csv
from collections import Counter
columns = defaultdict(list) # each value in each column is appended to a list
with open('csv_file.csv') as f:
reader = csv.DictReader(f) # read rows into a dictionary format
for row in reader: # read a row as {column1: value1, column2: value2,...}
for (k,v) in row.items(): # go over each column name and value
columns[k].append(v) # append the value into the appropriate list
# based on column name k
кредит для кода чтения csv
degree_list = columns['degree']
degree_list_clean = []
for cad_degrees in degree_list:
cad_degrees_lst = cad_degrees.split()
for degree in cad_degrees_lst:
degree_clean = degree.strip().replace('.','').lower()
degree_list_clean.append(degree_clean)
output_dict_counter_version = dict(Counter(degree_list_clean))
print(output_dict_counter_version)
degree_frequency_dict = {}
for deg in degree_list_clean:
if deg in degree_frequency_dict:
degree_frequency_dict[deg] += 1
else:
degree_frequency_dict[deg] = 1
print(degree_frequency_dict)
import pandas as pd
from collections import Counter
data = pd.read_csv("csv_file.csv")
degree_list = data['degree'].tolist()
degree_list_clean = []
for cad_degrees in degree_list:
cad_degrees_lst = cad_degrees.split()
for degree in cad_degrees_lst:
degree_clean = degree.strip().replace('.','').lower()
degree_list_clean.append(degree_clean)
print(dict(Counter(degree_list_clean)))
'''
------------------ Input
name,degree,email
ABC,PhD. ,[email protected]
CDE,Ph.D. ,[email protected]
FGH, MD PHD ,[email protected]
-------------------- Output
{'phd': 3, 'md': 1}
'''
Я считаю, что ваша проблема в том, что приведенный ниже код не имеет эффекта, если вы не назначили его переменной.
[word.replace(".", "") for word in student_degree_list]
[word.lower() for word in student_degree_list]
Кроме того, если степень имеет 1 случай, не должна быть установлена в 1, а не 0?
Рабочий код:
#degree_count{'phd':3,'md':1}
def degree_frequency():
f = open('csv_file')
csv_f = csv.reader(f)
# Creating a list to store all the degrees from the csv file
student_degree_list = []
# Creating an empty dictionary to count the frequency
degree_count = {}
for row in csv_f:
student_degree_list.append(row[1])
#Replacing fullstops to account for variations in writing degrees ( eg JD vs J.D)
student_degree_list = [word.replace('.','').lower() for word in student_degree_list]
for ele in student_degree_list:
if ele in degree_count:
degree_count[ele] += 1
else:
# Supposed to be 1?
degree_count[ele]=0
return degree_count
[word.replace(".", "").lower() for word in student_degree_list]