Python 使用For循环生成数据帧
我试图生成500个数据帧,每个学生一个。每个数据框将成为他们的考勤记录 (奖金)如何将每个数据框的名称设置为Python 使用For循环生成数据帧,python,python-3.x,pandas,dataframe,for-loop,Python,Python 3.x,Pandas,Dataframe,For Loop,我试图生成500个数据帧,每个学生一个。每个数据框将成为他们的考勤记录 (奖金)如何将每个数据框的名称设置为考勤\u I。其中我是学生的ID。所以,学生1的数据框是出勤率,学生2的数据框是出勤率,等等 代码如下: import pandas as pd import numpy as np from numpy import random from datetime import datetime from datetime import timedelta from dateutil.rrul
考勤\u I
。其中我是学生的ID。所以,学生1的数据框是出勤率,学生2的数据框是出勤率,等等
代码如下:
import pandas as pd
import numpy as np
from numpy import random
from datetime import datetime
from datetime import timedelta
from dateutil.rrule import DAILY, rrule, MO, TU, WE, TH, FR, SA, SU
import calendar
# DEFINE GLOBAL VARIABLES
student_count = 500
# DECLARE START AND END DATES
start = '2013-03-01'
end = '2015-07-31'
# GET NUMBER OF WORKDAYS
weekmask = 'Wed Thu Fri Sat Sun'
customworkweek = pd.offsets.CustomBusinessDay(weekmask=weekmask)
workdates = pd.bdate_range(start = start, end = end, freq = customworkweek).to_pydatetime().tolist()
# GET NUMBER OF WEEKS AND MONTHS
x = pd.to_datetime(end) - pd.to_datetime(start)
workweeks_int = int(x / np.timedelta64(1, 'W'))
workmonths_int = int(x / np.timedelta64(1, 'M'))
# MORE GLOBAL VARAIBLES
wed = pd.offsets.CustomBusinessDay(weekmask='Wed')
thu = pd.offsets.CustomBusinessDay(weekmask='Thu')
fri = pd.offsets.CustomBusinessDay(weekmask='Fri')
sat = pd.offsets.CustomBusinessDay(weekmask='Sat')
sun = pd.offsets.CustomBusinessDay(weekmask='Sun')
wed_classes = pd.bdate_range(start = start, end = end, freq = wed).to_pydatetime().tolist()
thu_classes = pd.bdate_range(start = start, end = end, freq = thu).to_pydatetime().tolist()
fri_classes = pd.bdate_range(start = start, end = end, freq = fri).to_pydatetime().tolist()
sat_classes = pd.bdate_range(start = start, end = end, freq = sat).to_pydatetime().tolist()
sun_classes = pd.bdate_range(start = start, end = end, freq = sun).to_pydatetime().tolist()
# SET UP STUDENT DATAFRAME
student_age = random.randint(4, 13, size = (student_count))
student_sex = random.choice(["male", "female"], size = (student_count))
student_id = list(range(0,student_count))
student_name_local = pd.read_csv('Database\student_name_local.csv')
student_name_english = pd.read_csv('Database\student_name_english.csv')
class_id = random.randint(1, 27, size = (student_count))
column_names = ['STUDENT_ID', 'STUDENT_NAME_LOCAL', 'STUDENT_NAME_ENGLISH', 'STUDENT_AGE', 'STUDENT_SEX']
students = pd.DataFrame(columns = column_names)
students['STUDENT_ID'] = student_id
students['STUDENT_NAME_LOCAL'] = student_name_local
students['STUDENT_NAME_ENGLISH'] = student_name_english
students['STUDENT_AGE'] = student_age
students['STUDENT_SEX'] = student_sex
students['CLASS_ID'] = class_id
# SET UP CLASSES DATAFRAME
data = [['0', 'Wednesday', '16:30:00'], ['1', 'Wednesday', '17:30:00'], ['2', 'Wednesday', '18:30:00'], ['3', 'Thursday', '16:30:00'], ['4', 'Thursday', '17:30:00'], ['5', 'Thursday', '18:30:00'], ['6', 'Friday', '16:30:00'], ['7', 'Friday', '17:30:00'], ['8', 'Friday', '18:30:00'], ['9', 'Saturday', '8:30:00'], ['10', 'Saturday', '9:30:00'], ['11', 'Saturday', '10:30:00'], ['12', 'Saturday', '11:30:00'], ['13', 'Saturday', '13:30:00'], ['14', 'Saturday', '14:30:00'], ['15', 'Saturday', '15:30:00'], ['16', 'Saturday', '16:30:00'], ['17', 'Saturday', '17:30:00'], ['18', 'Sunday', '8:30:00'], ['19', 'Sunday', '9:30:00'], ['20', 'Sunday', '10:30:00'], ['21', 'Sunday', '11:30:00'], ['22', 'Sunday', '13:30:00'], ['23', 'Sunday', '14:30:00'], ['24', 'Sunday', '15:30:00'], ['25', 'Sunday', '16:30:00'], ['26', 'Sunday', '17:30:00']]
column_names = ['CLASS_ID', 'CLASS_DAY', 'CLASS_TIME']
classes = pd.DataFrame(data = data, columns = column_names)
classes['CLASS_ID'] = classes['CLASS_ID'].astype(int)
# FOR LOOP VARIABLES
attendance_status = random.choice(["present", "absent"], p = [0.95, 0.05], size = workweeks_int)
# THE FOR LOOP
for i in student_id:
ids = list([i])*workweeks_int
column_names = ['STUDENT_ID', 'CLASS_DATE', 'ATTENDANCE_STATUS']
i = pd.DataFrame(columns = column_names)
i['STUDENT_ID'] = ids
i['ATTENDANCE_STATUS'] = attendance_status
x = students['CLASS_ID'].loc[students['STUDENT_ID'] == i].to_list()
x = x[-1]
c = classes['CLASS_DAY'].loc[classes['CLASS_ID'] == x].to_list()
c = c[-1]
if c == 'Wednesday':
i['CLASS_DATE'] = wed_classes
elif c == 'Thursday':
i['CLASS_DATE'] = thu_classes
elif c == 'Friday':
i['CLASS_DATE'] = fri_classes
elif c == 'Saturday':
i['CLASS_DATE'] = sat_classes
elif c == 'Sunday':
i['CLASS_DATE'] = sun_classes
下面是for循环期间发生的错误:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-4-c04451cf4f9b> in <module>
69 i['ATTENDANCE_STATUS'] = attendance_status
70
---> 71 x = students['CLASS_ID'].loc[students['STUDENT_ID'] == i].to_list()
72 x = x[-1]
73 c = classes['CLASS_DAY'].loc[classes['CLASS_ID'] == x].to_list()
~\anaconda3\lib\site-packages\pandas\core\indexing.py in __getitem__(self, key)
1766
1767 maybe_callable = com.apply_if_callable(key, self.obj)
-> 1768 return self._getitem_axis(maybe_callable, axis=axis)
1769
1770 def _is_scalar_access(self, key: Tuple):
~\anaconda3\lib\site-packages\pandas\core\indexing.py in _getitem_axis(self, key, axis)
1950
1951 if hasattr(key, "ndim") and key.ndim > 1:
-> 1952 raise ValueError("Cannot index with multidimensional key")
1953
1954 return self._getitem_iterable(key, axis=axis)
ValueError: Cannot index with multidimensional key
attendance_status = random.choice(["present", "absent"], p = [0.95, 0.05], size = workweeks_int)
id = 326
i = list([id])*workweeks_int
column_names = ['STUDENT_ID', 'CLASS_DATE', 'ATTENDANCE_STATUS']
att1 = pd.DataFrame(columns = column_names)
att1['STUDENT_ID'] = i
att1['ATTENDANCE_STATUS'] = attendance_status
x = students['CLASS_ID'].loc[students['STUDENT_ID'] == id].to_list()
x = x[-1]
c = classes['CLASS_DAY'].loc[classes['CLASS_ID'] == x].to_list()
c = c[-1]
if c == 'Wednesday':
att1['CLASS_DATE'] = wed_classes
elif c == 'Thursday':
att1['CLASS_DATE'] = thu_classes
elif c == 'Friday':
att1['CLASS_DATE'] = fri_classes
elif c == 'Saturday':
att1['CLASS_DATE'] = sat_classes
elif c == 'Sunday':
att1['CLASS_DATE'] = sun_classes
att1
您是否能够编辑帖子以便其他人可以运行代码请,
NameError:name'start'未定义
NameError:name'students'未定义(请尝试清除内存并重新运行)抱歉-我真的鼓励您擦除当前会话(重新启动笔记本/无论什么),然后从Scratch运行此操作请提供一个新的选项。你能在一个块中包含所有相关代码吗?另外,如何将每个数据帧的名称设置为attention\u I
,这是什么意思?