Python 如果值有新数据,则为dataframe迭代列填充
我一直在研究熊猫数据帧。陷入某种逻辑。有人能帮上忙吗: 这是我的输入数据框:Python 如果值有新数据,则为dataframe迭代列填充,python,pandas,dataframe,Python,Pandas,Dataframe,我一直在研究熊猫数据帧。陷入某种逻辑。有人能帮上忙吗: 这是我的输入数据框: **df4** Data1 Description Associated Data F104 TO F5334 F104 TO F105 F105 TO F1212 F105 TO F5332 F105 IS F104 F5332 IS F105 F5334 IS F104 F1000 IS F1212 F1015 IS F1242 F1015B
**df4**
Data1 Description Associated Data
F104 TO F5334
F104 TO F105
F105 TO F1212
F105 TO F5332
F105 IS F104
F5332 IS F105
F5334 IS F104
F1000 IS F1212
F1015 IS F1242
F1015B IS F1242B
F1015B IS F1242
F1212 TO F1000
F1212 TO F1242B
F1212 IS F105
F1242 TO F1015
F1242 TO F1015B
F1242B TO F1015B
F1242B IS F1212
这里有3列:
数据1(键)、描述(用作条件)、关联数据(值)
预期产出:
F104 F5334
F104 F105 F1212
F104 F105 F5332
F104 F105 F1212 F1000
F104 F105 F1212 F1242B
...............
我的代码:
df6 = pd.DataFrame(columns=["Is_Upgrade"])
df4 = df.copy()
for i in range(len(df4)):
print(i)
bas_part = df4.loc[i]["Data1"]
type_part = df4.loc[i]["Description"]
#associate = df4.loc[i]["Associated Data"]
if((str(type_part) == "TO") == True):
df6.loc[i,"Data1"]= str(bas_part)
df6.loc[i,"Upgrade_use"] = df4.loc[i]["Associated Data"]
elif((str(type_part) == "IS") == True):
#if(df6.loc[i]["Upgrade"] == ""):
df6.loc[df6["Data1"]== str(bas_part),"Is_Upgrade"] = df4.loc[i]["Associated Data"]
取得的结果:
F104 F105 F1212
F104 F105 F5332
F105 F1212 F1000
F105 F1212 F1242B
nan F104 F5334
nan F104 F105
F1212 F1242B F1015B
nan F1242 F1015
nan F1242 F1015B
详细说明:
如果描述列值为“TO”,则将关联数据添加到实际数据的右侧
迭代1:
F104, F5334
迭代2:因为我们有条件再次创建具有相同值的新行
F104 F5334
F104 F105
F104 F5334
F104 F105 F1212
F104 F5334
F104 F105 F1212
F104 F105 F5332
迭代3:因为我们有条件再次创建具有相同值的新行
F104 F5334
F104 F105
F104 F5334
F104 F105 F1212
F104 F5334
F104 F105 F1212
F104 F105 F5332
迭代4:因为我们有条件再次创建具有相同值的新行
F104 F5334
F104 F105
F104 F5334
F104 F105 F1212
F104 F5334
F104 F105 F1212
F104 F105 F5332
迭代5:由于我们有条件“是”,现在检查data1左边的值是否与associate相同如果“是”通过,否则添加新值将现有数据移动到新列
F104 F5334
F104 F105 F1212
F104 F105 F5332
F104 F5334
F104 F105 F1212
F104 F105 F5332
迭代7:由于我们有条件“是”,现在检查data1左边的值是否与associate相同如果“是”通过,否则添加新值将现有数据移动到新列
F104 F5334
F104 F105 F1212
F104 F105 F5332
F104 F5334
F104 F105 F1212
F104 F105 F5332
等等
有人能帮忙吗?方法是创建一个函数来检查数据帧中的索引值:
def right_tree_pn(df5,base_partnum):
if(len(df5) == 0):
parts_list = []
else:
new_pn_list = df5
parts_list = new_pn_list.replace("[","").replace("]","").replace("'","").split(", ")
if(base_partnum in parts_list):
base_index = "Yes"
else :
base_index = "No"
return base_index
现在创建一个for循环:
for i in range(len(df4)):
print(i)
base = df4.loc[i]["Data1"]
type_part = df4.loc[i]["Description"]
associate_data = df4.loc[i]["Associated Data"]
test_parts2 = base + "," + associate_data
test_parts =test_parts2.split(",")
#check for condition
if((str(type_part) == "TO") == True):
#initially the data frame is empty
if(len(df5) == 0):
parts_list = []
## Check for value . it is 0 for first step
if(len(parts_list) > 0):
index = parts_list.index(base)
#index_val = len(df5)
else:
index = 0
index_val = len(df5)
# Insert to first Position as index i.e base value
parts_list.insert(index, base)
if(associate_data in parts_list):
associate_index = parts_list.index(associate_data)
else:
associate_index = index
#########################################
if((associate_index < index) or (associate_index > index) == True):
#df5.loc[new_index,"Data"] = str(parts_list)
pass
else:
#add Associate data to next index value i.1 1 for first step
parts_list.insert(index + 1,associate_data)
df5.loc[index_val ,"Data"] = str(parts_list)
df5.loc[index_val ,"Base_PN"] = str(parts_list[0])
######### Add to new line if dataframe is not empty
else:
df_index = []
###############Get the index values from the dataframe if the base data exists
for x in range(len(df5)):
df6 = right_tree_pn(df5.loc[x]["Data"],base)
if(str(df6) == "Yes"):
df_index1 = x
df_index.append(df_index1)
#Check length of the index if it is 1 perform following operations
if(len(df_index) == 1):
df_index = df_index[0]
new_pn_list = df5.loc[df_index]["Data"]
parts_list1 = new_pn_list.replace("[","").replace("]","").replace("'","").replace(" ","").split(",")
######List compare
if(all(w in parts_list1 for w in test_parts) == True):
pass
else:
index = parts_list1.index(base)
if((index == 0 and len(parts_list1) != 0)) == True:
ass_pasrt2 = parts_list1[index + 1]
if(index == 0 and associate_data != ass_pasrt2) == True:
parts_list = []
index_val = len(df5)
else:
parts_list = parts_list1
index_val = df_index
elif(((index == 0 and len(parts_list1) == 0)) ==True):
parts_list = []
index_val = len(df5)
else:
index2 = len(parts_list1) - 1
if(index2 != index):
ass_pasrt2 = parts_list1[index + 1]
else:
ass_pasrt2 = ""
if((associate_data == ass_pasrt2)== True):
parts_list = parts_list1
index_val = df_index
elif((ass_pasrt2 == "") == True):
parts_list = parts_list1
index_val = df_index
else:
parts_list = parts_list1
if(ass_pasrt2 in parts_list):
parts_list.remove(ass_pasrt2)
index_val = len(df5)
if(len(parts_list) > 0):
index = parts_list.index(base)
else:
index = 0
parts_list.insert(index, base)
if(associate_data in parts_list):
associate_index = parts_list.index(associate_data)
else:
associate_index = index
#########################################
if((associate_index < index) or (associate_index > index) == True):
#df5.loc[new_index,"Data"] = str(parts_list)
pass
else:
parts_list.insert(index + 1,associate_data)
parts_list2 = list(OrderedDict.fromkeys(parts_list))
df5.loc[index_val,"Data"] = str(parts_list2)
df5.loc[index_val ,"Base_PN"] = str(parts_list2[0])
#### If the base is not present in the data frame create new row similar to first iteration
elif(len(df_index) == 0):
parts_list = []
if(len(parts_list) > 0):
index = parts_list.index(base)
#index_val = len(df5)
else:
index = 0
parts_list.insert(index, base)
index_val = len(df5)
if(associate_data in parts_list):
associate_index = parts_list.index(associate_data)
else:
associate_index = index
#########################################
if((associate_index < index) or (associate_index > index) == True):
#df5.loc[new_index,"Data"] = str(parts_list)
pass
else:
parts_list.insert(index + 1,associate_data)
df5.loc[index_val ,"Data"] = str(parts_list)
df5.loc[index_val ,"Base_PN"] = str(base)
else:
#Code for multiple index data
for z in range(len(df_index)):
index_value = df_index[z]
new_pn_list = df5.loc[index_value]["Data"]
parts_list1 = new_pn_list.replace("[","").replace("]","").replace("'","").split(", ")
if(all(w in parts_list1 for w in test_parts) == True):
pass
else:
index = parts_list1.index(base)
if((index == 0 and len(parts_list1) != 0)) ==True:
parts_list = parts_list1
elif(((index == 0 and len(parts_list1) == 0)) ==True):
parts_list = []
else:
parts_list = parts_list1
# parts_list = parts_list1
if(len(parts_list) > 0):
index = parts_list.index(base)
else:
index = 0
parts_list.insert(index, base)
if(associate_data in parts_list):
associate_index = parts_list.index(associate_data)
else:
associate_index = index
#########################################
if((associate_index < index) or (associate_index > index) == True):
#df5.loc[new_index,"Data"] = str(parts_list)
pass
else:
parts_list.insert(index + 1,associate_data)
parts_list2 = list(OrderedDict.fromkeys(parts_list))
df5.loc[index_value ,"Data"] = str(parts_list2)
df5.loc[index_value ,"Base_PN"] = str(parts_list2[0])
输出:
Level_0 Level_1 Level_2 Level_3 Level_4
----------------------------------------
F104 F5334
F104 F105 F1212
F104 F105 F5332
F104 F105 F1212 F1000
F104 F105 F1212 F1242B F1015B
预期产出的结构有点不清楚。一个包含两列的df,第一列包含索引,第二列包含所有添加值的列表,或者一个随每次添加而在列中增长的df?是@Racooneer,它随每次添加而增长。如果我们发现条件“TO”是正确的,正如迭代3和8中所解释的。迭代6发生了什么?好吧,您的代码似乎没有遵循这个想法:您为df6定义了3列
is\u Upgrade
,Data1
和Upgrade\u use
。你认为动态增长的新专栏应该来自哪里。。。顺便问一下,df5在这一切中做了什么?@Racooneer,df5在这里是打字错误。已将DF4值存储为测试目的的副本。