I'm trying to work with a big dataframe (1M rows) where I need to set up high (1) and low (0) profiles I build this function but it's quite long to go through all the colunms and rows, how could I improve it ? I've heard about vectorisation put don't know how to set it up.
Many thanks
#x is a dataframe
def flag_low(x):
if x['EAN'] in list1:
if (x['local_weekday'] >= 5 ):
return 1
elif ((x['local_hour'] <= 6) | (23 <= x['local_hour'])):
return 1
elif ((x['local_hour'] == 7) & ( x['local_minute'] < 30 )):
return 1
elif ((x['local_hour'] == 22) & ( 30 <= x['local_minute'] )):
return 1
elif x['EAN'] in list2:
if (x['local_weekday'] >= 5 ):
return 1
elif ((x['local_hour'] <= 6) | (23 <= x['local_hour'])):
return 1
elif x['EAN'] in list3:
if (x['local_weekday'] >= 5 ):
return 1
elif ((x['local_hour'] <= 6) | (22 <= x['local_hour'])):
return 1
elif x['EAN'] in list4:
if (x['local_weekday'] >= 5 ):
return 1
elif ((x['local_hour'] <= 6) | (22 <= x['local_hour'])):
return 1
elif x['EAN'] in list5:
if (x['local_weekday'] >= 5 ):
return 1
elif ((x['local_hour'] <= 6) | (22 <= x['local_hour'])):
return 1
elif x['EAN'] in list6:
if (x['local_weekday'] >= 5 ):
return 1
elif (x['local_time'] in be_holidays):
return 1
elif ((x['local_hour'] <= 5) | (21 <= x['local_hour'])):
return 1
elif x['EAN'] in list7:
if (x['local_weekday'] >= 5 ):
return 1
elif (x['local_time'] in be_holidays):
return 1
elif ((x['local_hour'] <= 6) | (22 <= x['local_hour'])):
return 1
elif x['EAN'] in list8:
if (x['local_weekday'] >= 5 ):
return 1
elif (x['local_time'] in be_holidays):
return 1
elif ((x['local_hour'] <= 6) | (22 <= x['local_hour'])):
return 1
elif x['EAN'] in list9:
if (x['local_weekday'] >= 5 ):
return 1
elif (x['local_time'] in be_holidays):
return 1
elif ((x['local_hour'] <= 6) | (22 <= x['local_hour'])):
return 1
else:
return 0
dataframe['BinLow'] = dataframe.apply(flag_low, axis = 1)
Aucun commentaire:
Enregistrer un commentaire