lundi 1 mars 2021

Iterating through pandas df with multiple row conditions

I'm trying to work with a big dataframe (1M rows) where I need to set up high (1) and low (0) profiles I build this function but it's quite long to go through all the colunms and rows, how could I improve it ? I've heard about vectorisation put don't know how to set it up.

Many thanks

#x is a dataframe
def flag_low(x):
    if x['EAN'] in list1:
        if (x['local_weekday'] >= 5 ):
            return 1
        elif ((x['local_hour'] <= 6) | (23 <= x['local_hour'])):
            return 1
        elif ((x['local_hour'] == 7) & ( x['local_minute'] < 30 )):
            return 1
        elif ((x['local_hour'] == 22) & ( 30 <= x['local_minute'] )):
            return 1
    elif x['EAN'] in list2:
        if (x['local_weekday'] >= 5 ):
            return 1
        elif ((x['local_hour'] <= 6) | (23 <= x['local_hour'])):
            return 1
    elif x['EAN'] in list3:
        if (x['local_weekday'] >= 5 ):
            return 1
        elif ((x['local_hour'] <= 6) | (22 <= x['local_hour'])):
            return 1
    elif x['EAN'] in  list4:
        if (x['local_weekday'] >= 5 ):
            return 1
        elif ((x['local_hour'] <= 6) | (22 <= x['local_hour'])):
            return 1
    elif x['EAN'] in list5:
        if (x['local_weekday'] >= 5 ):
            return 1
        elif ((x['local_hour'] <= 6) | (22 <= x['local_hour'])):
            return 1
    elif x['EAN'] in list6:
        if (x['local_weekday'] >= 5 ):
            return 1
        elif (x['local_time'] in be_holidays):
            return 1
        elif ((x['local_hour'] <= 5) | (21 <= x['local_hour'])):
            return 1
    elif x['EAN'] in list7:
        if (x['local_weekday'] >= 5 ):
            return 1
        elif (x['local_time'] in be_holidays):
            return 1
        elif ((x['local_hour'] <= 6) | (22 <= x['local_hour'])):
            return 1
    elif x['EAN'] in list8:
        if (x['local_weekday'] >= 5 ):
            return 1
        elif (x['local_time'] in be_holidays):
            return 1
        elif ((x['local_hour'] <= 6) | (22 <= x['local_hour'])):
            return 1
    elif x['EAN'] in list9:
        if (x['local_weekday'] >= 5 ):
            return 1
        elif (x['local_time'] in be_holidays):
            return 1
        elif ((x['local_hour'] <= 6) | (22 <= x['local_hour'])):
            return 1
    else:
        return 0
dataframe['BinLow'] = dataframe.apply(flag_low, axis = 1)

Aucun commentaire:

Enregistrer un commentaire