associationRules.csv = #I'm only displaying some lines here for my case
,antecedents,consequents,confidence
19,"(LM = 20, SMOK = y)",(DIAB = n),0.5
20,(LM = 20),"(DIAB = n, SMOK = y)",0.5
21,"(DIAB = n, RCA = 85, LM = 15)",(SMOK = y),1.0
175,(RCA = 85),(LAD = 40),0.6666666666666667
176,(LAD = 40),(RCA = 85),1.0
177,"(DIAB = y, CHOL = 200, SMOK = y)",(LAD = 90),0.6666666666666667
178,"(DIAB = y, CHOL = 200, LAD = 90)",(SMOK = y),1.0
200,(LM = 20),"(RCA = 75, DIAB = n)",0.5
203,"(SEX = F, DIAB = y, SMOK = y)",(LM = 20),1.0
239,(CHOL = 200),"(DIAB = y, SMOK = y)",1.0
I am iterating through association rules rows and would like to extract only the rows if: column " antecedent" has datasets belongs to g1 or g2 only. and DOES NOT belong to y. Meaning, only lines (175, 176, 203) should be extracted.
y = ['CHOL = 200', 'LM = 20', 'LM = 25', 'LM = 30', 'LM = 15', 'LM = 35' ]
#g1 and g2 are the rest of other values of antecedents s.a: DIAB, RCA, LAD..etc
My code only works if len(antecedents)==1 and fails when len(antecedents)>1.
antecedents_list = []
for i, row in associationRules.iterrows():
antecedents = row.iloc[0]
flag1 = False
flag2 = False
single_antecedent = False
for j, v in enumerate(antecedents):
if len(antecedents) == 1 and (v not in y): #print single items
single_antecedent = True
elif len(antecedents) > 1 and (v not in y):
if v in g1:
flag1 = True
if v in g2:
flag2 = True
if single_antecedent or (flag1 and flag2):
antecedents_list.append(antecedents)
rules['antecedents'] = antecedents_list
What am I doing wrong? Can anyone help
Aucun commentaire:
Enregistrer un commentaire