I read csv data from df2 file and then checked to df1 if there is then the centroid value increases and so on
import numpy as np
import pandas as pd
MAX_CENTROIDS =5
df2 = pd.read_csv('IMDB-Word2Vec/IMDBDataset.cleaned.2000.ver1.csv',encoding = "ISO-8859-1")
print(df2)
df1 = pd.read_csv('kata/kata5.csv')
def get_centroids(row):
centroids = np.zeros(MAX_CENTROIDS, dtype=int)
for word in row.split(' '):
if word in df1['word'].values:
print(word)
centroids[df1[df1['word']==word]['centroid'].values]+=1
return centroids
df2['centroid'] = df2['kalimat'].apply(get_centroids)
pd.DataFrame(df2['centroid'].tolist()).to_csv('belum_label/data5.csv', header=False, index=False)
and the word has entered if but an error in the part
centroids[df1[df1['word']==word]['centroid'].values]+=1
IndexError: arrays used as indices must be of integer (or boolean) type
Aucun commentaire:
Enregistrer un commentaire