I am following an online python course and the exercise is to summarize an article from a website using nltk. In the following code i am setting up a class with an inbuilt function to summarize the text i have extracted from the url.
class FrequencySummarizer:
def __init__(self,min_cut=0.1,max_cut=0.9):
self._min_cut = min_cut
self._max_cut = max_cut
self._stopwords = set(stopwords.words('english')+
def __compute__frequencies(self,word_sent,customStopWords=None):
freq = defaultdict(int)
if customStopWords is None:
stopwords = set(self._stopwords)
stopwords = set(customStopWords).union(self._stopwords)
for sentence in word_sent:
for word in sentence:
if word not in stopwords:
freq[word] += 1
m = float(max(freq.values()))
for word in freq.keys():
if freq[word] >= self._max_cut or freq[word] <= self._min_cut:
del freq[word]
return freq
def summarize(self, text ,n):
sentences= sent_tokenize(text)
assert n<=len(sentences)
word_sent= [word_tokenize(s.lower()) for s in sentences]
self._freq = self.__compute__frequencies(word_sent)
ranking = defaultdict(int)
for i,sentences in enumerate(word_sent):
for word in sentences:
if word in self._freq:
ranking[i] += self._freq[word]
sentences_index = nlargest(n,ranking, key=rankings.get)
return [sents[j] for j in sentences_index]
The error thrown looks like this:
TypeError Traceback (most recent call last)
<ipython-input-44-a60e7a2fe76f> in <module>
1 fs = FrequencySummarizer()
----> 2 summary = fs.summarize(textOfUrl[0],3)
<ipython-input-43-9a4a271838d4> in summarize(self, text, n)
33 for i,sentences in enumerate(word_sent):
34 for word in sentences:
---> 35 if word in self._freq:
36 ranking[i] += self._freq[word]
37 sentences_index = nlargest(n,ranking, key=rankings.get)
TypeError: argument of type 'NoneType' is not iterable
I believe the error occurs because I have incorrectly set up the self._freq variable, this could be in the line where i define the word_sent variable or in the __compute__frequencies function, I can't tell.
Aucun commentaire:
Enregistrer un commentaire