Extract frequent words from the text in Python

To extract frequent words from some text we can work with Dict in Python.

If you need to disable some words add them to stopword string

# prints 40 top keywords for the text

def PrintTopKeywords(text):

  stopwords = 'I we he she a about an and are as at be by com for from how if in is it me more my myself of on or that the this to was what when where who will with you your'

  stoplist = stopwords.split()

  print "

Top keywords

" wordList1 = [] wordList1 = text.lower().split(None) wordList2 = [] for word1 in wordList1: lastchar = word1[-1:] if lastchar in [",", ".", "!", "?", ";"]: word2 = word1.rstrip(lastchar) else: word2 = word1 # build a wordList2 of lower case modified words if len(word2.lower()) > 1 and word2.lower() not in stoplist: wordList2.append(word2.lower()) # create word frequency dictionary = hashtable Dict = {} for word2 in wordList2: Dict[word2] = Dict.get(word2, 0) + 1 # create a list of keys and sort the list keys = Dict.keys() keys.sort() #function inside a function def byvalues(d): return d[1] items = sorted(Dict.items(), key=byvalues, reverse=True) # Print the first 40 for item in items[:40]: print "
", item[0].encode('utf-8'), item[1], "
" ############################################

tags: & category: -