preview

Sample Assignment

Decent Essays

#!/usr/bin/python

import sys import operator from decimal import *

def get_top_most_frequent_ngrams(n_grams, f): """ Get the top f most frequent n-grams """ sorted_n_grams = sorted(n_grams.items(), key=operator.itemgetter(1)) sorted_n_grams.reverse() return sorted_n_grams[0:f - 1]

def extract_character_n_grams(doctext, n): """ Parse a document text and get all the character n-grams along with their frequencies as a dictionary """ n_gram_dict = {}

i = 0 while (i + n) < len(doctext): n_gram = doctext[i:n + i] n_gram = n_gram.replace(' ', '_') n_gram = n_gram.replace('\n', '__') # print(n_gram) if n_gram in n_gram_dict: …show more content…

Input: author_text_file - input filename of author n - length of n-grams f - count of top most frequent n-grams data_dir - folder containing sample text files of both authors Return: 'A' if input text is evaluated to be from author 'A' 'B' if input text is evaluated to be from author 'B' """ author_text = load_document_text(data_dir + '/' + author_text_file) author_n_grams = extract_word_n_grams(author_text, n)

A_author_file_scores = [] B_author_file_scores = []

print("") print("------------- Intermediate Results -------------") # evaluate

Get Access