apriori_templete

.py

School

University of Michigan *

*We aren’t endorsed by this school

Course

505

Subject

Computer Science

Date

Dec 6, 2023

Type

Pages

Uploaded by DeanDragonflyPerson763

from __future__ import print_function import sys def apriori(dataset, min_support=0.5, verbose=False): """Implements the Apriori algorithm. The Apriori algorithm will iteratively generate new candidate k-itemsets using the frequent (k-1)-itemsets found in the previous iteration. Parameters ---------- dataset : list The dataset (a list of transactions) from which to generate candidate itemsets. min_support : float The minimum support threshold. Defaults to 0.5. Returns ------- F : list The list of frequent itemsets. support_data : dict The support data for all candidate itemsets. References ---------- .. [1] R. Agrawal, R. Srikant, "Fast Algorithms for Mining Association Rules", 1994. """ C1 = create_candidates(dataset) D = list(map(set, dataset)) F1, support_data = get_freq(D, C1, min_support, verbose=False) # get frequent 1-itemsets F = [F1] # list of frequent itemsets; initialized to frequent 1-itemsets k = 2 # the itemset cardinality while (len(F[k - 2]) > 0): Ck = apriori_gen(F[k-2], k) # generate candidate itemsets Fk, supK = get_freq(D, Ck, min_support) # get frequent itemsets support_data.update(supK)# update the support counts to reflect pruning F.append(Fk) # add the frequent k-itemsets to the list of frequent itemsets k += 1 if verbose: # Print a list of all the frequent itemsets. for kset in F: for item in kset: print("" + "{" + "".join(str(i) + ", " for i in iter(item)).rstrip(', ') + "}" + ": sup = " + str(round(support_data[item], 3))) return F, support_data def create_candidates(dataset, verbose=False): """Creates a list of candidate 1-itemsets from a list of transactions.

Parameters ---------- dataset : list The dataset (a list of transactions) from which to generate candidate itemsets. Returns ------- The list of candidate itemsets (c1) passed as a frozenset (a set that is immutable and hashable). """ c1 = [] # list of all items in the database of transactions for transaction in dataset: for item in transaction: if not [item] in c1: c1.append([item]) c1.sort() if verbose: # Print a list of all the candidate items. print("" + "{" + "".join(str(i[0]) + ", " for i in iter(c1)).rstrip(', ') + "}") # Map c1 to a frozenset because it will be the key of a dictionary. return list(map(frozenset, c1)) def get_freq(dataset, candidates, min_support, verbose=False): """ This function separates the candidates itemsets into frequent itemset and infrequent itemsets based on the min_support, and returns all candidate itemsets that meet a minimum support threshold. Parameters ---------- dataset : list The dataset (a list of transactions) from which to generate candidate itemsets. candidates : frozenset The list of candidate itemsets. min_support : float The minimum support threshold. Returns ------- freq_list : list The list of frequent itemsets. support_data : dict The support data for all candidate itemsets. """ freq_list = [] support_data = dict() for candidateSet in candidates: supportCount = 0 for transaction in dataset:

if candidateSet.issubset(transaction): supportCount += 1 support = supportCount / len(dataset) if support >= min_support: freq_list.append(candidateSet) support_data[candidateSet] = support return freq_list, support_data def apriori_gen(freq_sets, k): """Generates candidate itemsets (via the F_k-1 x F_k-1 method). This part generates new candidate k-itemsets based on the frequent (k-1)-itemsets found in the previous iteration. The apriori_gen function performs two operations: (1) Generate length k candidate itemsets from length k-1 frequent itemsets (2) Prune candidate itemsets containing subsets of length k-1 that are infrequent Parameters ---------- freq_sets : list The list of frequent (k-1)-itemsets. k : integer The cardinality of the current itemsets being evaluated. Returns ------- candidate_list : list The list of candidate itemsets. """ n = len(freq_sets) if n<2: # Minimum 2 frequent itemsets needed to generate candidates return [] # generate all possible candidate itemsets candidate_set = set() for i in range(0, n-1): # iterate through each element for j in range(i+1, n): # and try to combine it with every element after it commonElems = freq_sets[i].intersection(freq_sets[j]) if len(commonElems) >= k-2: # if k-2 of the items in the sets match newCandidate = freq_sets[i].union(freq_sets[j]) # combine the sets to make a length k itemset candidate_set.add(newCandidate) # add that itemset to the list of candidates # find candidate itemsets which have k-1 length subsets that are infrequent invalidCandidates = set() for candidate in candidate_set: for elem in candidate: k1subset = candidate.difference({elem}) # generate every possible k-1 subset subsetIsFrequent = False for freqItemset in freq_sets: # check that every k-1 subset is frequent if k1subset == freqItemset: subsetIsFrequent = True

Your preview ends here

Eager to read complete document? Join bartleby learn and gain access to the full version