{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "------ITEMSET--------------\n", "{frozenset({'6'}), frozenset({'5'}), frozenset({'2'}), frozenset({'4'}), frozenset({'3'}), frozenset({'7'}), frozenset({'1'}), frozenset({'0'}), frozenset({'8'})}\n", "---------------TRANSACTION LIST----------------\n", "[frozenset({'1', '4', '0'}), frozenset({'1', '4', '0'}), frozenset({'0', '4', '1'}), frozenset({'1', '4', '0'}), frozenset({'1', '4', '0'}), frozenset({'1', '4', '0'}), frozenset({'1', '4', '0'}), frozenset({'0', '4', '1'}), frozenset({'0', '4', '1'}), frozenset({'1', '4', '0'}), frozenset({'1', '4', '0'}), frozenset({'0', '2', '1'}), frozenset({'0', '4', '1'}), frozenset({'0', '7', '1'}), frozenset({'0', '4', '7', '1'}), frozenset({'6', '0', '7', '1'}), frozenset({'0', '2', '1'}), frozenset({'1', '4', '0'}), frozenset({'0', '4', '1'}), frozenset({'0', '1'}), frozenset({'0', '2', '1'}), frozenset({'0', '2', '1'}), frozenset({'1', '4', '0'}), frozenset({'0', '2', '1'}), frozenset({'6', '0', '1'}), frozenset({'0', '4', '1', '5'}), frozenset({'0', '4', '1', '5'}), frozenset({'1', '2', '0'}), frozenset({'1', '4', '0'}), frozenset({'1', '2', '0'}), frozenset({'6', '0', '1'}), frozenset({'1', '4', '0'}), frozenset({'1', '2', '0'}), frozenset({'0', '2', '1'}), frozenset({'0', '4', '1'}), frozenset({'1', '4', '0'}), frozenset({'1', '4', '0'}), frozenset({'0', '2', '1'}), frozenset({'0', '4', '1'}), frozenset({'6', '1', '0'}), frozenset({'6', '1', '0'}), frozenset({'0', '2', '1'}), frozenset({'6', '0', '1'}), frozenset({'0', '2', '1'}), frozenset({'1', '4', '0'}), frozenset({'1', '4', '0'}), frozenset({'6', '0', '7', '1'}), frozenset({'1', '4', '0'}), frozenset({'1', '4', '0'}), frozenset({'1', '4', '0'}), frozenset({'1', '4', '0'}), frozenset({'6', '1', '0'}), frozenset({'0', '4', '1', '5'}), frozenset({'8', '0', '7', '1'}), frozenset({'1', '4', '0'}), frozenset({'1', '4', '0'}), frozenset({'0', '2', '1'}), frozenset({'0', '2', '1'}), frozenset({'0', '2', '1'}), frozenset({'0', '2', '1'}), frozenset({'0', '4', '1'}), frozenset({'0', '4', '1'}), frozenset({'0', '3', '1'}), frozenset({'1', '4', '0'}), frozenset({'1', '4', '0'}), frozenset({'1', '4', '0'}), frozenset({'0', '1'}), frozenset({'1', '4', '0'}), frozenset({'1', '4', '0'}), frozenset({'1', '4', '0'}), frozenset({'1', '4', '0'}), frozenset({'0', '2', '1'}), frozenset({'8', '0', '7', '1'}), frozenset({'0', '4', '1'}), frozenset({'1', '0'}), frozenset({'1', '2', '0'}), frozenset({'0', '3', '1'}), frozenset({'0', '7', '1'}), frozenset({'0', '2', '1'}), frozenset({'0', '2', '1'}), frozenset({'0', '3', '1'}), frozenset({'0', '7', '1'}), frozenset({'0', '4', '1'}), frozenset({'0', '2', '1'}), frozenset({'1', '2', '0'}), frozenset({'0', '7', '1', '5'}), frozenset({'0', '4', '1'}), frozenset({'0', '2', '1'}), frozenset({'6', '0', '1'}), frozenset({'0', '4', '1', '5'}), frozenset({'0', '4', '3', '1'}), frozenset({'0', '4', '3', '1'}), frozenset({'0', '4', '1'}), frozenset({'1', '2', '0'}), frozenset({'1', '4', '0'}), frozenset({'0', '2', '1'}), frozenset({'1', '2', '0'}), frozenset({'6', '1', '0'}), frozenset({'1', '4', '0'}), frozenset({'0', '7', '1'}), frozenset({'0', '2', '1'})]\n", "MINIMUM SUPPORT 0.0069328715049722794\n", "\n", "------------------------ FREQUENT ITEMSET:\n", "item: ('1', '7', '5') , 0.009\n", "item: ('7',) , 0.010\n", "item: ('6', '7') , 0.011\n", "item: ('6', '1', '7') , 0.020\n", "item: ('1', '4', '7') , 0.023\n", "item: ('5',) , 0.025\n", "item: ('4', '7') , 0.025\n", "item: ('1', '5') , 0.062\n", "item: ('1', '4', '3') , 0.066\n", "item: ('1', '7', '8') , 0.067\n", "item: ('4', '3') , 0.079\n", "item: ('7', '8') , 0.080\n", "item: ('6',) , 0.099\n", "item: ('1', '8') , 0.099\n", "item: ('1', '4', '5') , 0.099\n", "item: ('1', '7') , 0.104\n", "item: ('4', '5') , 0.109\n", "item: ('1', '3') , 0.124\n", "item: ('3',) , 0.149\n", "item: ('6', '1') , 0.149\n", "item: ('8',) , 0.158\n", "item: ('1', '2') , 0.936\n", "item: ('2',) , 1.337\n", "item: ('1', '4') , 1.767\n", "item: ('1',) , 2.000\n", "item: ('4',) , 2.525\n", "\n", "------------------------ RULES:\n", "Rule: ('1', '8') ==> ('7',) , 0.673, 68.007\n", "Rule: ('7',) ==> ('1', '8') , 6.733, 68.007\n", "Rule: ('7',) ==> ('8',) , 8.100, 51.131\n", "Rule: ('8',) ==> ('7',) , 0.506, 51.131\n", "Rule: ('7',) ==> ('1', '5') , 0.867, 14.005\n", "Rule: ('7',) ==> ('6', '1') , 2.067, 13.916\n", "Rule: ('7',) ==> ('6',) , 1.100, 11.110\n", "Rule: ('7',) ==> ('1',) , 10.500, 5.250\n", "Rule: ('1', '7') ==> ('8',) , 0.641, 4.048\n", "Rule: ('5',) ==> ('1', '4') , 4.000, 2.263\n", "Rule: ('5',) ==> ('4',) , 4.400, 1.743\n", "Rule: ('7', '5') ==> ('1',) , 2.889, 1.444\n", "Rule: ('7',) ==> ('1', '4') , 2.367, 1.339\n", "Rule: ('5',) ==> ('1',) , 2.500, 1.250\n", "Rule: ('7',) ==> ('4',) , 2.550, 1.010\n", "Rule: ('6', '7') ==> ('1',) , 1.879, 0.939\n", "Rule: ('6',) ==> ('1',) , 1.500, 0.750\n", "Rule: ('1', '5') ==> ('4',) , 1.600, 0.634\n", "Rule: ('4', '7') ==> ('1',) , 0.928, 0.464\n", "Rule: ('4', '5') ==> ('1',) , 0.909, 0.455\n", "Rule: ('4', '3') ==> ('1',) , 0.833, 0.417\n", "Rule: ('3',) ==> ('1',) , 0.833, 0.417\n", "Rule: ('7', '8') ==> ('1',) , 0.831, 0.416\n", "Rule: ('1',) ==> ('4',) , 0.884, 0.350\n", "Rule: ('4',) ==> ('1',) , 0.700, 0.350\n", "Rule: ('2',) ==> ('1',) , 0.700, 0.350\n", "Rule: ('8',) ==> ('1',) , 0.625, 0.312\n", "Rule: ('1', '3') ==> ('4',) , 0.533, 0.211\n", "Rule: ('3',) ==> ('4',) , 0.533, 0.211\n", "29\n" ] } ], "source": [ "import sys\n", "\n", "from itertools import chain, combinations\n", "from collections import defaultdict\n", "from optparse import OptionParser\n", "\n", "# from apriori import (\n", "# getItemSetTransactionList,\n", "# dataFromFile,\n", "# joinSet,\n", "# printResults,\n", "# returnItemsWithMinSupport,\n", "# runApriori,\n", "# subsets,\n", "# )\n", "\n", "\n", "itemresult = set()\n", "freqSet = defaultdict(int)\n", "freqSet2 = defaultdict(int)\n", "utilSet = defaultdict(int)\n", "utilSet2 = defaultdict(int)\n", "\n", "freqitemset=[]\n", "freqitemset2=[]\n", "largeSet = dict()\n", "minConfidence=0.5\n", "\n", "#### from collections import defaultdict\n", "def dataFromFile(fname):\n", " \"\"\"Function which reads from the file and yields a generator\"\"\"\n", " with open(fname, \"r\") as file_iter:\n", " for line in file_iter:\n", " line = line.strip().rstrip(\",\") # Remove trailing comma\n", " record = frozenset(line.split(\",\"))\n", " yield record\n", " \n", "\n", "\n", "def getItemSetTransactionList(data_iterator):\n", " transactionList = list()\n", " itemSet = set()\n", " for record in data_iterator:\n", " transaction = frozenset(record)\n", " transactionList.append(transaction)\n", " for item in transaction:\n", " itemSet.add(frozenset([item])) # Generate 1-itemSets\n", " return itemSet, transactionList\n", "\n", "def getminsup(itemSet,transactionList):\n", " for item in itemSet:\n", "# print(item)\n", " for transaction in transactionList:\n", " if item.issubset(transaction):\n", " freqSet[item] += 1\n", "# print(freqSet)\n", " \n", " sum=0 \n", " for item, count in freqSet.items():\n", " support = float(count) / float(len(transactionList))\n", "# print(support)\n", "# print(utilSet)\n", " ut=utilSet[item]\n", " utility1=support*ut\n", " sum=sum+utility1\n", " utilSet2[item]=utility1\n", "# print(utilSet2)\n", " aveutil=sum/len(itemSet)\n", "# print(aveutil)\n", " minsup=aveutil/len(transactionList)\n", "# print(minsup)\n", "\n", " return minsup\n", "\n", "\n", "def getfreqitem2(itemSet,transactionList,minimumSupport,freqSet,k):\n", " _itemSet=set()\n", " localSet = defaultdict(int)\n", " for item in itemSet:\n", "# print(\"item\")\n", "# print(item)\n", " for transaction in transactionList:\n", " if item.issubset(transaction):\n", " freqSet[item] += 1\n", " localSet[item] += 1\n", "# print(localSet)\n", " \n", " sum=0 \n", " for item, count in localSet.items():\n", " support = float(count) / float(len(transactionList))\n", " ut=0\n", " for item1,util in utilSet.items():\n", " if(item1.issubset(item)):\n", " ut=ut+float(util)\n", " averageutil=ut/k\n", "# print(averageutil)\n", " utility1=support*averageutil\n", "# print(utility1)\n", " utilSet2[item]=utility1\n", "# print(\"utilset\",utilSet2)\n", "# print(\"localset\",localSet)\n", " \n", " for item in itemSet:\n", "# print(item)\n", "# print(utilSet2[item])\n", " if(utilSet2[item])>=minimumSupport:\n", " _itemSet.add(item)\n", " return _itemSet\n", "\n", "\n", "def subsets(arr):\n", " \"\"\" Returns non empty subsets of arr\"\"\"\n", " return chain(*[combinations(arr, i + 1) for i, a in enumerate(arr)])\n", "\n", "def joinSet(itemSet, length):\n", " \"\"\"Join a set with itself and returns the n-element itemsets\"\"\"\n", " itemSet=frozenset(itemSet)\n", " return set(\n", " [i.union(j) for i in itemSet for j in itemSet if len(i.union(j)) == length]\n", " )\n", "\n", "def getSupport(item):\n", " \"\"\"local function which Returns the support of an item\"\"\"\n", " return float(utilSet2[item])\n", "\n", "\n", "inFile = dataFromFile(\"zoo.csv\")\n", "itemSet, transactionList = getItemSetTransactionList(inFile)\n", "print(\"------ITEMSET--------------\")\n", "print(itemSet)\n", "print(\"---------------TRANSACTION LIST----------------\")\n", "print(transactionList)\n", "\n", "\n", "\n", "utilSet[frozenset({'1'})]=2\n", "utilSet[frozenset({'2'})]=5\n", "utilSet[frozenset({'3'})]=3\n", "utilSet[frozenset({'4'})]=5\n", "utilSet[frozenset({'5'})]=0.5\n", "utilSet[frozenset({'6'})]=1\n", "utilSet[frozenset({'7'})]=0.1\n", "utilSet[frozenset({'8'})]=8\n", "utilSet[frozenset({'9'})]=4\n", "\n", "largeSet = dict()\n", " # Global dictionary which stores (key=n-itemSets,value=support)\n", " # which satisfy minSupport\n", "\n", "assocRules = dict()\n", " # Dictionary which stores Association Rules\n", "minsup=getminsup(itemSet,transactionList)\n", "print(\"MINIMUM SUPPORT\",minsup)\n", "oneCSet = getfreqitem2(itemSet, transactionList, minsup,freqSet,1)\n", "# print(oneCSet)\n", "\n", "currentLSet = oneCSet\n", "k = 2\n", "# currentLSet = joinSet(currentLSet, k)\n", "# print(\"2 itemset\")\n", "# print(currentLSet)\n", "# freqitemset2 = getfreqitem2(currentLSet, transactionList, minsup,freqSet,k)\n", "# print(\"freq item 2\")\n", "# print(freqitemset2)\n", "# k=3\n", "# currentLSet = joinSet(oneCSet, k)\n", "# print(currentLSet)\n", "# oneCSet = getfreqitem2(currentLSet, transactionList, minsup,freqSet,k)\n", "# print(\"freq item 3\")\n", "# print(oneCSet)\n", "\n", "while currentLSet != set([]):\n", " largeSet[k - 1] = currentLSet\n", " currentLSet = joinSet(currentLSet, k)\n", "# print(currentLSet)\n", " currentCSet = getfreqitem2(\n", " currentLSet, transactionList, minsup,freqSet,k\n", " )\n", " currentLSet = currentCSet\n", "# print(currentLSet)\n", " k = k + 1\n", "\n", " \n", "# print(largeSet)\n", " \n", "toRetItems = []\n", "for key, value in largeSet.items():\n", " toRetItems.extend([(tuple(item), getSupport(item)) for item in value])\n", "\n", "toRetRules = []\n", "for key, value in list(largeSet.items())[1:]:\n", " for item in value:\n", " _subsets = map(frozenset, [x for x in subsets(item)])\n", " for element in _subsets:\n", " remain = item.difference(element)\n", " if len(remain) > 0:\n", " confidence = getSupport(item) / getSupport(element)\n", " if confidence >= minConfidence:\n", " lift=confidence/getSupport(remain)\n", " toRetRules.append(((tuple(element), tuple(remain)), confidence,lift))\n", "\n", "print(\"\\n------------------------ FREQUENT ITEMSET:\")\n", "\"\"\"prints the generated itemsets sorted by support and the confidence rules sorted by confidence\"\"\"\n", "for item, support in sorted(toRetItems, key=lambda x: x[1]):\n", " print(\"item: %s , %.3f\" % (str(item), support))\n", " \n", "from operator import itemgetter\n", "res = sorted(toRetRules, key = itemgetter(2),reverse=True)\n", "# print(str(res))\n", "print(\"\\n------------------------ RULES:\")\n", "for rule, confidence,lift in (res):\n", " pre, post = rule\n", " print(\"Rule: %s ==> %s , %.3f, %.3f\" % (str(pre), str(post), confidence, lift))\n", "\n", "print(len(res))\n", "# for rule, confidence in sorted(toRetRules, key=lambda x: x[1]):\n", "# pre, post = rule\n", "# print(\"Rule: %s ==> %s , %.3f\" % (str(pre), str(post), confidence))\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4" } }, "nbformat": 4, "nbformat_minor": 4 }