|
1 | 1 | { |
2 | 2 | "metadata": { |
3 | 3 | "name": "", |
4 | | - "signature": "sha256:0d06f7c0dcbe740f33bc07e6b2f3b75bd99464160080eeb0dc89c4b381ba494a" |
| 4 | + "signature": "sha256:cfdf2e2f87626e61a47356de271a32958896c29bce6ec4af4456a82dc25b3c9b" |
5 | 5 | }, |
6 | 6 | "nbformat": 3, |
7 | 7 | "nbformat_minor": 0, |
|
208 | 208 | " def fit(self, X, y):\n", |
209 | 209 | " \"Find that label in the training data that occurs most often.\"\n", |
210 | 210 | " # insert your code here\n", |
211 | | - " self.most_popular = max(set(y), key=y.count)\n", |
212 | 211 | " \n", |
213 | 212 | " def predict(self, x):\n", |
214 | 213 | " \"Always predict that `x`'s label is equal to the most popular one.\"\n", |
|
498 | 497 | " \"\"\"Fit or train the naive bayes classifier. X must be an\n", |
499 | 498 | " iterable of examples where each example is an iterable as\n", |
500 | 499 | " well.\"\"\"\n", |
501 | | - " self.C = Counter(y) # insert your code here (class counts)\n", |
502 | | - " self.N = defaultdict(Counter) # insert your code here (feature counts per class)\n", |
503 | | - " for x, y_x in zip(X, y):\n", |
504 | | - " self.N[y_x] += Counter(x)\n", |
| 500 | + " self.C = # insert your code here (class counts)\n", |
| 501 | + " self.N = # insert your code here (feature counts per class)\n", |
| 502 | + " # add the feature counts per class here\n", |
505 | 503 | " \n", |
506 | 504 | " def predict(self, x):\n", |
507 | 505 | " \"\"\"Predict the outcome for example x. Choose the most\n", |
|
575 | 573 | "\n", |
576 | 574 | " def prior(self, y):\n", |
577 | 575 | " \"\"\"Return the prior probability of class y.\"\"\"\n", |
578 | | - " # insert your code here\n", |
579 | | - " # pass\n", |
580 | | - " return self.C[y] / sum(self.C.values())\n", |
| 576 | + " pass\n", |
581 | 577 | " \n", |
582 | 578 | " def probability(self, x, y):\n", |
583 | 579 | " \"\"\"Apply Laplace Smoothing to give a probability\n", |
584 | 580 | " estimate of feature x given y.\"\"\"\n", |
585 | 581 | " # insert your code here\n", |
586 | | - " return (self.N[y][x] + 1.0) / (sum(self.N[y].values()) + len(self.N))\n", |
587 | 582 | "\n", |
588 | 583 | "# these tests should return True if your code is correct\n", |
589 | 584 | "nb = NaiveBayesLearner()\n", |
|
720 | 715 | " \"\"\"Predict the outcome for example x. Choose the most\n", |
721 | 716 | " likely outcome out of all possible outcomes.\"\"\"\n", |
722 | 717 | " # insert your code here\n", |
723 | | - " x_counts = Counter(x)\n", |
724 | | - " def score(y):\n", |
725 | | - " return log(self.prior(y)) + sum(log(self.probability(x_i, y) * c) \n", |
726 | | - " for x_i, c in x_counts.items())\n", |
727 | | - " return max(self.N.keys(), key=score)\n", |
728 | 718 | "\n", |
729 | 719 | "# these tests should return True if your code is correct\n", |
730 | 720 | "nb = NaiveBayesLearner()\n", |
|
863 | 853 | " iterable = tuple(iterable)\n", |
864 | 854 | " ngrams = []\n", |
865 | 855 | " # insert your code here\n", |
866 | | - " for n in range(min_n, min(max_n + 1, len(iterable) + 1)):\n", |
867 | | - " for i in range(len(iterable) - n + 1):\n", |
868 | | - " ngrams.append(iterable[i: i + n])\n", |
869 | | - " return ngrams # leave this in students version\n", |
| 856 | + " return ngrams\n", |
870 | 857 | "\n", |
871 | 858 | "# these tests should return True if your code is correct\n", |
872 | 859 | "print(ngrams(\"humanities\", ngram_range=(2, 3)) == ['hu', 'um', 'ma', 'an', 'ni', 'it', \n", |
|
985 | 972 | " \"\"\"Fit or train the Authorship Learner. X must be an\n", |
986 | 973 | " iterable of examples where each example is an iterable as\n", |
987 | 974 | " well.\"\"\"\n", |
988 | | - " # insert your code here\n", |
989 | 975 | " self.C = Counter(y)\n", |
990 | 976 | " self.N = defaultdict(Counter)\n", |
991 | | - " for x, y_x in zip(X, y):\n", |
992 | | - " self.N[y_x] += Counter(x)\n", |
993 | | - " for y_x, counts in self.N.items():\n", |
994 | | - " self.N[y_x] = Counter(dict(counts.most_common(self.n_most_frequent)))\n", |
| 977 | + " # insert your code here\n", |
995 | 978 | " \n", |
996 | 979 | "# these tests should return True if your code is correct\n", |
997 | 980 | "X = [('a', 'a', 'a', 'b', 'b', 'c'), ('a', 'c', 'b', 'b'), ('b', 'b', 'b', 'c', 'c', 'a')]\n", |
|
1075 | 1058 | " \"\"\"Return the accuracy, defined as the proportion of\n", |
1076 | 1059 | " correctly predicted labels.\"\"\"\n", |
1077 | 1060 | " # insert your code here\n", |
1078 | | - " return sum(pred == true for pred, true in zip(pred_labels, true_labels)) / len(true_labels)\n", |
1079 | 1061 | "\n", |
1080 | 1062 | "# these tests should return True if your code is correct\n", |
1081 | 1063 | "predictions = [1, 2, 0, 0, 1]\n", |
|
1206 | 1188 | " if matrix is None:\n", |
1207 | 1189 | " matrix = error_statistics(pred_labels, true_labels)\n", |
1208 | 1190 | " # insert your code here\n", |
1209 | | - " scores = {}\n", |
1210 | | - " for label in set(true_labels):\n", |
1211 | | - " try:\n", |
1212 | | - " scores[label] = matrix[label]['tp'] / (matrix[label]['tp'] + matrix[label]['fn'])\n", |
1213 | | - " except ZeroDivisionError:\n", |
1214 | | - " scores[label] = 0.0\n", |
1215 | | - " return scores if not average else mean(scores.values())\n", |
1216 | 1191 | " \n", |
1217 | 1192 | "# these tests should return True if your code is correct\n", |
1218 | 1193 | "predictions = [1, 2, 0, 0, 1]\n", |
|
1238 | 1213 | " if matrix is None:\n", |
1239 | 1214 | " matrix = error_statistics(pred_labels, true_labels)\n", |
1240 | 1215 | " # insert your code here\n", |
1241 | | - " p = precision(pred_labels, true_labels, average=False, matrix=matrix)\n", |
1242 | | - " r = recall(pred_labels, true_labels, average=False, matrix=matrix)\n", |
1243 | | - " scores = {}\n", |
1244 | | - " for label in set(true_labels):\n", |
1245 | | - " try:\n", |
1246 | | - " scores[label] = 2 * (p[label] * r[label]) / (p[label] + r[label])\n", |
1247 | | - " except ZeroDivisionError:\n", |
1248 | | - " scores[label] = 0.0\n", |
1249 | | - " return scores if not average else mean(scores.values())\n", |
1250 | 1216 | "\n", |
1251 | 1217 | "# these tests should return True if your code is correct\n", |
1252 | 1218 | "predictions = [1, 2, 0, 0, 1]\n", |
|
1451 | 1417 | "collapsed": false, |
1452 | 1418 | "input": [ |
1453 | 1419 | "scores = {}\n", |
1454 | | - "# insert your code here\n", |
1455 | | - "for sample in range(100, 5000, 500):\n", |
1456 | | - " documents = [make_document(f, sample=sample) for f in glob('data/british-novels/*.txt')]\n", |
1457 | | - " authors, titles, texts = zip(*documents)\n", |
1458 | | - " scores[sample] = cross_validate(AuthorshipLearner(), texts, authors, k=None, score_fn=f_score)" |
| 1420 | + "# insert your code here" |
1459 | 1421 | ], |
1460 | 1422 | "language": "python", |
1461 | 1423 | "metadata": {}, |
|
1493 | 1455 | "collapsed": false, |
1494 | 1456 | "input": [ |
1495 | 1457 | "scores = {}\n", |
1496 | | - "# insert your code here\n", |
1497 | | - "for n_most_frequent in range(50, 500, 100):\n", |
1498 | | - " documents = [make_document(f) for f in glob('data/british-novels/*.txt')]\n", |
1499 | | - " authors, titles, texts = zip(*documents)\n", |
1500 | | - " scores[n_most_frequent] = cross_validate(AuthorshipLearner(n_most_frequent=n_most_frequent), \n", |
1501 | | - " texts, authors, k=None, score_fn=f_score)" |
| 1458 | + "# insert your code here" |
1502 | 1459 | ], |
1503 | 1460 | "language": "python", |
1504 | 1461 | "metadata": {}, |
|
0 commit comments