remove answers

fbkarsdorp · fbkarsdorp · commit e17ff1f9fc85 · 2014-07-18T14:07:58.000+02:00
diff --git a/Chapter 9 - Learning from Examples.ipynb b/Chapter 9 - Learning from Examples.ipynb
@@ -1,7 +1,7 @@
 {
  "metadata": {
   "name": "",
-  "signature": "sha256:0d06f7c0dcbe740f33bc07e6b2f3b75bd99464160080eeb0dc89c4b381ba494a"
+  "signature": "sha256:cfdf2e2f87626e61a47356de271a32958896c29bce6ec4af4456a82dc25b3c9b"
  },
  "nbformat": 3,
  "nbformat_minor": 0,
@@ -208,7 +208,6 @@
       "    def fit(self, X, y):\n",
       "        \"Find that label in the training data that occurs most often.\"\n",
       "        # insert your code here\n",
-      "        self.most_popular = max(set(y), key=y.count)\n",
       "        \n",
       "    def predict(self, x):\n",
       "        \"Always predict that `x`'s label is equal to the most popular one.\"\n",
@@ -498,10 +497,9 @@
       "        \"\"\"Fit or train the naive bayes classifier. X must be an\n",
       "        iterable of examples where each example is an iterable as\n",
       "        well.\"\"\"\n",
-      "        self.C = Counter(y) # insert your code here (class counts)\n",
-      "        self.N = defaultdict(Counter) # insert your code here (feature counts per class)\n",
-      "        for x, y_x in zip(X, y):\n",
-      "            self.N[y_x] += Counter(x)\n",
+      "        self.C = # insert your code here (class counts)\n",
+      "        self.N = # insert your code here (feature counts per class)\n",
+      "        # add the feature counts per class here\n",
       "       \n",
       "    def predict(self, x):\n",
       "        \"\"\"Predict the outcome for example x. Choose the most\n",
@@ -575,15 +573,12 @@
       "\n",
       "    def prior(self, y):\n",
       "        \"\"\"Return the prior probability of class y.\"\"\"\n",
-      "        # insert your code here\n",
-      "        # pass\n",
-      "        return self.C[y] / sum(self.C.values())\n",
+      "        pass\n",
       "            \n",
       "    def probability(self, x, y):\n",
       "        \"\"\"Apply Laplace Smoothing to give a probability\n",
       "        estimate of feature x given y.\"\"\"\n",
       "        # insert your code here\n",
-      "        return (self.N[y][x] + 1.0) / (sum(self.N[y].values()) + len(self.N))\n",
       "\n",
       "# these tests should return True if your code is correct\n",
       "nb = NaiveBayesLearner()\n",
@@ -720,11 +715,6 @@
       "        \"\"\"Predict the outcome for example x. Choose the most\n",
       "        likely outcome out of all possible outcomes.\"\"\"\n",
       "        # insert your code here\n",
-      "        x_counts = Counter(x)\n",
-      "        def score(y):\n",
-      "            return log(self.prior(y)) + sum(log(self.probability(x_i, y) * c) \n",
-      "                                            for x_i, c in x_counts.items())\n",
-      "        return max(self.N.keys(), key=score)\n",
       "\n",
       "# these tests should return True if your code is correct\n",
       "nb = NaiveBayesLearner()\n",
@@ -863,10 +853,7 @@
       "        iterable = tuple(iterable)\n",
       "    ngrams = []\n",
       "    # insert your code here\n",
-      "    for n in range(min_n, min(max_n + 1, len(iterable) + 1)):\n",
-      "        for i in range(len(iterable) - n + 1):\n",
-      "            ngrams.append(iterable[i: i + n])\n",
-      "    return ngrams # leave this in students version\n",
+      "    return ngrams\n",
       "\n",
       "# these tests should return True if your code is correct\n",
       "print(ngrams(\"humanities\", ngram_range=(2, 3)) == ['hu', 'um', 'ma', 'an', 'ni', 'it', \n",
@@ -985,13 +972,9 @@
       "        \"\"\"Fit or train the Authorship Learner. X must be an\n",
       "        iterable of examples where each example is an iterable as\n",
       "        well.\"\"\"\n",
-      "        # insert your code here\n",
       "        self.C = Counter(y)\n",
       "        self.N = defaultdict(Counter)\n",
-      "        for x, y_x in zip(X, y):\n",
-      "            self.N[y_x] += Counter(x)\n",
-      "        for y_x, counts in self.N.items():\n",
-      "            self.N[y_x] = Counter(dict(counts.most_common(self.n_most_frequent)))\n",
+      "        # insert your code here\n",
       "            \n",
       "# these tests should return True if your code is correct\n",
       "X = [('a', 'a', 'a', 'b', 'b', 'c'), ('a', 'c', 'b', 'b'), ('b', 'b', 'b', 'c', 'c', 'a')]\n",
@@ -1075,7 +1058,6 @@
       "    \"\"\"Return the accuracy, defined as the proportion of\n",
       "    correctly predicted labels.\"\"\"\n",
       "    # insert your code here\n",
-      "    return sum(pred == true for pred, true in zip(pred_labels, true_labels)) / len(true_labels)\n",
       "\n",
       "# these tests should return True if your code is correct\n",
       "predictions = [1, 2, 0, 0, 1]\n",
@@ -1206,13 +1188,6 @@
       "    if matrix is None:\n",
       "        matrix = error_statistics(pred_labels, true_labels)\n",
       "    # insert your code here\n",
-      "    scores = {}\n",
-      "    for label in set(true_labels):\n",
-      "        try:\n",
-      "            scores[label] = matrix[label]['tp'] / (matrix[label]['tp'] + matrix[label]['fn'])\n",
-      "        except ZeroDivisionError:\n",
-      "            scores[label] = 0.0\n",
-      "    return scores if not average else mean(scores.values())\n",
       "    \n",
       "# these tests should return True if your code is correct\n",
       "predictions = [1, 2, 0, 0, 1]\n",
@@ -1238,15 +1213,6 @@
       "    if matrix is None:\n",
       "        matrix = error_statistics(pred_labels, true_labels)\n",
       "    # insert your code here\n",
-      "    p = precision(pred_labels, true_labels, average=False, matrix=matrix)\n",
-      "    r = recall(pred_labels, true_labels, average=False, matrix=matrix)\n",
-      "    scores = {}\n",
-      "    for label in set(true_labels):\n",
-      "        try:\n",
-      "            scores[label] = 2 * (p[label] * r[label]) / (p[label] + r[label])\n",
-      "        except ZeroDivisionError:\n",
-      "            scores[label] = 0.0\n",
-      "    return scores if not average else mean(scores.values())\n",
       "\n",
       "# these tests should return True if your code is correct\n",
       "predictions = [1, 2, 0, 0, 1]\n",
@@ -1451,11 +1417,7 @@
      "collapsed": false,
      "input": [
       "scores = {}\n",
-      "# insert your code here\n",
-      "for sample in range(100, 5000, 500):\n",
-      "    documents = [make_document(f, sample=sample) for f in glob('data/british-novels/*.txt')]\n",
-      "    authors, titles, texts = zip(*documents)\n",
-      "    scores[sample] = cross_validate(AuthorshipLearner(), texts, authors, k=None, score_fn=f_score)"
+      "# insert your code here"
      ],
      "language": "python",
      "metadata": {},
@@ -1493,12 +1455,7 @@
      "collapsed": false,
      "input": [
       "scores = {}\n",
-      "# insert your code here\n",
-      "for n_most_frequent in range(50, 500, 100):\n",
-      "    documents = [make_document(f) for f in glob('data/british-novels/*.txt')]\n",
-      "    authors, titles, texts = zip(*documents)\n",
-      "    scores[n_most_frequent] = cross_validate(AuthorshipLearner(n_most_frequent=n_most_frequent), \n",
-      "                                    texts, authors, k=None, score_fn=f_score)"
+      "# insert your code here"
      ],
      "language": "python",
      "metadata": {},