|
1 | 1 | { |
2 | 2 | "metadata": { |
3 | 3 | "name": "", |
4 | | - "signature": "sha256:4a735510c93747b11ebb86855c4f985cce0023c63086b07a69dc1e0d784b3667" |
| 4 | + "signature": "sha256:d0a528bc10b3bb6717a054cfee3f7306cf4daa84f567a233c234843c5cfe34dd" |
5 | 5 | }, |
6 | 6 | "nbformat": 3, |
7 | 7 | "nbformat_minor": 0, |
|
333 | 333 | "input": [ |
334 | 334 | "def jaccard_distance(a, b):\n", |
335 | 335 | " # insert your code here\n", |
336 | | - " union_len = len(a.union(b))\n", |
337 | | - " return (union_len - len(a.intersection(b))) / union_len\n", |
338 | 336 | "\n", |
339 | 337 | "# these tests should return True if your code is correct\n", |
340 | 338 | "print(jaccard_distance({'a', 'b', 'c'}, {'b', 'c', 'a'}) == 0.0)\n", |
|
357 | 355 | "input": [ |
358 | 356 | "def pairwise_distances(X, distance_fn=jaccard_distance):\n", |
359 | 357 | " # insert your code here\n", |
360 | | - " n = len(X)\n", |
361 | | - " distances = [[0 for i in range(n)] for _ in range(n)]\n", |
362 | | - " for i in range(n):\n", |
363 | | - " for j in range(i):\n", |
364 | | - " distances[i][j] = distance = distance_fn(X[i], X[j])\n", |
365 | | - " distances[j][i] = distance\n", |
366 | | - " return distances\n", |
367 | 358 | "\n", |
368 | 359 | "# these tests should return True if your code is correct\n", |
369 | 360 | "X = [{'a', 'f', 'c'}, {'b', 'd', 'a'}, {'a', 'b', 'c'}, {'f', 'b', 'c'}]\n", |
|
391 | 382 | "\n", |
392 | 383 | "def smallest_distance(dm):\n", |
393 | 384 | " # insert your code here\n", |
394 | | - " return min(combinations(range(len(dm)), 2), key=lambda i: dm[i[0]][i[1]])\n", |
395 | 385 | "\n", |
396 | 386 | "# these tests should return True if your code is correct\n", |
397 | 387 | "distances = [[0, 1, 2, 3, 3, 2],\n", |
|
540 | 530 | " \n", |
541 | 531 | " def merge(self, i, j):\n", |
542 | 532 | " # insert your code here\n", |
543 | | - " self._clusters[i] = Cluster(self._n, self._clusters[i], self._clusters[j])\n", |
544 | | - " self._n += 1\n", |
545 | | - " del self._clusters[j]\n", |
546 | 533 | " \n", |
547 | 534 | " def __str__(self):\n", |
548 | 535 | " return '%s' % self._clusters[0]\n", |
|
659 | 646 | "input": [ |
660 | 647 | "def single_linkage(dm, i, j):\n", |
661 | 648 | " # insert your code here\n", |
662 | | - " for k in range(len(dm)):\n", |
663 | | - " if k != i and k != j:\n", |
664 | | - " dm[i][k] = distance = min(dm[i][k], dm[j][k])\n", |
665 | | - " dm[k][i] = distance\n", |
666 | | - " dm = [[val for c, val in enumerate(row) if c != j] \n", |
667 | | - " for r, row in enumerate(dm) if r != j]\n", |
668 | | - " return dm\n", |
669 | 649 | "\n", |
670 | 650 | "# these tests should return True if your code is correct\n", |
671 | 651 | "\n", |
|
699 | 679 | "input": [ |
700 | 680 | "def cluster(data_points, labels=None, linkage=single_linkage, distance_fn=jaccard_distance):\n", |
701 | 681 | " # initialize a `ClusterTree` with n=len(data_points)\n", |
702 | | - " tree = ClusterTree(len(data_points), labels=labels)\n", |
| 682 | + " tree = # insert your code here\n", |
703 | 683 | " # compute the pairwise distances between all data points \n", |
704 | 684 | " # using the provided distance function\n", |
705 | | - " dm = pairwise_distances(data_points, distance_fn=distance_fn) # insert your code here\n", |
| 685 | + " dm = # insert your code here\n", |
706 | 686 | " while len(dm) > 1:\n", |
707 | 687 | " # extract the indices of the clusters corresponding to the \n", |
708 | 688 | " # two closest clusters in the distance matrix\n", |
709 | | - " i, j = smallest_distance(dm) # insert your code here\n", |
| 689 | + " i, j = # insert your code here\n", |
710 | 690 | " # update the distance matrix using the provided linkage function\n", |
711 | | - " dm = linkage(dm, i, j) # insert your code here\n", |
| 691 | + " dm = # insert your code here\n", |
712 | 692 | " # merge the two clusters in the ClusterTree:\n", |
713 | | - " tree.merge(i, j) # insert your code here\n", |
| 693 | + " # insert your code here\n", |
714 | 694 | " return tree\n", |
715 | 695 | "\n", |
716 | 696 | "# these tests should return True if your code is correct\n", |
|
807 | 787 | "input": [ |
808 | 788 | "def complete_linkage(dm, i, j):\n", |
809 | 789 | " # insert your code here\n", |
810 | | - " return general_linkage(dm, i, j, max)\n", |
811 | 790 | "\n", |
812 | 791 | "# these tests should return True if your code is correct\n", |
813 | 792 | "\n", |
|
926 | 905 | "input": [ |
927 | 906 | "def summed_jaccard_distance(A, B):\n", |
928 | 907 | " # insert your code here\n", |
929 | | - " return sum(jaccard_distance(set(list(a)), set(list(b))) for a, b in zip(A, B))\n", |
930 | 908 | "\n", |
931 | 909 | "# these tests should return True if your code is correct\n", |
932 | 910 | "print(round(summed_jaccard_distance(numerals[0], numerals[4]), 2) == 5.57)\n", |
|
947 | 925 | "cell_type": "code", |
948 | 926 | "collapsed": false, |
949 | 927 | "input": [ |
950 | | - "solution = cluster(numerals, labels=languages, \n", |
951 | | - " distance_fn=summed_jaccard_distance) # insert your code here\n", |
| 928 | + "solution = # insert your code here\n", |
952 | 929 | "print(solution)" |
953 | 930 | ], |
954 | 931 | "language": "python", |
|
0 commit comments