|
33 | 33 | "name": "stdout", |
34 | 34 | "output_type": "stream", |
35 | 35 | "text": [ |
36 | | - "Config saved to: /tmp/tmp60wlr_qk/vdb_config.yaml\n" |
| 36 | + "Config saved to: /tmp/tmpnx5tjaud/vdb_config.yaml\n" |
37 | 37 | ] |
38 | 38 | } |
39 | 39 | ], |
40 | 40 | "source": [ |
41 | 41 | "config_yaml = \"\"\"\n", |
42 | 42 | "repositories:\n", |
43 | 43 | " BrentLab/harbison_2004:\n", |
| 44 | + " tags:\n", |
| 45 | + " assay: binding\n", |
| 46 | + " method: chip-chip\n", |
| 47 | + " organism: yeast\n", |
44 | 48 | " dataset:\n", |
45 | 49 | " harbison_2004:\n", |
46 | 50 | " db_name: harbison\n", |
|
59 | 63 | " field: regulator_symbol\n", |
60 | 64 | "\n", |
61 | 65 | " BrentLab/kemmeren_2014:\n", |
| 66 | + " tags:\n", |
| 67 | + " assay: perturbation\n", |
| 68 | + " method: microarray\n", |
| 69 | + " organism: yeast\n", |
62 | 70 | " dataset:\n", |
63 | 71 | " kemmeren_2014:\n", |
64 | 72 | " db_name: kemmeren\n", |
|
75 | 83 | " field: regulator_symbol\n", |
76 | 84 | "\n", |
77 | 85 | " BrentLab/hackett_2020:\n", |
| 86 | + " # Repo-level tags apply to all datasets in this repository\n", |
| 87 | + " tags:\n", |
| 88 | + " method: test_overwrite\n", |
| 89 | + " organism: yeast\n", |
78 | 90 | " dataset:\n", |
79 | 91 | " hackett_2020:\n", |
| 92 | + " # Dataset-level tags: 'assay' is new,\n", |
| 93 | + " # 'method' overrides the repo-level value\n", |
| 94 | + " tags:\n", |
| 95 | + " assay: perturbation\n", |
| 96 | + " method: overexpression\n", |
80 | 97 | " db_name: hackett\n", |
81 | 98 | " sample_id:\n", |
82 | 99 | " field: sample_id\n", |
|
126 | 143 | "print(f\"Config saved to: {temp_config}\")" |
127 | 144 | ] |
128 | 145 | }, |
| 146 | + { |
| 147 | + "cell_type": "markdown", |
| 148 | + "id": "ftysbe3fwz4", |
| 149 | + "metadata": {}, |
| 150 | + "source": [ |
| 151 | + "## Tags\n", |
| 152 | + "\n", |
| 153 | + "Tags are arbitrary key/value annotations defined in the configuration. They\n", |
| 154 | + "follow the same hierarchy as property mappings: repo-level tags apply to all\n", |
| 155 | + "datasets in that repository, and dataset-level tags override repo-level tags\n", |
| 156 | + "with the same key.\n", |
| 157 | + "\n", |
| 158 | + "Use `config.get_tags(repo_id, config_name)` to retrieve the merged tags for\n", |
| 159 | + "any dataset." |
| 160 | + ] |
| 161 | + }, |
| 162 | + { |
| 163 | + "cell_type": "code", |
| 164 | + "execution_count": 2, |
| 165 | + "id": "d0ulj4mzmf7", |
| 166 | + "metadata": {}, |
| 167 | + "outputs": [ |
| 168 | + { |
| 169 | + "ename": "NameError", |
| 170 | + "evalue": "name 'vdb' is not defined", |
| 171 | + "output_type": "error", |
| 172 | + "traceback": [ |
| 173 | + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", |
| 174 | + "\u001b[31mNameError\u001b[39m Traceback (most recent call last)", |
| 175 | + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[2]\u001b[39m\u001b[32m, line 3\u001b[39m\n\u001b[32m 1\u001b[39m \u001b[38;5;66;03m# Tags are accessible directly from the VirtualDB instance using the db_name.\u001b[39;00m\n\u001b[32m 2\u001b[39m \u001b[38;5;66;03m# No need to import MetadataConfig or specify repo_id.\u001b[39;00m\n\u001b[32m----> \u001b[39m\u001b[32m3\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33m\"\u001b[39m\u001b[33mharbison tags:\u001b[39m\u001b[33m\"\u001b[39m, \u001b[43mvdb\u001b[49m.get_tags(\u001b[33m\"\u001b[39m\u001b[33mharbison\u001b[39m\u001b[33m\"\u001b[39m))\n\u001b[32m 4\u001b[39m \u001b[38;5;28mprint\u001b[39m(\u001b[33m\"\u001b[39m\u001b[33mkemmeren tags:\u001b[39m\u001b[33m\"\u001b[39m, vdb.get_tags(\u001b[33m\"\u001b[39m\u001b[33mkemmeren\u001b[39m\u001b[33m\"\u001b[39m))\n\u001b[32m 6\u001b[39m \u001b[38;5;66;03m# Hackett has tags at both levels:\u001b[39;00m\n\u001b[32m 7\u001b[39m \u001b[38;5;66;03m# 'organism' comes from the repo level only,\u001b[39;00m\n\u001b[32m 8\u001b[39m \u001b[38;5;66;03m# 'assay' is added at the dataset level only,\u001b[39;00m\n\u001b[32m 9\u001b[39m \u001b[38;5;66;03m# 'method' is defined at both levels -- the dataset value wins.\u001b[39;00m\n", |
| 176 | + "\u001b[31mNameError\u001b[39m: name 'vdb' is not defined" |
| 177 | + ] |
| 178 | + } |
| 179 | + ], |
| 180 | + "source": [ |
| 181 | + "# Tags are accessible directly from the VirtualDB instance using the db_name.\n", |
| 182 | + "# No need to import MetadataConfig or specify repo_id.\n", |
| 183 | + "print(\"harbison tags:\", vdb.get_tags(\"harbison\"))\n", |
| 184 | + "print(\"kemmeren tags:\", vdb.get_tags(\"kemmeren\"))\n", |
| 185 | + "\n", |
| 186 | + "# Hackett has tags at both levels:\n", |
| 187 | + "# 'organism' comes from the repo level only,\n", |
| 188 | + "# 'assay' is added at the dataset level only,\n", |
| 189 | + "# 'method' is defined at both levels -- the dataset value wins.\n", |
| 190 | + "print(\"hackett tags:\", vdb.get_tags(\"hackett\"))\n", |
| 191 | + "\n", |
| 192 | + "# Dataset with no tags returns empty dict\n", |
| 193 | + "print(\"dto tags:\", vdb.get_tags(\"dto\"))" |
| 194 | + ] |
| 195 | + }, |
129 | 196 | { |
130 | 197 | "cell_type": "markdown", |
131 | 198 | "id": "cell-3", |
|
140 | 207 | }, |
141 | 208 | { |
142 | 209 | "cell_type": "code", |
143 | | - "execution_count": 2, |
| 210 | + "execution_count": null, |
144 | 211 | "id": "cell-4", |
145 | 212 | "metadata": {}, |
146 | 213 | "outputs": [ |
|
187 | 254 | }, |
188 | 255 | { |
189 | 256 | "cell_type": "code", |
190 | | - "execution_count": 3, |
| 257 | + "execution_count": null, |
191 | 258 | "id": "cell-6", |
192 | 259 | "metadata": {}, |
193 | 260 | "outputs": [ |
|
236 | 303 | }, |
237 | 304 | { |
238 | 305 | "cell_type": "code", |
239 | | - "execution_count": 4, |
| 306 | + "execution_count": null, |
240 | 307 | "id": "pdebujnqb9q", |
241 | 308 | "metadata": {}, |
242 | 309 | "outputs": [ |
|
469 | 536 | }, |
470 | 537 | { |
471 | 538 | "cell_type": "code", |
472 | | - "execution_count": 5, |
| 539 | + "execution_count": null, |
473 | 540 | "id": "9deee334", |
474 | 541 | "metadata": {}, |
475 | 542 | "outputs": [ |
|
806 | 873 | }, |
807 | 874 | { |
808 | 875 | "cell_type": "code", |
809 | | - "execution_count": 6, |
| 876 | + "execution_count": null, |
810 | 877 | "id": "cell-9", |
811 | 878 | "metadata": {}, |
812 | 879 | "outputs": [ |
|
840 | 907 | }, |
841 | 908 | { |
842 | 909 | "cell_type": "code", |
843 | | - "execution_count": 7, |
| 910 | + "execution_count": null, |
844 | 911 | "id": "1a705f1c", |
845 | 912 | "metadata": {}, |
846 | 913 | "outputs": [ |
|
1056 | 1123 | }, |
1057 | 1124 | { |
1058 | 1125 | "cell_type": "code", |
1059 | | - "execution_count": 8, |
| 1126 | + "execution_count": null, |
1060 | 1127 | "id": "cell-17", |
1061 | 1128 | "metadata": {}, |
1062 | 1129 | "outputs": [ |
|
1362 | 1429 | }, |
1363 | 1430 | { |
1364 | 1431 | "cell_type": "code", |
1365 | | - "execution_count": 9, |
| 1432 | + "execution_count": null, |
1366 | 1433 | "id": "cell-19", |
1367 | 1434 | "metadata": {}, |
1368 | 1435 | "outputs": [ |
|
1786 | 1853 | }, |
1787 | 1854 | { |
1788 | 1855 | "cell_type": "code", |
1789 | | - "execution_count": 10, |
| 1856 | + "execution_count": null, |
1790 | 1857 | "id": "cell-21", |
1791 | 1858 | "metadata": {}, |
1792 | 1859 | "outputs": [ |
|
2064 | 2131 | }, |
2065 | 2132 | { |
2066 | 2133 | "cell_type": "code", |
2067 | | - "execution_count": 11, |
| 2134 | + "execution_count": null, |
2068 | 2135 | "id": "cell-22", |
2069 | 2136 | "metadata": {}, |
2070 | 2137 | "outputs": [ |
|
2427 | 2494 | }, |
2428 | 2495 | { |
2429 | 2496 | "cell_type": "code", |
2430 | | - "execution_count": 12, |
| 2497 | + "execution_count": null, |
2431 | 2498 | "id": "cell-23", |
2432 | 2499 | "metadata": {}, |
2433 | 2500 | "outputs": [ |
|
2691 | 2758 | }, |
2692 | 2759 | { |
2693 | 2760 | "cell_type": "code", |
2694 | | - "execution_count": 13, |
| 2761 | + "execution_count": null, |
2695 | 2762 | "id": "f03e942a", |
2696 | 2763 | "metadata": {}, |
2697 | 2764 | "outputs": [ |
|
3387 | 3454 | }, |
3388 | 3455 | { |
3389 | 3456 | "cell_type": "code", |
3390 | | - "execution_count": 14, |
| 3457 | + "execution_count": null, |
3391 | 3458 | "id": "4d869036", |
3392 | 3459 | "metadata": {}, |
3393 | 3460 | "outputs": [ |
|
3603 | 3670 | }, |
3604 | 3671 | { |
3605 | 3672 | "cell_type": "code", |
3606 | | - "execution_count": 15, |
| 3673 | + "execution_count": null, |
3607 | 3674 | "id": "89408d2b", |
3608 | 3675 | "metadata": {}, |
3609 | 3676 | "outputs": [ |
|
3631 | 3698 | }, |
3632 | 3699 | { |
3633 | 3700 | "cell_type": "code", |
3634 | | - "execution_count": 16, |
| 3701 | + "execution_count": null, |
3635 | 3702 | "id": "5a3b802b", |
3636 | 3703 | "metadata": {}, |
3637 | 3704 | "outputs": [ |
|
3651 | 3718 | }, |
3652 | 3719 | { |
3653 | 3720 | "cell_type": "code", |
3654 | | - "execution_count": 17, |
| 3721 | + "execution_count": null, |
3655 | 3722 | "id": "abed8bc2", |
3656 | 3723 | "metadata": {}, |
3657 | 3724 | "outputs": [ |
|
4927 | 4994 | }, |
4928 | 4995 | { |
4929 | 4996 | "cell_type": "code", |
4930 | | - "execution_count": 18, |
| 4997 | + "execution_count": null, |
4931 | 4998 | "id": "cell-25", |
4932 | 4999 | "metadata": {}, |
4933 | 5000 | "outputs": [ |
|
5017 | 5084 | }, |
5018 | 5085 | { |
5019 | 5086 | "cell_type": "code", |
5020 | | - "execution_count": 19, |
| 5087 | + "execution_count": null, |
5021 | 5088 | "id": "cell-26", |
5022 | 5089 | "metadata": {}, |
5023 | 5090 | "outputs": [], |
|
0 commit comments