|
2 | 2 | "cells": [ |
3 | 3 | { |
4 | 4 | "cell_type": "markdown", |
| 5 | + "id": "0b7b320c", |
5 | 6 | "metadata": {}, |
6 | 7 | "source": [ |
7 | 8 | "# OpenRouter sample annotation with Leiden clusters\n", |
|
13 | 14 | "- annotate clusters across samples with `CellAnnotator`.\n", |
14 | 15 | "\n", |
15 | 16 | "It follows the same high-level workflow as `100_heart_atlas.ipynb`, but is parameterized for OpenRouter." |
16 | | - ], |
17 | | - "id": "0b7b320c" |
| 17 | + ] |
18 | 18 | }, |
19 | 19 | { |
20 | 20 | "cell_type": "code", |
| 21 | + "execution_count": null, |
| 22 | + "id": "add3858e", |
21 | 23 | "metadata": {}, |
| 24 | + "outputs": [], |
22 | 25 | "source": [ |
23 | 26 | "import scanpy as sc\n", |
24 | | - "import pandas as pd\n", |
25 | | - "from cell_annotator import CellAnnotator\n", |
26 | 27 | "\n", |
27 | 28 | "print(\"scanpy:\", sc.__version__)" |
28 | | - ], |
29 | | - "execution_count": null, |
30 | | - "outputs": [], |
31 | | - "id": "add3858e" |
| 29 | + ] |
32 | 30 | }, |
33 | 31 | { |
34 | 32 | "cell_type": "markdown", |
| 33 | + "id": "a2337a8d", |
35 | 34 | "metadata": {}, |
36 | 35 | "source": [ |
37 | 36 | "## Configure your run\n", |
|
41 | 40 | "- `OPENROUTER_MODEL`: an OpenRouter model ID (for example: `openai/gpt-4o-mini`)\n", |
42 | 41 | "- `LEIDEN_KEY`: the Leiden column in `adata.obs`\n", |
43 | 42 | "- `SAMPLE_KEY`: sample/batch column in `adata.obs` (or `None` for a single-sample dataset)" |
44 | | - ], |
45 | | - "id": "a2337a8d" |
| 43 | + ] |
46 | 44 | }, |
47 | 45 | { |
48 | 46 | "cell_type": "code", |
| 47 | + "execution_count": null, |
| 48 | + "id": "70164ce3", |
49 | 49 | "metadata": {}, |
| 50 | + "outputs": [], |
50 | 51 | "source": [ |
51 | 52 | "# Required\n", |
52 | 53 | "OPENROUTER_API_KEY = \"\" # e.g. sk-or-v1-...\n", |
|
66 | 67 | " raise ValueError(\"Set OPENROUTER_API_KEY before continuing.\")\n", |
67 | 68 | "if not OPENROUTER_MODEL:\n", |
68 | 69 | " raise ValueError(\"Set OPENROUTER_MODEL before continuing.\")" |
69 | | - ], |
70 | | - "execution_count": null, |
71 | | - "outputs": [], |
72 | | - "id": "70164ce3" |
| 70 | + ] |
73 | 71 | }, |
74 | 72 | { |
75 | 73 | "cell_type": "code", |
| 74 | + "execution_count": null, |
| 75 | + "id": "2428de31", |
76 | 76 | "metadata": {}, |
| 77 | + "outputs": [], |
77 | 78 | "source": [ |
78 | 79 | "adata = sc.read_h5ad(ADATA_PATH)\n", |
79 | 80 | "\n", |
|
86 | 87 | "print(adata)\n", |
87 | 88 | "print(\"Leiden key:\", LEIDEN_KEY)\n", |
88 | 89 | "print(\"Sample key:\", SAMPLE_KEY)" |
89 | | - ], |
90 | | - "execution_count": null, |
91 | | - "outputs": [], |
92 | | - "id": "2428de31" |
| 90 | + ] |
93 | 91 | }, |
94 | 92 | { |
95 | 93 | "cell_type": "code", |
| 94 | + "execution_count": null, |
| 95 | + "id": "ffe7a22b-8d3c-4ad2-9dfd-13bc01cb7f6a", |
96 | 96 | "metadata": {}, |
| 97 | + "outputs": [], |
97 | 98 | "source": [ |
98 | 99 | "adata.obs" |
99 | | - ], |
100 | | - "execution_count": null, |
101 | | - "outputs": [], |
102 | | - "id": "ffe7a22b-8d3c-4ad2-9dfd-13bc01cb7f6a" |
| 100 | + ] |
103 | 101 | }, |
104 | 102 | { |
105 | 103 | "cell_type": "code", |
106 | | - "metadata": {}, |
107 | | - "source": [ |
108 | | - "adata.obsm['spatial']" |
109 | | - ], |
110 | 104 | "execution_count": null, |
| 105 | + "id": "bd6282fc-2140-4e9f-a81c-1bc0f4200261", |
| 106 | + "metadata": {}, |
111 | 107 | "outputs": [], |
112 | | - "id": "bd6282fc-2140-4e9f-a81c-1bc0f4200261" |
| 108 | + "source": [ |
| 109 | + "adata.obsm[\"spatial\"]" |
| 110 | + ] |
113 | 111 | }, |
114 | 112 | { |
115 | 113 | "cell_type": "code", |
| 114 | + "execution_count": null, |
| 115 | + "id": "6245b49a", |
116 | 116 | "metadata": {}, |
| 117 | + "outputs": [], |
117 | 118 | "source": [ |
118 | 119 | "# 1) Ask the model for expected cell types and marker genes\n", |
119 | 120 | "cell_ann.get_expected_cell_type_markers()\n", |
|
123 | 124 | "\n", |
124 | 125 | "# 3) Annotate clusters and write predictions to adata.obs\n", |
125 | 126 | "cell_ann.annotate_clusters(key_added=\"cell_type_predicted\")" |
126 | | - ], |
127 | | - "execution_count": null, |
128 | | - "outputs": [], |
129 | | - "id": "6245b49a" |
| 127 | + ] |
130 | 128 | }, |
131 | 129 | { |
132 | 130 | "cell_type": "code", |
| 131 | + "execution_count": null, |
| 132 | + "id": "df316877", |
133 | 133 | "metadata": {}, |
| 134 | + "outputs": [], |
134 | 135 | "source": [ |
135 | 136 | "if \"X_umap\" in adata.obsm:\n", |
136 | 137 | " sc.pl.umap(adata, color=[LEIDEN_KEY, \"cell_type_predicted\"], wspace=0.35)\n", |
|
140 | 141 | "output_path = ADATA_PATH.replace(\".h5ad\", \"_annotated.h5ad\")\n", |
141 | 142 | "adata.write(output_path)\n", |
142 | 143 | "print(f\"Saved annotated object to: {output_path}\")" |
143 | | - ], |
144 | | - "execution_count": null, |
145 | | - "outputs": [], |
146 | | - "id": "df316877" |
| 144 | + ] |
147 | 145 | }, |
148 | 146 | { |
149 | 147 | "cell_type": "code", |
150 | | - "metadata": {}, |
151 | | - "source": [ |
152 | | - "fig = sc.pl.embedding(adata, basis='X_umap_drvi', color='cell_type_predicted', title='s', )" |
153 | | - ], |
154 | 148 | "execution_count": null, |
| 149 | + "id": "294208db-4c90-4855-b875-2841b23b8a20", |
| 150 | + "metadata": {}, |
155 | 151 | "outputs": [], |
156 | | - "id": "294208db-4c90-4855-b875-2841b23b8a20" |
| 152 | + "source": [ |
| 153 | + "fig = sc.pl.embedding(\n", |
| 154 | + " adata,\n", |
| 155 | + " basis=\"X_umap_drvi\",\n", |
| 156 | + " color=\"cell_type_predicted\",\n", |
| 157 | + " title=\"s\",\n", |
| 158 | + ")" |
| 159 | + ] |
157 | 160 | }, |
158 | 161 | { |
159 | 162 | "cell_type": "code", |
| 163 | + "execution_count": null, |
| 164 | + "id": "a9e74b69-ca17-44c4-a010-6cf149cccb73", |
160 | 165 | "metadata": {}, |
| 166 | + "outputs": [], |
161 | 167 | "source": [ |
162 | 168 | "fig" |
163 | | - ], |
164 | | - "execution_count": null, |
165 | | - "outputs": [], |
166 | | - "id": "a9e74b69-ca17-44c4-a010-6cf149cccb73" |
| 169 | + ] |
167 | 170 | } |
168 | 171 | ], |
169 | 172 | "metadata": { |
|
0 commit comments