AntonioLonga
diff --git a/‎README.md‎
Lines changed: 8 additions & 0 deletions b/‎README.md‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎Tutorial12/Tutorial12 GAE for link prediction.ipynb‎
Lines changed: 290 additions & 0 deletions b/‎Tutorial12/Tutorial12 GAE for link prediction.ipynb‎
Lines changed: 290 additions & 0 deletions
@@ -25,3 +25,11 @@ Feel free to join our weekly online tutorial, for more details, have a look at t
 
 * Tutorial8: Graph Generation.
 
+* Tutorial9: Recurrent Graph Neural Networks.
+
+* Tutorial10: DeepWalk and Node2Vec (Theory).
+
+* Tutorial11: DeepWalk and Node2Vec (Practice).
+
+* Tutorial12: Edge analysis.
+
@@ -0,0 +1,290 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os.path as osp\n",
+    "\n",
+    "import torch\n",
+    "import torch.nn.functional as F\n",
+    "from sklearn.metrics import roc_auc_score\n",
+    "\n",
+    "from torch_geometric.utils import negative_sampling\n",
+    "from torch_geometric.datasets import Planetoid\n",
+    "import torch_geometric.transforms as T\n",
+    "from torch_geometric.nn import GCNConv\n",
+    "from torch_geometric.utils import train_test_split_edges"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# GAE for link prediction\n",
+    "\n",
+    "[code](https://github.com/rusty1s/pytorch_geometric/blob/master/examples/link_pred.py)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "\n",
+    "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
+    "device = \"cpu\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Data(edge_index=[2, 10556], test_mask=[2708], train_mask=[2708], val_mask=[2708], x=[2708, 1433], y=[2708])\n"
+     ]
+    }
+   ],
+   "source": [
+    "# load the Cora dataset\n",
+    "dataset = 'Cora'\n",
+    "path = osp.join('.', 'data', dataset)\n",
+    "dataset = Planetoid(path, dataset, transform=T.NormalizeFeatures())\n",
+    "data = dataset[0]\n",
+    "print(dataset.data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Data(test_neg_edge_index=[2, 527], test_pos_edge_index=[2, 527], train_neg_adj_mask=[2708, 2708], train_pos_edge_index=[2, 8976], val_neg_edge_index=[2, 263], val_pos_edge_index=[2, 263], x=[2708, 1433])\n"
+     ]
+    }
+   ],
+   "source": [
+    "# use train_test_split_edges to create neg and positive edges\n",
+    "data.train_mask = data.val_mask = data.test_mask = data.y = None\n",
+    "data = train_test_split_edges(data)\n",
+    "print(data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Simple autoencoder model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Net(torch.nn.Module):\n",
+    "    def __init__(self):\n",
+    "        super(Net, self).__init__()\n",
+    "        self.conv1 = GCNConv(dataset.num_features, 128)\n",
+    "        self.conv2 = GCNConv(128, 64)\n",
+    "\n",
+    "    def encode(self):\n",
+    "        x = self.conv1(data.x, data.train_pos_edge_index) # convolution 1\n",
+    "        x = x.relu()\n",
+    "        return self.conv2(x, data.train_pos_edge_index) # convolution 2\n",
+    "\n",
+    "    def decode(self, z, pos_edge_index, neg_edge_index): # only pos and neg edges\n",
+    "        edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1) # concatenate pos and neg edges\n",
+    "        logits = (z[edge_index[0]] * z[edge_index[1]]).sum(dim=-1)  # dot product \n",
+    "        return logits\n",
+    "\n",
+    "    def decode_all(self, z): \n",
+    "        prob_adj = z @ z.t() # get adj NxN\n",
+    "        return (prob_adj > 0).nonzero(as_tuple=False).t() # get predicted edge_list "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "model, data = Net().to(device), data.to(device)\n",
+    "optimizer = torch.optim.Adam(params=model.parameters(), lr=0.01)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "def get_link_labels(pos_edge_index, neg_edge_index):\n",
+    "    # returns a tensor:\n",
+    "    # [1,1,1,1,...,0,0,0,0,0,..] with the number of ones is equel to the lenght of pos_edge_index\n",
+    "    # and the number of zeros is equal to the length of neg_edge_index\n",
+    "    E = pos_edge_index.size(1) + neg_edge_index.size(1)\n",
+    "    link_labels = torch.zeros(E, dtype=torch.float, device=device)\n",
+    "    link_labels[:pos_edge_index.size(1)] = 1.\n",
+    "    return link_labels\n",
+    "\n",
+    "\n",
+    "def train():\n",
+    "    model.train()\n",
+    "\n",
+    "    neg_edge_index = negative_sampling(\n",
+    "        edge_index=data.train_pos_edge_index, #positive edges\n",
+    "        num_nodes=data.num_nodes, # number of nodes\n",
+    "        num_neg_samples=data.train_pos_edge_index.size(1)) # number of neg_sample equal to number of pos_edges\n",
+    "\n",
+    "    optimizer.zero_grad()\n",
+    "    \n",
+    "    z = model.encode() #encode\n",
+    "    link_logits = model.decode(z, data.train_pos_edge_index, neg_edge_index) # decode\n",
+    "    \n",
+    "    link_labels = get_link_labels(data.train_pos_edge_index, neg_edge_index)\n",
+    "    loss = F.binary_cross_entropy_with_logits(link_logits, link_labels)\n",
+    "    loss.backward()\n",
+    "    optimizer.step()\n",
+    "\n",
+    "    return loss\n",
+    "\n",
+    "\n",
+    "@torch.no_grad()\n",
+    "def test():\n",
+    "    model.eval()\n",
+    "    perfs = []\n",
+    "    for prefix in [\"val\", \"test\"]:\n",
+    "        pos_edge_index = data[f'{prefix}_pos_edge_index']\n",
+    "        neg_edge_index = data[f'{prefix}_neg_edge_index']\n",
+    "\n",
+    "        z = model.encode() # encode train\n",
+    "        link_logits = model.decode(z, pos_edge_index, neg_edge_index) # decode test or val\n",
+    "        link_probs = link_logits.sigmoid() # apply sigmoid\n",
+    "        \n",
+    "        link_labels = get_link_labels(pos_edge_index, neg_edge_index) # get link\n",
+    "        \n",
+    "        perfs.append(roc_auc_score(link_labels.cpu(), link_probs.cpu())) #compute roc_auc score\n",
+    "    return perfs\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch: 010, Loss: 0.6837, Val: 0.7552, Test: 0.7562\n",
+      "Epoch: 020, Loss: 0.6423, Val: 0.7552, Test: 0.7562\n",
+      "Epoch: 030, Loss: 0.5490, Val: 0.7935, Test: 0.8021\n",
+      "Epoch: 040, Loss: 0.5108, Val: 0.8210, Test: 0.8486\n",
+      "Epoch: 050, Loss: 0.4894, Val: 0.8455, Test: 0.8712\n",
+      "Epoch: 060, Loss: 0.4656, Val: 0.8637, Test: 0.8966\n",
+      "Epoch: 070, Loss: 0.4585, Val: 0.8808, Test: 0.9000\n",
+      "Epoch: 080, Loss: 0.4518, Val: 0.8864, Test: 0.9084\n",
+      "Epoch: 090, Loss: 0.4458, Val: 0.8905, Test: 0.9093\n",
+      "Epoch: 100, Loss: 0.4501, Val: 0.8920, Test: 0.9111\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "best_val_perf = test_perf = 0\n",
+    "for epoch in range(1, 101):\n",
+    "    train_loss = train()\n",
+    "    val_perf, tmp_test_perf = test()\n",
+    "    if val_perf > best_val_perf:\n",
+    "        best_val_perf = val_perf\n",
+    "        test_perf = tmp_test_perf\n",
+    "    log = 'Epoch: {:03d}, Loss: {:.4f}, Val: {:.4f}, Test: {:.4f}'\n",
+    "    if epoch % 10 == 0:\n",
+    "        print(log.format(epoch, train_loss, best_val_perf, test_perf))\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "z = model.encode()\n",
+    "final_edge_index = model.decode_all(z)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}