mirror of
https://github.com/NVIDIA/dgx-spark-playbooks.git
synced 2026-04-24 02:43:55 +00:00
1479 lines
3.8 MiB
Plaintext
1479 lines
3.8 MiB
Plaintext
|
|
{
|
|||
|
|
"cells": [
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "commercial-typing",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"scRNA Analysis Preprocessing Workflow\n",
|
|||
|
|
"\n",
|
|||
|
|
"**Author:** [Severin Dicks](https://github.com/Intron7)\n",
|
|||
|
|
"**Copyright** [scverse](https://scverse.org)\n",
|
|||
|
|
"\n",
|
|||
|
|
"This notebook is the a demo End-to-End workflow, using RAPIDS-singlecell to accelerate \n",
|
|||
|
|
"1. Load and Preprocess the data\n",
|
|||
|
|
"1. Preprocess the data by:\n",
|
|||
|
|
" 1. QC cells visually to understand the data\n",
|
|||
|
|
" 1. Filter unusual cells\n",
|
|||
|
|
" 1. Remove unwanted sources of variation\n",
|
|||
|
|
"1. Cluster and visualize the data by:\n",
|
|||
|
|
" 1. Batch Correction and analysis\n",
|
|||
|
|
" 1. Explore the biological information from the data\n",
|
|||
|
|
"\n",
|
|||
|
|
"**Let's begin!**"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 1,
|
|||
|
|
"id": "fabulous-ultimate",
|
|||
|
|
"metadata": {
|
|||
|
|
"tags": []
|
|||
|
|
},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stderr",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"/opt/conda/lib/python3.13/site-packages/scanpy/_utils/__init__.py:33: FutureWarning: `__version__` is deprecated, use `importlib.metadata.version('anndata')` instead.\n",
|
|||
|
|
" from anndata import __version__ as anndata_version\n",
|
|||
|
|
"/opt/conda/lib/python3.13/site-packages/scanpy/__init__.py:24: FutureWarning: `__version__` is deprecated, use `importlib.metadata.version('anndata')` instead.\n",
|
|||
|
|
" if Version(anndata.__version__) >= Version(\"0.11.0rc2\"):\n",
|
|||
|
|
"/opt/conda/lib/python3.13/site-packages/scanpy/readwrite.py:16: FutureWarning: `__version__` is deprecated, use `importlib.metadata.version('anndata')` instead.\n",
|
|||
|
|
" if Version(anndata.__version__) >= Version(\"0.11.0rc2\"):\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"import scanpy as sc\n",
|
|||
|
|
"import cupy as cp\n",
|
|||
|
|
"\n",
|
|||
|
|
"import time\n",
|
|||
|
|
"import rapids_singlecell as rsc\n",
|
|||
|
|
"\n",
|
|||
|
|
"import warnings\n",
|
|||
|
|
"\n",
|
|||
|
|
"warnings.filterwarnings(\"ignore\")"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 2,
|
|||
|
|
"id": "twenty-orbit",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"import rmm\n",
|
|||
|
|
"from rmm.allocators.cupy import rmm_cupy_allocator\n",
|
|||
|
|
"\n",
|
|||
|
|
"rmm.reinitialize(\n",
|
|||
|
|
" managed_memory=False, # Allows oversubscription\n",
|
|||
|
|
" pool_allocator=True, # default is False\n",
|
|||
|
|
" devices=0, # GPU device IDs to register. By default registers only GPU 0.\n",
|
|||
|
|
")\n",
|
|||
|
|
"cp.cuda.set_allocator(rmm_cupy_allocator)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "scenic-overview",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"## Load and Prepare Data"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "742c6b09-41cf-4add-bf49-89cd44d77b17",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"Let's start by creating our dataset."
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 3,
|
|||
|
|
"id": "6f2940f0-a569-4b49-8c56-c1f8a7e09ef1",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"./h5 directory found\n",
|
|||
|
|
"./h5/dli_census.h5ad dataset found\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"import os\n",
|
|||
|
|
"import anndata as ad\n",
|
|||
|
|
"import wget\n",
|
|||
|
|
"\n",
|
|||
|
|
"url = 'https://scverse-exampledata.s3.eu-west-1.amazonaws.com/rapids-singlecell/dli_census.h5ad'\n",
|
|||
|
|
"data_dir = \"./h5\"\n",
|
|||
|
|
"output = data_dir+'/dli_census.h5ad'\n",
|
|||
|
|
"if not os.path.exists(data_dir): # Check if h5 directory exists\n",
|
|||
|
|
" print('creating data directory')\n",
|
|||
|
|
" os.system('mkdir ./h5')\n",
|
|||
|
|
"else:\n",
|
|||
|
|
" print(f'{data_dir} directory found')\n",
|
|||
|
|
"\n",
|
|||
|
|
"if not os.path.isfile(output): # Check to see if we have our final output. If it is there, get to the analysis!\n",
|
|||
|
|
" if not os.path.isfile(output): # as it's not there, let's see if we have our downloaded file. If not, let's get it!\n",
|
|||
|
|
" print('Downloading cell data..')\n",
|
|||
|
|
" wget.download(url, output)\n",
|
|||
|
|
" \n",
|
|||
|
|
" adata = ad.read_h5ad(output)\n",
|
|||
|
|
" adata= adata[adata.obs[\"assay\"].isin([\"10x 3' v3\", \"10x 5' v1\", \"10x 5' v2\"])].copy()\n",
|
|||
|
|
" adata.write(output) \n",
|
|||
|
|
"else:\n",
|
|||
|
|
" print(f'{output} dataset found')"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "saved-plain",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"We load the sparse count matrix from an `h5ad` file using Scanpy. The sparse count matrix will then be placed on the GPU. "
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 4,
|
|||
|
|
"id": "original-election",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"data_load_start = time.time()"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 5,
|
|||
|
|
"id": "comic-fundamental",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"CPU times: user 353 ms, sys: 837 ms, total: 1.19 s\n",
|
|||
|
|
"Wall time: 1.19 s\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"%%time\n",
|
|||
|
|
"adata = sc.read(output)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 6,
|
|||
|
|
"id": "assured-premiere",
|
|||
|
|
"metadata": {
|
|||
|
|
"tags": []
|
|||
|
|
},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"CPU times: user 115 ms, sys: 784 ms, total: 899 ms\n",
|
|||
|
|
"Wall time: 897 ms\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"%%time\n",
|
|||
|
|
"rsc.get.anndata_to_GPU(adata)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 7,
|
|||
|
|
"id": "major-disability",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"Total data load and format time: 2.1036038398742676\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"data_load_time = time.time()\n",
|
|||
|
|
"print(\"Total data load and format time: %s\" % (data_load_time - data_load_start))"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "hawaiian-cooperation",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"## Preprocessing"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 8,
|
|||
|
|
"id": "united-benchmark",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"preprocess_start = time.time()"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 9,
|
|||
|
|
"id": "72d4309d-6a95-4193-9ac8-8687b81b7698",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"adata.var_names = adata.var.feature_name.astype(str).to_numpy()"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "peripheral-senate",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"### Quality Control"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "presidential-parts",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"We calculate quality control (QC) metrics to assess cell and gene quality. These include: \n",
|
|||
|
|
"\n",
|
|||
|
|
"- **Per cell metrics**: \n",
|
|||
|
|
" - Total counts per cell (library size) \n",
|
|||
|
|
" - Number of detected genes per cell \n",
|
|||
|
|
" - Percentage of counts from mitochondrial (`MT`) and ribosomal (`RIBO`) genes \n",
|
|||
|
|
"\n",
|
|||
|
|
"- **Per gene metrics** (gene space): \n",
|
|||
|
|
" - Total counts per gene \n",
|
|||
|
|
" - Number of cells expressing each gene \n",
|
|||
|
|
"\n",
|
|||
|
|
"These metrics help identify low-quality or stressed cells and ensure a meaningful feature set for downstream analysis."
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 10,
|
|||
|
|
"id": "greatest-artwork",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"CPU times: user 2.78 ms, sys: 0 ns, total: 2.78 ms\n",
|
|||
|
|
"Wall time: 2.57 ms\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"%%time\n",
|
|||
|
|
"rsc.pp.flag_gene_family(adata, gene_family_name=\"MT\", gene_family_prefix=\"MT\")"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 11,
|
|||
|
|
"id": "major-brisbane",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"CPU times: user 2.14 ms, sys: 89 μs, total: 2.23 ms\n",
|
|||
|
|
"Wall time: 2.21 ms\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"%%time\n",
|
|||
|
|
"rsc.pp.flag_gene_family(adata, gene_family_name=\"RIBO\", gene_family_prefix=\"RPS\")"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 12,
|
|||
|
|
"id": "weighted-pound",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"CPU times: user 453 ms, sys: 24.4 ms, total: 477 ms\n",
|
|||
|
|
"Wall time: 556 ms\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"%%time\n",
|
|||
|
|
"rsc.pp.calculate_qc_metrics(adata, qc_vars=[\"MT\", \"RIBO\"])"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "d32205f0",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"To visualize the quality control (QC) metrics, we generate the following plots:\n",
|
|||
|
|
"\n",
|
|||
|
|
"1. **Scatter plot: Total counts vs. Mitochondrial percentage** \n",
|
|||
|
|
" - This plot shows the relationship between the total UMI counts per cell and the percentage of mitochondrial (`MT`) gene expression. \n",
|
|||
|
|
" - Cells with high mitochondrial percentages may indicate stressed or dying cells.\n",
|
|||
|
|
"\n",
|
|||
|
|
"2. **Scatter plot: Total counts vs. Number of detected genes** \n",
|
|||
|
|
" - This plot displays the correlation between the total UMI counts per cell and the number of detected genes. \n",
|
|||
|
|
" - A strong correlation is expected, but outliers with low gene counts might indicate empty droplets or dead cells.\n",
|
|||
|
|
"\n",
|
|||
|
|
"3. **Violin plot: Number of detected genes per cell** \n",
|
|||
|
|
" - This violin plot visualizes the distribution of the number of detected genes per cell. \n",
|
|||
|
|
" - It helps identify cells with abnormally low or high gene counts, which could be filtered out.\n",
|
|||
|
|
"\n",
|
|||
|
|
"4. **Violin plot: Total counts per cell** \n",
|
|||
|
|
" - This plot shows the distribution of total counts per cell, indicating library size variation. \n",
|
|||
|
|
" - Extreme values may suggest low-quality or overly dominant cells.\n",
|
|||
|
|
"\n",
|
|||
|
|
"5. **Violin plot: Percentage of mitochondrial counts per cell** \n",
|
|||
|
|
" - This plot illustrates the distribution of mitochondrial gene expression across all cells. \n",
|
|||
|
|
" - High mitochondrial content could be a sign of cell stress or apoptosis.\n",
|
|||
|
|
"\n",
|
|||
|
|
"These visualizations help assess dataset quality and guide decisions on filtering low-quality cells."
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 13,
|
|||
|
|
"id": "70932e9c-6ce6-4de0-808f-fc88ce7ea512",
|
|||
|
|
"metadata": {
|
|||
|
|
"scrolled": true
|
|||
|
|
},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlgAAAGdCAYAAADOqw1GAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjYsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvq6yFwwAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzs3WdwXOl1J/z/7ZxzDsiJAAmCYcghJ2tmnGTJZXvllZxmPfa6dldlydYG23KotVfySF+0rt2qlWyvNbZKK9nWeseyHBQmz3CYSSQi50bnnOPt+37Aex+jCZAEQJAYcs6vijVEo9G4bPSwD5/nPP/DCYIggBBCCCGE7BvJQV8AIYQQQsjDhgosQgghhJB9RgUWIYQQQsg+owKLEEIIIWSfUYFFCCGEELLPqMAihBBCCNlnVGARQgghhOwz2UFfwN1oNpsIhULQ6/XgOO6gL4cQQgghDzFBEJDP5+HxeCCR3H6N6oEusEKhEPx+/0FfBiGEEEI+QAKBAHw+323v80AXWHq9HsDGH9RgMBzw1RBCCCHkYZbL5eD3+1n9cTsPdIElbgsaDAYqsAghhBByX+ykLYma3AkhhBBC9hkVWIQQQggh++xAC6xGo4Hf/d3fRWdnJ9RqNbq6uvCHf/iHaDabB3lZhBBCCCF35UB7sL74xS/iK1/5Cv7yL/8SQ0NDuHLlCn7pl34JRqMRn/70pw/y0gghhBBC9uxAC6zz58/jJ37iJ/DhD38YANDR0YFvfvObuHLlyrb3r1arqFar7ONcLndfrpMQQgghZDcOdIvw8ccfx2uvvYa5uTkAwNjYGN5991382I/92Lb3f+mll2A0GtkvysAihBBCyPsRJwiCcFDfXBAEfPazn8UXv/hFSKVS8DyPz3/+8/jt3/7tbe+/3QqW3+9HNpulmAZCCCGE3FO5XA5Go3FHdceBbhH+9V//Nb7+9a/jG9/4BoaGhjA6Oopf//Vfh8fjwQsvvLDl/kqlEkql8gCulBBCCCFk5w60wPrP//k/47d+67fw8Y9/HABw5MgRrK6u4qWXXtq2wCKEEEIIeRAcaA9WqVTaMixRKpVSTAMhhBBCHmgHuoL1kY98BJ///OfR1taGoaEhXL9+HV/60pfw4osvHuRlEUIIIYTclQNtcs/n8/i93/s9vPLKK4jFYvB4PPjEJz6B3//934dCobjj1++m2YwQQggh5G7spu440ALrbt2PAqtWq+2o2COEEELIw203dQfNIryDfD6PB7gGJYQQQsgBONAerAeB1Wo96EsghBBCyAOGVrAIIYQQQvYZFViEEEIIIfuMCixCCCGEkH1GBRYhhBBCyD6jAosQQgghZJ9RgUUIIYQQss+owCKEEEII2WdUYBFCCCGE7DMqsAghhBBC9hkVWIQQQggh+4wKrHssEomg2Wwe9GUQQggh5D6iWYT3mMvlOuhLIIQQQsh9RitYhBBCCCH7jAosQgghhJB9RgUWIYQQQsg+owKLEEIIIWSfUYG1zwRBOOhLIIQQQsgBowJrn0UikYO+BEIIIYQcMCqw9pnb7T7oSyCEEELIAaMCixBCCCFkn1GBRQghhBCyz6jAIoQQQgjZZ1RgEUIIIYTsMyqw7pFwOHzQl0AIIYSQA0IF1j3idDoP+hIIIYQQckCowLpHJBJ6agkhhJAPKqoCCCGEEEL22YEWWB0dHeA4bsuvT37ykwd5WYQQQgghd0V2kN/88uXL4HmefTw5OYnnn38eH/vYxw7wqgghhBBC7s6BFlh2u73l4y984Qvo7u7GU089dUBXRAghhBBy9w60wNqsVqvh61//Oj7zmc+A47ht71OtVlGtVtnHuVzufl0eIYQQQsiOvW+a3P/u7/4OmUwG/+bf/Jtb3uell16C0Whkv/x+//27QEIIIYSQHeIEQRAO+iIA4Id/+IehUCjwne9855b32W4Fy+/3I5vNwmAw3I/LJIQQQsgHVC6Xg9Fo3FHd8b7YIlxdXcWrr76K//f//t9t76dUKqFUKu/TVRFCCCGE7M37Yovw5ZdfhsPhwIc//OGDvhRCCCGEkLt24AVWs9nEyy+/jBdeeAEy2ftiQY0QQggh5K4ceIH16quvYm1tDS+++OJBXwohhBBCyL448CWjH/qhH8L7pM/+fSMcDsPpdN5xnmEymYRWq4VKpbpPV0YIIYSQnTjwAots5Xa7d3Q/q9V6j6+EEEIIIXtx4FuEhBBCCCEPGyqwHlLhcBjNZvOgL4MQQgj5QKItwofUTrcZCSGEELL/aAWLEEIIIWSfUYFFCCGEELLPqMAihBBCCNlnVGARQgghhOwzKrA+gCjYlRBCCLm3qMD6AIrH4+B5/qAvgxBCCHloUUzDB5DD4TjoSyCEEEIearSCdZ+lUimUy+WDvgxCCCGE3EO0gnWfWSyWg74EQgghhNxjtIJFCCGEELLPqMAid5RMJlGpVA76MgghhJAHBm0RkjuyWq0HfQmEEELIA4VWsAghhBBC9hkVWIQQQggh+4wKrIdMMBg86EsghBBCPvCowHpANJtNRCKRO97P6/Xeh6shhBBCyO1QgfWAkEgkcDqdB30ZhBBCCNkBKrAOUKFQQL1e3/H9OY67h1dDCCGEkP1CBdYBksvlkEqlB30ZhBBCCNlnlIN1gJRK5UFfAiGEEELuAVrBIoQQQgjZZ1RgEUIIIYTsMyqw7rP7kVNVKpWQTqfv+fchhBBCyPaowLrP9junqlqtbrlNo9HAbDbv6/chhBBCyM5RgfWAuFXIaD6f39XjxONxNJvN/bgkQgghhNwCFVgPCLvdvu3tNpttV49jNpshkdCPnRBCCLmXDvydNhgM4ud//udhtVqh0WgwMjKCq1evHvRlve/sZEzOTshklMxBCCGE3GsH+m6bTqfx2GOP4ZlnnsE///M/w+FwYHFxESaT6SAv631pL71bjUYDxWIRRqNx288Hg0GaXUgIIYTcAwdaYH3xi1+E3+/Hyy+/zG7r6Og4uAt6yEilUqhUqlt+noorQggh5N440C3Cv//7v8fJkyfxsY99DA6HA8eOHcOf/dmf3fL+1WoVuVyu5Re5NY7jKC2eEEIIOQAHWmAtLS3hy1/+Mnp7e/G9730P/+7f/Tt86lOfwte+9rVt7//SSy/BaDSyX36//z5fMSGEEELInXGCIAgH9c0VCgVOnjyJ9957j932qU99CpcvX8b58+e33L9arbbkPuVyOfj9fmSzWRgMhvtyzR8kPM8jHo/D5XId9KUQQgghBy6Xy8FoNO6o7jjQFSy3243BwcGW2w4dOoS1tbVt769UKmEwGFp+kX9RqVSQSqX27fGkUikVV4QQQsgeHGiB9dhjj2F2drbltrm5ObS3tx/QFb1/7WTEjkqlgsViuQ9XQwghhJDbOdAC6zd+4zdw4cIF/NEf/REWFhbwjW98A3/6p3+KT37ykwd5We9L9/LEXzwev2ePTQghhHwQHWiB9cgjj+CVV17BN7/5TRw+fBj/7b/9N/zxH/8xfu7nfu4gL+uhEIvFUK/Xd3TfW+VkEUIIIWRvDrTJ/W7tptmMEEIIIeRuPDBN7mRn8vn8A5X5tZN+MUIIIeRhRgXWA0Cv19/XFbrdFkihUKjlY0qIJ4QQ8kFHBdYH2K0KKbfbjWazuePH8Xg8+3VJhBBCyEOBCqwPmM1F1a1WmkqlEsrl8v26JEIIIeShc6DDnsn9t5PtO51Odx+uhBBCCHl40QrWB9CttgZvdftutgsJIYQQQgXWB0o4HAZw61Ws7W6vVCrIZrP39LoIIYSQhw0VWA+4SqVyy89Vq1UkEgn2scViAc/zu3p8lUoFs9m852t7gGPWCCGEkD2jAusBVyqV2O8LhULLapNSqYTNZmMf1+t1NBqNPX+v3cY31Go12l4khBDygUQF1gNu83BnnU5327E3Op0OSqVyy+3bFU43Z1sBG1uIuymyDAYDpFLpju9PCCGEPCyowPoAqdVq2w52lkhaXwbVahVyubzlNnEl6nanECnBnRBCCNlABdYDaK+FjEKhgN1u33I7x3EtHyuVStjt9pb+rmg0esfHpwR3QgghZAMVWA+g3RYym7f7tmt
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 728.72x480 with 1 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAnIAAAGdCAYAAACW1J5fAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjYsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvq6yFwwAAAAlwSFlzAAAPYQAAD2EBqD+naQAAzQ1JREFUeJzs3Xd0XPd1J/Dvm957wwwqAQIgSJCiKFrFiiRb9diy0u1YibzpzjobWXF34sROdi2XTWxno10nTpy1N257jks269iyZcUqlCiSIglWECA6pvfe33v7B/b9PAMMwAEIEAR5P+fgiJh5ePNmBkdzcX+/ey8niqIIQgghhBCy48i2+wIIIYQQQsjGUCBHCCGEELJDUSBHCCGEELJDUSBHCCGEELJDUSBHCCGEELJDUSBHCCGEELJDUSBHCCGEELJDKbb7AnYKQRAQDAZhNBrBcdx2Xw4hhBBCbmCiKCKXy8Hr9UImWz3vRoFcm4LBILq6urb7MgghhBByE1lcXERnZ+eq91Mg1yaj0Qhg6QU1mUzbfDWEEEIIuZFls1l0dXWx+GM1FMi1SVpONZlMFMgRQggh5Jq40nYuKnYghBBCCNmhKJAjhBBCCNmhKJAjhBBCCNmhKJAjhBBCCNmhKJAjhBBCCNmhKJAjhBBCCNmhKJAjhBBCCNmhKJAjhBBCCNmhKJAjhBBCCNmhKJAjhBBCCNmhKJAjhBBCCNmhKJAjhBBCCNmhKJAjhBBCyA1DEATUarXtvoxrhgI5QgghhNww6vU6SqXSdl/GNaPY7gsghBBCCNksKpUKKpVquy/jmqGMHCGEEELIDkWBHCGEEELIDkWBHCGEEELIDkWBHCGEEELIDkWBHCGEEELIDkWBHCGEEELIDkWBHCGEEELIDkWBHCGEEELIDkWBHCGEEELIDkWBHCGEEEJ2nFgstt2XcF2gQI4QQgghO47ZbN7uS7guUCBHCCGEkB3nZpqnuhYK5AghhBBCdigK5AghhBBCdigK5AghhBBCdigK5AghhBBCdigK5AghhBBCdigK5AghhBBCdigK5AghhBBCdigK5AghhBBCdigK5AghhBBCdigK5AghhBBCdigK5AghhBBCdigK5AghhBBCdigK5AghhBBCdigK5AghhBCyIwmCsN2XsO0okCOEEELIjhSNRle9r1argef5a3g124MCOUIIIYTsSB6PZ9X7qtUqarXaNbya7aHY7gsghBBCCNlser1+uy/hmqCMHCGEEELIDkWBHCGEEELIDkWBHCGEEEJ2tEAgsN2XsG0okCOEEELIjubz+bb7ErYNBXKEEEIIITsUBXKEEEIIITsUBXKEEEIIITsUBXKEEEIIITsUBXKEEEIIITvUtgZy9XodH/vYx9DX1wetVotdu3bhL//yL5uG4IqiiE984hPwer3QarW47777cOHChabzVCoV/NEf/REcDgf0ej0ee+wx+P3+pmNSqRSeeOIJmM1mmM1mPPHEE0in09fiaRJCCCGEbIltDeQ+85nP4O/+7u/wzDPPYHx8HJ/97GfxX//rf8Xf/u3fsmM++9nP4nOf+xyeeeYZnDhxAh6PBw8++CByuRw75qmnnsL3vvc9fOtb38KRI0eQz+fx6KOPNg3LffzxxzE2NoZnn30Wzz77LMbGxvDEE09c0+dLCCGE3OwEQUC1Wt3uy7hhcKIoitv14I8++ijcbje+/OUvs9t++Zd/GTqdDv/8z/8MURTh9Xrx1FNP4cMf/jCApeyb2+3GZz7zGbz73e9GJpOB0+nEP//zP+Md73gHACAYDKKrqws/+MEP8PDDD2N8fBwjIyN47bXXcPvttwMAXnvtNdx55524dOkShoaGrnit2WwWZrMZmUwGJpNpC14NQggh5MZXrVZRqVRgNBq3+1Kua+3GHduakbv77rvx/PPPY3JyEgBw5swZHDlyBG95y1sAALOzswiHw3jooYfYz6jVatx777149dVXAQAnT55ErVZrOsbr9WLfvn3smKNHj8JsNrMgDgDuuOMOmM1mdsxylUoF2Wy26YsQQgghV0elUlEQt4kU2/ngH/7wh5HJZDA8PAy5XA6e5/HJT34S73znOwEA4XAYAOB2u5t+zu12Y35+nh2jUqlgtVpXHCP9fDgchsvlWvH4LpeLHbPcpz71KfzFX/zF1T1BQgghhJAttK0Zuf/9v/83vva1r+Eb3/gGTp06ha9+9av4q7/6K3z1q19tOo7juKbvRVFccdtyy49pdfxa5/noRz+KTCbDvhYXF9t9WoQQQggh18S2ZuQ++MEP4iMf+Qh+7dd+DQAwOjqK+fl5fOpTn8J/+A//AR6PB8BSRq2jo4P9XDQaZVk6j8eDarWKVCrVlJWLRqO466672DGRSGTF48disRXZPolarYZard6cJ0oIIYQQsgW2NSNXLBYhkzVfglwuZ+1H+vr64PF48Nxzz7H7q9UqXnzxRRakHTp0CEqlsumYUCiE8+fPs2PuvPNOZDIZHD9+nB1z7NgxZDIZdgwhhBBCyE6zrRm5t73tbfjkJz+J7u5u7N27F6dPn8bnPvc5/PZv/zaApeXQp556Ck8//TR2796N3bt34+mnn4ZOp8Pjjz8OADCbzfid3/kdvP/974fdbofNZsMHPvABjI6O4oEHHgAA7NmzB4888gh+7/d+D3//938PAPj93/99PProo21VrBJCCCEEyOfzMBgM230ZpMG2BnJ/+7d/iz/7sz/De97zHkSjUXi9Xrz73e/Gn//5n7NjPvShD6FUKuE973kPUqkUbr/9dvz4xz9uqnj5/Oc/D4VCgbe//e0olUq4//778ZWvfAVyuZwd8/Wvfx1PPvkkq2597LHH8Mwzz1y7J0sIIYTscIIgtLVP/UaXz+fB8zzMZvN2X8r29pHbSaiPHCGEEEKulR3RR44QQgghhGwcBXKEEEIIITsUBXKEEEII2bBIJIJ6vb7dl3HT2tZiB0IIIYTsbKv1YyXXBmXkCCGEEEJ2KArkCCGEEEJ2KArkCCGEELKpAoHAdl9CS8lkcrsvYdNRIEcIIYSQTeXz+a7J45TL5XUdr9Vqt+hKtg8FcoQQQgjZcluRpSsUCus6ngI5QgghhJAN2Iosnd1u3/Rz7jQUyBFCCCEExWIRqVRq2x4/GAxu22PvZBTIEUIIIQQ6nQ5Wq3XbHt/r9W7o50Kh0CZfyc5CgRwhhBBCtoQgCFseaN3sDYkpkCOEEEJI23K5HLLZbFvHymQydHR0bOn1yGQ3dyhDI7oIIYQQ0jaj0bjdl0Aa3NxhLCGEEELIDkaBHCGEEELIDkWBHCGEEELIDkWBHCGEEELIDkWBHCGEEELIDkWBHCGEEELIDkWBHCGEEHITqFarKJVK230ZZJNRIEcIIYTcBDiOu+mb596IqCEwIYQQchNQKpXbfQkrFAoF1Go1WCyW7b6UHYtCc0IIIYSsSRRFBIPBTT+vXq9vCuKCwSBEUdz0x7mRUUaOEEIIIWviOA5er3fLH+daPMaNhjJyhBBCCNnRstkscrncdl/GtqBAjhBCCLmBBAKB7b6Ea85kMsFoNG73ZWwLCuQIIYSQG4jP57viMZFI5BpcCbkWKJAjhBBCbjJ2u327L6FJsVi8aZdGrxYVOxBCCCE3GYXi+vr412g0230JOxZl5AghhJAb1Fa0DNkKMplsQ82Kb8b9gMtRIEcIIYTcYKQAR2rncaP2ZmtnP+CNjgI5Qggh5AbTGOCUSiVkMpkNnaedjFe9Xkc2m93Uc5L2USBHCCGE3MC0Wu2GRmCJogiO4654nEwmg1qtZt8Xi8U1j6cs2uaiQI4QQgghK7Q7zSEUCjUFcpVKZVOvIxqNbur5bjQUyBFCCCFkBVEUUSgUrnjc8gyb1Wrd1OvY7PPdaCiQI4QQQkhLgiBs9yVAqVRu9yVc1yiQI4QQQsgKHMe1NfZqteKFSqWCWq222ZdFlqFAjhBCCCEbtlrxgiAIN2zbk+sJBXKEEELITSiTySCfz7d9/GotRlbLyGm1WqhUqg1dG2kfBXKEEELITchsNsNgMLR9fCQSaXk7tRPZXhTIEUIIIeSKdu/e3fL2Wq2
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 728.72x480 with 1 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"sc.pl.scatter(adata, x=\"total_counts\", y=\"pct_counts_MT\")\n",
|
|||
|
|
"sc.pl.scatter(adata, x=\"total_counts\", y=\"n_genes_by_counts\")"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 14,
|
|||
|
|
"id": "de9eccf2-26d1-4dda-8a11-89c9b4b49a08",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAnIAAAGdCAYAAACW1J5fAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjYsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvq6yFwwAAAAlwSFlzAAAPYQAAD2EBqD+naQAAoBdJREFUeJztnXmYFcXV/8+A7MuwCcOwOIAoKIyIjrhwGVxQUEBjwCiIEVzigjjExESTCJhXwSVq4vjExCQSg4q4kJ95E0nURJ0JirwIigsqeEd2MQQGIqtwfn+Qas49U9Vd3bf73r73ns/z9DNzu6tr66pT31q6uggREQRBEARBEISco1G2IyAIgiAIgiAEQ4ScIAiCIAhCjiJCThAEQRAEIUcRIScIgiAIgpCjiJATBEEQBEHIUUTICYIgCIIg5Cgi5ARBEARBEHKUI7IdgVzh4MGDsHHjRmjTpg0UFRVlOzqCIAiCIOQxiAg7d+6E0tJSaNTIPO4mQs6SjRs3Qo8ePbIdDUEQBEEQCoh169ZB9+7djddFyFnSpk0bADiUoW3bts1ybARBEARByGd27NgBPXr0cPSHCRFylqjp1LZt24qQEwRBEAQhI3gt55KXHQRBEARBEHIUEXKCIAiCIAg5igg5QRAEQRCEHEWEnCAIgiAIQo4iQk4QBEEQBCFHESEnCIIgCIKQo4iQEwRBEARByFFEyAmCIAiCIOQoIuQEQRAEQRByFBFygiAIgiAIOYoIOUEQBEEQhBxFhJwgCIIgCEKOIkJOiB179+4FRPR1T11dXTSREQRBEIQYI0JOiBX19fUwduxYeOyxx6zvqaurg169eomYEwRBEAoOEXJCrKirq4Pdu3fDvHnzrO8pKyuDZDIJZWVl0UVMEARBEGKICDkhLxARJwiCIBQiIuQEQRAEQRByFBFyQqwoKirKdhQEQRAEIWcQIScIgiAIgpCjiJATBEEQBEHIUUTICYIgCIIg5Cgi5ARBEARBEHIUEXKCIAiCIAg5igg5QRAEQRCEHEWEnJCXyOe6BEEQhEJAhJyQd8i3VwVBEIRCQYScECsQMW0/5NurgiAIQqEgQk7IS0TECYIgCIWACDlBEARBEIQcRYScIAiCIAhCjiJCTogVRUVF2Y6CIAiCIOQMIuQEQRAEQRByFBFygiAIgiAIOYoIOUEQBEEQhBxFhJwgCIIgCEKOIkJOEARBEAQhRxEhJwiCIAiCkKOIkBMEQRAEQchRRMgJgiAIgiDkKCLkBEEQBEEQchQRcoIQY+rq6rIdBUEQBCHGiJArAEQM5CZ1dXXQq1cveX6CIAiCERFyeY6IgdylrKwMkskklJWVZTsqgiAIQkwRIZfniBjIbeS5CYIgCG6IkCsAckkMIGK2oyAIgiAIOYMIOUEQBEEQhBxFhFyekqtr4oqKirIdBUEQBEHIGUTI5SHygoMgCIIgFAYi5PIQecFBEARBEAoDEXJ5iog4QRAEQch/RMgJgiAIgiDkKCLkBEEQBEEQchQRckKskH3kBEEQBMGerAq5r7/+Gn784x9Dr169oEWLFtC7d2+488474eDBg44bRISZM2dCaWkptGjRAoYPHw4ffPBBij979+6Fm266CTp16gStWrWCsWPHwvr161PcbNu2DSZNmgTFxcVQXFwMkyZNgu3bt2cimYIgCIIgCJGQVSF3zz33wKOPPgrV1dXw0Ucfwb333gv33XcfPPzww46be++9Fx544AGorq6GpUuXQklJCYwYMQJ27tzpuKmqqoKFCxfC/Pnzoba2Fv7zn//A6NGj4cCBA46bCRMmwIoVK2DRokWwaNEiWLFiBUyaNCmj6XVDtgoRBEEQBME3mEUuuOACnDJlSsq5iy++GC+//HJERDx48CCWlJTgnDlznOt79uzB4uJifPTRRxERcfv27dikSROcP3++42bDhg3YqFEjXLRoESIifvjhhwgA+NZbbzlu3nzzTQQAXLVqlVVc6+vrEQCwvr4+WGJdSCaTCACYTCZD9zvXWLFiBSYSCUwkEtmOiiAIgiBkDVvdkdURuaFDh8Krr74Kn3zyCQAAvPvuu1BbWwvnn38+AAAkk0nYvHkznHvuuc49zZo1g8rKSli8eDEAACxbtgz279+f4qa0tBQGDBjguHnzzTehuLgYhgwZ4rg59dRTobi42HHD2bt3L+zYsSPliArZ9+0w8mUHQRAEQbDniGwG/oMf/ADq6+uhX79+0LhxYzhw4ADcddddcNlllwEAwObNmwEAoEuXLin3denSBT7//HPHTdOmTaF9+/YN3Kj7N2/eDJ07d24QfufOnR03nNmzZ8OsWbPSS6APRMQJgiAIguCXrI7IPfPMMzBv3jx46qmn4J133oHf//73cP/998Pvf//7FHd8lAYRPUduuBudezd/brvtNqivr3eOdevW2SZLEARBEAQhI2R1RO773/8+/PCHP4RLL70UAAAGDhwIn3/+OcyePRu+/e1vQ0lJCQAcGlHr2rWrc9+WLVucUbqSkhLYt28fbNu2LWVUbsuWLXD66ac7br744osG4X/55ZcNRvsUzZo1g2bNmoWTUEEQBEEQhAjI6ojcrl27oFGj1Cg0btzY2X6kV69eUFJSAi+//LJzfd++ffD66687Iu2kk06CJk2apLjZtGkTvP/++46b0047Derr6+Htt9923CxZsgTq6+sdN4IgCIIgCLlGVkfkxowZA3fddRf07NkTjj/+eFi+fDk88MADMGXKFAA4NB1aVVUFd999N/Tt2xf69u0Ld999N7Rs2RImTJgAAADFxcVw1VVXwS233AIdO3aEDh06wPe+9z0YOHAgnHPOOQAA0L9/fxg5ciRcc8018Ktf/QoAAK699loYPXo0HHvssdlJvJDT1NXVybpGQRAEIetkVcg9/PDD8JOf/ARuuOEG2LJlC5SWlsJ3vvMduOOOOxw3t956K+zevRtuuOEG2LZtGwwZMgT+9re/QZs2bRw3Dz74IBxxxBFwySWXwO7du+Hss8+GuXPnQuPGjR03Tz75JEybNs15u3Xs2LFQXV2ducQKeUNdXR306tVL3jQWBCFvceus/vWvf4UvvvgCrrjiisxGStBShCjfRLJhx44dUFxcDPX19dC2bdtsRydvee+992Dq1KkAAPDGG29kOTZmZEROEIR8xauzOmzYMACIt43OB2x1h3xrVRACICJOEIR8RfY2zS1EyBUY8ikwQRAEwQsRcbmDCLkCQg2Xi5gTBEEQhPxAhFwBIcPlgiAIgpBfiJArMETECYIgCEL+IEJOEARBEAQhRxEhJxiJ61q6uMZLEARBEDKNCDlBS1xfjMh2vOKWH4IgCEJhI0JO0JKpFyP8CqNsvrCRbREpCIIgCBwRcoKRTIi4IMLIb7zCEl7y1q8gCIIQN0TICVlDJ4zC/mJc2KNo2RoJFARBEAQdIuTynLiLgKiFUa6Posl0bu4jz04QhCgRIZdFojbwIgIOkasiDiD3hWiho+rg+PHjYfPmzdmOjiAIeYgIuSyRCZElIiA/kOeXu5SVlcHFF18MX3zxBbzwwgvZjo4gCHmICLkskSmRlWsioKioKNtREIRQOXDgAAAA7N+/P8sxEQQhHxEhl0VyTWQJgiAIghAvRMgJsSLst1YFQRAEIZ8RIScIgiAIgpCjiJATBEHIADLaLAhCFIiQEwRBiBD1Ao+8yCMIQhSIkBMEQRAEQchRRMgJoVHoGw8LgiAIQqYRISeEgnxFQhAEQRAyjwg5IRQK/SsSYQtYEcSCIAiCDSLkhNAIQ8Tl4oLwsEcjdf6JsBMEQRB0iJArcEQgpE/Yo5HcP5m2zm3UtiOy/YggCFEgQq6AEYFgxm+ehD2lTP0r9GnrfCEXR5sFQYg/IuQKGBEIeuIocOUZCYIgCDpEyOUAUQoKEQgNEYErRIFMrQqCEAUi5GJOHEeHch2bvBQRJ4SNTK0KghAFIuRijowOhYsI48JDnrUg5B9Srw8jQi4HKCQRF/X0kwjjwiJOwl2mVgUhHOJUr+OACLkCIF8Le9B0pSPi8jUv85U4CXeZWhWEcIhTvY4DIuTynHzsudTV1aWVrqB5kY9
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 804.6x480 with 1 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAnIAAAGdCAYAAACW1J5fAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjYsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvq6yFwwAAAAlwSFlzAAAPYQAAD2EBqD+naQAA/klJREFUeJzsvX14VdWVP75BQUQhIu8EMIBBYkKIyCUauAZQkCiitkGtoBgUX1HC6HTUqWLaKtaxtnWwozNV2+/Yimhrx9bqaK0vQcH6q++1to5GpVpf6gtVK1Zl/f6I62Sdz117n3PuPTf3BM56nv0k95x99vte+7PXWnvtXkREJqWUUkoppZRSSimlHke9S12AlFJKKaWUUkoppZTyoxTIpZRSSimllFJKKfVQSoFcSimllFJKKaWUUg+lFMillFJKKaWUUkop9VBKgVxKKaWUUkoppZRSD6UUyKWUUkoppZRSSin1UEqBXEoppZRSSimllFIPpZ1LXYCeQtu2bTOvv/66GTBggOnVq1epi5NSSimllFJKKW3HRETmgw8+MKNGjTK9e9vlbimQC0mvv/66GTNmTKmLkVJKKaWUUkop7UC0efNmM3r0aOv7FMiFpAEDBhhjOht04MCBJS5NSimllFJKKaW0PdPf/vY3M2bMGA9/2CgFciGJ1akDBw5MgVxKKaWUUkoppdQtFGTOlR52SCmllFJKKaWUUuqhlAK5lFJKKaWUUkoppR5KKZBLKaWUUkoppZRS6qGUArmUUkoppZRSSimlHkopkEsppZRSSimllFLqoZQCuZRSSimllFJKKaUeSimQSymllFJKKaWUUuqhlAK5lFJKKaWUUkoppR5KKZBLKaWUUkoppZRS6qGUArmUUkoppZRSSimlHkopkEsppZRSSimllFLqoZQCuZRSSimllFJKKaUeSimQSymllFJKKaUdiF5++eWCvv/888/NJ598Ek9hegAV2l7FphTIpZRSSimllNIOQi+//LIZN25cQeDkggsuMIuXLI6vUAmmONqr2LRzqQuQUkoppZRSSil1D1VUVJiOjg5TUVGRdxqbNm2Kr0AJpzjaq9iUSuRSSimllFJKaQeiJIOSJFLS2ysFcimllFJKKaXUzZRkVV1KPYtSIJdSSimllFJK3Ug9we4qpZ5DKZBLyUdJYCxPPPGEef/990tdjJRSSimlolBPsLtKqedQCuRS8igJu8TXXnvNrFy50lx66aUlK0NKKaWUUrEpThCXhA14SqWjFMil5FESdol//etfjTHGPProoyUrQ0oppZRST6EkbMBTKi2lQC4lH6Wi/pRSSimlnkNJ2ICnVFpKgVxKKaWUUkop9WBKQdyOTSmQSymllFJKKaWUUuqhlAK5lBJFvXr1KnURUkoppZRSSqnHUArkEk47qgHr1q1bS12ElFJKKaWUAmhHXaOSRCmQSzDtiKeRiMhs3brVPPbYYztUvVNKKaWUehrtiGtUEikFcgmmqKeRtpfJ1K9fP5PJZFID3pRSSimlBNPYsWMTdWJ2e1kDo1IK5BJOUUCcbWfUEwd3v379Sl2ElFJKKaUdhvJZJ7Zt25YoELejSgdTILedkE16tyMP7qRSlL5I+y2llFKKi2z8JN91Ytu2bYUXKibakf3ppUBuOyJtAO/IgzuJFIVhpiA8pZRSiotc/CTfdYKI4ilcTLSjrnMpkEsQvfzyy6EX7SiL+442uEsBfILy5PdRGGYKwlNKKaVCSPKlIH6SD5/5/PPP8ytYSrFSYoDcmjVrTK9evUxra6v3jIjMJZdcYkaNGmV23XVXM2vWLPP73//e990nn3xizj77bDNkyBCz2267mYULF5o///nPvjjvvfeeOeGEE0xZWZkpKyszJ5xwgnn//fe7oVbhiXdL2o5J+10KSU0hILO7yhpX20RVf7ryxPdRGKamKk8ppZRSCiKNL8W9KUyKanVH54uJAHKPPfaY+c///E9TW1vre37FFVeYq666yqxdu9Y89thjZsSIEWbu3Lnmgw8+8OK0traa22+/3axbt85s2LDBfPjhh2bBggW+ncLxxx9vnnzySXP33Xebu+++2zz55JPmhBNO6Lb6haGKigrT3t6es2OyTcbultSEBUhavO4EnnG0TdTyhtnpxtFfqao1pZRSCkvdsU4kQSLXHXwx8TyXSkwffPABVVZW0r333kuNjY20cuVKIiLatm0bjRgxgi6//HIv7tatW6msrIyuvfZaIiJ6//33qU+fPrRu3TovzmuvvUa9e/emu+++m4iInnvuOTLG0KZNm7w4GzduJGMMPf/889Zybd26lbZs2eKFzZs3kzGGtmzZEmf1Pero6CBjDHV0dHi/5Tstvut9MShsPkHlddGTTz5J2WyWstls+IIVgTo6OrqtXaNQEsuUUkop7VjEPPqdd94pdVGIqHh8kdcBuTZ3J23ZsiUU7ii5RO6ss84yhx9+uDnkkEN8zzs6Oswbb7xh5s2b5z3bZZddTGNjo3nkkUeMMcb87ne/M59++qkvzqhRo0xNTY0XZ+PGjaasrMzU19d7cQ444ABTVlbmxdFozZo1niq2rKzMjBkzJpb62kjunoJUcfJ9d0u78o3XE+28kij96ontmFJKKcVDLvONQr7Pl5IgkTOmOHyR11ZjTOJtlUsK5NatW2cef/xxs2bNmpx3b7zxhjHGmOHDh/ueDx8+3Hv3xhtvmL59+5pBgwY54wwbNiwn/WHDhnlxNLrgggvMli1bvLB58+ZolcuDeKBEUdVtzwbxqJ7tTtqe2zWllFLqeWTbtBdi9lIoffbZZ7GllTTCdTbJVDIgt3nzZrNy5Upz0003OZ2/4iXqRBR4sTrG0eIHpbPLLruYgQMH+kJ3UtDAke+TPsjyoa1bt5ZE6ihpe2zXlFJKqWcSbi7xJHy+3xdC3QHkSqkV6SlrQMmA3O9+9zvz1ltvmf3339/svPPOZueddzYPPvigufrqq83OO+/sSeJQavbWW29570aMGGH+8Y9/mPfee88Z580338zJ/+23386R9qXUvaRNUPrCL1G/fv12CKljSiltT5Q0U4TtjSQIw81tmM2u6/uwRMJ3XLFVq+kBr3BUMiB38MEHm2eeecY8+eSTXpg2bZpZvHixefLJJ8348ePNiBEjzL333ut9849//MM8+OCDpqGhwRhjzP7772/69Onji/OXv/zFPPvss16cAw880GzZssX89re/9eI8+uijZsuWLV6clLqHop5k1aSO6YROqafR008/bU477TSnKcf2QDvSoqupN+NO00W4uY262S1kcyzBW7ElcukmPhyVDMgNGDDA1NTU+MJuu+1mBg8ebGpqajyfcpdddpm5/fbbzbPPPmtOOukk079/f3P88ccbY4wpKyszJ598sjn33HPNfffdZ5544gmzZMkSM3nyZO/wRFVVlZk/f75Zvny52bRpk9m0aZNZvny5WbBggdlnn31KVf1QxGrFnkou4GaboK4d3o60ULhoR69/T6PvfOc75g9/+IO5/fbbS12UotKOsugiH4qDL3EaGzZsiOT2KChN1/N8+0n6juuOww7b+3iKg0p+atVFX/3qV01ra6s588wzzbRp08xrr71m7rnnHjNgwAAvzne+8x1z1FFHmWOOOcbMmDHD9O/f3/ziF78wO+20kxfnxz/+sZk8ebKZN2+emTdvnqmtrTX//d//XYoqhSae2N0BXFzph915Bhngakxem6AuxhBmodjeQU4KZnsesenHP/7xjxKXpPi0Iyy6hUrDbGm2t7ebbDbrAbooFBZcxsE/JI+W/wetI0ngWd0tOe02Kr4nlO2DwvpziZO6w5eZy0eO5ttOi8vP29vbc57b8rTRhg0bKJvNUiaTiVALvbyuvJPqjy1MuZJa9pR0OvLIIymbzdJ3vvOdUhclpYRTR0cHtbe35+W3TOPLYeJFpQ8//NDzI8f+WsOsI3HUqRCKwx9cd/uU6zF+5FJyUzF3uC+//LJzNxl25yl3k3K3oqUZtCP89NNPzdatW81jjz0WWvpnK68t7w0bNnS7VCtMXrJtwtoOptRzKOi0fUopVVRUmJkzZ+Yl4dP
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 804.6x480 with 1 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlgAAAGdCAYAAADOqw1GAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjYsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvq6yFwwAAAAlwSFlzAAAPYQAAD2EBqD+naQAAtf1JREFUeJztfXmYlMXxfy3IoXIZELlZQBEEVgRG5BjAM+KdBDRiIq73z4vVHIZogqvGIyrxgCgSxXhgWE0w5qvGeLPEaNAg8cSIw+GdaAQ8QIT6/bHpoaamut9j3pn3ndn6PM/77M57dFdXV1d/urrffqsQEUGhUCgUCoVCERlaxC2AQqFQKBQKRaVBCZZCoVAoFApFxFCCpVAoFAqFQhExlGApFAqFQqFQRAwlWAqFQqFQKBQRQwmWQqFQKBQKRcRQgqVQKBQKhUIRMXaIW4BCsG3bNnjvvfegffv2UFVVFbc4CoVCoVAoKhiICBs3boQePXpAixbuGFVZE6z33nsPevfuHbcYCoVCoVAomhHWrVsHvXr1ct5T1gSrffv2ANBU0A4dOsQsjUKhUCgUikrGhg0boHfv3ln+4UJZEywzLdihQwclWAqFQqFQKEoCP8uSdJG7QqFQKBQKRcRQgqVQKBQKhUIRMWIlWF9//TVcfPHF0K9fP9hxxx2hf//+cOmll8K2bdviFEuhUCgUCoWiIMS6Buvqq6+GW265BX7729/CkCFD4IUXXoDa2lro2LEjzJgxI07RFAqFQqFQKEIjVoL1t7/9DY4++mg4/PDDAQCguroa7r33XnjhhRfiFEuhUCgUCoWiIMQ6RTh+/Hh44okn4M033wQAgBUrVsDSpUvhsMMOE+/fvHkzbNiwIedQKBQKhUKhSBpijWBdeOGFsH79ehg0aBC0bNkStm7dCr/4xS/g+OOPF++/8sorob6+vsRSKhQKhUKhUARDrBGsRYsWwd133w0LFy6Ef/zjH/Db3/4Wrr32Wvjtb38r3j9z5kxYv3599li3bl2JJVYoFAqFQqHwRhUiYlyZ9+7dG37yk5/A2WefnT13+eWXw9133w1vvPGG5/MbNmyAjh07wvr163WjUYVCoVAoFEVFEN4RawTriy++yPtYYsuWLXWbBoVCoVAoFGWNWNdgHXnkkfCLX/wC+vTpA0OGDIHly5fD7Nmz4eSTT45TLIVCoVAoFIqCEOsU4caNG+FnP/sZLF68GD766CPo0aMHHH/88fDzn/8cWrdu7fm8ThEqFAqFQqEoFYLwjlgJVqFQgqVQKBSK5o7Vq1dDdXU1bN68Gdq0aRO3OBWNslmDpVAoFAqFIjxWr14N/fr1gxtvvBEOP/xweP/99+MWSfE/KMFSKBQKhaJMUV1dDZlMBu6//3746quv4KWXXopbJMX/oARLoVAoFIoyRnV1ddwiKAQowVIoFAqFQqGIGEqwFAqFQqFQKCKGEiyFQqFQKBSKiKEES6FQKBQKhSJiKMFSKBQKhUKhiBhKsBQKhUKhUCgihhIshUKhUCgUioihBEuhUCgUCoUiYijBUigUCoVCoYgYSrAUCkVFYPXq1b7v3bp1KyxatAg+/PDD4gmkUCiaNZRgKRSKsof54K1fktXY2Ahz586Fq6++uriCKRSKZgslWAqFouxhPnjr95ts//73vwEA4IUXXiiiVAqFojlDCZZCoagI6AdvFQpFkqAES6FQKBQKhSJiKMFSKBQKhUKhiBhKsBQKRbNCkLcNFQqFIiyUYCkUimYD87ahbs+gUCiKDSVYCoWi2cC8bdi1a9e4RVEoFBUOJVgKhaJZQd82VCgUpYASLIVCoVAoFIqIoQRLoVAoFAqFImIowVIoFAqFQqGIGEqwFApFs0NVVVXcIigqELoFiIJCCZZCoWh2UIKliBpBPziuqHwowVIoFAqFokAE/eC4ovKhBEuhUCgUigig5EpBESvBqq6uhqqqqrzj7LPPjlMshUKhUCgUioKwQ5yZL1u2DLZu3Zr9/corr8DBBx8MU6dOjVEqhUKhUCgUisIQK8Haddddc35fddVVMGDAAJg4cWJMEikUiuYARIxbBIVCUeGIlWBRfPXVV3D33XfDBRdcYH3DZ/PmzbB58+bs7w0bNpRKPIVCoVAoFArfSMwi9wceeAA+/fRTOOmkk6z3XHnlldCxY8fs0bt379IJqFAoFAqFQuETiSFYt912G0yePBl69OhhvWfmzJmwfv367LFu3boSSqhQKCoFOkWoUCiKjURMEa5ZswYef/xx+MMf/uC8r02bNtCmTZsSSaVQKBQKhUIRDomIYC1YsAC6du0Khx9+eNyiKBQKhUKhUBSM2AnWtm3bYMGCBTB9+nTYYYdEBNQUCkWFQz+Vo1Aoio3YCdbjjz8Oa9euhZNPPjluURQKhUKhUCgiQewE65BDDgFEhIEDB8YtikKhUFQU9MPDyYfWUeUidoKlUCgUiuixevVq6Nevn3bgCYbWUWVDCZZCoWh2aA5rsKqrqyGTyegHiBMMraPKhhIshUJRVOjoPD5ox518aB1VLpRgKRSKoiGpUyC60ahCoSg2lGApFIqiQadAFApFc4USLIVCUVQouVIoFM0RSrAUCoVCoVAoIoYSLIVC0ezQHN4iVCgU8UIJlkKhUIC+7ahQKKKFEiyFQtHswN8iTOrbjgqFonyhBEuhUDR76NuOCoUiaijBUuTANoJ/4YUXYPny5aUVRqEoIZRcKRSKKLFD3AIokgMzTSKN5C+44AIAAFiyZEkMkikUCoVCUV7QCJYiC50mUSgUCoUiGijBUuRAyZWiXKAL0hUKRZKhBEuhUJQd9K0/hUKRdCjBUigUZQedzlYoFEmHEiyFQlGWUHKlUCiSDCVYCoWiLKHTgwqFIslQgqVQKMoOugZLUUyoXSmigBIshUJRdtA1WIpiQcm7IioowVIoFGUD2ukpuVIUA0reFVFBCZZCoSgLaGRBUSoouVJEASVYCoWiLKCRBYVCUU5QgqVQKMoGSq4UCkW5QAmWQqFodkBEX/fpdKRCoQgLJViKUNCOR1Hp0DVfCoWiECjBUgSGdjyK5gC65kttXaFQBIUSLEVg6GJjRbmjqqrK132GXOmAovxR7PpT+1BwKMFShIKSK0VzQdABhXa0yUOxSbKScIWE2AnWu+++C9/73vegc+fOsNNOO8Hw4cPhxRdfjFsshUIRA5LaQQUhV9rRJg/FjrprVF8hIVaC9d///hfGjRsHrVq1gkceeQRee+01uO6666BTp05xiqVQKIoMiYBUAjnRjja5KHadaJ0rOHaIM/Orr74aevfuDQsWLMieUyNVKCobhkhxIlJKcuJ3DVYYqA9TKBQAMUewHnzwQRg1ahRMnToVunbtCvvssw/Mnz/fev/mzZthw4YNOYdCoSgvuIiUkhOFQlEpiJVgvf3223DzzTfDHnvsAY8++iiceeaZcN5558Gdd94p3n/llVdCx44ds0fv3r1LLLFCoYgCSqTsKOcpUoVCsR2xEqxt27bBiBEj4IorroB99tkHzjjjDDjttNPg5ptvFu+fOXMmrF+/PnusW7euxBIrFApF8VAJ69AUCkUTYiVY3bt3h7322ivn3ODBg2Ht2rXi/W3atIEOHTrkHAqFonjQjr600EXyCkXlIFaCNW7cOFi5cmXOuTfffBP69u0bk0SKSoSShHCo5GiK328RxgElVwpFZSBWgnX++efDc889B1dccQW89dZbsHDhQrj11lvh7LPPjlMsRQWhkklCsaHRlMKhdqdQNF/ESrBSqRQsXrwY7r33Xhg6dChcdtllcP3118MJJ5wQp1iKCoKShMKgegsPv+ReSZhCUZmIdR8sAIAjjjgCjjjiiLjFUFQwlCQo4oAfcm/bE0yhUJQ/Yv9UjkKhKD00alIaeJEmjbAqFOGRdD+mBEtRNkh6YyoX6Lq0cIvci6UvJVcKRXCUgx9TgqUoC5RDYyoXNJeoSZS2Esb+Sm2r2jYUzQnl4MeUYCnKAuXQmMoJla5HL0IU9FuEQe2v1AMCHYAomiOS7seUYCnKBklvTIrkoBiEPEhapR4Q6ABEoXAjjsGHEiyFokLR3KMZcZO
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 804.6x480 with 1 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"sc.pl.violin(adata, \"n_genes_by_counts\", jitter=0.4, groupby=\"assay\")\n",
|
|||
|
|
"sc.pl.violin(adata, \"total_counts\", jitter=0.4, groupby=\"assay\")\n",
|
|||
|
|
"sc.pl.violin(adata, \"pct_counts_MT\", jitter=0.4, groupby=\"assay\")"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "adolescent-burlington",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"### Filter"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "developmental-fluid",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"We filter the count matrix to remove cells with an extreme number of genes expressed.\n",
|
|||
|
|
"We also filter out cells with a mitchondrial content of more than 20%."
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 15,
|
|||
|
|
"id": "apart-faith",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"CPU times: user 433 ms, sys: 107 ms, total: 540 ms\n",
|
|||
|
|
"Wall time: 539 ms\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"%%time\n",
|
|||
|
|
"adata = adata[adata.obs[\"n_genes_by_counts\"] < 5000]\n",
|
|||
|
|
"adata = adata[adata.obs[\"pct_counts_MT\"] < 20]"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "spiritual-filing",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"We also filter out genes that are expressed in less than 3 cells."
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 16,
|
|||
|
|
"id": "municipal-buyer",
|
|||
|
|
"metadata": {
|
|||
|
|
"scrolled": true
|
|||
|
|
},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"filtered out 8265 genes that are detected in less than 3 cells\n",
|
|||
|
|
"CPU times: user 2.09 s, sys: 316 ms, total: 2.41 s\n",
|
|||
|
|
"Wall time: 2.88 s\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"%%time\n",
|
|||
|
|
"rsc.pp.filter_genes(adata, min_cells=3)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "juvenile-inside",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"The size of our count matrix is now reduced."
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 17,
|
|||
|
|
"id": "photographic-hobby",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"text/plain": [
|
|||
|
|
"(216382, 28133)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"execution_count": 17,
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "execute_result"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"adata.shape"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "armed-scratch",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"### Normalize"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "wicked-whole",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"We normalize the count matrix so that the total counts in each cell sum to 1e4. This will help remove some technical variability due to sequencing depth."
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 18,
|
|||
|
|
"id": "chemical-chair",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"CPU times: user 31.1 ms, sys: 23 μs, total: 31.1 ms\n",
|
|||
|
|
"Wall time: 31.1 ms\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"%%time\n",
|
|||
|
|
"rsc.pp.normalize_total(adata, target_sum=1e4)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "threaded-sweden",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"Next, we data transform the count matrix."
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 19,
|
|||
|
|
"id": "confirmed-rainbow",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"CPU times: user 1.05 ms, sys: 842 μs, total: 1.89 ms\n",
|
|||
|
|
"Wall time: 1.87 ms\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"%%time\n",
|
|||
|
|
"rsc.pp.log1p(adata)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "celtic-presence",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"### Select Most Variable Genes"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "693e9276-100f-469c-a7f1-452c32c61bfb",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"We now search for highly variable genes, which help focus our analysis on the most informative genes while improving computational efficiency. This function supports the flavors `cell_ranger`, `seurat`, `seurat_v3`, and `pearson_residuals`. \n",
|
|||
|
|
"As in Scanpy, you can either filter based on variance cutoffs or select the `n_top_genes`. Additionally, you can use a `batch_key` to correct for batch effects. \n",
|
|||
|
|
"\n",
|
|||
|
|
"In this example, we use the `cell_ranger` method, which selects highly variable genes based on the log-normalized counts stored in `.X`.\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 20,
|
|||
|
|
"id": "satellite-criterion",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"CPU times: user 318 ms, sys: 10.2 ms, total: 328 ms\n",
|
|||
|
|
"Wall time: 350 ms\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"%%time\n",
|
|||
|
|
"rsc.pp.highly_variable_genes(adata, n_top_genes=5000, flavor=\"cell_ranger\")"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "arctic-upgrade",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"Now we safe this version of the AnnData as adata.raw. "
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 21,
|
|||
|
|
"id": "falling-soldier",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"CPU times: user 1.69 ms, sys: 132 ms, total: 134 ms\n",
|
|||
|
|
"Wall time: 183 ms\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"%%time\n",
|
|||
|
|
"adata.raw = adata"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "north-concept",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"We now restrict our AnnData object to only the highly variable genes. \n",
|
|||
|
|
"This step reduces the number of features, focusing the analysis on the most informative genes while improving computational efficiency."
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 22,
|
|||
|
|
"id": "peripheral-annotation",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"CPU times: user 302 ms, sys: 121 ms, total: 422 ms\n",
|
|||
|
|
"Wall time: 742 ms\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"%%time\n",
|
|||
|
|
"rsc.pp.filter_highly_variable(adata)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "seventh-liquid",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"Next, we regress out the effects of total counts per cell and mitochondrial content. \n",
|
|||
|
|
"This helps remove technical variation that could bias downstream analyses. \n",
|
|||
|
|
"\n",
|
|||
|
|
"As in Scanpy, you can regress out any numerical column from `.obs`, allowing for the correction of batch effects, sequencing depth, or other confounding factors.\n",
|
|||
|
|
"\n",
|
|||
|
|
"For more complex data sets, these steps may overcorrect and remove potentially useful information and need to be carefully used."
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 23,
|
|||
|
|
"id": "organizational-judgment",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"CPU times: user 788 ms, sys: 141 ms, total: 929 ms\n",
|
|||
|
|
"Wall time: 1.6 s\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"%%time\n",
|
|||
|
|
"rsc.pp.regress_out(adata, keys=[\"total_counts\", \"pct_counts_MT\"])"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "simple-change",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"### Scale"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "monetary-burke",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"Finally, we scale the count matrix to obtain a z-score transformation, standardizing gene expression across cells. \n",
|
|||
|
|
"To prevent extreme values from dominating the analysis, we apply a cutoff of 10 standard deviations. \n",
|
|||
|
|
"This ensures that highly expressed genes do not disproportionately influence downstream computations."
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 24,
|
|||
|
|
"id": "visible-explanation",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"CPU times: user 1.12 s, sys: 189 ms, total: 1.31 s\n",
|
|||
|
|
"Wall time: 1.71 s\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"%%time\n",
|
|||
|
|
"rsc.pp.scale(adata, max_value=10)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "delayed-combination",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"Now we move `.X` out of the GPU."
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 25,
|
|||
|
|
"id": "lightweight-breeding",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"Total Preprocessing time: 12.143767356872559\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"preprocess_time = time.time()\n",
|
|||
|
|
"print(\"Total Preprocessing time: %s\" % (preprocess_time - preprocess_start))"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "first-reggae",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"## Clustering and Visualization"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "early-feelings",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"### Principal component analysis"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "available-trauma",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"We use Principal Component Analysis (PCA) to reduce the dimensionality of the gene expression matrix to its top 100 principal components. \n",
|
|||
|
|
"This step captures the most important sources of variation in the data while reducing computational complexity. \n",
|
|||
|
|
"\n",
|
|||
|
|
"We use the GPU-accelerated PCA implementation from cuML, which significantly speeds up computation compared to CPU-based methods."
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 26,
|
|||
|
|
"id": "virtual-insertion",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"CPU times: user 618 ms, sys: 330 ms, total: 948 ms\n",
|
|||
|
|
"Wall time: 4.19 s\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"%%time\n",
|
|||
|
|
"rsc.tl.pca(adata, n_comps=100)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "pretty-cheese",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"We can use scanpy `pca_variance_ratio` plot to inspect the contribution of single PCs to the total variance in the data."
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 27,
|
|||
|
|
"id": "statewide-disposal",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkEAAAHFCAYAAAD1zS3+AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjYsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvq6yFwwAAAAlwSFlzAAAPYQAAD2EBqD+naQAAhTdJREFUeJzt3XdYVNf6NuBnkCIg2BABQZooIFWxoWbsDTWKscbYELvHmFhD/IlGBdQYPbEmGlFjjdiiRrGBEntHUVABO7HSxAxtfX/4sY8Do6IygMxzX9dch1mz98zas5Mzb9611rtkQggBIiIiIg2jVdIdICIiIioJDIKIiIhIIzEIIiIiIo3EIIiIiIg0EoMgIiIi0kgMgoiIiEgjMQgiIiIijcQgiIiIiDQSgyAiIiLSSAyCiKjIhIaGQiaTITExsaS7UuIePHiAwMBAXLx4scBrgYGBkMlkxd8pIlLCIIiIioyPjw9OnDgBc3Pzku5KiXvw4AFmzJihMggaOnQoTpw4UfydIiIl2iXdASL69L18+RLly5dHtWrVUK1atZLujlrkXWNRZHAsLS1haWlZBL0ioo/BTBBRGbZjxw7IZDIcOnSowGvLli2DTCbD5cuXAQBnz55Fnz59YGNjA319fdjY2KBv3764ffu20nl5Q17h4eEYMmQIqlWrBgMDAygUCpXDYQcOHMDnn38OS0tLlC9fHrVq1cLw4cPx5MkTpffNGyK6evUq+vbti4oVK6J69eoYMmQIUlJSlI7Nzc3Fzz//DA8PD+jr66NSpUpo3Lgxdu3apXTc5s2b0aRJExgaGqJChQpo3749Lly48M7v7W3XePPmTQwePBgODg4wMDBAjRo10KVLF0RHR0vnR0REoEGDBgCAwYMHQyaTQSaTITAwUOla81/T3Llz4ejoCD09PZiammLAgAG4d+/eO/tLRB+GQRBRGda5c2eYmppi9erVBV4LDQ1FvXr14ObmBgBITExEnTp1sHDhQuzfvx8hISF4+PAhGjRoUCBgAYAhQ4ZAR0cH69atw9atW6Gjo6OyD7du3UKTJk2wbNkyhIeH4//+7/9w6tQpNGvWDFlZWQWO79GjB2rXro2wsDBMmTIFGzZswPjx45WOGTRoEMaNG4cGDRpg8+bN2LRpE7p27aoUfM2ZMwd9+/aFs7MztmzZgnXr1iEtLQ3NmzdHTExMob4/Vdf44MEDVK1aFcHBwdi3bx+WLFkCbW1tNGrUCLGxsQCAevXqSd/5999/jxMnTuDEiRMYOnToGz9r5MiRmDx5Mtq2bYtdu3bhhx9+wL59++Dt7a3y+yeiIiCIqEz75ptvhL6+vkhOTpbaYmJiBADx888/v/G87OxskZ6eLgwNDcWiRYuk9tWrVwsAYsCAAQXOyXstISFB5Xvm5uaKrKwscfv2bQFA7Ny5U3pt+vTpAoCYO3eu0jmjRo0S5cuXF7m5uUIIIY4ePSoAiICAgDf2/c6dO0JbW1uMHTtWqT0tLU2YmZmJXr16vfHcd11jftnZ2SIzM1M4ODiI8ePHS+1nzpwRAMTq1asLnJN3rXmuXbsmAIhRo0YpHXfq1CkBQHz33Xfv7AcRvT9mgojKuCFDhuDly5fYvHmz1LZ69Wro6emhX79+Ult6ejomT56MWrVqQVtbG9ra2qhQoQJevHiBa9euFXjfHj16FOrzHz16hBEjRsDKygra2trQ0dGBtbU1AKh8365duyo9d3Nzw7///otHjx4BAP766y8AwOjRo9/4mfv370d2djYGDBiA7Oxs6VG+fHnI5XJEREQUqu+qrjE7Oxtz5syBs7MzdHV1oa2tDV1dXdy4cUPl9RTGkSNHALzKcL2uYcOGcHJyUjmcSUQfjxOjicq4unXrokGDBli9ejWGDRuGnJwc/P777/j8889RpUoV6bh+/frh0KFDmDZtGho0aABjY2PIZDJ06tQJL1++LPC+hVkBlpubi3bt2uHBgweYNm0aXF1dYWhoiNzcXDRu3Fjl+1atWlXpuZ6eHgBIxz5+/BjlypWDmZnZGz/3n3/+AQBpXk5+WlqF++8/Vdf4zTffYMmSJZg8eTLkcjkqV64MLS0tDB06VOX1FMbTp0/f+HkWFhYF5mURUdFgEESkAQYPHoxRo0bh2rVriI+Px8OHDzF48GDp9ZSUFOzevRvTp0/HlClTpHaFQoFnz56pfM/CrJK6cuUKLl26hNDQUAwcOFBqv3nz5gdfS7Vq1ZCTk4OkpKQ3BmImJiYAgK1bt0pZpw+h6hp///13DBgwAHPmzFFqf/LkCSpVqvRBn5MX+D18+LDAqrEHDx5I10NERYvDYUQaoG/fvihfvjxCQ0MRGhqKGjVqoF27dtLrMpkMQggp65Jn5cqVyMnJ+eDPzQsi8r/vihUrPvg9O3bsCODV6rY3ad++PbS1tXHr1i14eXmpfHwomUxW4Hr27NmD+/fvK7Xlz2C9TatWrQC8CrBed+bMGVy7dg2tW7f+4P4S0ZsxE0SkASpVqoTu3bsjNDQUycnJmDBhgtKQkLGxMT777DPMmzcPJiYmsLGxQWRkJFatWvXB2Q0AcHR0hL29PaZMmQIhBKpUqYI///wTBw4c+OD3bN68Ob766ivMmjUL//zzDzp37gw9PT1cuHABBgYGGDt2LGxsbDBz5kwEBAQgPj4eHTp0QOXKlfHPP//g9OnTMDQ0xIwZMz7o8zt37ozQ0FA4OjrCzc0N586dw7x58wpkcOzt7aGvr4/169fDyckJFSpUgIWFBSwsLAq8Z506dTBs2DD8/PPP0NLSQseOHZGYmIhp06bBysqqwOo4IioazAQRaYjBgwfj0aNHyMzMLDABFwA2bNiAli1bYtKkSfD19cXZs2dx4MABVKxY8YM/U0dHB3/++Sdq166N4cOHo2/fvnj06BEOHjz4EVfyann/ggULcPz4cXzxxRfo1asXdu7cCVtbW+mYqVOnYuvWrYiLi8PAgQPRvn17TJo0Cbdv38Znn332wZ+9aNEi9O/fH0FBQejSpQt27dqFbdu2wd7eXuk4AwMD/Pbbb3j69CnatWuHBg0a4Jdffnnj+y5btgzBwcHYu3cvOnfujICAALRr1w7Hjx8vME+KiIqGTAghSroTRERERMWNmSAiIiLSSAyCiIiISCMxCCIiIiKNxCCIiIiINBKDICIiItJIDIKIiIhII5W5Yom5ubl48OABjIyMClXWn4iIiD4dQgikpaXBwsKi0PsAvkmZC4IePHgAKyurku4GERERqdHdu3cLVGp/X2UuCDIyMgLw6ssxNjYu4d4QERFRUUpNTYWVlZX0e/8xylwQlDcEZmxszCCIiIiojCqKKS8aNzF6+fLlJd0FIiIiKgU0LgiaM2dOSXeBiIiISoEyNxyWZ+DAgdDR0VFqE0Lg2bNnJdQjIiIiKk3KbBDUsmVLVKxYUalNCIEjR46UUI+IiIioNCmzQZCbmxtatGhRoH3atGnF3xkiIiIqdcrsnCAzMzOV7ZGRkcXcEyIiIiqNymwQZGFhobLdzs6umHtCREREpVGZDYJWrFhRoC0kJARz584tgd4QERFRaSMTQoiS7kRRSk1NRcWKFZGUlITq1asrvZaRkQFvb29cvHixZDpHREREHyXvdz4lJeWjiyKX2UyQvr5+gTYDAwOUsZiPiIiIPlCZDYJUBTu5ublIS0srgd4QERFRaVNmg6B58+YVaAsKCoJcLi+B3hAREVFpU2brBG3duhUHDx5EkyZNAAAnT55EcnIyl8gTERERgDKcCfrtt9/g6+uLuLg4ZGZmwt/fH+fPn4epqWlJd42IiIhKgTKbCerUqRPq1KmD69evw8/PD76+viXdJSIiIipFymwm6MSJEzh16hSioqKwYMGCku4OERERlTJlNgiqUaMGAMDV1RUvXrwo4d4QERFRaVNmh8NiY2NhaGgIAFAoFLh27Zq0bN7Z2bkku0ZERESlQJmtGF2zZk1oaRVMdMlkMsTHx5dAz4iIiOhjFWXF6DKbCYqOjv7oL4eIiIjKrjI7J4iIiIjobRgEERERkUZiEEREREQaiUEQERERaSQGQURERKSRGAQRERGRRmI
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"sc.pl.pca_variance_ratio(adata, log=True, n_pcs=100)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "92674c3f-82c7-4baa-8019-d19c3710de66",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"Now, we transfer the `.X` matrix back to host (CPU) memory to free up GPU resources. \n",
|
|||
|
|
"\n",
|
|||
|
|
"Since some downstream analyses or visualizations may not require GPU acceleration, this step helps optimize memory usage, \n",
|
|||
|
|
"preventing unnecessary GPU load while still keeping the processed data accessible for further analysis."
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 28,
|
|||
|
|
"id": "f62c276a-4530-4044-9427-a1a5e66e6094",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"rsc.get.anndata_to_CPU(adata)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "imported-meeting",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"### Computing the neighborhood graph and UMAP"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "biological-phenomenon",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"Next, we compute the neighborhood graph using `rsc`. \n",
|
|||
|
|
"\n",
|
|||
|
|
"Scanpy’s CPU-based implementation of nearest neighbor search uses an approximation, while the GPU-accelerated RAPIDS implementation computes the exact graph. \n",
|
|||
|
|
"Both methods are valid, but small differences in results can occur. \n",
|
|||
|
|
"\n",
|
|||
|
|
"The following approximate nearest neighbor (ANN) algorithms are also supported: \n",
|
|||
|
|
"- **`ivfflat`**: Uses an inverted file index for fast approximate search, suitable for very large datasets. \n",
|
|||
|
|
"- **`ivfpq`**: A variant of `ivfflat` that compresses data for even more efficient memory usage, trading off some accuracy. \n",
|
|||
|
|
"- **`cagra`**: A graph-based ANN method optimized for fast, high-accuracy queries on GPUs. \n",
|
|||
|
|
"- **`nn_descent`**: A graph-based method that incrementally refines the nearest neighbor search and works well for large, high-dimensional datasets. \n",
|
|||
|
|
"\n",
|
|||
|
|
"Since our dataset is relatively small, we use **brute-force (`brute`)** search to ensure exact results."
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 29,
|
|||
|
|
"id": "transparent-major",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"CPU times: user 2.02 s, sys: 853 ms, total: 2.88 s\n",
|
|||
|
|
"Wall time: 3.14 s\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"%%time\n",
|
|||
|
|
"rsc.pp.neighbors(adata, n_neighbors=15, n_pcs=40)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "photographic-script",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"Next, we calculate the UMAP embedding using RAPIDS. \n",
|
|||
|
|
"\n",
|
|||
|
|
"UMAP (Uniform Manifold Approximation and Projection) is a dimensionality reduction technique that preserves local and global structure in the data. \n",
|
|||
|
|
"The RAPIDS implementation accelerates UMAP computation on GPUs, making it significantly faster than the standard CPU version."
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 30,
|
|||
|
|
"id": "timely-actor",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"CPU times: user 448 ms, sys: 190 ms, total: 638 ms\n",
|
|||
|
|
"Wall time: 1.95 s\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"%%time\n",
|
|||
|
|
"rsc.tl.umap(adata)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "linear-highlight",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"### Clustering"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "wooden-submission",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"Next, we use the **Louvain** and **Leiden** algorithms for graph-based clustering with RAPIDS. \n",
|
|||
|
|
"\n",
|
|||
|
|
"Both methods detect communities (clusters) in the neighborhood graph by optimizing modularity: \n",
|
|||
|
|
"- **Louvain**: A hierarchical clustering algorithm that iteratively refines clusters for optimal modularity. \n",
|
|||
|
|
"- **Leiden**: An improved version of Louvain that guarantees well-connected, more stable clusters. \n",
|
|||
|
|
"\n",
|
|||
|
|
"Using RAPIDS accelerates the clustering process on GPUs, making it significantly faster than traditional CPU implementations."
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 31,
|
|||
|
|
"id": "certified-mixer",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"CPU times: user 765 ms, sys: 372 ms, total: 1.14 s\n",
|
|||
|
|
"Wall time: 2.81 s\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"%%time\n",
|
|||
|
|
"rsc.tl.louvain(adata, resolution=0.6)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 32,
|
|||
|
|
"id": "initial-ribbon",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"CPU times: user 511 ms, sys: 189 ms, total: 700 ms\n",
|
|||
|
|
"Wall time: 2.16 s\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"%%time\n",
|
|||
|
|
"rsc.tl.leiden(adata, resolution=0.6)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 33,
|
|||
|
|
"id": "frozen-convertible",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABHwAAAGtCAYAAABk0mMCAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjYsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvq6yFwwAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzs3Xm8pFdd4P/POedZa7tL72s6nYSELMgmEILsCAIiIDCCiBDHcWMcf8gMjo4y6rjgjLszLsMqQUQGQUX2VZAQdpKQ0Fk6vffd763tWc/y++O5fbtvugMJJiTpPu+8+tWdqqeqnqpb3XXqe76LcM45PM/zPM/zPM/zPM/zvLOGvL9PwPM8z/M8z/M8z/M8z7t3+YCP53me53me53me53neWcYHfDzP8zzP8zzP8zzP884yPuDjeZ7neZ7neZ7neZ53lvEBH8/zPM/zPM/zPM/zvLOMD/h4nud5nud5nud5nuedZXzAx/M8z/M8z/M8z/M87yzjAz6e53me53me53me53lnGR/w8TzP8zzP8zzP8zzPO8v4gI/nnaXe+ta3IoTgwIED9/ep3G179uzhla985f19Gp7neZ7ned813+ma7Z7c7slPfjJPfvKTv6Pz8zzvwSu4v0/A8zzvhPe+9730er37+zQ8z/M8z/Me8J7znOdw7bXXsm3btvv7VDzPe4DyAR/P8x4wHvGIR9zfp+B5nud5nvegsGnTJjZt2nR/n4bneQ9gvqTL884hb37zm/me7/kekiRhenqaF7zgBdx8883rjrmrlN9XvvKV7NmzB4C6rtm8eTM/9mM/dtpxKysrpGnKa17zGgCKouAXf/EXefjDH87ExATT09NceeWV/MM//MNpt71zSdenPvUphBC8853v5Fd+5VfYvn07vV6Ppz/96ezbt+87fyE8z/M8z/MewD72sY/xtKc9jV6vR6vV4qqrruLjH//4umPOVNLlnOP3fu/3OO+880iShEc+8pF88IMfPONjDAYDXvva13L++ecTRRE7duzgF37hFxiPx+uOE0Lw6le/mre//e089KEPpdVq8T3f8z28//3vv9eft+d59y4f8PG8c8Tv/M7v8BM/8RNcdtll/P3f/z1//Md/zPXXX8+VV17Jrbfeeo/uKwxDXv7yl/Oe97yHwWCw7rp3vvOdFEXBq171KgDKsmRpaYnXvva1vO997+Od73wnT3jCE3jhC1/IX//1X9+tx/vlX/5lDh48yBvf+Eb+6q/+iltvvZUf/MEfxBhzj87b8zzP8zzvge6aa67h+7//++n1erztbW/j7/7u75ienuaZz3zmaUGfO/v1X/91Xve61/GMZzyD973vffzMz/wMP/mTP3naRlmWZTzpSU/ibW97Gz//8z/PBz/4QV73utfx1re+lec973k459Yd/8///M/82Z/9Gb/xG7/Be97znrWNw/3799/rz9/zvHuPL+nyvHPAysoKv/mbv8mzn/1s/uZv/mbt8ic/+clcdNFF/Pf//t95xzvecY/u81WvehV/+Id/yLve9S5+8id/cu3yt771rTzqUY/iiiuuAGBiYoK3vOUta9cbY3ja057G8vIyf/RHf8QrXvGKb/tYl156Kddcc83a/yuleMlLXsIXv/hFHve4x92j8/Y8z/M8z3ugyrKM//Sf/hPPfe5zee9737t2+bOf/Wwe+chH8su//Mtcd911Z7ztysoKb3jDG3jBC17AG9/4xrXLL7vsMq666iouvvjitcv+5E/+hOuvv57rrruORz/60QA87WlPY8eOHbzoRS/iQx/6ED/wAz+wdnye53zsYx+j2+0C8MhHPpLt27fzd3/3d/zSL/3SvfoaeJ537/EZPp53Drj22mvJ8/y0CVi7du3iqU996rfdLTqTK664gkc96lHrgjk333wzX/jCF7j66qvXHfvud7+bq666ik6nQxAEhGHIm970ptPKye7K8573vHX//7CHPQyAgwcP3uPz9jzP8zzPe6D63Oc+x9LSEj/+4z+O1nrtl7WWZz3rWXzxi188reTqhGuvvZaiKPjRH/3RdZc//vGP57zzzlt32fvf/34uv/xyHv7wh697nGc+85kIIfjUpz617vinPOUpa8EegC1btrB582a/FvO8Bzgf8PG8c8Di4iLAGac4bN++fe36e+rqq6/m2muv5Zvf/CYAb3nLW4jjmJe+9KVrx/z93/89L3nJS9ixYwfXXHMN1157LV/84he5+uqrKYribj3Ohg0b1v1/HMdAs9vkeZ7neZ53tpidnQXgRS96EWEYrvv1hje8AeccS0tLZ7ztifXc1q1bT7vuzpfNzs5y/fXXn/YY3W4X5xwLCwvrjr/zWgya9Zhfi3neA5sv6fK8c8CJD+njx4+fdt2xY8fYuHHj2v8nSUK/3z/tuDt/8AO89KUv5TWveQ1vfetb+a3f+i3e/va38/znP5+pqam1Y6655hrOP/983vWudyGEWLu8LMt/03PyPM/zPM8725xYk/3pn/7pXZatb9my5YyXn1jvzczMnHbdzMzM2vCNE4+TpilvfvObv+V5eJ734OYDPp53DrjyyitJ05RrrrmGF7/4xWuXHzlyhE984hO86EUvWrtsz549vPvd76Ysy7VMmsXFRT73uc/R6/XW3e/U1BTPf/7z+eu//muuvPJKZmZmTivnEkIQRdG6YM/MzMwZp3R5nud5nuedy6666iomJye56aabePWrX32Pbvu4xz2OJEl4xzvewQ//8A+vXf65z32OgwcPrgv4PPe5z+W3f/u32bBhA+eff/69dfqe5z3A+JIuzzsHTE5O8qu/+qv84z/+I694xSv44Ac/yDXXXMNTnvIUkiTh9a9//dqxP/ZjP8bS0hIvf/nL+chHPsI73/lOnv70p58W7Dnh6quv5vjx47z61a9m586dPP3pT193/XOf+1z27dvHz/7sz/KJT3yCt73tbTzhCU84Y3mZ53me53neuazT6fCnf/qn/OVf/iU/8iM/wv/7f/+Pf/mXf+E973kPv/Zrv8bP/MzP3OVtp6ameO1rX8t73/te/v2///d8+MMf5o1vfCMveclLTivp+oVf+AUuvvhinvjEJ/IHf/AHfOxjH+MjH/nI2vF31Rja87wHF5/h43nniP/6X/8rmzdv5k/+5E9417veRZqmPPnJT+a3f/u3ueiii9aOu+qqq3jb297G7/7u7/JDP/RD7N27l9e//vV84AMfOK2BH8DTn/50du3axeHDh/mVX/kVpFwfR37Vq17F3Nwcf/EXf8Gb3/xm9u7dyy/90i9x5MgRfv3Xf/2+ftqe53me53kPKi9/+cvZvXs3v/d7v8dP/dRPMRwO2bx5Mw9/+MNPG8BxZ7/xG79Bu93m//yf/8Pb3/52LrnkEv7iL/6C//W//te649rtNp/5zGf43d/9Xf7qr/6KO+64gzRN2b17N09/+tPXZQN5nvfgJZxz7v4+Cc/zPM/zPM/zPM/zPO/e40u6PM/zPM/zPM/zPM/zzjI+4ON5nud5nud5nud5nneW8QEfz/M8z/M8z/M8z/O8s4wP+Hie53me53me53me551lfMDH8zzP8zzP8zzP8zzvLOMDPp7neZ7neZ7neZ7neWeZ4P54UGstx44do9vtIoS4P07B8zzP87xvwznHcDhk+/btSOn3iM5mfm3meZ7neQ8O92R9dr8EfI4dO8auXbvuj4f2PM/zPO8eOnz4MDt37ry/T8O7D/m1med5nuc9uNyd9dn9EvDpdrtAc4K9Xu/+OAXP8zzP876NwWDArl271j63vbOXX5t5nud53oPDPVmf3S8BnxOpwr1ezy8qPM/zPO8Bzpf4nP382szzPM/zHlzuzvrMF+R7nud5nud5nud5nuedZXzAx/M8z/M8z/M8z/M87yzjAz6e53me53me53me53lnGR/w8TzP8zzP8zzP8zzPO8v4gI/neZ7neZ7neZ7ned5Zxgd8PM/zPM/zPM/zPM/zzjI+4ON5nud5nud5nud5nneW8QEfz/M8z/M8z/M8z/O8s4wP+Hie53me53me53me551lfMDH8zzP8zzP8zzP8zzvLOMDPp7neZ7
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 1455.6x480 with 2 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"CPU times: user 1.04 s, sys: 13.9 ms, total: 1.05 s\n",
|
|||
|
|
"Wall time: 1.05 s\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"%%time\n",
|
|||
|
|
"sc.pl.umap(adata, color=[\"louvain\", \"leiden\"], legend_loc=\"on data\")"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 34,
|
|||
|
|
"id": "amateur-equality",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABT0AAAKOCAYAAABk9L1ZAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjYsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvq6yFwwAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzs/XeYXWd56P1/V929T68aSaMuq9iWO7axjTHYYCfU5ATCe5IQSN6QkHMI4QeHElLOSd6c5ARyIBAMIXQMsY2NsY17lWRJo15mRtP77F5X/f0xtmxhGUwsW7a4P9ely7PXrLX386y1x/vZ93qe+1Z83/cRQgghhBBCCCGEEEKIs4R6phsghBBCCCGEEEIIIYQQp5MEPYUQQgghhBBCCCGEEGcVCXoKIYQQQgghhBBCCCHOKhL0FEIIIYQQQgghhBBCnFUk6CmEEEIIIYQQQgghhDirSNBTCCGEEEIIIYQQQghxVpGgpxBCCCGEEEIIIYQQ4qwiQU8hhBBCCCGEEEIIIcRZRYKeQgghhBBCCCGEEEKIs4oEPYUQ4kVSFIVPfepTJx4/8MADKIrCAw888KKfY2pqik996lPs2bPntLdPCCGEEEIIIYQQSyToKYQQr6CpqSk+/elPS9BTCCGEEEIIIYR4GUnQUwghhBBCCCGEEEIIcVaRoKcQ4qx3+PBh3v3ud9Pa2kogEKCnp4f3vOc9NBoNAGZmZnj/+99PV1cXpmnS19fHpz/9aRzHOa3teOCBBzj//PMBeN/73oeiKCeWzH/9619HURQef/zx5x33mc98BsMwmJqaAuCKK65gw4YNPPzww1x44YWEQiE6Ozv5xCc+geu6Jx1rWRaf/exnWbNmDYFAgObmZt73vvcxPz9/WvsmhBBCCPFyGRwc5H3vex/9/f2Ew2E6Ozu54YYb2Ldv30n7eZ7HZz/7WVavXk0oFCKZTHLOOefwj//4jyf2mZ+f5/d+7/fo7u4+MTa65JJLuPfee0/sc8899/DWt76Vrq4ugsEgK1eu5P3vfz8LCwsn9nn44YdRFIVvfetbz2vvv/3bv6EoCjt27HgZzoYQQogXSz/TDRBCiJfTwMAAl156KU1NTXzmM5+hv7+f6elpbrvtNizLIpfLsW3bNlRV5X/8j//BihUrePzxx/nsZz/LyMgIN99882lry9atW7n55pt53/vex8c//nHe/OY3A9DV1UVLSwsf+chH+PznP89FF1104hjHcfjiF7/ITTfdREdHx4ntMzMzvOtd7+KjH/0on/nMZ7jjjjv47Gc/Sy6X43Of+xywNPB/61vfysMPP8xHPvIRLr74YkZHR/nkJz/JFVdcwc6dOwmFQqetf0IIIYQQL4epqSkymQx/8zd/Q3NzM9lslq997WtccMEF7N69m9WrVwPwv/7X/+JTn/oUH//4x3nd616HbdscPnyYfD5/4rl+67d+i127dvGXf/mXrFq1inw+z65du1hcXDyxz9DQEBdddBG/8zu/QyKRYGRkhL//+7/n0ksvZd++fRiGwWWXXcaWLVv4/Oc/z7vf/e6T2vu5z32O888//8TNbiGEEGeIL4QQZ7HXv/71fjKZ9Ofm5k75+/e///1+NBr1R0dHT9r+d3/3dz7gHzhw4MQ2wP/kJz954vH999/vA/7999//otuzY8cOH/Bvvvnm5/3uk5/8pG+apj87O3ti23e+8x0f8B988MET2y6//HIf8G+99daTjv/d3/1dX1XVE3351re+5QP+Lbfccso2/PM///OLbrcQQgghxKuF4zi+ZVl+f3+//yd/8icntl9//fX+5s2bf+6x0WjU/+M//uMX/Vqe5/m2bfujo6PPG3/dfPPNPuDv3r37xLbt27f7gP+1r33txXdICCHEy0KWtwshzlrVapUHH3yQd7zjHTQ3N59ynx/96EdceeWVdHR04DjOiX/XXXcdAA8++OAr1t4PfOADAHzpS186se1zn/scGzdu5HWve91J+8ZiMd7ylrectO03fuM38DyPhx56CFjqWzKZ5IYbbjipb5s3b6atre2XqjovhBBCCHGmOI7DX/3VX7Fu3TpM00TXdUzT5NixYxw6dOjEftu2bWNgYIAPfvCD/OQnP6FYLD7vubZt28ZXv/pVPvvZz/LEE09g2/bz9pmbm+P3f//36e7uRtd1DMOgt7cX4KTXe/e7301LSwuf//znT2z7p3/6J5qbm3nnO995Ok+BEEKI/wQJegohzlq5XA7Xdenq6nrBfWZnZ7n99tsxDOOkf+vXrwc4KXfTy621tZV3vvOdfPGLX8R1Xfbu3cvDDz/MH/7hH55y35/V1tYGcGJ51uzsLPl8HtM0n9e/mZmZV7RvQgghhBD/WR/+8If5xCc+wY033sjtt9/Ok08+yY4dO9i0aRO1Wu3Efn/+53/O3/3d3/HEE09w3XXXkclkuOqqq9i5c+eJfb7zne/w3ve+ly9/+ctcdNFFpNNp3vOe9zAzMwMspQd6wxvewA9+8AM+8pGP8NOf/pTt27fzxBNPAJz0eoFAgPe///1885vfJJ/PMz8/z3e/+11+53d+h0Ag8AqdHSGEEC9EcnoKIc5a6XQaTdOYmJh4wX2ampo455xz+Mu//MtT/v65eTRfCR/60If4+te/zq233spdd91FMpnkN3/zN5+33+zs7PO2PTNYz2QywFLfMpkMd9111ylfKxaLncaWCyGEEEK8PP793/+d97znPfzVX/3VSdsXFhZIJpMnHuu6zoc//GE+/OEPk8/nuffee/nYxz7Gtddey/j4OOFwmKamJv7hH/6Bf/iHf2BsbIzbbruNj370o8zNzXHXXXexf/9+BgYG+OpXv8p73/veE889ODh4yrZ94AMf4G/+5m/4yle+Qr1ex3Ecfv/3f/9lOQ9CCCF+ORL0FEKctUKhEJdffjnf+973+Mu//Euampqet8/111/PnXfeyYoVK0ilUi97m5656//cWQLPde6553LxxRfzP//n/2T//v383u/9HpFI5Hn7lUolbrvttpOWuH/zm99EVdUTS+Gvv/56vv3tb+O6LhdccMHL0BshhBBCiJefoijPmzl5xx13MDk5ycqVK095TDKZ5G1vexuTk5P88R//MSMjI6xbt+6kfXp6evjDP/xDfvrTn/Loo4+eeC3gea/3xS9+8ZSv097eztvf/nb++Z//GcuyuOGGG+jp6flP9VMIIcTpJUFPIcRZ7ZlKmxdccAEf/ehHWblyJbOzs9x222188Ytf5DOf+Qz33HMPF198MX/0R3/E6tWrqdfrjIyMcOedd/KFL3zh5y6P/2WtWLGCUCjEN77xDdauXUs0GqWjo+OkGaUf+tCHeOc734miKHzwgx885fNkMhk+8IEPMDY2xqpVq7jzzjv50pe+xAc+8IETA+13vetdfOMb3+BNb3oTH/rQh9i2bRuGYTAxMcH999/PW9/6Vm666abT1jchhBBCiJfD9ddfz1e/+lXWrFnDOeecw1NPPcXf/u3fPm+MdsMNN7BhwwbOO+88mpubGR0d5R/+4R/o7e2lv7+fQqHAlVdeyW/8xm+wZs0aYrEYO3bs4K677uLXfu3XAFizZg0rVqzgox/9KL7vk06nuf3227nnnntesH0f+tCHTtxgvvnmm1++EyGEEOKXIkFPIcRZbdOmTWzfvp1PfvKT/Pmf/zmlUom2tjZe//rXY5om7e3t7Ny5k7/4i7/gb//2b5mYmCAWi9HX18cb3/jG0z77MxwO85WvfIVPf/rTvOENb8C2bT75yU/yqU996sQ+N954I4FAgCuvvJL+/v5TPk9bWxuf//zn+W//7b+xb98+0uk0H/vYx/j0pz99Yh9N07jtttv4x3/8R77+9a/z13/91+i6TldXF5dffjkbN248rX0TQgghhHg5/OM//iOGYfDXf/3XlMtltm7dyg9+8AM+/vGPn7TflVdeyS233MKXv/xlisUibW1tXHPNNXziE5/AMAyCwSAXXHABX//61xkZGcG2bXp6evizP/szPvKRjwBgGAa33347H/rQh3j/+9+PrutcffXV3HvvvS84g3Pbtm0sW7aMUCjEVVdd9bK
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 1455.6x480 with 2 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"fig = sc.pl.umap(adata, color=[\"cell_type\", \"assay\"], return_fig=True)\n",
|
|||
|
|
"ax = fig.axes[0]\n",
|
|||
|
|
"ax.legend_.set_title(\"cell_type\")\n",
|
|||
|
|
"ax.legend_.set_bbox_to_anchor((-0.2, -0.4))\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "8fbe8181-285f-4ff1-9d1a-28621762b92b",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"### Batch Correction \n",
|
|||
|
|
"\n",
|
|||
|
|
"In the previous UMAP, we observed strong batch effects between assay types. \n",
|
|||
|
|
"To correct for this, we apply **Harmony**, a method that aligns different batches while preserving biological variation. \n",
|
|||
|
|
"\n",
|
|||
|
|
"After applying Harmony, we will redo: \n",
|
|||
|
|
"- **Neighborhood search** to recompute the k-nearest neighbor graph \n",
|
|||
|
|
"- **UMAP** to visualize the corrected embedding \n",
|
|||
|
|
"- **Graph-based clustering** to identify cell populations without batch-driven artifacts \n",
|
|||
|
|
"\n",
|
|||
|
|
"This ensures that batch effects do not drive clustering results while retaining meaningful biological structure.\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 35,
|
|||
|
|
"id": "351529fa-3ebc-432e-b370-71283a0a45b8",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"CPU times: user 5.15 s, sys: 856 ms, total: 6 s\n",
|
|||
|
|
"Wall time: 8.85 s\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"%%time\n",
|
|||
|
|
"rsc.pp.harmony_integrate(adata, key=\"assay\", dtype=cp.float32)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 36,
|
|||
|
|
"id": "413c0f53-cd12-499d-9bb0-90606e15c6ff",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"CPU times: user 1.96 s, sys: 1.16 s, total: 3.12 s\n",
|
|||
|
|
"Wall time: 7.84 s\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"%%time\n",
|
|||
|
|
"rsc.pp.neighbors(adata, n_neighbors=15, n_pcs=40,use_rep=\"X_pca_harmony\",key_added=\"harmony\")\n",
|
|||
|
|
"rsc.tl.umap(adata,neighbors_key=\"harmony\",key_added=\"X_umap_harmony\")\n",
|
|||
|
|
"rsc.tl.louvain(adata, resolution=0.6,neighbors_key=\"harmony\",key_added=\"louvain_harmony\")\n",
|
|||
|
|
"rsc.tl.leiden(adata, resolution=0.6,neighbors_key=\"harmony\",key_added=\"leiden_harmony\")"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 37,
|
|||
|
|
"id": "e07cbbbb-cc5c-4e1c-9295-863f062f96cf",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABHwAAAGtCAYAAABk0mMCAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjYsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvq6yFwwAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzs3Xe8XHWd+P/Xmd7r7f3mJje9JyTU0JGOZVUURbGv7Oqu7lr2t19ldW3brLi6KggIglJEeksoqTe93t7LzJ3e6znn98cnBGNARSkSPs/H4z5y58yZmc85kzCH97yLouu6jiRJkiRJkiRJkiRJknTSMLzeC5AkSZIkSZIkSZIkSZJeWTLgI0mSJEmSJEmSJEmSdJKRAR9JkiRJkiRJkiRJkqSTjAz4SJIkSZIkSZIkSZIknWRkwEeSJEmSJEmSJEmSJOkkIwM+kiRJkiRJkiRJkiRJJxkZ8JEkSZIkSZIkSZIkSTrJyICPJEmSJEmSJEmSJEnSSUYGfCRJkiRJkiRJkiRJkk4yMuAjSX+Gm2++GUVRGB0dfb2X8ifr6OjgAx/4wKvy3M+fj507d74qzy9JkiRJkvTn+HOv2V7O484++2zOPvvsP2t9r5TR0VEUReE///M/X9d1SJL018X0ei9AkqTXxr333ovH43m9lyFJkiRJkvRX79JLL2Xr1q00Nja+3kuRJEn6s8mAjyS9SaxcufL1XsJfLJ/P43A4Xu9lSJIkSZJ0kqutraW2tvb1XsZfnUKhgM1mQ1GU13spkiT9CWRJlyS9Qn72s5+xfPlybDYbgUCAt771rRw5cuS4fV4q5fcDH/gAHR0dAFQqFerq6njf+953wn7JZBK73c4//uM/AlAsFvnMZz7DihUr8Hq9BAIBTj31VH7zm9+c8NjfL+natGkTiqJwxx138C//8i80NTXh8Xg4//zz6evr+7POQSaT4ROf+AQ1NTUEg0He9ra3MT09fdw+d955JxdeeCGNjY3Y7XYWLlzI5z//eXK53AnnxOVyceDAAS688ELcbjfnnXceAIqicP3113PTTTcxf/587HY7a9asYdu2bei6zn/8x3/Q2dmJy+Xi3HPPZXBw8IS1/inv1/NrGBwc5JJLLsHlctHa2spnPvMZSqUSALquM2/ePC666KITXiObzeL1evnkJz/5Z51PSZIkSZJeeU888QTnnXceHo8Hh8PB6aefzpNPPnncPi9W0qXrOt/61rdob2/HZrOxatUqHn744Rd9jXQ6zWc/+1k6OzuxWCw0Nzfz6U9/+oTrneevaW699VYWLlyIw+Fg+fLlPPDAA3/28f33f//3seugU089lW3bth13/86dO3n3u99NR0cHdrudjo4Orr76asbGxl70HDz22GNcd9111NbW4nA4KJVKnH322SxZsoStW7dy2mmnHXuem266CYAHH3yQVatW4XA4WLp0KY888sgJ63zuuec477zzcLvdOBwOTjvtNB588MEXXcPGjRv/4DXmhz70IQKBAPl8/oTXOffcc1m8ePGffT4l6Y1MBnwk6RXw9a9/nQ996EMsXryYe+65h+985zvs37+fU089lYGBgZf1XGazmWuuuYa7776bdDp93H133HEHxWKRD37wgwCUSiXi8Tif/exnue+++7jjjjs444wzeNvb3sYtt9zyJ73eF7/4RcbGxvjJT37Cj3/8YwYGBrj88stRVfVlrRvgwx/+MGazmdtvv51vfetbbNq0iWuuuea4fQYGBrjkkkv46U9/yiOPPMKnP/1p7rrrLi6//PITnq9cLnPFFVdw7rnn8pvf/IYbbrjh2H0PPPAAP/nJT/jGN77BHXfcQSaT4dJLL+Uzn/kMmzdv5vvf/z4//vGPOXz4MG9/+9vRdf3YY1/O+1WpVLjiiis477zz+M1vfsN1113H//zP//DNb34TEBdqf/d3f8fjjz9+wmNvueUW0um0DPhIkiRJ0l+J2267jQsvvBCPx8PPf/5z7rrrLgKBABdddNEJQZ/fd8MNN/C5z32OCy64gPvuu49PfOITfOQjHznhi7J8Ps+GDRv4+c9/zt///d/z8MMP87nPfY6bb76ZK6644rhrEhDBke9///v827/9G3ffffexL6KGh4df9vH94Ac/4PHHH+fb3/42v/jFL8jlclxyySWkUqlj+4yOjjJ//ny+/e1v8+ijj/LNb36TmZkZ1q5dSzQaPeE5r7vuOsxmM7feeiu//vWvMZvNAIRCIT74wQ/y4Q9/mN/85jcsXbqU6667jn/7t3/jC1/4Av/8z//M3Xffjcvl4qqrrjouQPP0009z7rnnkkql+OlPf8odd9yB2+3m8ssv58477zxhDX/sGvNTn/oUiUSC22+//bjHHT58mI0bN8prMenNS5ck6WW76aabdEAfGRnRE4mEbrfb9UsuueS4fcbHx3Wr1aq/5z3vObZtw4YN+oYNG054vmuvvVZvb28/dnv//v06oP/4xz8+br9TTjlFX7169Uuuq1qt6pVKRf/Qhz6kr1y58rj72tvb9WuvvfbY7Y0bN+rACeu+6667dEDfunXrS77O73v+fPzt3/7tcdu/9a1v6YA+MzPzoo/TNE2vVCr6008/rQP6vn37jt137bXX6oD+s5/97ITHAXpDQ4OezWaPbbvvvvt0QF+xYoWuadqx7d/+9rd1QN+/f7+u6/rLer+eX8Ndd9113L6XXHKJPn/+/GO30+m07na79U996lPH7bdo0SL9nHPOedFjlyRJkiTp1fe712y5XE4PBAL65Zdfftw+qqrqy5cv10855ZQXfZyui+sHm82mv/Wtbz3usZs3b9aB467vvv71r+sGg0Hv6ek5bt9f//rXOqA/9NBDx7YBen19vZ5Op49tC4VCusFg0L/+9a//ycc5MjKiA/rSpUv1arV6bPuOHTt0QL/jjjte8rHValXPZrO60+nUv/Od75xwDt7//vef8JgNGzbogL5z585j22KxmG40GnW73a5PTU0d2753714d0L/73e8e27Z+/Xq9rq5Oz2Qyx61jyZIlektLy7FruZdzjblhwwZ9xYoVx+33iU98Qvd4PMe9jiS9mcgMH0n6C23dupVCoXDCBKzW1lbOPffcP/pt0YtZunQpq1evPpYWC3DkyBF27NjBddddd9y+v/rVrzj99NNxuVyYTCbMZjM//elPTyhPeilXXHHFcbeXLVsGcEJa7yv1XMPDw7znPe+hoaEBo9GI2Wxmw4YNAC+65re//e0v+lrnnHMOTqfz2O2FCxcCcPHFFx9XV/789ufX8HLfL0VRTsg+WrZs2XHH5Ha7+eAHP8jNN998LFX7qaee4vDhw1x//fUvun5JkiRJkl5bW7ZsIR6Pc+2111KtVo/9aJrGW97yFnp6ek4ouXre1q1bKRaLvPe97z1u+2mnnUZ7e/tx2x544AGWLFnCihUrjnudiy66CEVR2LRp03H7n3POObjd7mO36+vrqaur+7OuxS699FKMRuOx2y92LZbNZvnc5z7H3LlzMZlMmEwmXC4XuVzuZV2LNTY2snr16mO3A4EAdXV1rFixgqampmPbf/9aLJfLsX37dt7xjnfgcrmO7Wc0Gnnf+97H5OTkCVlTf8o15qc+9Sn27t3L5s2bAVFWd+utt3Lttdce9zqS9GYiAz6S9BeKxWIALzrFoamp6dj9L9d1113H1q1b6e3tBeCmm27CarVy9dVXH9vnnnvu4Z3vfCfNzc3cdtttbN26lZ6eHq677jqKxeKf9DrBYPC421arFRBN+V6uP/Zc2WyWM888k+3bt/PVr36VTZs20dPTwz333POir+lwOF5yslggEDjutsVi+YPbnz8fL/f9cjgc2Gy2E47r98/v3/3d35HJZPjFL34BwPe//31aWlq48sorX3T9kiRJkiS9tsLhMADveMc7MJvNx/1885vfRNd14vH4iz72+euDhoaGE+77/W3hcJj9+/ef8Bputxtd108om/r96ycQ1xqvxrUYwHve8x6+//3v8+EPf5hHH32UHTt20NPTQ21t7Yu+5ktNKvv9ay4Q111/7FoskUig6/pLXosBJ1yP/SnHdeWVV9L
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 1455.6x480 with 2 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"CPU times: user 1.07 s, sys: 5.37 ms, total: 1.07 s\n",
|
|||
|
|
"Wall time: 1.07 s\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"%%time\n",
|
|||
|
|
"sc.pl.embedding(adata,basis=\"X_umap_harmony\", color=[\"louvain_harmony\", \"leiden_harmony\"], legend_loc=\"on data\")"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 38,
|
|||
|
|
"id": "85fc1084-ce41-4f82-a1a0-661af80bccee",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABT0AAAKOCAYAAABk9L1ZAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjYsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvq6yFwwAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzs3XeUJFl94PtvZqT3tirL+65q76a7Z3q8Y5hhMJKQMNpF4slgVm9htftYxJEWI5DXPlgx7CIhBiOB8DAexndPe1fdVd3lbWZlpfc+3PujoJl5MyCQxtHczzl1OjPyZsbvZkR23POLawy6rusIgiAIgiAIgiAIgiAIgiBcIYyvdACCIAiCIAiCIAiCIAiCIAgvJpH0FARBEARBEARBEARBEAThiiKSnoIgCIIgCIIgCIIgCIIgXFFE0lMQBEEQBEEQBEEQBEEQhCuKSHoKgiAIgiAIgiAIgiAIgnBFEUlPQRAEQRAEQRAEQRAEQRCuKCLpKQiCIAiCIAiCIAiCIAjCFUUkPQVBEARBEARBEARBEARBuKKIpKcgCIIgCIIgCIIgCIIgCFcUkfQUBEH4GRkMBj7ykY9cfv7UU09hMBh46qmnfubPiMfjfOQjH2F8fPxFj08QBEEQBEEQBEEQhA0i6SkIgvAyisfjfPSjHxVJT0EQBEEQBEEQBEF4CYmkpyAIgiAIgiAIgiAIgiAIVxSR9BQE4Yo3PT3N2972Ntrb27FarfT29vKOd7yDZrMJQCKR4F3vehfd3d1YLBYGBgb46Ec/iqIoL2ocTz31FPv27QPgne98JwaD4fKQ+S9/+csYDAaOHTv2vPd97GMfw2w2E4/HAbjpppvYtm0bhw8f5uqrr8Zut9PV1cWf/MmfoKrqc97barX4+Mc/ztjYGFarlXA4zDvf+U7S6fSLWjdBEARBEISXyvz8PO985zsZGRnB4XDQ1dXF61//eiYmJp5TTtM0Pv7xjzM6Oordbsfn87Fjxw4+9alPXS6TTqf5/d//fXp6ei63ja699loee+yxy2UeffRR3vjGN9Ld3Y3NZmN4eJh3vetdZDKZy2UOHz6MwWDgq1/96vPi/dKXvoTBYODUqVMvwbchCIIg/KxMr3QAgiAIL6Xz589z3XXXEQqF+NjHPsbIyAjr6+vcd999tFot8vk8+/fvx2g08j/+x/9gaGiIY8eO8fGPf5zl5WXuvffeFy2WPXv2cO+99/LOd76TP/7jP+Z1r3sdAN3d3bS1tfGBD3yAe+65h2uuuebyexRF4bOf/Sy/8iu/Qmdn5+XtiUSCt771rXzwgx/kYx/7GA8++CAf//jHyefzfPrTnwY2Gv5vfOMbOXz4MB/4wAc4ePAgKysrfPjDH+amm27i9OnT2O32F61+giAIgiAIL4V4PE4wGOQv/uIvCIfD5HI5vvjFL3LgwAHOnTvH6OgoAH/1V3/FRz7yEf74j/+YG264AVmWmZ6eplAoXP6s//gf/yNnz57lE5/4BJs2baJQKHD27Fmy2ezlMgsLC1xzzTX87u/+Ll6vl+XlZf7n//yfXHfddUxMTGA2m7n++uvZvXs399xzD29729ueE++nP/1p9u3bd/lmtyAIgvAK0QVBEK5gt9xyi+7z+fRUKvWCr7/rXe/SXS6XvrKy8pztf/M3f6MD+sWLFy9vA/QPf/jDl58/+eSTOqA/+eSTP3M8p06d0gH93nvvfd5rH/7wh3WLxaInk8nL2772ta/pgP70009f3nbjjTfqgP69733vOe//vd/7Pd1oNF6uy1e/+lUd0L/1rW+9YAyf+cxnfua4BUEQBEEQXi0URdFbrZY+MjKi/5f/8l8ub7/77rv1Xbt2/dT3ulwu/f3vf//PvC9N03RZlvWVlZXntb/uvfdeHdDPnTt3edvJkyd1QP/iF7/4s1dIEARBeEmI4e2CIFyxarUaTz/9NL/xG79BOBx+wTIPPPAAN998M52dnSiKcvnvzjvvBODpp59+2eJ9z3veA8A//MM/XN726U9/mu3bt3PDDTc8p6zb7eYNb3jDc7a9/e1vR9M0Dh06BGzUzefz8frXv/45ddu1axeRSOTnWnVeEARBEAThlaIoCn/2Z3/Gli1bsFgsmEwmLBYLc3NzTE1NXS63f/9+zp8/z3vf+16+//3vUyqVnvdZ+/fv5wtf+AIf//jHOX78OLIsP69MKpXi3e9+Nz09PZhMJsxmM319fQDP2d/b3vY22trauOeeey5v+7u/+zvC4TBvectbXsyvQBAEQfg3EElPQRCuWPl8HlVV6e7u/ollkskk999/P2az+Tl/W7duBXjO3E0vtfb2dt7ylrfw2c9+FlVVuXDhAocPH+YP/uAPXrDs/18kEgG4PDwrmUxSKBSwWCzPq18ikXhZ6yYIgiAIgvBv9Yd/+If8yZ/8CW9605u4//77OXHiBKdOnWLnzp3U6/XL5f7oj/6Iv/mbv+H48ePceeedBINBbr31Vk6fPn25zNe+9jV+67d+i8997nNcc801BAIB3vGOd5BIJICN6YFe85rX8O1vf5sPfOADPP7445w8eZLjx48DPGd/VquVd73rXXzlK1+hUCiQTqf5+te/zu/+7u9itVpfpm9HEARB+EnEnJ6CIFyxAoEAkiQRi8V+YplQKMSOHTv4xCc+8YKvP3sezZfD+973Pr785S/zve99j0ceeQSfz8dv/uZvPq9cMpl83rYfNdaDwSCwUbdgMMgjjzzygvtyu90vYuSCIAiCIAgvjX/6p3/iHe94B3/2Z3/2nO2ZTAafz3f5uclk4g//8A/5wz/8QwqFAo899hgf+tCHuOOOO4hGozgcDkKhEJ/85Cf55Cc/yerqKvfddx8f/OAHSaVSPPLII0xOTnL+/Hm+8IUv8Fu/9VuXP3t+fv4FY3vPe97DX/zFX/D5z3+eRqOBoii8+93vfkm+B0EQBOHnI5KegiBcsex2OzfeeCPf+MY3+MQnPkEoFHpembvvvpuHHnqIoaEh/H7/Sx7Tj+76P7uXwLPt3buXgwcP8pd/+ZdMTk7y+7//+zidzueVK5fL3Hfffc8Z4v6Vr3wFo9F4eSj83Xffzb/8y7+gqioHDhx4CWojCIIgCILw0jMYDM/rOfnggw+ytrbG8PDwC77H5/Px5je/mbW1Nd7//vezvLzMli1bnlOmt7eXP/iDP+Dxxx/nyJEjl/cFPG9/n/3sZ19wPx0dHfz6r/86n/nMZ2i1Wrz+9a+nt7f331RPQRAE4cUlkp6CIFzRfrTS5oEDB/jgBz/I8PAwyWSS++67j89+9rN87GMf49FHH+XgwYP85//8nxkdHaXRaLC8vMxDDz3E//k//+enDo//eQ0NDWG32/nnf/5nNm/ejMvlorOz8zk9St/3vvfxlre8BYPBwHvf+94X/JxgMMh73vMeVldX2bRpEw899BD/8A//wHve857LDe23vvWt/PM//zN33XUX73vf+9i/fz9ms5lYLMaTTz7JG9/4Rn7lV37lRaubIAiCIAjCS+Huu+/mC1/4AmNjY+zYsYMzZ87w13/9189ro73+9a9n27ZtXHXVVYTDYVZWVvjkJz9JX18fIyMjFItFbr75Zt7+9rczNjaG2+3m1KlTPPLII/zqr/4qAGNjYwwNDfHBD34QXdcJBALcf//9PProoz8xvve9732XbzDfe++9L90XIQiCIPxcRNJTEIQr2s6dOzl58iQf/vCH+aM/+iPK5TKRSIRbbrkFi8VCR0cHp0+f5k//9E/567/+a2KxGG63m4GBAV772te+6L0/HQ4Hn//85/noRz/Ka17zGmRZ5sMf/jAf+chHLpd505vehNVq5eabb2ZkZOQFPycSiXDPPffw3/7bf2NiYoJAIMCHPvQhPvrRj14uI0kS9913H5/61Kf48pe/zJ//+Z9jMpno7u7mxhtvZPv27S9q3QRBEARBEF4Kn/rUpzCbzfz5n/85lUqFPXv28O1vf5s//uM/fk65m2++mW9961t87nOfo1QqEYlEuP322/mTP/kTzGYzNpuNAwcO8OUvf5nl5WVkWaa3t5f//t//Ox/4wAcAMJvN3H///bzvfe/jXe9
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 1455.6x480 with 2 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"fig = sc.pl.embedding(adata,basis=\"X_umap_harmony\", color=[\"cell_type\", \"assay\"], return_fig=True)\n",
|
|||
|
|
"ax = fig.axes[0]\n",
|
|||
|
|
"ax.legend_.set_title(\"cell_type\")\n",
|
|||
|
|
"ax.legend_.set_bbox_to_anchor((-0.2, -0.4))"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "boring-bulgarian",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"### TSNE"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "appreciated-designer",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"Next, we use **t-distributed Stochastic Neighbor Embedding (t-SNE)** to visualize cells in two dimensions. \n",
|
|||
|
|
"t-SNE is a non-linear dimensionality reduction technique that preserves local structure and reveals complex patterns in the data. \n",
|
|||
|
|
"\n",
|
|||
|
|
"We leverage the RAPIDS GPU implementation of t-SNE, which is significantly faster than CPU-based methods. \n",
|
|||
|
|
"This allows us to efficiently process large datasets while maintaining high-quality embeddings. \n",
|
|||
|
|
"\n"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 39,
|
|||
|
|
"id": "touched-hollywood",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"[2025-12-04 23:31:40.093] [CUML] [warning] # of Nearest Neighbors should be at least 3 * perplexity. Your results might be a bit strange...\n",
|
|||
|
|
"CPU times: user 3.1 s, sys: 4.84 s, total: 7.95 s\n",
|
|||
|
|
"Wall time: 11.5 s\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"%%time\n",
|
|||
|
|
"rsc.tl.tsne(adata, n_pcs=40, perplexity=30, early_exaggeration=12, learning_rate=200, use_rep=\"X_pca_harmony\")"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 40,
|
|||
|
|
"id": "statutory-supplement",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABT0AAAKOCAYAAABk9L1ZAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjYsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvq6yFwwAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzs3XeYXFd5+PHvnd7b9r6rbSqrbjUXyb03wMEGfpheQ2ICCRACoSckkAQSSmg2xgZcwMaWbdxkyZZtyeplpZW0RdvrzE6vd+69vz/WCCvuRs3y+3kePdLcueWcM6u5Z9973nMUwzAMhBBCCCGEEEIIIYQQ4jRhOtkFEEIIIYQQQgghhBBCiGNJgp5CCCGEEEIIIYQQQojTigQ9hRBCCCGEEEIIIYQQpxUJegohhBBCCCGEEEIIIU4rEvQUQgghhBBCCCGEEEKcViToKYQQQgghhBBCCCGEOK1I0FMIIYQQQgghhBBCCHFakaCnEEIIIYQQQgghhBDitCJBTyGEEEIIIYQQQgghxGlFgp5CCPEaKYrCV7/61SOvN2zYgKIobNiw4TWfY3R0lK9+9avs2rXrmJdPCCGEEEIIIYQQMyToKYQQJ9Do6Chf+9rXJOgphBBCCCGEEEIcRxL0FEIIIYQQQgghhBBCnFYk6CmEOO0dOHCAd73rXVRUVGC326mvr+fGG28kn88DMD4+zsc+9jFqa2ux2Ww0NTXxta99jWKxeEzLsWHDBpYtWwbABz7wARRFOZIyf9ttt6EoCps2bXrRcV//+texWq2Mjo4CcO6559LR0cHGjRtZuXIlTqeTmpoavvzlL6Np2lHHFgoFvvnNbzJ79mzsdjtlZWV84AMfYGpq6pjWTQghhBDieOnp6eEDH/gAra2tuFwuampquOqqq9i7d+9R++m6zje/+U3a29txOp0EAgEWLFjA97///SP7TE1N8dGPfpS6urojfaOzzjqLxx9//Mg+jz32GNdccw21tbU4HA5aWlr42Mc+RjgcPrLPxo0bURSF3/72ty8q769+9SsURWHr1q3HoTWEEEK8VpaTXQAhhDiedu/ezdlnn01paSlf//rXaW1tZWxsjPvvv59CoUA0GmX58uWYTCb++Z//mebmZjZt2sQ3v/lN+vv7ueWWW45ZWZYsWcItt9zCBz7wAb70pS9xxRVXAFBbW0t5eTmf+9zn+OEPf8iqVauOHFMsFvnJT37C2972Nqqrq49sHx8f54YbbuALX/gCX//613nwwQf55je/STQa5Qc/+AEw0/G/5ppr2LhxI5/73Oc488wzGRgY4Ctf+Qrnnnsu27Ztw+l0HrP6CSGEEEIcD6Ojo5SUlPDtb3+bsrIypqenufXWW1mxYgU7d+6kvb0dgH//93/nq1/9Kl/60pdYvXo1qqpy4MABYrHYkXO9973vZceOHXzrW9+ira2NWCzGjh07iEQiR/bp7e1l1apVfPjDH8bv99Pf389//ud/cvbZZ7N3716sVivnnHMOixcv5oc//CHvete7jirvD37wA5YtW3bkYbcQQoiTxBBCiNPY+eefbwQCAWNycvIl3//Yxz5meDweY2Bg4Kjt3/3udw3A2Ldv35FtgPGVr3zlyOv169cbgLF+/frXXJ6tW7cagHHLLbe86L2vfOUrhs1mMyYmJo5su/POOw3AePLJJ49sW7NmjQEY991331HHf+QjHzFMJtORuvz2t781AOP3v//9S5bhRz/60WsutxBCCCHEqaJYLBqFQsFobW01/u7v/u7I9iuvvNJYtGjRKx7r8XiMT3/606/5WrquG6qqGgMDAy/qf91yyy0GYOzcufPIti1bthiAceutt772CgkhhDguJL1dCHHaymQyPPnkk7zzne+krKzsJfd54IEHOO+886iurqZYLB75c9lllwHw5JNPnrDyfuITnwDgZz/72ZFtP/jBD5g/fz6rV68+al+v18vVV1991LZ3v/vd6LrOU089BczULRAIcNVVVx1Vt0WLFlFZWfm6Vp0XQgghhDhZisUi//Iv/8LcuXOx2WxYLBZsNhvd3d10dXUd2W/58uXs3r2bT37ykzzyyCMkEokXnWv58uX88pe/5Jvf/CabN29GVdUX7TM5OcnHP/5x6urqsFgsWK1WGhoaAI663rve9S7Ky8v54Q9/eGTb//zP/1BWVsb1119/LJtACCHEGyBBTyHEaSsajaJpGrW1tS+7z8TEBGvXrsVqtR71Z968eQBHzd10vFVUVHD99dfzk5/8BE3T2LNnDxs3buRTn/rUS+77f1VWVgIcSc+amJggFoths9leVL/x8fETWjchhBBCiDfqM5/5DF/+8pe59tprWbt2Lc899xxbt25l4cKFZLPZI/v94z/+I9/97nfZvHkzl112GSUlJVxwwQVs27btyD533nkn73vf+/j5z3/OqlWrCIVC3HjjjYyPjwMz0wNdfPHF3HPPPXzuc59j3bp1bNmyhc2bNwMcdT273c7HPvYxfvOb3xCLxZiamuKuu+7iwx/+MHa7/QS1jhBCiJcjc3oKIU5boVAIs9nM8PDwy+5TWlrKggUL+Na3vvWS779wHs0T4aabbuK2227jvvvu4+GHHyYQCPCe97znRftNTEy8aNufOuslJSXATN1KSkp4+OGHX/JaXq/3GJZcCCGEEOL4uP3227nxxhv5l3/5l6O2h8NhAoHAkdcWi4XPfOYzfOYznyEWi/H444/zxS9+kUsuuYShoSFcLhelpaV873vf43vf+x6Dg4Pcf//9fOELX2BycpKHH36Yzs5Odu/ezS9/+Uve9773HTl3T0/PS5btE5/4BN/+9re5+eabyeVyFItFPv7xjx+XdhBCCPH6SNBTCHHacjqdrFmzhrvvvptvfetblJaWvmifK6+8koceeojm5maCweBxL9Ofnvq/cJTACy1dupQzzzyTf/u3f6Ozs5OPfvSjuN3uF+2XTCa5//77j0px/81vfoPJZDqSCn/llVdyxx13oGkaK1asOA61EUIIIYQ4/hRFedHIyQcffJCRkRFaWlpe8phAIMB1113HyMgIn/70p+nv72fu3LlH7VNfX8+nPvUp1q1bxzPPPHPkWsCLrveTn/zkJa9TVVXFX/3VX/GjH/2IQqHAVVddRX19/RuqpxBCiGNLgp5CiNPan1baXLFiBV/4whdoaWlhYmKC+++/n5/85Cd8/etf57HHHuPMM8/kb//2b2lvbyeXy9Hf389DDz3E//7v/75ievzr1dzcjNPp5Ne//jVz5szB4/FQXV191IjSm266ieuvvx5FUfjkJz/5kucpKSnhE5/4BIODg7S1tfHQQw/xs5/9jE984hNHOto33HADv/71r7n88su56aabWL58OVarleHhYdavX88111zD2972tmNWNyGEEEKI4+HKK6/kl7/8JbNnz2bBggVs376d73znOy/qo1111VV0dHRwxhlnUFZWxsDAAN/73vdoaGigtbWVeDzOeeedx7vf/W5mz56N1+tl69atPPzww7z97W8HYPbs2TQ3N/OFL3wBwzAIhUKsXbuWxx577GXLd9NNNx15wHzLLbccv4YQQgjxukjQUwhxWlu4cCFbtmzhK1/5Cv/4j/9IMpmksrKS888/H5vNRlVVFdu2beMb3/gG3/nOdxgeHsbr9dLU1MSll156zEd/ulwubr75Zr72ta9x8cUXo6oqX/nKV/jqV796ZJ9rr70Wu93OeeedR2tr60uep7Kykh/+8If8/d//PXv37iUUCvHFL36Rr33ta0f2MZvN3H///Xz/+9/ntttu41//9V+xWCzU1tayZs0a5s+ff0zrJoQQQghxPHz/+9/HarXyr//6r6RSKZYsWcI999zDl770paP2O++88/j973/Pz3/+cxKJBJWVlVx00UV8+ctfxmq14nA4WLFiBbfddhv9/f2oqkp9fT2f//zn+dznPgeA1Wpl7dq13HTTTXzsYx/DYrFw4YUX8vjjj7/sCM7ly5fT2NiI0+nkggsuOO7tIYQQ4rVRDMMwTnYhhBBC/NnatWu5+uqrefDBB7n88stf9P65555LOByms7PzJJROCCGEEEK
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 1455.6x480 with 2 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"fig = sc.pl.tsne(adata, color=[\"cell_type\", \"assay\"], return_fig=True)\n",
|
|||
|
|
"ax = fig.axes[0]\n",
|
|||
|
|
"ax.legend_.set_title(\"cell_type\")\n",
|
|||
|
|
"ax.legend_.set_bbox_to_anchor((-0.2, -0.4))"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "sexual-delaware",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"## Differential expression analysis"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "fallen-exposure",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"To identify key marker genes for each cell type, we use **logistic regression** to compute a ranking of highly differential genes. \n",
|
|||
|
|
"Unlike traditional statistical tests, logistic regression models the probability of a gene being highly expressed in a given cluster \n",
|
|||
|
|
"while accounting for all genes simultaneously, making it more robust for complex datasets. \n",
|
|||
|
|
"\n",
|
|||
|
|
"We rank the top 50 genes that best distinguish each **cell type**, helping to characterize biological differences between clusters. "
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 41,
|
|||
|
|
"id": "naked-treasury",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"CPU times: user 16.4 s, sys: 14.9 s, total: 31.3 s\n",
|
|||
|
|
"Wall time: 49.3 s\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"%%time\n",
|
|||
|
|
"rsc.tl.rank_genes_groups_logreg(adata, groupby=\"cell_type\", use_raw=False)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 42,
|
|||
|
|
"id": "082233ed-f984-40ee-a000-f8354105f542",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAACGoAAAYaCAYAAACGEB0pAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjYsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvq6yFwwAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzs3XdUFNffBvBn6Ui3gIhSBLGhiNgbChobYu9GECxYEytqYtdobLGhaFSwxIIdG1GJGo0YO0axC7aAgtLEiAjz/uHL/liKAi47u/B8zuFE7szOPjsB5s7Md+6VCIIggIiIiIiIiIiIiIiIiIiIiIiKnZrYAYiIiIiIiIiIiIiIiIiIiIhKCxZqEBERERERERERERERERERESkICzWIiIiIiIiIiIiIiIiIiIiIFISFGkREREREREREREREREREREQKwkINIiIiIiIiIiIiIiIiIiIiIgVhoQYRERERERERERERERERERGRgrBQg4iIiIiIiIiIiIiIiIiIiEhBWKhBREREREREREREREREREREpCAs1CAiIiIiIiIiIiIiIiIiIiJSEBZqEBWDmzdvYsiQIbCxsYGOjg709fVRv359LF68GG/evJGu17p1a0gkEkgkEqipqcHAwAB2dnbo3bs39u7di8zMzM++z3///Qd7e3tIJBIsXbq0uD/WV/Hy8oK1tbVM208//YSDBw/mWvfMmTOQSCQ4c+aMQrLlfN+9e/fKbZtBQUGQSCSIjo6W2za/RtZnLMiXorRu3RqtW7eWaZNIJJg9e7bCMiibCxcuYPbs2UhMTBQ7ChFRgSnbMS8/EokEY8aMETtGniIjIzF79myl34fK5NixY6W6z0BEpGjR0dGQSCQICgoq0uutra3h7u4u31D5kPcx/927d5g9e7Zcr1XMnj27QOfnOc+Zi1PO83GxrtEokx07dmDFihVixyAiIhFYW1vDy8urSK/NeUwtjnP+7PeYPvelqPPmvPoNWf2d0iy/e2FEGmIHICppfv31V4waNQrVq1fH5MmTUatWLaSnp+PKlSsICAhAeHg4Dhw4IF2/atWq+O233wAAqampiIqKwsGDB9G7d2+0bNkShw8fhpGRUZ7vNWPGDKSmpirkc32tGTNm4LvvvpNp++mnn9CrVy9069ZNpr1+/foIDw9HrVq1FJiwdMjat9l1794dtra2Sl/sU5pcuHABc+bMgZeXF4yNjcWOQ0REChIZGYk5c+agdevWuQpcKW/Hjh2Dv78/izWIiKjYvXv3DnPmzAEAuRVODB06FB06dJB+HxMTgx49emDs2LEYMGCAtN3Q0FAu70dFs2PHDty6dQvff/+92FGIiEiFFcc5/9q1a5GcnCz9/ujRo5g/fz4CAwNRo0YNaXvlypXl8n5UNPndCyNioQaRHIWHh2PkyJFo164dDh48CG1tbemydu3aYeLEiQgNDZV5ja6uLpo0aSLTNnToUAQGBsLb2xvDhw/H7t27c73XpUuXsHr1avz222/o3bt38XwgObK1tS3wuoaGhrn2CRXef//9B11dXZm2vPattrY2jI2Nuc+LSBAEvH//Pte+JiIiIlkZGRn4+PGjTB+ZiIioNKtcubLMjZOsJ2wtLS15jl5E6enpkEgk0NDgZW8iIir5cj7sevfuXQCAg4MDGjRoIEYklffu3TuUKVNG7BhUSnDqEyI5+umnnyCRSLBhw4Y8L0BraWnBw8OjQNsaMmQIOnXqhD179uDJkycyyz58+ABvb2+MHj260AfbrKGntm/fjgkTJqBixYrQ1dWFi4sLrl+/nmv9kJAQNG3aFGXKlIGBgQHatWuXa0SGuLg4DB8+HFWqVIG2tjYqVKiA5s2b49SpU9J1ck59IpFIkJqaii1btuQayjPn8FgrVqyARCLBw4cPc+Xz8/ODlpYW4uPjpW2nTp2Cm5sbDA0NUaZMGTRv3hxhYWEF3kfp6en44YcfUKlSJRgaGqJt27a4d+9ervWK+j6tW7eGg4MDzp07hyZNmkBXVxcWFhaYMWMGMjIyZNb98OED5s+fjxo1akj37ZAhQxAXFyezXtbwsfv374eTkxN0dHSkT/oUh8TEREycOBFVq1aFtrY2TE1N0alTJ2lHsDDZiyI9PR2mpqb49ttv88ymq6uLCRMmAAAyMzMxf/58VK9eHbq6ujA2NkbdunWxcuXKIr131vC5AQEBqFmzJrS1tbFlyxYAwIMHDzBgwACYmppCW1sbNWvWhL+/v8zrv5Rn9uzZmDx5MgDAxsZG+vtRmoeZJSLVNG/ePGhoaODZs2e5lnl7e6NcuXJ4//49gP8dx44cOQInJyfo6uqiZs2aOHLkCIBP06rUrFkTenp6aNSoEa5cuSKzPS8vL+jr6+P27dtwc3ODnp4eKlSogDFjxuDdu3d55tu2bRtq1qyJMmXKwNHRUfpe2Z0/fx5ubm4wMDBAmTJl0KxZMxw9ejTXei9evJD2hbS0tFCpUiX06tULL1++xNu3b2FsbIwRI0bkel10dDTU1dWxZMkSBAUFSYtv27RpI/37n31o+aL0PeLi4qClpYUZM2bkWnb37l1IJBKsWrUKwKeLEZMmTZJO31e2bFk0aNAAO3fu/Ox75CVraPzFixdj/vz5sLGxgba2Nk6fPg0AuHLlCjw8PFC2bFno6OjAyckJwcHBMtv4Uh4vLy/pcTb7kK6cOoaIqPAePnyIIUOGoFq1aihTpgwsLCzQpUsX/PPPP198bdZw1tevX0ePHj1gaGgIIyMjDBo0KN/zv9DQUNSvXx+6urqoUaMGNm/eLLM8Li4Oo0aNQq1ataCvrw9TU1O4urri3Llzhf5s69evh729PbS1tVGrVi3s2rUr1zqxsbEYMWIEKleuDC0tLdjY2GDOnDn4+PEjgE/HtQoVKgAA5syZIz3mZA2F/jX7Tx7u3r2L/v37w8zMDNra2rC0tMTgwYORlpZW4M/4NSIiIiCRSLBp06Zcy44fPw6JRIKQkBAABbuGVFBZ14+2bduGiRMnwsLCAtra2tLrRwXpO30pT+vWrXH06FE8efJElOliiYiKw6FDh1C3bl1oa2ujatWqWLlyZZ7TU/j7+6NVq1YwNTWFnp4e6tSpg8WLFyM9PV1mvazr3eHh4WjWrBl0dXVhbW2NwMBAAJ9Geahfvz7KlCmDOnXq5HqYNeu9b968id69e8PIyAhly5bFhAkT8PHjR9y7dw8dOnSAgYEBrK2tsXjxYpnXv3//HhMnTkS9evWkr23atCkOHTpU4H2Snp6OKVOmoGLFiihTpgxatGiBS5cu5bluUY6pXzrnP3nyJLp27YrKlStDR0cHdnZ2GDFihMx9j+IUGhoKNzc3GBkZoUyZMqhZsyYWLlwos05BzuOLqjD3gK5fvw53d3fpNfhKlSqhc+fOeP78eaHfN+tn79q1a+jVqxdMTEykDx0LgoC1a9eiXr160NXVhYmJCXr16oXHjx/LbONLeT53L4yIpcVEcpKRkYE//vgDzs7OqFKlily26eHhgWPHjuHcuXOwsrKSts+dOxepqamYN29ekW96T58+HfXr18fGjRuRlJSE2bNno3Xr1rh+/TqqVq0K4NPQjgMHDsQ333yDnTt3Ii0tDYsXL0br1q0RFhaGFi1aAAC+/fZbXLt2DQsWLIC9vT0SExNx7do1vH79Ot/3Dw8Ph6urK9q0aSO9cZDfUJ6DBg2Cn58fgoKCMH/+fGl7RkYGtm/fji5duqB8+fIAgO3bt2Pw4MHo2rUrtmzZAk1NTaxfvx7t27fH77//Djc3twLtm+bNm2Pjxo1ITk6Gn58funTpgjt37kBdXV0u7xMbG4t+/fph6tSpmDt3rnRIsoSEBKxZswbApxv6Xbt2xblz5zBlyhQ0a9YMT548waxZs9C6dWtcuXJFZhSHa9eu4c6dO/jxxx9hY2MDPT29L37WokhJSUGLFi0QHR0NPz8/NG7cGG/fvsWff/6JmJgY1KhRo9DZC0tTUxODBg1CQEAA/P39ZX52du7ciffv32PIkCEAgMWLF2P27Nn48ccf0apVK6Snp+Pu3btITEws8vsfPHgQ586dw8yZM1GxYkW
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 2560x1920 with 16 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"sc.pl.rank_genes_groups(adata)"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 43,
|
|||
|
|
"id": "imposed-waterproof",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"post_time = time.time()"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 44,
|
|||
|
|
"id": "radio-combat",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"Total Postprocessing time: 100.55777049064636\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"print(\"Total Postprocessing time: %s\" % (post_time - preprocess_time))"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "verbal-programming",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"### Diffusion Map Embedding \n",
|
|||
|
|
"\n",
|
|||
|
|
"Next, we compute **Diffusion Maps**, a nonlinear dimensionality reduction method that preserves the continuous structure of the data. \n",
|
|||
|
|
"Unlike UMAP and t-SNE, Diffusion Maps are well-suited for capturing **trajectory-like** relationships in single-cell data. \n",
|
|||
|
|
"\n",
|
|||
|
|
"We use the batch-corrected neighborhood graph (`neighbors_key=\"harmony\"`) to ensure that the embedding is not influenced by batch effects. "
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 45,
|
|||
|
|
"id": "controlling-portugal",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"CPU times: user 2.4 s, sys: 165 ms, total: 2.57 s\n",
|
|||
|
|
"Wall time: 3.35 s\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"%%time\n",
|
|||
|
|
"rsc.tl.diffmap(adata,neighbors_key=\"harmony\")\n",
|
|||
|
|
"#Due to an issue with plotting in Scanpy, we need to shift the first component of the diffusion map by removing the first column:\n",
|
|||
|
|
"adata.obsm[\"X_diffmap\"] = adata.obsm[\"X_diffmap\"][:, 1:]"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 46,
|
|||
|
|
"id": "affiliated-excess",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"data": {
|
|||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABgYAAAGtCAYAAADd1c+SAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjYsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvq6yFwwAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzs3Xd8leX9//HXffbJDiMDCCQBAiGEIQiCyqgDRQG1dde9RaHOai0q1lGtWrHf1ipWsM6frRMHVgVcgGwJJARkjzBCQubZ5/79EXLkkABBwn4/H4884NzXdV/3dd3nJPd97s81DNM0TURERERERERERERE5LhgOdwVEBERERERERERERGRQ0eBARERERERERERERGR44gCAyIiIiIiIiIiIiIixxEFBkREREREREREREREjiMKDIiIiIiIiIiIiIiIHEcUGBAREREREREREREROY4oMCAiIiIiIiIiIiIichxRYEBERERERERERERE5DiiwICIiIiIiIiIiIiIyHFEgQERiWIYBg8//HDk9YwZMzAMgxkzZjS5jE2bNvHwww+zaNGiZq+fiIiIiIiIiIiIHBgFBkSk2W3atInx48crMCAiIiIiIiIiInIEUmBAREREREREREREROQ4osCAyDFi2bJlXHrppaSmpuJ0Omnfvj1XXnklPp8PgM2bN3PTTTfRrl07HA4HWVlZjB8/nmAw2Kz1mDFjBieeeCIA11xzDYZhRKYneu211zAMg1mzZjXY75FHHsFut7Np0yYAhgwZQvfu3fn222856aSTcLvdtG3blnHjxhEKhaL29fv9PProo3Tt2hWn00nr1q255ppr2LZtW7O2TURERERERERE5FhgO9wVEJED9+OPP3LKKafQqlUrHnnkETp37kxJSQkfffQRfr+f8vJy+vXrh8Vi4cEHH6Rjx47MmjWLRx99lDVr1jBp0qRmq8sJJ5zApEmTuOaaa/jjH//IOeecA0C7du1ISUnh3nvv5e9//zsDBgyI7BMMBnnxxRc5//zzadOmTWT75s2bueSSS7jvvvt45JFH+OSTT3j00UcpLy/n//7v/wAIh8OMGjWKb7/9lnvvvZeBAweydu1aHnroIYYMGcK8efNwu93N1j4REREREREREZGjnQIDIseAO++8E5vNxpw5c2jdunVk++WXXw7APffcQ3l5OUuXLqV9+/YAnHbaabjdbu6++27uueceunXr1ix1SUhIoHv37gB07NiRk046KSr9pptu4oknnuDZZ58lJSUFgPfee49NmzZx2223ReXdvn07H374ISNHjgTgzDPPxOPx8MILL3DvvffSvn173nnnHaZOncq7777LBRdcENm3Z8+enHjiiUyePJlbbrmlWdomIiIiIiIiIiJyLNBUQiJHudraWr7++msuuuiiqKDArj7++GOGDh1KmzZtCAaDkZ+zzz4bgK+//vqQ1bf+If3EiRMj2/7v//6P/Px8Bg0aFJU3Pj4+EhSod9lllxEOh/nmm2+AurYlJSUxYsSIqLb16tWLtLQ0ZsyYcXAbJCIiIiIiIiIicpRRYEDkKFdeXk4oFKJdu3Z7zLNlyxamTJmC3W6P+snLywOgtLT0UFWX1NRULr74Yl588UVCoRCLFy/m22+/bTBaoD7v7tLS0oC60QRQ17YdO3bgcDgatG/z5s2HtG0iIiIiIiIiIiJHA00lJHKUa9GiBVarlQ0bNuwxT6tWrejRowePPfZYo+m7zut/KIwdO5bXXnuNDz/8kKlTp5KUlBSZ9mhXW7ZsabBt8+bNALRs2RKoa1vLli2ZOnVqo8eKj49vxpqLiIiIiIiIiIgc/RQYEDnKud1uBg8ezH/+8x8ee+wxWrVq1SDPueeey6effkrHjh1JTk4+6HVyOp0AeDyeRtP79OnDwIEDefLJJ1myZAk33ngjsbGxDfJVVVXx0UcfRU0n9Oabb2KxWCLTDp177rm8/fbbhEIh+vfvfxBaIyIiIiIiIiIicmxRYEDkGPDss89yyimn0L9/f+677z46derEli1b+Oijj3jxxRd55JFH+OKLLxg4cCBjxoyhS5cueL1e1qxZw6effso///nPvU5FtL86duyI2+3mjTfeIDc3l7i4ONq0aRM1MmHs2LFcfPHFGIbBrbfe2mg5LVu25JZbbmHdunXk5OTw6aefMnHiRG655ZbIIsqXXHIJb7zxBsOHD2fs2LH069cPu93Ohg0bmD59OqNGjeL8889vtraJiIiIiIiIiIgc7RQYEDkG9OzZkzlz5vDQQw9x//33U1VVRVpaGr/61a9wOBykp6czb948/vSnP/GXv/yFDRs2EB8fT1ZWFmeddVazjyKIiYnhlVdeYfz48Zx55pkEAgEeeughHn744Uie8847D6fTydChQ+ncuXOj5aSlpfH3v/+du+++m4KCAlq0aMEf/vAHxo8fH8ljtVr56KOPmDBhAq+99hpPPPEENpuNdu3aMXjwYPLz85u1bSIiIiIiIiIiIkc7wzRN83BXQkSOP1OmTGHkyJF88sknDB8+vEH6kCFDKC0tZcmSJYehdiIiIiIiIiIiIscujRgQkUOqsLCQtWvXctddd9GrVy/OPvvsw10lERERERERERGR44oCAyLSZKZpEgqF9prHarViGMYe02+99Va+//57TjjhBF599dW95hUREREREREREZHmp6mERKTJZsyYwdChQ/eaZ9KkSVx99dWHpkIiIiIiIiIiIiKy3xQYEJEmq6qqori4eK95srKyaNmy5SGqkYiIiIiIiIiIiOwvBQZERERERERERERERI4jlsNdAREREREREREREREROXQOy+LD4XCYTZs2ER8fr4VHRUREjhKmaVJVVUWbNm2wWNS3QERERERERORodVgCA5s2bSIjI+NwHFpEREQO0Pr162nXrt3hroaIiIiIiIiI/EKHJTAQHx8P1D1YSEhIOBxVEBERkf1UWVlJRkZG5DouIiIiIiIiIkenwxIYqJ8+KCEhQYEBERGRo4ymARQRERERERE5ummCYBERERERERERERGR44gCAyIiIiIiIiIiIiIixxEFBkREREREREREREREjiMKDIiIiIiIiIiIiIiIHEcUGBAREREREREREREROY4oMCAiIiIiIiIiIiIichxRYEBERERERERERERE5DiiwICIiIiIiIiIiIiIyHFEgQERERERERERERERkeOIAgMiIiIiIiIiIiIiIscRBQZERERERERERERERI4jCgyIiIiIiIiIiIiIiBxHFBgQERERERERERERETmOKDAg0gS+VasIVdcc7mqIiIiIiIiIiIiIHDDb4a6AyNHAmZ19uKsgIiIiIiIiIiIi0iw0YkCkEf716wmWlx/uaoiIiIiIiIiIiIg0O40YEGmEIyPjcFdBRERERERERERE5KDQiAERERERERERERERkeOIAgMiIiIiIiIiIiIiIscRBQZE9iKwZSuBLVsPdzVEREREREREREREmo0CAyJ7YU9NIbi1LjAQLC0l7PUe5hqJiIiIiIiIiIiIHBgFBkT2wTTDeAoKCPv9BDZuPNzVERERERERERERETkgtsNdAZEjnb1NG8JVVTjatDncVRERERERERERERE5YBoxILIXZjhMuLISZ1bW4a6KiIiIiIiIiIiISLNQYEBkLwyLBWd29uGuhoiIiIiIiIiIiEizUWBAZC9ClZWEqqoOdzVEREREREREREREmo0CAyJ7UDN7NoGSEgybjR0ffHi4qyMiIiIiIiIiIiLSLLT4sMgexPTvT6isDIvbTeLIEYe7OiIiIiIiIiIiIiLNQiMGRPbAMAw8BQV1/7dYCJaXH+YaiYiIiIiIiIiIiBw4BQZE9iDs92Nr0eLn19XVh7E2IiIiIiIiIiIiIs1DUwmJNCKweTPB7WXYMzMj2xwZGYevQiIiIiIiIiIiIiLNRIEBkUbY09IwAwECW7ZgcTiwuFyHu0oiIiIiIiIiIiIizUKBATluBcvLCdfU4mjXtkGap6CAwNZtBMrLscXGYmnT5jDUUERERERERERERKT5KTAgxy1bcjIkJzfYbvr9GA4nrpz
|
|||
|
|
"text/plain": [
|
|||
|
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
"metadata": {},
|
|||
|
|
"output_type": "display_data"
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"sc.pl.diffmap(adata, color=\"cell_type\")"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 47,
|
|||
|
|
"id": "ranking-brazilian",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [
|
|||
|
|
{
|
|||
|
|
"name": "stdout",
|
|||
|
|
"output_type": "stream",
|
|||
|
|
"text": [
|
|||
|
|
"Total Processing time: 116.74001026153564\n"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"source": [
|
|||
|
|
"print(\"Total Processing time: %s\" % (time.time() - preprocess_start))"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": 48,
|
|||
|
|
"id": "23354b8e-009b-481c-8d9d-7e9736bdb121",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"adata.write(\"h5/dli_decoupler.h5ad\")"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "markdown",
|
|||
|
|
"id": "59987984-5ca7-45a8-9369-ec8fabb39d65",
|
|||
|
|
"metadata": {},
|
|||
|
|
"source": [
|
|||
|
|
"## Next Steps:\n",
|
|||
|
|
"\n",
|
|||
|
|
"We can now take this output file, `dli_decoupler.h5ad`, and use it further analysis, like **[Transcriptional Regulatory Analysis](https://github.com/NVIDIA-AI-Blueprints/single-cell-analysis-blueprint/blob/main/notebooks/02_scRNA_analysis_extended.ipynb)**. You can get it quickly and continue your journey uncommenting and running the command below:"
|
|||
|
|
]
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"cell_type": "code",
|
|||
|
|
"execution_count": null,
|
|||
|
|
"id": "e1c5c0cb-4110-43c9-beba-131ddb9cf895",
|
|||
|
|
"metadata": {},
|
|||
|
|
"outputs": [],
|
|||
|
|
"source": [
|
|||
|
|
"# !wget https://raw.githubusercontent.com/NVIDIA-AI-Blueprints/single-cell-analysis-blueprint/refs/heads/main/notebooks/02_scRNA_analysis_extended.ipynb"
|
|||
|
|
]
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"metadata": {
|
|||
|
|
"kernelspec": {
|
|||
|
|
"display_name": "Python 3 (ipykernel)",
|
|||
|
|
"language": "python",
|
|||
|
|
"name": "python3"
|
|||
|
|
},
|
|||
|
|
"language_info": {
|
|||
|
|
"codemirror_mode": {
|
|||
|
|
"name": "ipython",
|
|||
|
|
"version": 3
|
|||
|
|
},
|
|||
|
|
"file_extension": ".py",
|
|||
|
|
"mimetype": "text/x-python",
|
|||
|
|
"name": "python",
|
|||
|
|
"nbconvert_exporter": "python",
|
|||
|
|
"pygments_lexer": "ipython3",
|
|||
|
|
"version": "3.13.7"
|
|||
|
|
}
|
|||
|
|
},
|
|||
|
|
"nbformat": 4,
|
|||
|
|
"nbformat_minor": 5
|
|||
|
|
}
|