from functools import wraps
from pathlib import Path
import anndata as ad
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import polars as pl
import scanpy as sc
import seaborn as sns
plt.rcParams["figure.dpi"] = 200
sns.set_theme()
path = Path(
"/mnt/morbo/Users/chaichontat/rawSeq/lgladul1_196667/combi/counts_unfiltered"
)
adata = ad.read_h5ad("countsfiltered.h5ad")
sc.pl.highest_expr_genes(adata, n_top=20, gene_symbols="gene_name")
fig, ax = plt.subplots(figsize=(7, 7))
x = np.asarray(adata.X.sum(axis=1))
y = np.asarray(np.sum(adata.X > 0, axis=1))
ax.scatter(x, y, alpha=0.25, s=1, edgecolors="none")
ax.set_xlabel("UMI Counts")
ax.set_ylabel("Genes Detected")
ax.set_xscale("log")
ax.set_yscale("log")
expected_num_cells = 20000 # @param {type:"integer"}
knee = np.sort((np.array(adata.X.sum(axis=1))).flatten())[::-1]
fig, ax = plt.subplots(figsize=(10, 7))
ax.loglog(range(len(knee)), knee, linewidth=5, color="g")
# ax.axvline(x=knee[expected_num_cells], linewidth=3, color="k")
# ax.axhline(y=expected_num_cells, linewidth=3, color="k")
ax.set_ylabel("UMI Counts")
ax.set_xlabel("Set of Barcodes")
plt.grid(True, which="both")
plt.show()
mito_genes = adata.var["gene_name"].str.startswith("mt-")
# for each cell compute fraction of counts in mito genes vs. all genes
# the `.A1` is only necessary as X is sparse (to transform to a dense array after summing)
adata.obs["percent_mito"] = np.sum(adata[:, mito_genes].X, axis=1) / np.sum(
adata.X, axis=1
)
# add the total counts per cell as observations-annotation to adata
adata.obs["n_counts"] = adata.X.sum(axis=1)
adata.obs["n_genes"] = np.sum(adata.X > 0, axis=1)
sc.pl.violin(
adata, ["n_genes", "n_counts", "percent_mito"], jitter=0.4, multi_panel=True
)
sc.pl.scatter(adata, x="n_counts", y="percent_mito")
sc.pl.scatter(adata, x="n_counts", y="n_genes")
/home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/seaborn/axisgrid.py:118: UserWarning: The figure layout has changed to tight self._figure.tight_layout(*args, **kwargs)
sc.experimental.pp.highly_variable_genes(
adata, flavor="pearson_residuals", n_top_genes=2000
)
fig, ax = plt.subplots(figsize=(6, 6))
hvgs = adata.var["highly_variable"]
ax.scatter(adata.var["means"], adata.var["residual_variances"], s=3, edgecolor="none")
ax.scatter(
adata.var["means"][hvgs],
adata.var["residual_variances"][hvgs],
c="tab:red",
label="selected genes",
s=3,
edgecolor="none",
)
ax.set_xscale("log")
ax.set_xlabel("mean expression")
ax.set_yscale("log")
ax.set_ylabel("residual variance")
ax.spines["right"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.yaxis.set_ticks_position("left")
ax.xaxis.set_ticks_position("bottom")
plt.legend()
<matplotlib.legend.Legend at 0x7f5d5dd1a250>
adata.layers["raw"] = adata.X.copy()
adata.layers["sqrt_norm"] = np.sqrt(sc.pp.normalize_total(adata, inplace=False)["X"])
sc.experimental.pp.normalize_pearson_residuals(adata)
# adata.write_h5ad("normalized.h5ad")
adata = adata[adata.obs["rSAPed"]]
sc.tl.pca(adata, svd_solver="arpack", use_highly_variable=True)
sc.pl.pca_variance_ratio(adata, log=True)
sc.pp.neighbors(adata, n_neighbors=15, n_pcs=30)
/home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/umap/distances.py:1063: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details. @numba.jit() /home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/umap/distances.py:1071: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details. @numba.jit() /home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/umap/distances.py:1086: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details. @numba.jit() /home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/umap/umap_.py:660: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details. @numba.jit()
sc.tl.umap(adata)
sc.pl.umap(adata, color="sample")
/home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter(
sc.tl.louvain(adata, resolution=0.4, random_state=42)
sc.pl.umap(adata, color="louvain")
/home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter(
sc.tl.rank_genes_groups(
adata,
"louvain",
method="t-test",
corr_method="bonferroni",
)
sc.pl.rank_genes_groups(adata, n_genes=25, gene_symbols="gene_name")
/home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/scanpy/tools/_rank_genes_groups.py:420: RuntimeWarning: invalid value encountered in log2 self.stats[group_name, 'logfoldchanges'] = np.log2( /home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/scanpy/tools/_rank_genes_groups.py:420: RuntimeWarning: invalid value encountered in log2 self.stats[group_name, 'logfoldchanges'] = np.log2( /home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/scanpy/tools/_rank_genes_groups.py:420: RuntimeWarning: invalid value encountered in log2 self.stats[group_name, 'logfoldchanges'] = np.log2( /home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/scanpy/tools/_rank_genes_groups.py:420: RuntimeWarning: invalid value encountered in log2 self.stats[group_name, 'logfoldchanges'] = np.log2( /home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/scanpy/tools/_rank_genes_groups.py:420: RuntimeWarning: invalid value encountered in log2 self.stats[group_name, 'logfoldchanges'] = np.log2( /home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/scanpy/tools/_rank_genes_groups.py:420: RuntimeWarning: invalid value encountered in log2 self.stats[group_name, 'logfoldchanges'] = np.log2( /home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/scanpy/tools/_rank_genes_groups.py:420: RuntimeWarning: invalid value encountered in log2 self.stats[group_name, 'logfoldchanges'] = np.log2( /home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/scanpy/tools/_rank_genes_groups.py:420: RuntimeWarning: invalid value encountered in log2 self.stats[group_name, 'logfoldchanges'] = np.log2( /home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/scanpy/tools/_rank_genes_groups.py:420: RuntimeWarning: invalid value encountered in log2 self.stats[group_name, 'logfoldchanges'] = np.log2( /home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/scanpy/tools/_rank_genes_groups.py:420: RuntimeWarning: invalid value encountered in log2 self.stats[group_name, 'logfoldchanges'] = np.log2( /home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/scanpy/tools/_rank_genes_groups.py:420: RuntimeWarning: invalid value encountered in log2 self.stats[group_name, 'logfoldchanges'] = np.log2( /home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/scanpy/tools/_rank_genes_groups.py:420: RuntimeWarning: invalid value encountered in log2 self.stats[group_name, 'logfoldchanges'] = np.log2( /home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/scanpy/tools/_rank_genes_groups.py:420: RuntimeWarning: invalid value encountered in log2 self.stats[group_name, 'logfoldchanges'] = np.log2( /home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/scanpy/tools/_rank_genes_groups.py:420: RuntimeWarning: invalid value encountered in log2 self.stats[group_name, 'logfoldchanges'] = np.log2( /home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/scanpy/tools/_rank_genes_groups.py:420: RuntimeWarning: invalid value encountered in log2 self.stats[group_name, 'logfoldchanges'] = np.log2( /home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/scanpy/tools/_rank_genes_groups.py:420: RuntimeWarning: invalid value encountered in log2 self.stats[group_name, 'logfoldchanges'] = np.log2( /home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/scanpy/tools/_rank_genes_groups.py:420: RuntimeWarning: invalid value encountered in log2 self.stats[group_name, 'logfoldchanges'] = np.log2( /home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/scanpy/tools/_rank_genes_groups.py:420: RuntimeWarning: invalid value encountered in log2 self.stats[group_name, 'logfoldchanges'] = np.log2(