In [ ]:
from functools import wraps
from pathlib import Path

import anndata as ad
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import polars as pl
import scanpy as sc
import seaborn as sns

plt.rcParams["figure.dpi"] = 200
sns.set_theme()
path = Path(
    "/mnt/morbo/Users/chaichontat/rawSeq/lgladul1_196667/combi/counts_unfiltered"
)
In [ ]:
adata = ad.read_h5ad("countsfiltered.h5ad")
In [ ]:
sc.pl.highest_expr_genes(adata, n_top=20, gene_symbols="gene_name")
In [ ]:
fig, ax = plt.subplots(figsize=(7, 7))

x = np.asarray(adata.X.sum(axis=1))
y = np.asarray(np.sum(adata.X > 0, axis=1))

ax.scatter(x, y, alpha=0.25, s=1, edgecolors="none")
ax.set_xlabel("UMI Counts")
ax.set_ylabel("Genes Detected")
ax.set_xscale("log")
ax.set_yscale("log")
In [ ]:
expected_num_cells = 20000  # @param {type:"integer"}
knee = np.sort((np.array(adata.X.sum(axis=1))).flatten())[::-1]

fig, ax = plt.subplots(figsize=(10, 7))

ax.loglog(range(len(knee)), knee, linewidth=5, color="g")
# ax.axvline(x=knee[expected_num_cells], linewidth=3, color="k")
# ax.axhline(y=expected_num_cells, linewidth=3, color="k")

ax.set_ylabel("UMI Counts")
ax.set_xlabel("Set of Barcodes")

plt.grid(True, which="both")
plt.show()
In [ ]:
mito_genes = adata.var["gene_name"].str.startswith("mt-")
# for each cell compute fraction of counts in mito genes vs. all genes
# the `.A1` is only necessary as X is sparse (to transform to a dense array after summing)
adata.obs["percent_mito"] = np.sum(adata[:, mito_genes].X, axis=1) / np.sum(
    adata.X, axis=1
)
# add the total counts per cell as observations-annotation to adata
adata.obs["n_counts"] = adata.X.sum(axis=1)
adata.obs["n_genes"] = np.sum(adata.X > 0, axis=1)
In [ ]:
sc.pl.violin(
    adata, ["n_genes", "n_counts", "percent_mito"], jitter=0.4, multi_panel=True
)

sc.pl.scatter(adata, x="n_counts", y="percent_mito")
sc.pl.scatter(adata, x="n_counts", y="n_genes")
/home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/seaborn/axisgrid.py:118: UserWarning: The figure layout has changed to tight
  self._figure.tight_layout(*args, **kwargs)
In [ ]:
sc.experimental.pp.highly_variable_genes(
    adata, flavor="pearson_residuals", n_top_genes=2000
)
In [ ]:
fig, ax = plt.subplots(figsize=(6, 6))

hvgs = adata.var["highly_variable"]
ax.scatter(adata.var["means"], adata.var["residual_variances"], s=3, edgecolor="none")
ax.scatter(
    adata.var["means"][hvgs],
    adata.var["residual_variances"][hvgs],
    c="tab:red",
    label="selected genes",
    s=3,
    edgecolor="none",
)

ax.set_xscale("log")
ax.set_xlabel("mean expression")
ax.set_yscale("log")
ax.set_ylabel("residual variance")

ax.spines["right"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.yaxis.set_ticks_position("left")
ax.xaxis.set_ticks_position("bottom")
plt.legend()
Out[ ]:
<matplotlib.legend.Legend at 0x7f5d5dd1a250>
In [ ]:
adata.layers["raw"] = adata.X.copy()
adata.layers["sqrt_norm"] = np.sqrt(sc.pp.normalize_total(adata, inplace=False)["X"])
sc.experimental.pp.normalize_pearson_residuals(adata)
# adata.write_h5ad("normalized.h5ad")
In [ ]:
adata = adata[adata.obs["rSAPed"]]
In [ ]:
sc.tl.pca(adata, svd_solver="arpack", use_highly_variable=True)
In [ ]:
sc.pl.pca_variance_ratio(adata, log=True)
In [ ]:
sc.pp.neighbors(adata, n_neighbors=15, n_pcs=30)
/home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/umap/distances.py:1063: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.
  @numba.jit()
/home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/umap/distances.py:1071: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.
  @numba.jit()
/home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/umap/distances.py:1086: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.
  @numba.jit()
/home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/umap/umap_.py:660: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.
  @numba.jit()
In [ ]:
sc.tl.umap(adata)
In [ ]:
sc.pl.umap(adata, color="sample")
/home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
In [ ]:
sc.tl.louvain(adata, resolution=0.4, random_state=42)
In [ ]:
sc.pl.umap(adata, color="louvain")
/home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/scanpy/plotting/_tools/scatterplots.py:392: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
In [ ]:
sc.tl.rank_genes_groups(
    adata,
    "louvain",
    method="t-test",
    corr_method="bonferroni",
)
sc.pl.rank_genes_groups(adata, n_genes=25, gene_symbols="gene_name")
/home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/scanpy/tools/_rank_genes_groups.py:420: RuntimeWarning: invalid value encountered in log2
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/scanpy/tools/_rank_genes_groups.py:420: RuntimeWarning: invalid value encountered in log2
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/scanpy/tools/_rank_genes_groups.py:420: RuntimeWarning: invalid value encountered in log2
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/scanpy/tools/_rank_genes_groups.py:420: RuntimeWarning: invalid value encountered in log2
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/scanpy/tools/_rank_genes_groups.py:420: RuntimeWarning: invalid value encountered in log2
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/scanpy/tools/_rank_genes_groups.py:420: RuntimeWarning: invalid value encountered in log2
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/scanpy/tools/_rank_genes_groups.py:420: RuntimeWarning: invalid value encountered in log2
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/scanpy/tools/_rank_genes_groups.py:420: RuntimeWarning: invalid value encountered in log2
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/scanpy/tools/_rank_genes_groups.py:420: RuntimeWarning: invalid value encountered in log2
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/scanpy/tools/_rank_genes_groups.py:420: RuntimeWarning: invalid value encountered in log2
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/scanpy/tools/_rank_genes_groups.py:420: RuntimeWarning: invalid value encountered in log2
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/scanpy/tools/_rank_genes_groups.py:420: RuntimeWarning: invalid value encountered in log2
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/scanpy/tools/_rank_genes_groups.py:420: RuntimeWarning: invalid value encountered in log2
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/scanpy/tools/_rank_genes_groups.py:420: RuntimeWarning: invalid value encountered in log2
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/scanpy/tools/_rank_genes_groups.py:420: RuntimeWarning: invalid value encountered in log2
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/scanpy/tools/_rank_genes_groups.py:420: RuntimeWarning: invalid value encountered in log2
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/scanpy/tools/_rank_genes_groups.py:420: RuntimeWarning: invalid value encountered in log2
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/chaichontat/mambaforge/envs/seq/lib/python3.11/site-packages/scanpy/tools/_rank_genes_groups.py:420: RuntimeWarning: invalid value encountered in log2
  self.stats[group_name, 'logfoldchanges'] = np.log2(