import pandas as pd
import matplotlib.pyplot as plt
import igraph as ig
import scvelo as scv
import loompy as lmp
import anndata
import os
import leidenalg
from scipy import io
from scipy.sparse import coo_matrix, csr_matrix
import scanpy as sc
sc.settings.verbosity = 3
sc.logging.print_header()
sc.settings.set_figure_params(dpi = 80, facecolor = "white")
adata = sc.read_10x_mtx("matrix.mtx",
var_names = "gene_symbols",
cache = True)
print(type(adata))
adata.var_names_make_unique()
print(adata.var_names)
print(type(adata.var_names))
print(adata.obs_names)
print(type(adata.obs_names))
sc.pl.highest_expr_genes(adata, n_top = 20)
sc.pp.filter_cells(adata, min_genes = 200)
sc.pp.filter_genes(adata, min_cells = 3)
print(adata)
adata.var['mt'] = adata.var_names.str.startswith('MT-')
sc.pp.calculate_qc_metrics(adata,
qc_vars = ['mt'],
percent_top = None,
log1p = False,
inplace = True)
print(adata.obs)
sc.pl.violin(adata,
['n_genes_by_counts', 'total_counts', 'pct_counts_mt'],
multi_panel = False,
stripplot = False)
sc.pl.scatter(adata, x='total_counts', y='pct_counts_mt')
sc.pl.scatter(adata, x='total_counts', y='n_genes_by_counts')
adata = adata[adata.obs.n_genes_by_counts < 2500, :]
adata = adata[adata.obs.pct_counts_mt < 5, :]
sc.pp.normalize_total(adata,
target_sum = 1e4)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata,
min_mean = 0.0125,
max_mean = 3,
min_disp = 0.5)
adata.raw = adata
adata = adata[:, adata.var.highly_variable]
sc.pp.regress_out(adata, ['total_counts', 'pct_counts_mt'])
sc.pp.scale(adata, zero_center = True, max_value=10)
sc.tl.pca(adata, svd_solver='arpack')
sc.pl.pca(adata, color = ["PHF1", "SNHG7"])
sc.pl.pca_variance_ratio(adata, log=True)
sc.pp.neighbors(adata, n_neighbors = 10, n_pcs = 13)
sc.tl.umap(adata)
sc.pl.umap(adata, color = ['TPSB2', 'VWF', 'NDUFA4L2'])
sc.tl.leiden(adata, resolution = 0.5)
sc.pl.umap(adata, color = ['leiden'], legend_fontsize = 8, save = '_leiden')
sc.tl.rank_genes_groups(adata, 'leiden', method='t-test')
sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False)
adata.write(results_file)
print(adata.obs)
print(pd.DataFrame(adata.uns['rank_genes_groups']['names']).head(5))
result = adata.uns['rank_genes_groups']
groups = result['names'].dtype.names
print(pd.DataFrame(
{group + '_' + key[:1]: result[key][group]
for group in groups for key in ['names', 'pvals']}).head(5))
sc.tl.rank_genes_groups(adata, 'leiden', groups = ['0'], reference = '1', method = 'wilcoxon')
sc.pl.rank_genes_groups(adata, groups = ['0'], n_genes = 20)
#بیوانفورماتیک
کانال آموزشی آرش صفرزاده (@GeneticsAS