-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathquality_metrics.py
More file actions
118 lines (96 loc) · 3.63 KB
/
quality_metrics.py
File metadata and controls
118 lines (96 loc) · 3.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import numpy as np
import pandas as pd
import spatialdata as sd
def proportion_of_assigned_reads(
sdata: sd.SpatialData,
) -> [float, pd.Series]:
""" Calculate the proportion of assigned reads
Parameters
----------
sdata : sd.SpatialData
SpatialData object with sdata['transcripts'] including the column 'cell_id'
Returns
-------
float
Proportion of assigned reads
pd.Series
Proportion of assigned reads per gene
"""
# Proportion of assigned reads
prop_of_assigned_reads = float(sdata["counts"].layers["counts"].sum() / len(sdata['transcripts']))
# Proportion of assigned reads per gene
if prop_of_assigned_reads == 1.0:
prop_of_assigned_reads_per_gene = pd.Series(
index=sdata['transcripts']['feature_name'].unique().compute().values,
data=1.0
)
elif prop_of_assigned_reads == 0.0:
prop_of_assigned_reads_per_gene = pd.Series(
index=sdata['transcripts']['feature_name'].unique().compute().values,
data=0.0
)
else:
genes, counts = np.unique(sdata['transcripts']['feature_name'], return_counts=True)
df = pd.DataFrame(index=genes, data = {"fraction":0, "count":counts, "count_assigned":0})
df.loc[sdata["counts"].var_names, "count_assigned"] = np.array(sdata["counts"].layers["counts"].sum(axis=0))[0,:]
df["fraction"] = df["count_assigned"] / df["count"]
prop_of_assigned_reads_per_gene = df["fraction"]
return prop_of_assigned_reads, prop_of_assigned_reads_per_gene
# Previous version only based on transcripts table.
#
#def proportion_of_assigned_reads(
# sdata: sd.SpatialData,
#) -> [float, pd.Series]:
# """ Calculate the proportion of assigned reads
#
# Parameters
# ----------
# sdata : sd.SpatialData
# SpatialData object with sdata['transcripts'] including the column 'cell_id'
#
# Returns
# -------
# float
# Proportion of assigned reads
# pd.Series
# Proportion of assigned reads per gene
#
# """
#
# sdata['transcripts']['assigned'] = sdata['transcripts']['cell_id'] != 0
#
# # Proportion of assigned reads
# prop_of_assigned_reads = float(((sdata['transcripts']['assigned']).sum() / len(sdata['transcripts'])).compute())
#
# # Proportion of assigned reads per gene
# if prop_of_assigned_reads == 1.0:
# prop_of_assigned_reads_per_gene = pd.Series(
# index=sdata['transcripts']['feature_name'].unique().compute().values,
# data=1.0
# )
# elif prop_of_assigned_reads == 0.0:
# prop_of_assigned_reads_per_gene = pd.Series(
# index=sdata['transcripts']['feature_name'].unique().compute().values,
# data=0.0
# )
# else:
# df = pd.crosstab(sdata['transcripts']['feature_name'], sdata['transcripts']['assigned'])
# prop_of_assigned_reads_per_gene = df[True] / (df[False] + df[True])
#
# return prop_of_assigned_reads, prop_of_assigned_reads_per_gene
def proportion_of_annotated_cells(
sdata: sd.SpatialData,
) -> float:
""" Calculate the proportion of cells that are annotated with cell types
Parameters
----------
sdata : sd.SpatialData
SpatialData object with sdata['counts'] including the obs column 'cell_type'
Returns
-------
float
Proportion of cells that are annotated with cell types
"""
cts = sdata['counts'].obs["cell_type"]
n_annotated_cells = (cts.notna() & (cts != "None_sp")).sum()
return float(n_annotated_cells / len(cts))