Skip to content

Commit bee19bc

Browse files
committed
Readme updates
1 parent b756b5d commit bee19bc

File tree

3 files changed

+38
-38
lines changed

3 files changed

+38
-38
lines changed

README.md

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -28,16 +28,16 @@ import scanpy as sc
2828
adata = sc.datasets.pbmc68k_reduced()
2929

3030
#instantiate the AnnData object (you may also pass a h5ad file to the adata parameter)
31-
adata_sql = AnnSQL(adata=adata)
31+
asql = AnnSQL(adata=adata)
3232

3333
#query the expression table. Returns Pandas Dataframe by Default
34-
adata_sql.query("SELECT * FROM X")
34+
asql.query("SELECT * FROM X")
3535

3636
#query the observation table. Returns adata object.
37-
adata_sql.query("SELECT * FROM obs", return_type="adata")
37+
asql.query("SELECT * FROM obs", return_type="adata")
3838

3939
#query the join of 'X' and 'obs' table
40-
adata_sql.query("SELECT * FROM adata", return_type="parquet")
40+
asql.query("SELECT * FROM adata", return_type="parquet")
4141
```
4242

4343

@@ -54,10 +54,10 @@ adata = sc.datasets.pbmc68k_reduced()
5454
MakeDb(adata=adata, db_name="pbmc3k_reduced", db_path="db/")
5555

5656
#open the AnnSQL database
57-
adata_sql = AnnSQL(db="db/pbmc3k_reduced.asql")
57+
asql = AnnSQL(db="db/pbmc3k_reduced.asql")
5858

5959
#query the expression table
60-
adata_sql.query("SELECT * FROM adata")
60+
asql.query("SELECT * FROM adata")
6161
```
6262

6363
## Entity Relationship Diagram
@@ -83,19 +83,19 @@ import scanpy as sc
8383
adata = sc.datasets.pbmc68k_reduced()
8484

8585
#pass the AnnData object to the AnnSQL class
86-
adata_sql = AnnSQL(adata=adata)
86+
asql = AnnSQL(adata=adata)
8787

8888
#group and count all labels
89-
adata_sql.query("SELECT obs.bulk_labels, COUNT(*) FROM obs GROUP BY obs.bulk_labels")
89+
asql.query("SELECT obs.bulk_labels, COUNT(*) FROM obs GROUP BY obs.bulk_labels")
9090

9191
#take the log10 of a value
92-
adata_sql.query("SELECT LOG10(HES4) FROM X WHERE HES4 > 0")
92+
asql.query("SELECT LOG10(HES4) FROM X WHERE HES4 > 0")
9393

94-
#sum all gene counts
95-
adata_sql.query("SELECT SUM(COLUMNS(*)) FROM (SELECT * EXCLUDE (cell_id) FROM X)")
94+
#sum all gene counts | Memory intensive | See method calculate_gene_counts for chunked approach.
95+
asql.query("SELECT SUM(COLUMNS(*)) FROM (SELECT * EXCLUDE (cell_id) FROM X)")
9696

9797
#taking the correlation of genes ITGB2 and SSU72 in dendritic cells that express either gene > 0
98-
adata_sql.query("SELECT corr(ITGB2,SSU72) as correlation FROM adata WHERE bulk_labels = 'Dendritic' AND (ITGB2 > 0 OR SSU72 >0)")
98+
asql.query("SELECT corr(ITGB2,SSU72) as correlation FROM adata WHERE bulk_labels = 'Dendritic' AND (ITGB2 > 0 OR SSU72 >0)")
9999

100100
```
101101

@@ -261,23 +261,23 @@ adata = sc.read_h5ad("Macosko_Mouse_Atlas_Single_Nuclei.Use_Backed.h5ad", backed
261261
MakeDb(adata=adata, db_name="Macosko_Mouse_Atlas", db_path="../db/", layers=["X", "obs"])
262262

263263
#query example | Runtime: 0.24sec
264-
adata_sql.query("SELECT ENSMUSG00000070880 FROM X WHERE ENSMUSG00000070880 > 0")
264+
asql.query("SELECT ENSMUSG00000070880 FROM X WHERE ENSMUSG00000070880 > 0")
265265

266266
#count the number of cells in each cluster | Runtime: 0.35sec
267-
adata_sql.query("SELECT ClusterNm, COUNT(cell_id) AS num_cells FROM obs GROUP BY ClusterNm ORDER BY num_cells DESC")
267+
asql.query("SELECT ClusterNm, COUNT(cell_id) AS num_cells FROM obs GROUP BY ClusterNm ORDER BY num_cells DESC")
268268

269269
#determine the total counts per cell library | Runtime: 4min 30sec
270-
adata_sql.calculate_total_counts(chunk_size=950)
270+
asql.calculate_total_counts(chunk_size=950)
271271

272272
#normalize umi counts to 10k per cell | Runtime: 1hr 48mins
273-
adata_sql.expression_normalize(total_counts_per_cell=1e4, chunk_size=300)
273+
asql.expression_normalize(total_counts_per_cell=1e4, chunk_size=300)
274274

275275
#log scale the normalized counts | Runtime: 59mins 13sec
276-
adata_sql.expression_log(log_type="LOG2", chunk_size=250)
276+
asql.expression_log(log_type="LN", chunk_size=250)
277277

278278
```
279279

280-
## Laptop system details for runtime analyses displayed above.
280+
## Laptop system details for both runtime analyses displayed above.
281281
- **Memory:** 40.0 GiB
282282
- **Processor:** 12th Gen Intel® Core™ i7-1255U × 12
283283
- **Disk Capacity:** 1.0 TB

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setuptools.setup(
77
name='AnnSQL',
8-
version='v0.9.4',
8+
version='v0.9.5',
99
author="Kenny Pavan",
1010
author_email="pavan@ohsu.edu",
1111
description="A Python SQL tool for converting Anndata objects to a relational DuckDb database. Methods are included for querying and basic single-cell preprocessing (experimental). ",

src/AnnSQL.egg-info/PKG-INFO

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Metadata-Version: 2.1
22
Name: AnnSQL
3-
Version: 0.9.4
3+
Version: 0.9.5
44
Summary: A Python SQL tool for converting Anndata objects to a relational DuckDb database. Methods are included for querying and basic single-cell preprocessing (experimental).
55
Home-page: https://github.com/kennypavan/AnnSQL
66
Author: Kenny Pavan
@@ -44,16 +44,16 @@ import scanpy as sc
4444
adata = sc.datasets.pbmc68k_reduced()
4545

4646
#instantiate the AnnData object (you may also pass a h5ad file to the adata parameter)
47-
adata_sql = AnnSQL(adata=adata)
47+
asql = AnnSQL(adata=adata)
4848

4949
#query the expression table. Returns Pandas Dataframe by Default
50-
adata_sql.query("SELECT * FROM X")
50+
asql.query("SELECT * FROM X")
5151

5252
#query the observation table. Returns adata object.
53-
adata_sql.query("SELECT * FROM obs", return_type="adata")
53+
asql.query("SELECT * FROM obs", return_type="adata")
5454

5555
#query the join of 'X' and 'obs' table
56-
adata_sql.query("SELECT * FROM adata", return_type="parquet")
56+
asql.query("SELECT * FROM adata", return_type="parquet")
5757
```
5858

5959

@@ -70,10 +70,10 @@ adata = sc.datasets.pbmc68k_reduced()
7070
MakeDb(adata=adata, db_name="pbmc3k_reduced", db_path="db/")
7171

7272
#open the AnnSQL database
73-
adata_sql = AnnSQL(db="db/pbmc3k_reduced.asql")
73+
asql = AnnSQL(db="db/pbmc3k_reduced.asql")
7474

7575
#query the expression table
76-
adata_sql.query("SELECT * FROM adata")
76+
asql.query("SELECT * FROM adata")
7777
```
7878

7979
## Entity Relationship Diagram
@@ -99,19 +99,19 @@ import scanpy as sc
9999
adata = sc.datasets.pbmc68k_reduced()
100100

101101
#pass the AnnData object to the AnnSQL class
102-
adata_sql = AnnSQL(adata=adata)
102+
asql = AnnSQL(adata=adata)
103103

104104
#group and count all labels
105-
adata_sql.query("SELECT obs.bulk_labels, COUNT(*) FROM obs GROUP BY obs.bulk_labels")
105+
asql.query("SELECT obs.bulk_labels, COUNT(*) FROM obs GROUP BY obs.bulk_labels")
106106

107107
#take the log10 of a value
108-
adata_sql.query("SELECT LOG10(HES4) FROM X WHERE HES4 > 0")
108+
asql.query("SELECT LOG10(HES4) FROM X WHERE HES4 > 0")
109109

110-
#sum all gene counts
111-
adata_sql.query("SELECT SUM(COLUMNS(*)) FROM (SELECT * EXCLUDE (cell_id) FROM X)")
110+
#sum all gene counts | Memory intensive | See method calculate_gene_counts for chunked approach.
111+
asql.query("SELECT SUM(COLUMNS(*)) FROM (SELECT * EXCLUDE (cell_id) FROM X)")
112112

113113
#taking the correlation of genes ITGB2 and SSU72 in dendritic cells that express either gene > 0
114-
adata_sql.query("SELECT corr(ITGB2,SSU72) as correlation FROM adata WHERE bulk_labels = 'Dendritic' AND (ITGB2 > 0 OR SSU72 >0)")
114+
asql.query("SELECT corr(ITGB2,SSU72) as correlation FROM adata WHERE bulk_labels = 'Dendritic' AND (ITGB2 > 0 OR SSU72 >0)")
115115

116116
```
117117

@@ -277,23 +277,23 @@ adata = sc.read_h5ad("Macosko_Mouse_Atlas_Single_Nuclei.Use_Backed.h5ad", backed
277277
MakeDb(adata=adata, db_name="Macosko_Mouse_Atlas", db_path="../db/", layers=["X", "obs"])
278278

279279
#query example | Runtime: 0.24sec
280-
adata_sql.query("SELECT ENSMUSG00000070880 FROM X WHERE ENSMUSG00000070880 > 0")
280+
asql.query("SELECT ENSMUSG00000070880 FROM X WHERE ENSMUSG00000070880 > 0")
281281

282282
#count the number of cells in each cluster | Runtime: 0.35sec
283-
adata_sql.query("SELECT ClusterNm, COUNT(cell_id) AS num_cells FROM obs GROUP BY ClusterNm ORDER BY num_cells DESC")
283+
asql.query("SELECT ClusterNm, COUNT(cell_id) AS num_cells FROM obs GROUP BY ClusterNm ORDER BY num_cells DESC")
284284

285285
#determine the total counts per cell library | Runtime: 4min 30sec
286-
adata_sql.calculate_total_counts(chunk_size=950)
286+
asql.calculate_total_counts(chunk_size=950)
287287

288288
#normalize umi counts to 10k per cell | Runtime: 1hr 48mins
289-
adata_sql.expression_normalize(total_counts_per_cell=1e4, chunk_size=300)
289+
asql.expression_normalize(total_counts_per_cell=1e4, chunk_size=300)
290290

291291
#log scale the normalized counts | Runtime: 59mins 13sec
292-
adata_sql.expression_log(log_type="LOG2", chunk_size=250)
292+
asql.expression_log(log_type="LN", chunk_size=250)
293293

294294
```
295295

296-
## Laptop system details for runtime analyses displayed above.
296+
## Laptop system details for both runtime analyses displayed above.
297297
- **Memory:** 40.0 GiB
298298
- **Processor:** 12th Gen Intel® Core™ i7-1255U × 12
299299
- **Disk Capacity:** 1.0 TB

0 commit comments

Comments
 (0)