Skip to content

Commit 17bf045

Browse files
committed
Code cleanup
1 parent c66fb84 commit 17bf045

File tree

8 files changed

+93
-64
lines changed

8 files changed

+93
-64
lines changed

examples/advanced_usage.ipynb

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@
5757
"if os.path.exists(\"db/pbmc3k.asql\"):\n",
5858
"\tos.remove(\"db/pbmc3k.asql\")\n",
5959
"MakeDb(adata=adata, db_name=\"pbmc3k\", db_path=\"db/\")\n",
60-
"adata_sql = AnnSQL(db=\"db/pbmc3k.asql\")"
60+
"asql = AnnSQL(db=\"db/pbmc3k.asql\")"
6161
]
6262
},
6363
{
@@ -82,7 +82,7 @@
8282
}
8383
],
8484
"source": [
85-
"adata_sql.calculate_total_counts(chunk_size=950) #if system memory is low, lower chunks to <=200 "
85+
"asql.calculate_total_counts(chunk_size=950) #if system memory is low, lower chunks to <=200 "
8686
]
8787
},
8888
{
@@ -161,7 +161,7 @@
161161
}
162162
],
163163
"source": [
164-
"adata_sql.query(\"SELECT total_counts FROM X ORDER BY total_counts DESC LIMIT 5 \")"
164+
"asql.query(\"SELECT total_counts FROM X ORDER BY total_counts DESC LIMIT 5 \")"
165165
]
166166
},
167167
{
@@ -188,7 +188,7 @@
188188
}
189189
],
190190
"source": [
191-
"adata_sql.calculate_gene_counts(chunk_size=950)"
191+
"asql.calculate_gene_counts(chunk_size=950)"
192192
]
193193
},
194194
{
@@ -285,7 +285,7 @@
285285
}
286286
],
287287
"source": [
288-
"adata_sql.query(\"SELECT * FROM var ORDER BY gene_counts DESC LIMIT 5 \")"
288+
"asql.query(\"SELECT * FROM var ORDER BY gene_counts DESC LIMIT 5 \")"
289289
]
290290
},
291291
{
@@ -311,7 +311,7 @@
311311
],
312312
"source": [
313313
"#lower chunk_size if system memory is low. Max supported chunk_size is 950 (DuckDB limitation)\n",
314-
"adata_sql.expression_normalize(chunk_size=950) "
314+
"asql.expression_normalize(chunk_size=950) "
315315
]
316316
},
317317
{
@@ -390,7 +390,7 @@
390390
}
391391
],
392392
"source": [
393-
"adata_sql.query(\"SELECT RER1 FROM X ORDER BY RER1 DESC LIMIT 5 \")"
393+
"asql.query(\"SELECT RER1 FROM X ORDER BY RER1 DESC LIMIT 5 \")"
394394
]
395395
},
396396
{
@@ -415,7 +415,7 @@
415415
}
416416
],
417417
"source": [
418-
"adata_sql.expression_log(log_type=\"LOG2\", chunk_size=950) #LOG2 or LOG10"
418+
"asql.expression_log(log_type=\"LOG2\", chunk_size=950) #LOG2 or LOG10"
419419
]
420420
},
421421
{
@@ -494,7 +494,7 @@
494494
}
495495
],
496496
"source": [
497-
"adata_sql.query(\"SELECT RER1 FROM X ORDER BY RER1 DESC LIMIT 5 \")"
497+
"asql.query(\"SELECT RER1 FROM X ORDER BY RER1 DESC LIMIT 5 \")"
498498
]
499499
},
500500
{
@@ -520,7 +520,7 @@
520520
}
521521
],
522522
"source": [
523-
"adata_sql.calculate_variable_genes(chunk_size=950) "
523+
"asql.calculate_variable_genes(chunk_size=950) "
524524
]
525525
},
526526
{
@@ -1028,7 +1028,7 @@
10281028
}
10291029
],
10301030
"source": [
1031-
"adata_sql.query(\"SELECT * FROM var ORDER BY variance DESC LIMIT 50 \")"
1031+
"asql.query(\"SELECT * FROM var ORDER BY variance DESC LIMIT 50 \")"
10321032
]
10331033
}
10341034
],

examples/basic_usage.ipynb

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
},
2929
{
3030
"cell_type": "code",
31-
"execution_count": 1,
31+
"execution_count": 9,
3232
"metadata": {},
3333
"outputs": [],
3434
"source": [
@@ -46,7 +46,7 @@
4646
},
4747
{
4848
"cell_type": "code",
49-
"execution_count": 2,
49+
"execution_count": 10,
5050
"metadata": {},
5151
"outputs": [
5252
{
@@ -78,21 +78,21 @@
7878
},
7979
{
8080
"cell_type": "code",
81-
"execution_count": 3,
81+
"execution_count": 11,
8282
"metadata": {},
8383
"outputs": [
8484
{
8585
"name": "stdout",
8686
"output_type": "stream",
8787
"text": [
88-
"Time to make var_names unique: 0.05755782127380371\n",
89-
"Time to create X table structure: 0.0072100162506103516\n",
90-
"Time to insert X data: 0.12090826034545898\n"
88+
"Time to make var_names unique: 0.034673452377319336\n",
89+
"Time to create X table structure: 0.004136085510253906\n",
90+
"Time to insert X data: 0.08172154426574707\n"
9191
]
9292
}
9393
],
9494
"source": [
95-
"adata_sql = AnnSQL(adata=adata)"
95+
"asql = AnnSQL(adata=adata)"
9696
]
9797
},
9898
{
@@ -107,7 +107,7 @@
107107
},
108108
{
109109
"cell_type": "code",
110-
"execution_count": 4,
110+
"execution_count": 12,
111111
"metadata": {},
112112
"outputs": [
113113
{
@@ -305,13 +305,13 @@
305305
"[5 rows x 766 columns]"
306306
]
307307
},
308-
"execution_count": 4,
308+
"execution_count": 12,
309309
"metadata": {},
310310
"output_type": "execute_result"
311311
}
312312
],
313313
"source": [
314-
"adata_sql.query(\"SELECT * FROM X LIMIT 5\")"
314+
"asql.query(\"SELECT * FROM X LIMIT 5\")"
315315
]
316316
},
317317
{
@@ -323,7 +323,7 @@
323323
},
324324
{
325325
"cell_type": "code",
326-
"execution_count": 5,
326+
"execution_count": 13,
327327
"metadata": {},
328328
"outputs": [
329329
{
@@ -602,13 +602,13 @@
602602
"[8 rows x 766 columns]"
603603
]
604604
},
605-
"execution_count": 5,
605+
"execution_count": 13,
606606
"metadata": {},
607607
"output_type": "execute_result"
608608
}
609609
],
610610
"source": [
611-
"adata_sql.query(\"SELECT * FROM X WHERE HES4 > 4\")"
611+
"asql.query(\"SELECT * FROM X WHERE HES4 > 4\")"
612612
]
613613
},
614614
{
@@ -620,7 +620,7 @@
620620
},
621621
{
622622
"cell_type": "code",
623-
"execution_count": 6,
623+
"execution_count": 14,
624624
"metadata": {},
625625
"outputs": [
626626
{
@@ -953,13 +953,13 @@
953953
"[10 rows x 775 columns]"
954954
]
955955
},
956-
"execution_count": 6,
956+
"execution_count": 14,
957957
"metadata": {},
958958
"output_type": "execute_result"
959959
}
960960
],
961961
"source": [
962-
"adata_sql.query(\"SELECT * FROM adata WHERE bulk_labels = 'Dendritic' AND percent_mito > 0.025\")"
962+
"asql.query(\"SELECT * FROM adata WHERE bulk_labels = 'Dendritic' AND percent_mito > 0.025\")"
963963
]
964964
},
965965
{
@@ -971,7 +971,7 @@
971971
},
972972
{
973973
"cell_type": "code",
974-
"execution_count": 7,
974+
"execution_count": 15,
975975
"metadata": {},
976976
"outputs": [
977977
{
@@ -1068,18 +1068,18 @@
10681068
"9 CD4+/CD45RA+/CD25- Naive T 8"
10691069
]
10701070
},
1071-
"execution_count": 7,
1071+
"execution_count": 15,
10721072
"metadata": {},
10731073
"output_type": "execute_result"
10741074
}
10751075
],
10761076
"source": [
1077-
"adata_sql.query(\"SELECT bulk_labels, COUNT(*) as total FROM obs GROUP BY bulk_labels ORDER BY total DESC\")"
1077+
"asql.query(\"SELECT bulk_labels, COUNT(*) as total FROM obs GROUP BY bulk_labels ORDER BY total DESC\")"
10781078
]
10791079
},
10801080
{
10811081
"cell_type": "code",
1082-
"execution_count": 8,
1082+
"execution_count": 16,
10831083
"metadata": {},
10841084
"outputs": [
10851085
{
@@ -1170,13 +1170,13 @@
11701170
"10 varm_PCs"
11711171
]
11721172
},
1173-
"execution_count": 8,
1173+
"execution_count": 16,
11741174
"metadata": {},
11751175
"output_type": "execute_result"
11761176
}
11771177
],
11781178
"source": [
1179-
"adata_sql.query(\"SHOW tables;\")"
1179+
"asql.query(\"SHOW tables;\")"
11801180
]
11811181
}
11821182
],

examples/build_ondisk_database.ipynb

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@
109109
"source": [
110110
"adata = sc.read_h5ad(\"data/pbmc3k_raw.h5ad\", backed=\"r\")\n",
111111
"\n",
112-
"#this delete command is for testing purposes only. \n",
112+
"#Delete command. This is for testing purposes only. \n",
113113
"if os.path.exists(\"db/pbmc3k.asql\"):\n",
114114
"\tos.remove(\"db/pbmc3k.asql\")\n",
115115
"if os.path.exists(\"db/pbmc3k.asql.wal\"):\n",
@@ -121,7 +121,7 @@
121121
"# #medium system memory (12-24Gb)\n",
122122
"# MakeDb(adata=adata, db_name=\"pbmc3k\", db_path=\"db/\", chunk_size=2500)\n",
123123
"\n",
124-
"# #low system memory (<12Gb)\n",
124+
"# #low system memory (<=12Gb)\n",
125125
"# MakeDb(adata=adata, db_name=\"pbmc3k\", db_path=\"db/\", chunk_size=1000, make_buffer_file=True)"
126126
]
127127
},
@@ -139,7 +139,7 @@
139139
"metadata": {},
140140
"outputs": [],
141141
"source": [
142-
"adata_sql = AnnSQL(db=\"db/pbmc3k.asql\")"
142+
"asql = AnnSQL(db=\"db/pbmc3k.asql\")"
143143
]
144144
},
145145
{
@@ -362,7 +362,7 @@
362362
}
363363
],
364364
"source": [
365-
"adata_sql.query(\"SELECT * FROM X LIMIT 5\")"
365+
"asql.query(\"SELECT * FROM X LIMIT 5\")"
366366
]
367367
},
368368
{
@@ -474,7 +474,7 @@
474474
],
475475
"source": [
476476
"#total counts per gene \n",
477-
"adata_sql.query(\"SELECT SUM(COLUMNS(*)) FROM (SELECT * EXCLUDE (cell_id) FROM X)\")"
477+
"asql.query(\"SELECT SUM(COLUMNS(*)) FROM (SELECT * EXCLUDE (cell_id) FROM X)\")"
478478
]
479479
}
480480
],

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setuptools.setup(
77
name='AnnSQL',
8-
version='v0.9.2',
8+
version='v0.9.3',
99
author="Kenny Pavan",
1010
author_email="pavan@ohsu.edu",
1111
description="A Python SQL tool for converting Anndata objects to a relational DuckDb database. Methods are included for querying and basic single-cell preprocessing (experimental). ",

src/AnnSQL.egg-info/PKG-INFO

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Metadata-Version: 2.1
22
Name: AnnSQL
3-
Version: 0.9.2
3+
Version: 0.9.3
44
Summary: A Python SQL tool for converting Anndata objects to a relational DuckDb database. Methods are included for querying and basic single-cell preprocessing (experimental).
55
Home-page: https://github.com/kennypavan/AnnSQL
66
Author: Kenny Pavan

src/AnnSQL/AnnSQL.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,21 @@
88

99
class AnnSQL:
1010
def __init__(self, adata=None, db=None, create_all_indexes=False, create_basic_indexes=False, layers=["X", "obs", "var", "var_names", "obsm", "varm", "obsp", "uns"]):
11+
"""
12+
Initializes an instance of the AnnSQL class. This class is used to query and update a database created from an AnnData object.
13+
it also provides methods for data normalization and transformation. The in-process database engine is DuckDB AND the database is
14+
stored in memory by default, However, the database can be loaded from a file path by providing the db parameter. Databases can be
15+
built from an AnnData object by using the MakeDb class.
16+
17+
Parameters:
18+
- adata (AnnData or None): An AnnData object containing the data to be stored in the database. If None, an empty AnnData object will be created.
19+
- db (str or None): The path to an existing database file.
20+
- create_all_indexes (bool): Whether to create indexes for all columns in the database. Memory intensive. Default is False.
21+
- create_basic_indexes (bool): Whether to create indexes for basic columns. Default is False.
22+
- layers (list): A list of layer names to be stored in the database. Default is ["X", "obs", "var", "var_names", "obsm", "varm", "obsp", "uns"].
23+
Returns:
24+
None
25+
"""
1126
self.adata = self.open_anndata(adata)
1227
self.db = db
1328
self.create_basic_indexes = create_basic_indexes
@@ -168,9 +183,7 @@ def expression_log(self, log_type="LN", chunk_size=200, print_progress=False):
168183
for gene in chunk:
169184
if gene == 'total_counts':
170185
continue
171-
updates.append(f"{gene} = {log_type}({gene}+1)")
172-
#handle zero values
173-
#updates.append(f"{gene} = CASE WHEN {gene} = 0 OR {gene} = 0.0 THEN 0.0 ELSE {log_type}({gene}) END")
186+
updates.append(f"{gene} = {log_type}({gene}+1)") #handle zero values like scanpy
174187
update_query = f"UPDATE X SET {', '.join(updates)}"
175188
self.update_query(update_query, suppress_message=True)
176189
if print_progress == True:
@@ -286,7 +299,6 @@ def calculate_variable_genes(self, chunk_size=100, print_progress=False, gene_fi
286299
self.conn.execute("DROP VIEW IF EXISTS variance_df")
287300
print("Variance Calculation Complete")
288301

289-
290302
def check_chunk_size(self, chunk_size):
291303
if chunk_size > 999:
292304
raise ValueError('chunk_size must be less than 1000. DuckDb limitation')

0 commit comments

Comments
 (0)