Skip to content

Commit 9ff2e68

Browse files
committed
Added chunk size parameter to MakeDb for backed mode.
1 parent f70ac69 commit 9ff2e68

File tree

5 files changed

+17
-5
lines changed

5 files changed

+17
-5
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,7 @@ adata_sql.query("SELECT corr(ITGB2,SSU72) as correlation FROM adata WHERE bulk_l
221221
<li><code>adata</code>: AnnData object (required)</li>
222222
<li><code>db_name</code>: Name for the database (required)</li>
223223
<li><code>db_path</code>: Path to store the database (default: 'db/')</li>
224+
<li><code>chunk_size</code>: When opening AnnData in backed mode, the amount of chunks to insert at one time. Lower the value for low-memory systems (default: 5000)</li>
224225
<li><code>layers</code>: List (optional. default: ["X", "obs", "var", "var_names", "obsm", "varm", "obsp", "uns"]).<i>The layers of the Anndata object to build into the database. For larger datasets, it may be beneficial to only include the layers you're interested in querying.</i></li>
225226
<li><code>create_basic_indexes</code>: Build indexed on cell_id (optional. default: False)</li>
226227
<li><code>create_all_indexes</code>: Boolean (optional. default: False). <i>Warning: Runtime can be significant when building.</i></li>

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setuptools.setup(
77
name='AnnSQL',
8-
version='v0.9.1',
8+
version='v0.9.2',
99
author="Kenny Pavan",
1010
author_email="pavan@ohsu.edu",
1111
description="A Python SQL tool for converting Anndata objects to a relational DuckDb database. Methods are included for querying and basic single-cell preprocessing (experimental). ",

src/AnnSQL.egg-info/PKG-INFO

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Metadata-Version: 2.1
22
Name: AnnSQL
3-
Version: 0.9.1
3+
Version: 0.9.2
44
Summary: A Python SQL tool for converting Anndata objects to a relational DuckDb database. Methods are included for querying and basic single-cell preprocessing (experimental).
55
Home-page: https://github.com/kennypavan/AnnSQL
66
Author: Kenny Pavan
@@ -237,6 +237,7 @@ adata_sql.query("SELECT corr(ITGB2,SSU72) as correlation FROM adata WHERE bulk_l
237237
<li><code>adata</code>: AnnData object (required)</li>
238238
<li><code>db_name</code>: Name for the database (required)</li>
239239
<li><code>db_path</code>: Path to store the database (default: 'db/')</li>
240+
<li><code>chunk_size</code>: When opening AnnData in backed mode, the amount of chunks to insert at one time. Lower the value for low-memory systems (default: 5000)</li>
240241
<li><code>layers</code>: List (optional. default: ["X", "obs", "var", "var_names", "obsm", "varm", "obsp", "uns"]).<i>The layers of the Anndata object to build into the database. For larger datasets, it may be beneficial to only include the layers you're interested in querying.</i></li>
241242
<li><code>create_basic_indexes</code>: Build indexed on cell_id (optional. default: False)</li>
242243
<li><code>create_all_indexes</code>: Boolean (optional. default: False). <i>Warning: Runtime can be significant when building.</i></li>

src/AnnSQL/BuildDb.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,15 @@ def __init__(self,
2525
create_all_indexes=False,
2626
create_basic_indexes=False,
2727
convenience_view=True,
28+
chunk_size=5000,
2829
layers=["X", "obs", "var", "var_names", "obsm", "varm", "obsp", "uns"]):
2930
self.adata = adata
3031
self.conn = conn
3132
self.create_all_indexes = create_all_indexes
3233
self.create_basic_indexes = create_basic_indexes
3334
self.convenience_view = convenience_view
3435
self.layers = layers
36+
self.chunk_size = chunk_size
3537
self.build()
3638
if "uns" in self.layers: #not recommended for large datasets
3739
self.build_uns_layer()
@@ -76,7 +78,7 @@ def build(self):
7678
cell_id_df = pd.DataFrame(obs_df['cell_id'][:1]).reset_index(drop=True)
7779
X_df = pd.concat([cell_id_df, X_df], axis=1)
7880
X_df.columns = ['cell_id'] + list(X_df.columns[1:])
79-
chunk_size = 5000
81+
chunk_size = self.chunk_size
8082
print(f"Starting backed mode X table data insert. Total rows: {self.adata.shape[0]}")
8183
for start in range(0, self.adata.shape[0], chunk_size):
8284
start_time = time.time()

src/AnnSQL/MakeDb.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,22 @@
55
import os
66

77
class MakeDb:
8-
def __init__(self, adata=None, db_name=None, db_path="db/", create_all_indexes=False, create_basic_indexes=False, convenience_view=True, layers=["X", "obs", "var", "var_names", "obsm", "varm", "obsp", "uns"]):
8+
def __init__(self, adata=None,
9+
db_name=None,
10+
db_path="db/",
11+
create_all_indexes=False,
12+
create_basic_indexes=False,
13+
convenience_view=True,
14+
chunk_size=5000,
15+
layers=["X", "obs", "var", "var_names", "obsm", "varm", "obsp", "uns"]):
916
self.adata = adata
1017
self.db_name = db_name
1118
self.db_path = db_path
1219
self.layers = layers
1320
self.create_all_indexes = create_all_indexes
1421
self.create_basic_indexes = create_basic_indexes
1522
self.convenience_view = convenience_view
23+
self.chunk_size = chunk_size
1624
self.validate_params()
1725
self.build_db()
1826

@@ -35,5 +43,5 @@ def create_db(self):
3543

3644
def build_db(self):
3745
self.create_db()
38-
BuildDb(adata=self.adata, conn=self.conn, create_all_indexes=self.create_all_indexes, create_basic_indexes=self.create_basic_indexes, convenience_view=self.convenience_view, layers=self.layers)
46+
BuildDb(adata=self.adata, conn=self.conn, create_all_indexes=self.create_all_indexes, create_basic_indexes=self.create_basic_indexes, convenience_view=self.convenience_view, layers=self.layers, chunk_size=self.chunk_size)
3947
self.conn.close()

0 commit comments

Comments
 (0)