Skip to content

Commit 01e51f4

Browse files
authored
Merge pull request #1 from ArpiarSaundersLab/dev
Extended functionality added
2 parents dfcdc1d + 65c9a04 commit 01e51f4

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+24385
-1998
lines changed

.gitignore

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,24 +2,16 @@ data/
22
db/
33
env/
44
__pycache__/
5-
6-
5+
rdata/
76
*.pyc
8-
9-
# Setuptools distribution folder.
107
/dist/
11-
12-
# Python egg metadata, regenerated from source files by setuptools.
138
/*.egg-info
149
/*.egg
15-
1610
env
1711
.eggs/
1812
build/
1913
build_doc.sh
2014
build_wheel.sh
21-
2215
.DS_Store
23-
2416
/**/__pycache__/
2517
/**/.pytest_cache/
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
library(Seurat)
2+
library(SeuratDisk)
3+
library(anndata)
4+
5+
# Define the file increments
6+
n_cells <- c("1000", "5000", "10000", "15000", "20000", "25000", "30000", "35000", "40000", "45000", "50000", "75000", "100000", "250000")
7+
8+
#Iterate splatter data and convert to Seurat
9+
for (i in n_cells) {
10+
print(i)
11+
h5ad_file <- paste0("/home/kenny/Documents/OHSU/Projects/AnnSql/examples/data/splatter/data_", i, ".h5ad")
12+
seurat_file <- paste0("/home/kenny/Documents/OHSU/Projects/AnnSql/examples/rdata/splatter/data_", i, ".rds")
13+
Convert(h5ad_file, dest = "h5seurat", overwrite = TRUE)
14+
seurat_obj <- LoadH5Seurat(sub(".h5ad$", ".h5seurat", h5ad_file))
15+
saveRDS(seurat_obj, file = seurat_file)
16+
file.remove(sub(".h5ad$", ".h5seurat", h5ad_file))
17+
seurat_obj <- readRDS(seurat_file)
18+
s4_seurat <- UpdateSeuratObject(object = seurat_obj)
19+
saveRDS(s4_seurat, seurat_file)
20+
rm(seurat_obj)
21+
rm(s4_seurat)
22+
}
23+
24+
25+
# Iterate random data and convert to Seurat
26+
for (i in n_cells) {
27+
print(i)
28+
h5ad_file <- paste0("/home/kenny/Documents/OHSU/Projects/AnnSql/examples/data/random/data_", i, ".h5ad")
29+
seurat_file <- paste0("/home/kenny/Documents/OHSU/Projects/AnnSql/examples/rdata/random/data_", i, ".rds")
30+
Convert(h5ad_file, dest = "h5seurat", overwrite = TRUE)
31+
seurat_obj <- LoadH5Seurat(sub(".h5ad$", ".h5seurat", h5ad_file), meta.data = FALSE, misc = FALSE)
32+
adata <- read_h5ad(h5ad_file, backed = TRUE)
33+
seurat_obj <- AddMetaData(seurat_obj, adata$obs)
34+
saveRDS(seurat_obj, file = seurat_file)
35+
file.remove(sub(".h5ad$", ".h5seurat", h5ad_file))
36+
seurat_obj <- readRDS(seurat_file)
37+
s4_seurat <- UpdateSeuratObject(object = seurat_obj)
38+
saveRDS(s4_seurat, seurat_file)
39+
rm(seurat_obj)
40+
rm(s4_seurat)
41+
}
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,16 +29,16 @@
2929
adata.obs["cell_type"] = np.random.choice(["A","B","C"], adata.shape[0])
3030

3131
#save the object
32-
adata.write("../data/random_data_"+str(i)+".h5ad")
32+
adata.write("../data/random/data_"+str(i)+".h5ad")
3333

3434
#open the object
35-
adata = sc.read("../data/random_data_"+str(i)+".h5ad", backed="r")
35+
adata = sc.read("../data/random/data_"+str(i)+".h5ad", backed="r")
3636

3737
#make the database using backed mode
3838
start_time = time.time()
39-
MakeDb(adata, db_path="../db/", db_name="random_data_"+str(i), create_all_indexes=False, convenience_view=False)
39+
MakeDb(adata, db_path="../db/random/", db_name="random_data_"+str(i), create_all_indexes=False, convenience_view=False)
4040
print("--- %s seconds ---" % (time.time() - start_time))
4141

4242
#clear mem
4343
gc.collect()
44-
adata = None
44+
adata = None
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
#import libraries
2+
import scanpy as sc
3+
from AnnSQL import AnnSQL
4+
import time
5+
import pandas as pd
6+
from AnnSQL.MakeDb import MakeDb
7+
import os
8+
import numpy as np
9+
import gc
10+
import seaborn as sns
11+
import matplotlib.pyplot as plt
12+
from scsim import scsim
13+
14+
15+
#dataset sizes to generate
16+
dataset_sizes = [1000, 5000, 10000, 15000, 20000, 25000, 30000, 35000, 40000, 45000, 50000, 75000, 100000, 250000]
17+
dataset_sizes = [9999]
18+
19+
#iterates and creates data_sizes defined above
20+
for i in dataset_sizes:
21+
22+
print(f"Running for {i}")
23+
24+
columns = 10000
25+
26+
simulator = scsim(ngenes=columns, ncells=i, ngroups=5, libloc=7.64, libscale=0.78,
27+
mean_rate=7.68,mean_shape=0.34, expoutprob=0.00286,
28+
expoutloc=6.15, expoutscale=0.49,
29+
diffexpprob=0.025, diffexpdownprob=0., diffexploc=1.0, diffexpscale=1.0,
30+
bcv_dispersion=0.448, bcv_dof=22.087, ndoublets=0,
31+
nproggenes=400, progdownprob=0., progdeloc=1.0,
32+
progdescale=1.0, progcellfrac=0.35, proggoups= list(range(1, int(10/3)+1)),
33+
minprogusage=.1, maxprogusage=.7, seed=21)
34+
35+
36+
start_time = time.time()
37+
simulator.simulate()
38+
end_time = time.time()
39+
print('minutes elapsed for seed:',((end_time-start_time)/60))
40+
41+
#generate a cellxgene object using random data
42+
adata = sc.AnnData(X=simulator.counts,
43+
obs=simulator.cellparams,
44+
)
45+
adata.var.index = [f"gene_{i}" for i in range(adata.shape[1])]
46+
adata.obs["cell_type"] = adata.obs["group"]
47+
adata.obs = adata.obs.drop(columns=["group","has_program","program_usage","libsize"])
48+
49+
#save the object
50+
adata.write("../data//splatter/data_"+str(i)+".h5ad")
51+
52+
#open the object
53+
adata = sc.read("../data/splatter/data_"+str(i)+".h5ad", backed="r")
54+
55+
#make the database using backed mode
56+
os.system(f"rm ../db/splatter/data_{i}.asql")
57+
start_time = time.time()
58+
MakeDb(adata, db_path="../db/splatter/", db_name="splatter_data_"+str(i), create_all_indexes=False, convenience_view=False)
59+
print("--- %s seconds ---" % (time.time() - start_time))
60+
61+
#clear mem
62+
gc.collect()
63+
adata = None

0 commit comments

Comments
 (0)