Skip to content

Commit 109bb12

Browse files
committed
change bigwig track to use oxbow
1 parent adadde4 commit 109bb12

File tree

13 files changed

+57
-76
lines changed

13 files changed

+57
-76
lines changed

coolbox/core/coverage/highlights.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ class _Highlights(object):
1616
def fetch_data(self, gr: GenomeRange, **kwargs):
1717
gr = to_gr(gr)
1818
if gr.chrom not in list(self.interval_tree):
19-
gr.change_chrom_names()
19+
gr = gr.change_chrom_names()
2020

2121
return [
2222
(region.begin, region.end, region.data)

coolbox/core/coverage/vlines.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ def fetch_data(self, gr: GenomeRange):
1010
vlines_list = []
1111

1212
if gr.chrom not in list(self.vlines_intval_tree):
13-
gr.change_chrom_names()
13+
gr = gr.change_chrom_names()
1414

1515
for region in sorted(self.vlines_intval_tree[gr.chrom][gr.start - 1:gr.end + 1]):
1616
vlines_list.append(region.begin)

coolbox/core/track/arcs/fetch.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ def fetch_intervals(self, bgz_file, gr: GenomeRange, gr2: GenomeRange = None) ->
88
open_region = self.properties.get("open_region") == "yes"
99
rows = list(pairix_query(bgz_file, gr, second=gr2, open_region=open_region, split=True))
1010
if not rows:
11-
gr.change_chrom_names()
11+
gr = gr.change_chrom_names()
1212
if gr2:
13-
gr2.change_chrom_names()
13+
gr2 = gr2.change_chrom_names()
1414
rows = list(pairix_query(bgz_file, gr, second=gr2, open_region=open_region, split=True))
1515

1616
return pd.DataFrame(rows)

coolbox/core/track/bed/fetch.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def fetch_intervals(self, bgz_file, gr: GenomeRange) -> pd.DataFrame:
2020
"""
2121
intervals, bed_type = self.load_range(bgz_file, gr)
2222
if len(intervals) == 0:
23-
gr.change_chrom_names()
23+
gr = gr.change_chrom_names()
2424
intervals, bed_type = self.load_range(bgz_file, gr)
2525
if len(intervals) == 0:
2626
log.debug(f"No valid intervals were found in file {bgz_file} within range {gr}")

coolbox/core/track/gtf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ def fetch_intervals(self, gr: GenomeRange):
9898
"""
9999
rows = [row for row in tabix_query(self.bgz_file, gr.chrom, gr.start, gr.end)]
100100
if not rows:
101-
gr.change_chrom_names()
101+
gr = gr.change_chrom_names()
102102
for row in tabix_query(self.bgz_file, gr.chrom, gr.start, gr.end):
103103
rows.append(row)
104104

coolbox/core/track/hist/bigwig.py

Lines changed: 10 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
import numpy as np
2-
import pandas as pd
32

43
from coolbox.utilities import (
54
split_genome_range, change_chrom_names,
65
GenomeRange, get_logger, to_gr
76
)
7+
import oxbow as ox
88
from .base import HistBase
99

1010
log = get_logger(__name__)
@@ -39,13 +39,13 @@ def __init__(self, file, **kwargs):
3939
**kwargs
4040
})
4141
super().__init__(**properties)
42-
import bbi
43-
self.bw = bbi.open(self.properties['file'])
42+
self.ds = ox.from_bigwig(self.properties['file'])
4443

4544
def fetch_plot_data(self, gr: GenomeRange, **kwargs):
46-
num_bins = self.get_num_bins()
4745
self.check_chrom_name(gr)
48-
return self.fetch_scores(gr, num_bins)
46+
intervals = self.fetch_data(gr)
47+
values = intervals['value'].values
48+
return values
4949

5050
def fetch_data(self, gr: GenomeRange, **kwargs):
5151
"""
@@ -59,47 +59,17 @@ def fetch_data(self, gr: GenomeRange, **kwargs):
5959
BigWig interval table.
6060
"""
6161
chrom, start, end = split_genome_range(gr)
62-
if chrom not in self.bw.chromsizes:
62+
if chrom not in self.ds.chrom_names:
6363
chrom = change_chrom_names(chrom)
6464

65-
intervals = self.bw.fetch_intervals(chrom, start, end)
66-
columns = list(intervals.columns)
67-
if 'value' in columns:
68-
columns[columns.index('value')] = 'score'
69-
intervals.columns = columns
70-
65+
intervals = self.ds.regions(f"{chrom}:{start}-{end}").pd()
7166
return intervals
7267

73-
def get_num_bins(self, default_num=700):
74-
num_bins = default_num
75-
if 'number_of_bins' in self.properties:
76-
try:
77-
num_bins = int(self.properties['number_of_bins'])
78-
except TypeError:
79-
num_bins = default_num
80-
log.warning("'number_of_bins' value: {} for bigwig file {} "
81-
"is not valid. Using default value (700)".format(self.properties['number_of_bins'],
82-
self.properties['file']))
83-
return num_bins
84-
85-
def fetch_scores(self, genome_range, num_bins, max_try_nums=5):
86-
"""Fetch bins scores within input chromosome range.
87-
"""
88-
scores_per_bin = np.zeros(num_bins)
89-
gr = to_gr(genome_range)
90-
if gr.chrom not in self.bw.chromsizes:
91-
gr.change_chrom_names()
92-
try:
93-
scores_per_bin = self.bw.fetch(gr.chrom, gr.start, gr.end, num_bins).astype(float)
94-
except Exception as e:
95-
log.warning(f"error found while reading bigwig scores: {e}")
96-
return scores_per_bin
97-
9868
def check_chrom_name(self, genome_range):
99-
if genome_range.chrom not in self.bw.chromsizes:
100-
genome_range.change_chrom_names()
69+
if genome_range.chrom not in self.ds.chrom_names:
70+
genome_range = genome_range.change_chrom_names()
10171

102-
if genome_range.chrom not in self.bw.chromsizes:
72+
if genome_range.chrom not in self.ds.chrom_names:
10373
log.warning("Can not read region {} from bigwig file:\n\n"
10474
"{}\n\nPlease check that the chromosome name is part of the bigwig file "
10575
"and that the region is valid".format(str(genome_range), self.properties['file']))

coolbox/core/track/hist/snp.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
1-
import os.path as osp
2-
import subprocess as subp
3-
41
import numpy as np
52
import pandas as pd
63

@@ -81,7 +78,7 @@ def fetch_data(self, gr: GenomeRange, **kwargs):
8178
ix_pval = self.properties['col_pval']
8279
rows = self.load_range(gr)
8380
if len(rows) == 0:
84-
gr.change_chrom_names()
81+
gr = gr.change_chrom_names()
8582
rows = self.load_range(gr)
8683
df = pd.DataFrame(rows)
8784
if df.shape[0] > 0:

coolbox/core/track/ideogram.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def lookup_band_color(self, band_type):
7676

7777
def fetch_data(self, gr: GenomeRange, **kwargs):
7878
if gr.chrom not in self.interval_tree:
79-
gr.change_chrom_names()
79+
gr = gr.change_chrom_names()
8080
bands_in_region = sorted(self.interval_tree[gr.chrom][gr.start:gr.end])
8181
rows = []
8282
for itv in bands_in_region:

coolbox/utilities/genome.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ def parse_region_string(region_string):
119119
raise ValueError(f"Failure to parse region string {region_string}, please check that region format "
120120
f"should be like \"chr:start-end\".")
121121

122-
def change_chrom_names(self):
122+
def change_chrom_names(self) -> "GenomeRange":
123123
"""
124124
>>> range1 = GenomeRange("chr1", 1000, 2000)
125125
>>> range1.chrom
@@ -131,7 +131,9 @@ def change_chrom_names(self):
131131
>>> range1.chrom
132132
'chr1'
133133
"""
134-
self.chrom = change_chrom_names(self.chrom)
134+
new_chrom = change_chrom_names(self.chrom)
135+
gr = GenomeRange(new_chrom, self.start, self.end)
136+
return gr
135137

136138
@property
137139
def length(self):

coolbox/utilities/hic/wrap.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,9 @@ def fetch(self, gr1, gr2=None):
5959
gr2 = GenomeRange(gr2)
6060

6161
if gr1.chrom.startswith("chr"):
62-
gr1.change_chrom_names()
62+
gr1 = gr1.change_chrom_names()
6363
if gr2.chrom.startswith("chr"):
64-
gr2.change_chrom_names()
64+
gr2 = gr2.change_chrom_names()
6565

6666
binsize = self.infer_binsize(gr1)
6767
self.fetched_binsize = binsize # expose fetched binsize
@@ -128,9 +128,9 @@ def fetch_pixels(self, genome_range1, genome_range2=None):
128128
if genome_range2 is None:
129129
genome_range2 = genome_range1
130130
if genome_range1.chrom.startswith("chr"):
131-
genome_range1.change_chrom_names()
131+
genome_range1 = genome_range1.change_chrom_names()
132132
if genome_range2.chrom.startswith("chr"):
133-
genome_range2.change_chrom_names()
133+
genome_range2 = genome_range2.change_chrom_names()
134134
binsize = self.infer_binsize(genome_range1)
135135
siter = self.__fetch_straw_iter(genome_range1, genome_range2, binsize)
136136
rows = [[i[0], i[1], i[2]] for i in siter]
@@ -305,9 +305,9 @@ def fetch(self, genome_range1, genome_range2=None):
305305
cool = self.get_cool(genome_range1)
306306

307307
if genome_range1.chrom not in cool.chromnames:
308-
genome_range1.change_chrom_names()
308+
genome_range1 = genome_range1.change_chrom_names()
309309
if genome_range2.chrom not in cool.chromnames:
310-
genome_range2.change_chrom_names()
310+
genome_range2 = genome_range2.change_chrom_names()
311311

312312
try:
313313
mat = cool.matrix(balance=self.balance).fetch(str(genome_range1), str(genome_range2))
@@ -325,9 +325,9 @@ def fetch_pixels(self, genome_range1, genome_range2=None, join=True):
325325
genome_range2 = genome_range1
326326

327327
if genome_range1.chrom not in cool.chromnames:
328-
genome_range1.change_chrom_names()
328+
genome_range1 = genome_range1.change_chrom_names()
329329
if genome_range2.chrom not in cool.chromnames:
330-
genome_range2.change_chrom_names()
330+
genome_range2 = genome_range2.change_chrom_names()
331331

332332
mat = cool.matrix(as_pixels=True, balance=self.balance, join=join)
333333
return mat.fetch(str(genome_range1), str(genome_range2))

0 commit comments

Comments
 (0)