@@ -189,10 +189,14 @@ class TabFileReader(abc.ABC):
189189 """
190190 def __init__ (self , path : str , ** params ):
191191 self .path = path
192- suffix = osp .splitext (path .rstrip (".bgz" ))[1 ].lower ()
192+ if path .endswith (".bgz" ):
193+ _p = path .rstrip (".bgz" )
194+ else :
195+ _p = path
196+ suffix = osp .splitext (_p )[1 ].lower ()
193197 self .suffix = suffix
194198 self .bed_type = None
195- if suffix in [".bed" , ".bedgraph" ]:
199+ if suffix in [".bed" , ".bedgraph" , ".bg" ]:
196200 self .bed_type = guess_bed_type (path )
197201 self .params = params
198202 self .is_2d = False
@@ -211,7 +215,7 @@ def __init__(self, path: str, **params):
211215 suffix = self .suffix
212216 if suffix == ".gtf" :
213217 ds = ox .from_gtf (self .path )
214- elif suffix in [".bed" , ".bedgraph" ]:
218+ elif suffix in [".bed" , ".bedgraph" , ".bg" ]:
215219 ds = ox .from_bed (self .path )
216220 elif suffix in ['.bw' , '.bigwig' ]:
217221 ds = ox .from_bigwig (self .path )
@@ -224,7 +228,7 @@ def __init__(self, path: str, **params):
224228 def query (self , gr : GenomeRange , ** kwargs ) -> pd .DataFrame :
225229 sub = self .ds .regions (str (gr ))
226230 df = sub .pd ()
227- if self .suffix in [".bed" , ".bedgraph" ]:
231+ if self .suffix in [".bed" , ".bedgraph" , ".bg" ]:
228232 rest = df .pop ('rest' )
229233 df_rest = rest .str .split ('\t ' , expand = True )
230234 df = pd .concat ([df , df_rest ], axis = 1 )
@@ -260,7 +264,7 @@ def query(
260264 itr = tabix_query (self .path , gr , split = True )
261265 rows = list (itr )
262266 df = pd .DataFrame (rows )
263- if self .suffix in [".bed" , ".bedgraph" ]:
267+ if self .suffix in [".bed" , ".bedgraph" , ".bg" ]:
264268 columns = FMT2COLUMNS [self .bed_type ]
265269 else :
266270 fmt = self .suffix [1 :]
@@ -277,7 +281,7 @@ def query(
277281class TabFileReaderInMemory (TabFileReader ):
278282 def __init__ (self , path : str , ** params ):
279283 super ().__init__ (path , ** params )
280- if self .suffix in [".bed" , ".bedgraph" ]:
284+ if self .suffix in [".bed" , ".bedgraph" , ".bg" ]:
281285 columns = FMT2COLUMNS [self .bed_type ]
282286 else :
283287 fmt = self .suffix [1 :]
@@ -312,27 +316,28 @@ def query(
312316 'chr1' : 'chrom1' ,
313317 'start1' : 'start1' ,
314318 'end1' : 'end1' ,
315- 'chrom2 ' : 'chrom2' ,
319+ 'chr2 ' : 'chrom2' ,
316320 'start2' : 'start2' ,
317321 'end2' : 'end2' ,
318322 }
319323
320324 if second is not None :
321325 sdf = self .df .query (
322- f"{ field_names ['chrom1 ' ]} == '{ gr .chrom } ' and { field_names ['start1' ]} >= { gr .start } and { field_names ['end1' ]} <= { gr .end } "
323- f"and { field_names ['chrom2 ' ]} == '{ second .chrom } ' and { field_names ['start2' ]} >= { second .start } and { field_names ['end2' ]} <= { second .end } "
326+ f"{ field_names ['chr1 ' ]} == '{ gr .chrom } ' and { field_names ['start1' ]} >= { gr .start } and { field_names ['end1' ]} <= { gr .end } "
327+ f"and { field_names ['chr2 ' ]} == '{ second .chrom } ' and { field_names ['start2' ]} >= { second .start } and { field_names ['end2' ]} <= { second .end } "
324328 )
325329 return sdf
326330 else :
327331 if open_region :
328- sdf = self . df . query (
329- f"{ field_names ['chrom1 ' ]} == '{ gr .chrom } ' and { field_names ['start1' ]} >= { gr .start } and { field_names ['end1' ]} <= { gr .end } "
330- f"and { field_names ['chrom2 ' ]} == '{ gr .chrom } "
332+ q = (
333+ f"{ field_names ['chr1 ' ]} == '{ gr .chrom } ' and { field_names ['start1' ]} >= { gr .start } and { field_names ['end1' ]} <= { gr .end } "
334+ f"and { field_names ['chr2 ' ]} == '{ gr .chrom } ' "
331335 )
336+ sdf = self .df .query (q )
332337 else :
333338 sdf = self .df .query (
334- f"{ field_names ['chrom1 ' ]} == '{ gr .chrom } ' and { field_names ['start1' ]} >= { gr .start } and { field_names ['end1' ]} <= { gr .end } "
335- f"and { field_names ['chrom2 ' ]} == '{ gr .chrom } ' and { field_names ['start2' ]} >= { gr .start } and { field_names ['end2' ]} <= { gr .end } "
339+ f"{ field_names ['chr1 ' ]} == '{ gr .chrom } ' and { field_names ['start1' ]} >= { gr .start } and { field_names ['end1' ]} <= { gr .end } "
340+ f"and { field_names ['chr2 ' ]} == '{ gr .chrom } ' and { field_names ['start2' ]} >= { gr .start } and { field_names ['end2' ]} <= { gr .end } "
336341 )
337342 return sdf
338343 else :
@@ -367,7 +372,7 @@ def _build_bgz_file(
367372 cat_cmd = "zcat" if input_is_gz else "cat"
368373 if prefix .lower ().endswith (".gtf" ):
369374 cmd = f'{ cat_cmd } { path } | grep -v ^"#" | sort -k1,1 -k4,4n | bgzip > { output_path } '
370- elif prefix .lower ().endswith ('.bed' ) or prefix .lower ().endswith ('.bedgraph' ):
375+ elif prefix .lower ().endswith ('.bed' ) or prefix .lower ().endswith ('.bedgraph' ) or prefix . lower (). endswith ( '.bg' ) :
371376 cmd = f'{ cat_cmd } { path } | sort -k1,1 -k2,2n | bgzip > { output_path } '
372377 elif prefix .lower ().endswith ('.bedpe' ):
373378 cmd = f'{ cat_cmd } { path } | sort -k1,1 -k4,4 -k2,2n -k5,5n | bgzip > { output_path } '
0 commit comments