Skip to content

Commit 8f87eab

Browse files
committed
feat: implement roaring64_bitmap_add_offset
In order to allow shifting the full range, take `bool negative, uint64_t offset` rather than a `int64_t`
1 parent 4c21998 commit 8f87eab

File tree

3 files changed

+396
-2
lines changed

3 files changed

+396
-2
lines changed

include/roaring/roaring64.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,16 @@ void roaring64_bitmap_flip_inplace(roaring64_bitmap_t *r, uint64_t min,
506506
*/
507507
void roaring64_bitmap_flip_closed_inplace(roaring64_bitmap_t *r, uint64_t min,
508508
uint64_t max);
509+
510+
/**
511+
* Return a copy of the bitmap with all values shifted by offset. If negative
512+
* is true the shift is subtracted, otherwise added. Values that overflow or
513+
* underflow uint64_t are dropped. The caller is responsible for freeing the
514+
* returned bitmap.
515+
*/
516+
roaring64_bitmap_t *roaring64_bitmap_add_offset(const roaring64_bitmap_t *r,
517+
bool negative, uint64_t offset);
518+
509519
/**
510520
* How many bytes are required to serialize this bitmap.
511521
*

src/roaring64.c

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1899,6 +1899,125 @@ void roaring64_bitmap_flip_closed_inplace(roaring64_bitmap_t *r, uint64_t min,
18991899
}
19001900
}
19011901

1902+
roaring64_bitmap_t *roaring64_bitmap_add_offset(const roaring64_bitmap_t *bm,
1903+
bool negative,
1904+
uint64_t offset) {
1905+
if (offset == 0) {
1906+
return roaring64_bitmap_copy(bm);
1907+
}
1908+
1909+
roaring64_bitmap_t *answer = roaring64_bitmap_create();
1910+
1911+
// Decompose the offset into a signed container-level shift and an
1912+
// intra-container shift. For negative offsets the low 16 bits wrap: e.g.
1913+
// -1 = container_offset(-1) + in_offset(0xffff), because shifting by -1
1914+
// container is a shift of -0x1_0000, so we need to shift up within
1915+
// containers to get back to -1
1916+
uint16_t low16 = (uint16_t)offset;
1917+
int64_t container_offset;
1918+
uint16_t in_offset;
1919+
if (!negative) {
1920+
container_offset = (int64_t)(offset >> 16);
1921+
in_offset = low16;
1922+
} else if (low16 == 0) {
1923+
container_offset = -(int64_t)(offset >> 16);
1924+
in_offset = 0;
1925+
} else {
1926+
container_offset = -(int64_t)(offset >> 16) - 1;
1927+
in_offset = (uint16_t)-low16;
1928+
}
1929+
1930+
art_iterator_t it = art_init_iterator((art_t *)&bm->art, /*first=*/true);
1931+
1932+
if (in_offset == 0) {
1933+
while (it.value != NULL) {
1934+
leaf_t leaf = (leaf_t)*it.value;
1935+
int64_t k =
1936+
(int64_t)(combine_key(it.key, 0) >> 16) + container_offset;
1937+
if ((uint64_t)k < (uint64_t)1 << 48) {
1938+
uint8_t new_high48[ART_KEY_BYTES];
1939+
split_key((uint64_t)k << 16, new_high48);
1940+
uint8_t typecode = get_typecode(leaf);
1941+
container_t *container =
1942+
get_copy_of_container(get_container(bm, leaf), &typecode,
1943+
/*copy_on_write=*/false);
1944+
leaf_t new_leaf = add_container(answer, container, typecode);
1945+
art_insert(&answer->art, new_high48, (art_val_t)new_leaf);
1946+
}
1947+
art_iterator_next(&it);
1948+
}
1949+
return answer;
1950+
}
1951+
1952+
while (it.value != NULL) {
1953+
leaf_t leaf = (leaf_t)*it.value;
1954+
int64_t k = (int64_t)(combine_key(it.key, 0) >> 16) + container_offset;
1955+
1956+
container_t *lo = NULL, *hi = NULL;
1957+
container_t **lo_ptr = NULL, **hi_ptr = NULL;
1958+
1959+
if ((uint64_t)k < (uint64_t)1 << 48) {
1960+
lo_ptr = &lo;
1961+
}
1962+
if ((uint64_t)(k + 1) < (uint64_t)1 << 48) {
1963+
hi_ptr = &hi;
1964+
}
1965+
if (lo_ptr == NULL && hi_ptr == NULL) {
1966+
art_iterator_next(&it);
1967+
continue;
1968+
}
1969+
1970+
uint8_t typecode = get_typecode(leaf);
1971+
const container_t *c =
1972+
container_unwrap_shared(get_container(bm, leaf), &typecode);
1973+
container_add_offset(c, typecode, lo_ptr, hi_ptr, in_offset);
1974+
1975+
if (lo != NULL) {
1976+
uint8_t lo_high48[ART_KEY_BYTES];
1977+
split_key((uint64_t)k << 16, lo_high48);
1978+
leaf_t *existing_leaf = (leaf_t *)art_find(&answer->art, lo_high48);
1979+
if (existing_leaf != NULL) {
1980+
uint8_t existing_type = get_typecode(*existing_leaf);
1981+
container_t *existing_c = get_container(answer, *existing_leaf);
1982+
uint8_t merged_type;
1983+
container_t *merged_c = container_ior(
1984+
existing_c, existing_type, lo, typecode, &merged_type);
1985+
if (merged_c != existing_c) {
1986+
container_free(existing_c, existing_type);
1987+
}
1988+
replace_container(answer, existing_leaf, merged_c, merged_type);
1989+
container_free(lo, typecode);
1990+
} else {
1991+
leaf_t new_leaf = add_container(answer, lo, typecode);
1992+
art_insert(&answer->art, lo_high48, (art_val_t)new_leaf);
1993+
}
1994+
}
1995+
1996+
if (hi != NULL) {
1997+
uint8_t hi_high48[ART_KEY_BYTES];
1998+
split_key((uint64_t)(k + 1) << 16, hi_high48);
1999+
leaf_t new_leaf = add_container(answer, hi, typecode);
2000+
art_insert(&answer->art, hi_high48, (art_val_t)new_leaf);
2001+
}
2002+
2003+
art_iterator_next(&it);
2004+
}
2005+
2006+
// Repair containers (e.g., convert low-cardinality bitset containers to
2007+
// array containers after lazy union operations).
2008+
art_iterator_t repair_it = art_init_iterator(&answer->art, /*first=*/true);
2009+
while (repair_it.value != NULL) {
2010+
leaf_t *leaf_ptr = (leaf_t *)repair_it.value;
2011+
uint8_t typecode = get_typecode(*leaf_ptr);
2012+
container_t *repaired = container_repair_after_lazy(
2013+
get_container(answer, *leaf_ptr), &typecode);
2014+
replace_container(answer, leaf_ptr, repaired, typecode);
2015+
art_iterator_next(&repair_it);
2016+
}
2017+
2018+
return answer;
2019+
}
2020+
19022021
// Returns the number of distinct high 32-bit entries in the bitmap.
19032022
static inline uint64_t count_high32(const roaring64_bitmap_t *r) {
19042023
art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true);

0 commit comments

Comments
 (0)