From b5c230499c425b76ddd036e0677516ab13afd103 Mon Sep 17 00:00:00 2001
From: Kristian Larsson <k@centor.se>
Date: Sat, 28 Mar 2026 23:01:26 +0100
Subject: [PATCH] Keep str internals on raw ints

Several str.c methods converted between boxed B_int values and plain C integers
while calling internal helpers. That added loads of allocation churn which is
really quite unnecessary.

This change uses C integers internally and passed between helper functions to
avoid those extra B_int related allocations and conversions.
---
 base/builtin/str.c | 589 +++++++++++++++++++++++++--------------------
 1 file changed, 328 insertions(+), 261 deletions(-)

diff --git a/base/builtin/str.c b/base/builtin/str.c
index d87194bec..bda87dc93 100644
--- a/base/builtin/str.c
+++ b/base/builtin/str.c
@@ -538,40 +538,153 @@ static int get_index(int i, int nchars) {
     return 0;
 }
 
+// The Boyer-Moore-Horspool algorithm for searching for pattern in text.
+// For very short patterns, this should be replaced by brute force.
+// Returns byte position in text where first occurrence of pattern starts,
+// or -1 if it does not occur.
+// Start search from the left end of text.
+int bmh( unsigned char *text, unsigned char *pattern, int tbytes, int pbytes) {
+    if (pbytes>tbytes) return -1;
+    int skip[256];
+    for (int i=0; i<256; i++) skip[i] = pbytes;
+    for (int i=0; i<pbytes-1; i++)
+        skip[(int)pattern[i]] = pbytes-i-1;
+    int k = pbytes-1;
+    int i, j;
+    while (k<tbytes) {
+        j = pbytes-1;
+        i = k;
+        while (j >=0 && text[i] == pattern[j]) {
+            j--; i--;
+        }
+        if (j==-1) return i+1;
+        k += skip[(int)text[k]];
+    }
+    return -1;
+}
 
-// Eliminates slice notation in find, index, count and other methods
-// with optional start and end and adds defaults for omitted parameters.
+// Start search from the right end of text.
+static int rbmh( unsigned char *text, unsigned char *pattern, int tbytes, int pbytes) {
+    if (pbytes>tbytes) return -1;
+    int skip[256];
+    for (int i=0; i<256; i++) skip[i] = pbytes;
+    for (int i=pbytes-1; i>0; i--)
+        skip[(int)pattern[i]] = i;
+    int k = tbytes - pbytes;
+    int i, j;
+    while (k >= 0) {
+        j = 0;
+        i = k;
+        while (j < pbytes && text[i] == pattern[j]) {
+            j++; i++;
+        }
+        if (j==pbytes) return i-pbytes;
+        k -= skip[(int)text[k]];
+    }
+    return -1;
+}
 
-static int fix_start_end(int nchars, B_int *start, B_int *end) {
-    if (*start==NULL) {
-        *start = acton_malloc(sizeof(struct B_int));
-        *start = toB_int(0);
+static int normalize_start_end(int len, B_int start, B_int end, int *start_out, int *end_out) {
+    int st;
+    int en;
+
+    if (start == NULL) {
+        st = 0;
     } else {
-        int st = fromB_int(*start);
-        if (st > nchars) {
+        st = fromB_int(start);
+        if (st > len)
             return -1;
-        }
-        if (st < 0) 
-            st += nchars+1;
+        if (st < 0)
+            st += len + 1;
         st = st < 0 ? 0 : st;
-        *start = toB_int(st);
     }
-    if (*end==NULL) {
-        *end = acton_malloc(sizeof(struct B_int));
-        *end = toB_int(nchars);
+
+    if (end == NULL) {
+        en = len;
     } else {
-        int en = fromB_int(*end);
-        if (en > nchars)   
-            en = nchars;      
-        else if (en < 0) 
-            en += nchars+1;     
-        en = en < 0 ? 0 : en;    
-        
-        *end = toB_int(en);
+        en = fromB_int(end);
+        if (en > len)
+            en = len;
+        else if (en < 0)
+            en += len + 1;
+        en = en < 0 ? 0 : en;
     }
+
+    *start_out = st;
+    *end_out = en;
     return 0;
 }
 
+static int count_matches(unsigned char *p, unsigned char *q, unsigned char *sub, int subbytes) {
+    int count = 0;
+    int n = bmh(p, sub, q - p, subbytes);
+
+    while (n >= 0) {
+        count++;
+        p += n + (subbytes > 0 ? subbytes : 1);
+        n = bmh(p, sub, q - p, subbytes);
+    }
+
+    return count;
+}
+
+static int normalize_maxsplit(B_int maxsplit) {
+    if (maxsplit == NULL || fromB_int(maxsplit) < 0)
+        return INT_MAX;
+    return fromB_int(maxsplit);
+}
+
+static int str_count_range(B_str s, B_str sub, int start, int end) {
+    int isascii = s->nchars == s->nbytes;
+    unsigned char *p = skip_chars(s->str, start, isascii);
+    unsigned char *q = skip_chars(p, end - start, isascii);
+    return count_matches(p, q, sub->str, sub->nbytes);
+}
+
+static int str_find_range(B_str s, B_str sub, int start, int end) {
+    int isascii = s->nchars == s->nbytes;
+    unsigned char *p = skip_chars(s->str, start, isascii);
+    unsigned char *q = skip_chars(p, end - start, isascii);
+    int n = bmh(p, sub->str, q - p, sub->nbytes);
+
+    if (n < 0)
+        return -1;
+    return char_no(s, n + p - s->str);
+}
+
+static int str_rfind_range(B_str s, B_str sub, int start, int end) {
+    int isascii = s->nchars == s->nbytes;
+    unsigned char *p = skip_chars(s->str, start, isascii);
+    unsigned char *q = skip_chars(p, end - start, isascii);
+    int n = rbmh(p, sub->str, q - p, sub->nbytes);
+
+    if (n < 0)
+        return -1;
+    return char_no(s, n + p - s->str);
+}
+
+static int buffer_count_range(unsigned char *buf, int start, int end, unsigned char *sub, int subbytes) {
+    unsigned char *p = buf + start;
+    unsigned char *q = buf + end;
+    return count_matches(p, q, sub, subbytes);
+}
+
+static int buffer_find_range(unsigned char *buf, int start, int end, unsigned char *sub, int subbytes) {
+    int n = bmh(buf + start, sub, end - start, subbytes);
+
+    if (n < 0)
+        return -1;
+    return start + n;
+}
+
+static int buffer_rfind_range(unsigned char *buf, int start, int end, unsigned char *sub, int subbytes) {
+    int n = rbmh(buf + start, sub, end - start, subbytes);
+
+    if (n < 0)
+        return -1;
+    return start + n;
+}
+
 // Builds a new one-char string starting at p.
 static B_str mk_char(unsigned char *p) {
     unsigned char c = *p;
@@ -609,52 +722,6 @@ static int islinebreak_codepoint(int codepoint) {
 
 }
 
-// The Boyer-Moore-Horspool algorithm for searching for pattern in text.
-// For very short patterns, this should be replaced by brute force.
-// Returns byte position in text where first occurrence of pattern starts,
-// or -1 if it does not occur.
-// Start search from the left end of text.
-int bmh( unsigned char *text, unsigned char *pattern, int tbytes, int pbytes) {
-    if (pbytes>tbytes) return -1;
-    int skip[256];
-    for (int i=0; i<256; i++) skip[i] = pbytes;
-    for (int i=0; i<pbytes-1; i++)
-        skip[(int)pattern[i]] = pbytes-i-1;
-    int k = pbytes-1;
-    int i, j;
-    while (k<tbytes) {
-        j = pbytes-1;
-        i = k;
-        while (j >=0 && text[i] == pattern[j]) {
-            j--; i--;
-        }
-        if (j==-1) return i+1;
-        k += skip[(int)text[k]];
-    }
-    return -1;
-}
-
-// Start search from the right end of text.
-static int rbmh( unsigned char *text, unsigned char *pattern, int tbytes, int pbytes) {
-    if (pbytes>tbytes) return -1;
-    int skip[256];
-    for (int i=0; i<256; i++) skip[i] = pbytes;
-    for (int i=pbytes-1; i>0; i--)
-        skip[(int)pattern[i]] = i;
-    int k = tbytes - pbytes;
-    int i, j;
-    while (k >= 0) {
-        j = 0;
-        i = k;
-        while (j < pbytes && text[i] == pattern[j]) {
-            j++; i++;
-        }
-        if (j==pbytes) return i-pbytes;
-        k -= skip[(int)text[k]];
-    }
-    return -1;
-}
-
 struct byte_counts {
     int printable, squotes, dquotes, escaped, non_printable, non_ascii, braces;
 };
@@ -929,20 +996,12 @@ B_str B_strD_center(B_str s, B_int width, B_str fill) {
 
 
 B_int B_strD_count(B_str s, B_str sub, B_int start, B_int end) {
-    int isascii = s->nchars == s->nbytes;
-    B_int st = start;
-    B_int en = end;
-    if (fix_start_end(s->nchars,&st,&en) < 0) return toB_int(0);
-    unsigned char *p = skip_chars(s->str,fromB_int(st),isascii);
-    unsigned char *q = skip_chars(p,fromB_int(en)-fromB_int(st),isascii);
-    int res = 0;
-    int n = bmh(p,sub->str,q-p,sub->nbytes);
-    while (n>=0) {
-        res++;
-        p += n + (sub->nbytes>0 ? sub->nbytes : 1);
-        n = bmh(p,sub->str,q-p,sub->nbytes);
-    }
-    return toB_int(res);
+    int st;
+    int en;
+
+    if (normalize_start_end(s->nchars, start, end, &st, &en) < 0)
+        return toB_int(0);
+    return toB_int(str_count_range(s, sub, st, en));
 }
 
 B_bytes B_strD_encode(B_str s) {
@@ -953,12 +1012,15 @@ B_bytes B_strD_encode(B_str s) {
 }
 
 B_bool B_strD_endswith(B_str s, B_str sub, B_int start, B_int end) {
-    B_int st = start;
-    B_int en = end;
-    if (fix_start_end(s->nchars,&st,&en) < 0) return B_False;
-    if (en->val-st->val < sub->nbytes) return B_False;
+    int st;
+    int en;
+
+    if (normalize_start_end(s->nchars, start, end, &st, &en) < 0)
+        return B_False;
+    if (en - st < sub->nbytes)
+        return B_False;
     int isascii = s->nchars==s->nbytes;
-    unsigned char *p = skip_chars(s->str + s->nbytes,fromB_int(en) - s->nchars,isascii) - sub->nbytes;
+    unsigned char *p = skip_chars(s->str + s->nbytes, en - s->nchars, isascii) - sub->nbytes;
     unsigned char *q = sub->str;
     for (int i=0; i<sub->nbytes; i++) {
         if (*p == 0 || *p++ != *q++) {
@@ -1005,23 +1067,25 @@ B_str B_strD_expandtabs(B_str s, B_int tabsize){
 }
 
 B_int B_strD_find(B_str s, B_str sub, B_int start, B_int end) {
-    int isascii = s->nchars == s->nbytes;
-    B_int st = start;
-    B_int en = end;
-    if (fix_start_end(s->nchars,&st,&en) < 0) return toB_int(-1);
-    unsigned char *p = skip_chars(s->str,fromB_int(st),isascii);
-    unsigned char *q = skip_chars(p,fromB_int(en)-fromB_int(st),isascii);
-    int n = bmh(p,sub->str,q-p,sub->nbytes);
-    if (n<0) return toB_int(-1);
-    return toB_int(char_no(s,n+p-s->str));
+    int st;
+    int en;
+
+    if (normalize_start_end(s->nchars, start, end, &st, &en) < 0)
+        return toB_int(-1);
+    return toB_int(str_find_range(s, sub, st, en));
 }
 
 B_int B_strD_index(B_str s, B_str sub, B_int start, B_int end) {
-    B_int n = B_strD_find(s,sub,start,end);
-    if (fromB_int(n)<0) {
+    int st;
+    int en;
+    int n = -1;
+
+    if (normalize_start_end(s->nchars, start, end, &st, &en) >= 0)
+        n = str_find_range(s, sub, st, en);
+    if (n < 0) {
         $RAISE((B_BaseException)$NEW(B_ValueError,to$str("index: substring not found")));
     }
-    return n;
+    return toB_int(n);
 }
 
 B_bool B_strD_isalnum(B_str s) {
@@ -1262,7 +1326,7 @@ B_str B_strD_lstrip(B_str s, B_str cs) {
 }
 
 B_tuple B_strD_partition(B_str s, B_str sep) {
-    int n = fromB_int(B_strD_find(s,sep,NULL,NULL));
+    int n = str_find_range(s, sep, 0, s->nchars);
     if (n<0) {
         return $NEWTUPLE(3,s,null_str,null_str);
     } else {
@@ -1280,10 +1344,9 @@ B_tuple B_strD_partition(B_str s, B_str sep) {
 }
 
 B_str B_strD_replace(B_str s, B_str old, B_str new, B_int count) {
-    if (count==NULL)
-        count = toB_int(INT_MAX);
-    int c = fromB_int(B_strD_count(s,old,NULL,NULL));
-    int c0 = fromB_int(count) < c ? fromB_int(count) : c;
+    int countval = count == NULL ? INT_MAX : fromB_int(count);
+    int c = str_count_range(s, old, 0, s->nchars);
+    int c0 = countval < c ? countval : c;
     if (c0==0){
         return s;
     }
@@ -1315,24 +1378,26 @@ B_str B_strD_replace(B_str s, B_str old, B_str new, B_int count) {
 
 
 B_int B_strD_rfind(B_str s, B_str sub, B_int start, B_int end) {
-    int isascii = s->nchars == s->nbytes;
-    B_int st = start;
-    B_int en = end;
-    if (fix_start_end(s->nchars,&st,&en) < 0) return toB_int(-1);
-    unsigned char *p = skip_chars(s->str,fromB_int(st),isascii);
-    unsigned char *q = skip_chars(p,fromB_int(en)-fromB_int(st),isascii);
-    int n = rbmh(p,sub->str,q-p,sub->nbytes);
-    if (n<0) return toB_int(-1);
-    return toB_int(char_no(s,n+p-s->str));
+    int st;
+    int en;
+
+    if (normalize_start_end(s->nchars, start, end, &st, &en) < 0)
+        return toB_int(-1);
+    return toB_int(str_rfind_range(s, sub, st, en));
 }
 
 
 B_int B_strD_rindex(B_str s, B_str sub, B_int start, B_int end) {
-    B_int n = B_strD_rfind(s,sub,start,end);
-    if (fromB_int(n)<0) {
+    int st;
+    int en;
+    int n = -1;
+
+    if (normalize_start_end(s->nchars, start, end, &st, &en) >= 0)
+        n = str_rfind_range(s, sub, st, en);
+    if (n < 0) {
         $RAISE((B_BaseException)$NEW(B_ValueError,to$str("rindex: substring not found")));
     };
-    return n;
+    return toB_int(n);
 }
 
 B_str B_strD_rjust(B_str s, B_int width, B_str fill) {
@@ -1358,7 +1423,7 @@ B_str B_strD_rjust(B_str s, B_int width, B_str fill) {
 }
 
 B_tuple B_strD_rpartition(B_str s, B_str sep) {
-    int n = fromB_int(B_strD_rfind(s,sep,NULL,NULL));
+    int n = str_rfind_range(s, sep, 0, s->nchars);
     if (n<0) {
         return $NEWTUPLE(3,null_str,null_str,s);
     } else {
@@ -1379,7 +1444,7 @@ B_tuple B_strD_rpartition(B_str s, B_str sep) {
 B_list B_strD_split(B_str s, B_str sep, B_int maxsplit) {
     B_list res = $NEW(B_list,NULL,NULL);
     B_SequenceD_list wit = B_SequenceD_listG_witness;
-    if (maxsplit == NULL || fromB_int(maxsplit) < 0) maxsplit = toB_int(INT_MAX); 
+    int maxspl = normalize_maxsplit(maxsplit);
     int remaining = s->nchars;
     if (sep == NULL) {
         unsigned char *p = s->str;
@@ -1397,7 +1462,7 @@ B_list B_strD_split(B_str s, B_str sep, B_int maxsplit) {
                     inword = 1;
                     q = p;
                     wordlength = 1;
-                    if (res->length == fromB_int(maxsplit))
+                    if (res->length == maxspl)
                         break; // we have now removed leading whitespace in remainder
                 } else
                     wordlength++;
@@ -1439,16 +1504,16 @@ B_list B_strD_split(B_str s, B_str sep, B_int maxsplit) {
             wit->$class->append(wit,res,null_str);
             return res;
         }
-        B_str ls, rs, ssep;
+        B_str ls, rs, ssep = NULL;
         rs = s;
         // Note: This builds many intermediate rs strings...
-        while (rs->nchars>0 && res->length < fromB_int(maxsplit)) {
+        while (rs->nchars>0 && res->length < maxspl) {
             B_tuple t = B_strD_partition(rs,sep);
             ssep = (B_str)t->components[1];
             rs =  (B_str)t->components[2];
             wit->$class->append(wit,res,(B_str)t->components[0]);
         }
-        if (ssep->nchars>0)
+        if (ssep && ssep->nchars>0)
             wit->$class->append(wit,res,rs);
         return res;
     }
@@ -1518,11 +1583,13 @@ B_str B_strD_rstrip(B_str s, B_str cs) {
 }
 
 B_bool B_strD_startswith(B_str s, B_str sub, B_int start, B_int end) {
-    B_int st = start;
-    B_int en = end;
-    if (fix_start_end(s->nchars,&st,&en) < 0) return B_False;
+    int st;
+    int en;
+
+    if (normalize_start_end(s->nchars, start, end, &st, &en) < 0)
+        return B_False;
     int isascii = s->nchars==s->nbytes;
-    unsigned char *p = skip_chars(s->str,fromB_int(st),isascii);
+    unsigned char *p = skip_chars(s->str, st, isascii);
     unsigned char *q = sub->str;
     for (int i=0; i<sub->nbytes; i++) {
         if (*p == 0 || *p++ != *q++) {
@@ -1979,21 +2046,12 @@ B_bytearray B_bytearrayD_center(B_bytearray s, B_int width, B_bytearray fill) {
 }
 
 B_int B_bytearrayD_count(B_bytearray s, B_bytearray sub, B_int start, B_int end) {
-    B_int st = start;
-    B_int en = end;
-    if (fix_start_end(s->nbytes,&st,&en) < 0) return toB_int(0);
-    int stval = fromB_int(st);
-    int enval = fromB_int(en);
-    unsigned char *p = &s->str[stval];
-    unsigned char *q = &p[enval-stval];
-    int res = 0;
-    int n = bmh(p,sub->str,q-p,sub->nbytes);
-    while (n>=0) {
-        res++;
-        p += n + (sub->nbytes>0 ? sub->nbytes : 1);
-        n = bmh(p,sub->str,q-p,sub->nbytes);
-    }
-    return toB_int(res);
+    int st;
+    int en;
+
+    if (normalize_start_end(s->nbytes, start, end, &st, &en) < 0)
+        return toB_int(0);
+    return toB_int(buffer_count_range(s->str, st, en, sub->str, sub->nbytes));
 }
 
 B_str B_bytearrayD_decode(B_bytearray s) {
@@ -2001,11 +2059,14 @@ B_str B_bytearrayD_decode(B_bytearray s) {
 }
 
 B_bool B_bytearrayD_endswith(B_bytearray s, B_bytearray sub, B_int start, B_int end) {
-    B_int st = start;
-    B_int en = end;
-    if (fix_start_end(s->nbytes,&st,&en) < 0) return B_False;
-    int enval = fromB_int(en);
-    unsigned char *p = &s->str[enval-sub->nbytes];
+    int st;
+    int en;
+
+    if (normalize_start_end(s->nbytes, start, end, &st, &en) < 0)
+        return B_False;
+    if (en - st < sub->nbytes)
+        return B_False;
+    unsigned char *p = &s->str[en-sub->nbytes];
     unsigned char *q = sub->str;
     for (int i=0; i<sub->nbytes; i++) {
         if (*p == 0 || *p++ != *q++) {
@@ -2052,14 +2113,12 @@ B_bytearray B_bytearrayD_expandtabs(B_bytearray s, B_int tabsz){
 }
 
 B_int B_bytearrayD_find(B_bytearray s, B_bytearray sub, B_int start, B_int end) {
-    B_int st = start;
-    B_int en = end;
-    if (fix_start_end(s->nbytes,&st,&en) < 0) return toB_int(-1);
-    unsigned char *p = &s->str[fromB_int(st)];
-    unsigned char *q = &s->str[fromB_int(en)];
-    int n = bmh(p,sub->str,q-p,sub->nbytes);
-    if (n<0) return toB_int(-1);
-    return toB_int(n+p-s->str);
+    int st;
+    int en;
+
+    if (normalize_start_end(s->nbytes, start, end, &st, &en) < 0)
+        return toB_int(-1);
+    return toB_int(buffer_find_range(s->str, st, en, sub->str, sub->nbytes));
 }
 
 B_bytearray B_bytearrayD_from_hex(B_str s) {
@@ -2129,11 +2188,16 @@ B_str B_bytearrayD_hex(B_bytearray s) {
 
 
 B_int B_bytearrayD_index(B_bytearray s, B_bytearray sub, B_int start, B_int end) {
-    B_int n = B_bytearrayD_find(s,sub,start,end);
-    if (fromB_int(n)<0) {
+    int st;
+    int en;
+    int n = -1;
+
+    if (normalize_start_end(s->nbytes, start, end, &st, &en) >= 0)
+        n = buffer_find_range(s->str, st, en, sub->str, sub->nbytes);
+    if (n < 0) {
         $RAISE((B_BaseException)$NEW(B_ValueError,to$str("index: substring not found")));
     }
-    return n;
+    return toB_int(n);
 }
 
 B_bool B_bytearrayD_isalnum(B_bytearray s) {
@@ -2316,28 +2380,26 @@ B_bytearray B_bytearrayD_lstrip(B_bytearray s, B_bytearray cs) {
 
 
 B_tuple B_bytearrayD_partition(B_bytearray s, B_bytearray sep) {
-    int n = fromB_int(B_bytearrayD_find(s,sep,NULL,NULL));
+    int n = buffer_find_range(s->str, 0, s->nbytes, sep->str, sep->nbytes);
     if (n<0) {
         return $NEWTUPLE(3,s,toB_bytearray(""),toB_bytearray(""));
     } else {
-        int nb = bmh(s->str,sep->str,s->nbytes,sep->nbytes);
         B_bytearray ls;
-        NEW_UNFILLED_BYTEARRAY(ls,nb);
-        memcpy(ls->str,s->str,nb);
+        NEW_UNFILLED_BYTEARRAY(ls,n);
+        memcpy(ls->str,s->str,n);
         B_bytearray rs;
-        int nbr = s->nbytes - sep->nbytes - nb;
+        int nbr = s->nbytes - sep->nbytes - n;
         NEW_UNFILLED_BYTEARRAY(rs,nbr);
-        memcpy(rs->str,s->str+nb+sep->nbytes,nbr);
+        memcpy(rs->str,s->str+n+sep->nbytes,nbr);
         return $NEWTUPLE(3,ls,sep,rs);
     }
 }
 
 
 B_bytearray B_bytearrayD_replace(B_bytearray s, B_bytearray old, B_bytearray new, B_int count) {
-    if (count==NULL)
-        count = toB_int(INT_MAX);
-    int c = fromB_int(B_bytearrayD_count(s,old,NULL,NULL));
-    int c0 = fromB_int(count) < c ? fromB_int(count) : c;
+    int countval = count == NULL ? INT_MAX : fromB_int(count);
+    int c = buffer_count_range(s->str, 0, s->nbytes, old->str, old->nbytes);
+    int c0 = countval < c ? countval : c;
     if (c0==0){
         return B_bytearrayD_copy(s);
     }
@@ -2368,23 +2430,26 @@ B_bytearray B_bytearrayD_replace(B_bytearray s, B_bytearray old, B_bytearray new
 
 
 B_int B_bytearrayD_rfind(B_bytearray s, B_bytearray sub, B_int start, B_int end) {
-    B_int st = start;
-    B_int en = end;
-    if (fix_start_end(s->nbytes,&st,&en) < 0) return toB_int(-1);
-    unsigned char *p = &s->str[fromB_int(st)];
-    unsigned char *q = &s->str[fromB_int(en)];
-    int n = rbmh(p,sub->str,q-p,sub->nbytes);
-    if (n<0) return toB_int(-1);
-    return toB_int(n+p-s->str);
+    int st;
+    int en;
+
+    if (normalize_start_end(s->nbytes, start, end, &st, &en) < 0)
+        return toB_int(-1);
+    return toB_int(buffer_rfind_range(s->str, st, en, sub->str, sub->nbytes));
 }
 
 
 B_int B_bytearrayD_rindex(B_bytearray s, B_bytearray sub, B_int start, B_int end) {
-    B_int n = B_bytearrayD_rfind(s,sub,start,end);
-    if (fromB_int(n)<0) {
+    int st;
+    int en;
+    int n = -1;
+
+    if (normalize_start_end(s->nbytes, start, end, &st, &en) >= 0)
+        n = buffer_rfind_range(s->str, st, en, sub->str, sub->nbytes);
+    if (n < 0) {
         $RAISE((B_BaseException)$NEW(B_ValueError,to$str("rindex for bytearray: substring not found")));
     };
-    return n;
+    return toB_int(n);
 }
 
 B_bytearray B_bytearrayD_rjust(B_bytearray s, B_int width, B_bytearray fill) {
@@ -2409,18 +2474,17 @@ B_bytearray B_bytearrayD_rjust(B_bytearray s, B_int width, B_bytearray fill) {
 }
 
 B_tuple B_bytearrayD_rpartition(B_bytearray s, B_bytearray sep) {
-    int n = fromB_int(B_bytearrayD_rfind(s,sep,NULL,NULL));
+    int n = buffer_rfind_range(s->str, 0, s->nbytes, sep->str, sep->nbytes);
     if (n<0) {
         return $NEWTUPLE(3,toB_bytearray(""),toB_bytearray(""),s);
     } else {
-        int nb = rbmh(s->str,sep->str,s->nbytes,sep->nbytes);
         B_bytearray ls;
-        NEW_UNFILLED_BYTEARRAY(ls,nb);
-        memcpy(ls->str,s->str,nb);
-        int nbr = s->nbytes - sep->nbytes - nb;
+        NEW_UNFILLED_BYTEARRAY(ls,n);
+        memcpy(ls->str,s->str,n);
+        int nbr = s->nbytes - sep->nbytes - n;
         B_bytearray rs;
         NEW_UNFILLED_BYTEARRAY(rs,nbr);
-        memcpy(rs->str,s->str+nb+sep->nbytes,nbr);
+        memcpy(rs->str,s->str+n+sep->nbytes,nbr);
         return  $NEWTUPLE(3,ls,sep,rs);
     }
 }
@@ -2450,7 +2514,7 @@ B_bytearray B_bytearrayD_rstrip(B_bytearray s, B_bytearray cs) {
 B_list B_bytearrayD_split(B_bytearray s, B_bytearray sep, B_int maxsplit) {
     B_list res = $NEW(B_list,NULL,NULL);
     B_SequenceD_list wit = B_SequenceD_listG_witness;
-    if (maxsplit == NULL || fromB_int(maxsplit) < 0) maxsplit = toB_int(INT_MAX); 
+    int maxspl = normalize_maxsplit(maxsplit);
     if (sep == NULL) {
         unsigned char *p = s->str;
         if (s->nbytes==0) {
@@ -2463,7 +2527,7 @@ B_list B_bytearrayD_split(B_bytearray s, B_bytearray sep, B_int maxsplit) {
                 if (!inword) {
                     inword = 1;
                     q = p;
-                    if (res->length == fromB_int(maxsplit))
+                    if (res->length == maxspl)
                         break; // we have now removed leading whitespace in remainder
                 }
             } else {
@@ -2501,16 +2565,16 @@ B_list B_bytearrayD_split(B_bytearray s, B_bytearray sep, B_int maxsplit) {
             wit->$class->append(wit,res,toB_bytearray(""));
             return res;
         }
-        B_bytearray ls, rs, ssep;
+        B_bytearray ls, rs, ssep = NULL;
         rs = s;
         // Note: This builds many intermediate rs strings...
-        while (rs->nbytes>0 && res->length < fromB_int(maxsplit)) {
+        while (rs->nbytes>0 && res->length < maxspl) {
             B_tuple t = B_bytearrayD_partition(rs,sep);
             ssep = (B_bytearray)t->components[1];
             rs =  (B_bytearray)t->components[2];
              wit->$class->append(wit,res,(B_bytearray)t->components[0]);
         }
-        if (ssep->nbytes>0)
+        if (ssep && ssep->nbytes>0)
             wit->$class->append(wit,res,rs);
         return res;
     }
@@ -2551,14 +2615,16 @@ B_list B_bytearrayD_splitlines(B_bytearray s, B_bool keepends) {
 }
 
 B_bool B_bytearrayD_startswith(B_bytearray s, B_bytearray sub, B_int start, B_int end) {
-    B_int st = start;
-    B_int en = end;
-    if (fix_start_end(s->nbytes,&st,&en) < 0) return B_False;
-    unsigned char *p = s->str + fromB_int(st);
+    int st;
+    int en;
+
+    if (normalize_start_end(s->nbytes, start, end, &st, &en) < 0)
+        return B_False;
+    unsigned char *p = s->str + st;
     if (sub->nbytes > 0 && p+sub->nbytes > s->str+s->nbytes) return B_False;
     unsigned char *q = sub->str;
     for (int i=0; i<sub->nbytes; i++) {
-        if (p >= s->str + fromB_int(en) || *p++ != *q++) {
+        if (p >= s->str + en || *p++ != *q++) {
             return B_False;
         }
     }
@@ -3074,20 +3140,12 @@ B_bytes B_bytesD_center(B_bytes s, B_int width, B_bytes fill) {
 }
 
 B_int B_bytesD_count(B_bytes s, B_bytes sub, B_int start, B_int end) {
-    B_int st = start;
-    B_int en = end;
-    if (fix_start_end(s->nbytes,&st,&en) < 0) return toB_int(0);
-    int stval = fromB_int(st);
-    unsigned char *p = &s->str[stval];
-    unsigned char *q = &p[fromB_int(en)-stval];
-    int res = 0;
-    int n = bmh(p,sub->str,q-p,sub->nbytes);
-    while (n>=0) {
-        res++;
-        p += n + (sub->nbytes>0 ? sub->nbytes : 1);
-        n = bmh(p,sub->str,q-p,sub->nbytes);
-    }
-    return toB_int(res);
+    int st;
+    int en;
+
+    if (normalize_start_end(s->nbytes, start, end, &st, &en) < 0)
+        return toB_int(0);
+    return toB_int(buffer_count_range(s->str, st, en, sub->str, sub->nbytes));
 }
 
 B_str B_bytesD_decode(B_bytes s) {
@@ -3095,10 +3153,14 @@ B_str B_bytesD_decode(B_bytes s) {
 }
 
 B_bool B_bytesD_endswith(B_bytes s, B_bytes sub, B_int start, B_int end) {
-    B_int st = start;
-    B_int en = end;
-    if (fix_start_end(s->nbytes,&st,&en) < 0) return B_False;
-    unsigned char *p = &s->str[fromB_int(en)-sub->nbytes];
+    int st;
+    int en;
+
+    if (normalize_start_end(s->nbytes, start, end, &st, &en) < 0)
+        return B_False;
+    if (en - st < sub->nbytes)
+        return B_False;
+    unsigned char *p = &s->str[en-sub->nbytes];
     unsigned char *q = sub->str;
     for (int i=0; i<sub->nbytes; i++) {
         if (*p == 0 || *p++ != *q++) {
@@ -3145,14 +3207,12 @@ B_bytes B_bytesD_expandtabs(B_bytes s, B_int tabsz){
 }
 
 B_int B_bytesD_find(B_bytes s, B_bytes sub, B_int start, B_int end) {
-    B_int st = start;
-    B_int en = end;
-    if (fix_start_end(s->nbytes,&st,&en) < 0) return toB_int(-1);
-    unsigned char *p = &s->str[fromB_int(st)];
-    unsigned char *q = &s->str[fromB_int(en)];
-    int n = bmh(p,sub->str,q-p,sub->nbytes);
-    if (n<0) return toB_int(-1);
-    return toB_int(n+p-s->str);
+    int st;
+    int en;
+
+    if (normalize_start_end(s->nbytes, start, end, &st, &en) < 0)
+        return toB_int(-1);
+    return toB_int(buffer_find_range(s->str, st, en, sub->str, sub->nbytes));
 }
 
 B_bytes B_bytesD_from_hex(B_str s) {
@@ -3225,11 +3285,16 @@ B_str B_bytesD_hex(B_bytes s) {
 }
 
 B_int B_bytesD_index(B_bytes s, B_bytes sub, B_int start, B_int end) {
-    B_int n = B_bytesD_find(s,sub,start,end);
-    if (fromB_int(n)<0) {
+    int st;
+    int en;
+    int n = -1;
+
+    if (normalize_start_end(s->nbytes, start, end, &st, &en) >= 0)
+        n = buffer_find_range(s->str, st, en, sub->str, sub->nbytes);
+    if (n < 0) {
         $RAISE((B_BaseException)$NEW(B_ValueError,to$str("index: substring not found")));
     }
-    return n;
+    return toB_int(n);
 }
 
 B_bool B_bytesD_isalnum(B_bytes s) {
@@ -3412,18 +3477,17 @@ B_bytes B_bytesD_lstrip(B_bytes s, B_bytes cs) {
 
 
 B_tuple B_bytesD_partition(B_bytes s, B_bytes sep) {
-    int n = fromB_int(B_bytesD_find(s,sep,NULL,NULL));
+    int n = buffer_find_range(s->str, 0, s->nbytes, sep->str, sep->nbytes);
     if (n<0) {
         return $NEWTUPLE(3,s,to$bytes(""),to$bytes(""));
     } else {
-        int nb = bmh(s->str,sep->str,s->nbytes,sep->nbytes);
         B_bytes ls;
-        NEW_UNFILLED_BYTES(ls,nb);
-        memcpy(ls->str,s->str,nb);
+        NEW_UNFILLED_BYTES(ls,n);
+        memcpy(ls->str,s->str,n);
         B_bytes rs;
-        int nbr = s->nbytes - sep->nbytes - nb;
+        int nbr = s->nbytes - sep->nbytes - n;
         NEW_UNFILLED_BYTES(rs,nbr);
-        memcpy(rs->str,s->str+nb+sep->nbytes,nbr);
+        memcpy(rs->str,s->str+n+sep->nbytes,nbr);
         return $NEWTUPLE(3,ls,sep,rs);
     }
 }
@@ -3455,10 +3519,9 @@ B_bytes B_bytesD_removesuffix(B_bytes s, B_bytes suffix) {
     return res;
 }
 B_bytes B_bytesD_replace(B_bytes s, B_bytes old, B_bytes new, B_int count) {
-    if (count==NULL)
-        count = toB_int(INT_MAX);
-    int c = fromB_int(B_bytesD_count(s,old,NULL,NULL));
-    int c0 = fromB_int(count) < c ? fromB_int(count) : c;
+    int countval = count == NULL ? INT_MAX : fromB_int(count);
+    int c = buffer_count_range(s->str, 0, s->nbytes, old->str, old->nbytes);
+    int c0 = countval < c ? countval : c;
     if (c0==0){
         return B_bytesD_copy(s);
     }
@@ -3489,23 +3552,26 @@ B_bytes B_bytesD_replace(B_bytes s, B_bytes old, B_bytes new, B_int count) {
 
 
 B_int B_bytesD_rfind(B_bytes s, B_bytes sub, B_int start, B_int end) {
-    B_int st = start;
-    B_int en = end;
-    if (fix_start_end(s->nbytes,&st,&en) < 0) return toB_int(-1);
-    unsigned char *p = &s->str[fromB_int(st)];
-    unsigned char *q = &s->str[fromB_int(en)];
-    int n = rbmh(p,sub->str,q-p,sub->nbytes);
-    if (n<0) return toB_int(-1);
-    return toB_int(n+p-s->str);
+    int st;
+    int en;
+
+    if (normalize_start_end(s->nbytes, start, end, &st, &en) < 0)
+        return toB_int(-1);
+    return toB_int(buffer_rfind_range(s->str, st, en, sub->str, sub->nbytes));
 }
 
 
 B_int B_bytesD_rindex(B_bytes s, B_bytes sub, B_int start, B_int end) {
-    B_int n = B_bytesD_rfind(s,sub,start,end);
-    if (fromB_int(n)<0) {
+    int st;
+    int en;
+    int n = -1;
+
+    if (normalize_start_end(s->nbytes, start, end, &st, &en) >= 0)
+        n = buffer_rfind_range(s->str, st, en, sub->str, sub->nbytes);
+    if (n < 0) {
         $RAISE((B_BaseException)$NEW(B_ValueError,to$str("rindex for bytes: substring not found")));
     };
-    return n;
+    return toB_int(n);
 }
 
 B_bytes B_bytesD_rjust(B_bytes s, B_int width, B_bytes fill) {
@@ -3530,18 +3596,17 @@ B_bytes B_bytesD_rjust(B_bytes s, B_int width, B_bytes fill) {
 }
 
 B_tuple B_bytesD_rpartition(B_bytes s, B_bytes sep) {
-    int n = fromB_int(B_bytesD_rfind(s,sep,NULL,NULL));
+    int n = buffer_rfind_range(s->str, 0, s->nbytes, sep->str, sep->nbytes);
     if (n<0) {
         return $NEWTUPLE(3,to$bytes(""),to$bytes(""),s);
     } else {
-        int nb = rbmh(s->str,sep->str,s->nbytes,sep->nbytes);
         B_bytes ls;
-        NEW_UNFILLED_BYTES(ls,nb);
-        memcpy(ls->str,s->str,nb);
-        int nbr = s->nbytes - sep->nbytes - nb;
+        NEW_UNFILLED_BYTES(ls,n);
+        memcpy(ls->str,s->str,n);
+        int nbr = s->nbytes - sep->nbytes - n;
         B_bytes rs;
         NEW_UNFILLED_BYTES(rs,nbr);
-        memcpy(rs->str,s->str+nb+sep->nbytes,nbr);
+        memcpy(rs->str,s->str+n+sep->nbytes,nbr);
         return  $NEWTUPLE(3,ls,sep,rs);
     }
 }
@@ -3571,7 +3636,7 @@ B_bytes B_bytesD_rstrip(B_bytes s, B_bytes cs) {
 B_list B_bytesD_split(B_bytes s, B_bytes sep, B_int maxsplit) {
     B_list res = $NEW(B_list,NULL,NULL);
     B_SequenceD_list wit = B_SequenceD_listG_witness;
-    if (maxsplit == NULL || fromB_int(maxsplit) < 0) maxsplit = toB_int(INT_MAX); 
+    int maxspl = normalize_maxsplit(maxsplit);
     if (sep == NULL) {
         unsigned char *p = s->str;
         if (s->nbytes==0) {
@@ -3584,7 +3649,7 @@ B_list B_bytesD_split(B_bytes s, B_bytes sep, B_int maxsplit) {
                 if (!inword) {
                     inword = 1;
                     q = p;
-                    if (res->length == fromB_int(maxsplit))
+                    if (res->length == maxspl)
                         break; // we have now removed leading whitespace in remainder
                 }
             } else {
@@ -3622,16 +3687,16 @@ B_list B_bytesD_split(B_bytes s, B_bytes sep, B_int maxsplit) {
             wit->$class->append(wit,res,null_bytes);
             return res;
         }
-        B_bytes ls, rs, ssep;
+        B_bytes ls, rs, ssep = NULL;
         rs = s;
         // Note: This builds many intermediate rs strings...
-        while (rs->nbytes>0 && res->length < fromB_int(maxsplit)) {
+        while (rs->nbytes>0 && res->length < maxspl) {
             B_tuple t = B_bytesD_partition(rs,sep);
             ssep = (B_bytes)t->components[1];
             rs =  (B_bytes)t->components[2];
             wit->$class->append(wit,res,(B_bytes)t->components[0]);
         }
-        if (ssep->nbytes>0)
+        if (ssep && ssep->nbytes>0)
             wit->$class->append(wit,res,rs);
         return res;
     }
@@ -3672,14 +3737,16 @@ B_list B_bytesD_splitlines(B_bytes s, B_bool keepends) {
 }
 
 B_bool B_bytesD_startswith(B_bytes s, B_bytes sub, B_int start, B_int end) {
-    B_int st = start;
-    B_int en = end;
-    if (fix_start_end(s->nbytes,&st,&en) < 0) return B_False;
-    unsigned char *p = s->str + fromB_int(st);
+    int st;
+    int en;
+
+    if (normalize_start_end(s->nbytes, start, end, &st, &en) < 0)
+        return B_False;
+    unsigned char *p = s->str + st;
     if (sub->nbytes > 0 && p+sub->nbytes > s->str+s->nbytes) return B_False;
     unsigned char *q = sub->str;
     for (int i=0; i<sub->nbytes; i++) {
-        if (p >= s->str + fromB_int(en) || *p++ != *q++) {
+        if (p >= s->str + en || *p++ != *q++) {
             return B_False;
         }
     }