[ Home | Programming Tips | Mail ]

Fast ANSI strlen() / memchr()


PowerPCに最適化した ANSI strlen() / memchr()

 PowerPCに最適化した ANSI strlen() / memchr()です。単純に 1 byteずつちまちま見て行くアルゴリズムに比べて3倍以上高速です。
 memchr()は String Search(まず先頭 1 byteをこれで探す)や Pixel Search(特に 8 bit depth)等にも使える(応用できる)と思います。

// -----------------------------------------------------------------------
//  Metrowerks C++ optimizerの glitch対策
//  なぜか k01010101 / k80808080に直接定数を代入すると registerに入らず
//  immediate値が生成されてしまい遅い codeになってしまう。staticから
//  持って来ると compile時に値が確定できないためうまく registerに入ります。
//  32 bit immediate値を多用する codeでは注意!
// -----------------------------------------------------------------------
#if     1
    static  unsigned long   k01 = 0x01010101UL;
#else
    #define k01 0x01010101UL
#endif
 
// -----------------------------------------------------------------------
//      strlen(const char *str)
// -----------------------------------------------------------------------
size_t  strlen(const char *str)
{
    unsigned long   k01010101 = k01;
    unsigned long   k80808080 = k01010101 << 7;
 
    unsigned long       w0;
    const unsigned long *lp;
    
    long    n = (long)str & 3;
    if (n == 0) {
        lp = (unsigned long *)str - 1;
    }else{
        unsigned long   w1, w2;
        unsigned long   kMask = 0xffffffffL << ((4 - n) << 3);
        lp = (unsigned long *)(str - n);
        w0 = *lp | kMask;
        w1 = w0 - k01010101;
        w2 = k80808080 & ~w0;
        if (w1 & w2)  goto rtn;
    }
    
    for (;;) {
        unsigned long   w1, w2;
        w0 = *++lp;
        w1 = w0 - k01010101;
        w2 = k80808080 & ~w0;
        if (w1 & w2)  break;
    }
 
rtn:
    {
        size_t  len = (const char *)lp - str;
        if ((w0 & 0xff000000UL) == 0)  return len;
        if ((w0 & 0x00ff0000UL) == 0)  return len + 1;
        if ((w0 & 0x0000ff00UL) == 0)  return len + 2;
        return len + 3;
    }
}
 
// -----------------------------------------------------------------------
//      memchr(const void *src,int val,size_t n)
// -----------------------------------------------------------------------
void    *memchr(const void *src,int val,size_t n)
{
unsigned long       kVal = (unsigned char)val;
unsigned long       w0;
const unsigned long *lp;
 
    if (4 <= n) {
        unsigned long   k01010101 = k01;
        unsigned long   k80808080 = k01010101 << 7;
        unsigned long   kXorPatn  = (kVal << 24) | (kVal << 16) | (kVal << 8) | kVal;
 
        {
        long    align = (long)src & 3;
 
            if (align == 0) {
                lp = (unsigned long *)src - 1;
            }else{
                unsigned long   w1, w2;
                const long      nBytes = (4 - align);
                unsigned long   kMask  = 0xffffffffUL << (nBytes << 3);
                lp = (unsigned long *)((char *)src - align);
                w0 = (*lp ^ kXorPatn) | kMask;
                w1 = w0 - k01010101;
                w2 = k80808080 & ~w0;
                if (w1 & w2)  goto found;
                n -= nBytes;
            }
 
        }
        
        {
        long    nWords = n >> 2;
 
            while (0 < nWords) {
                unsigned long   w1, w2;
                w0 = *++lp ^ kXorPatn;
                w1 = w0 - k01010101;
                w2 = k80808080 & ~w0;
                if (w1 & w2)  goto found;
                nWords--;
            }
        }
 
        {
        long    remainBytes = n & 3;
 
            if (remainBytes != 0) {
                unsigned long   w1, w2;
                unsigned long   kMask = 0xffffffffUL >> (remainBytes << 3);
                w0 = (*++lp ^ kXorPatn) | kMask;
                w1 = w0 - k01010101;
                w2 = k80808080 & ~w0;
                if (w1 & w2)  goto found;
            }
        }
    }else{
        const unsigned char *p     = src;
        long                nBytes = n;
        while (0 < nBytes) {
            if (*p == kVal)  return (void *)p;
            p++;
            nBytes--;
        }
    }
 
    return NULL;
 
found:
    if ((w0 & 0xff000000UL) == 0)  return (void *)lp;
    if ((w0 & 0x00ff0000UL) == 0)  return (void *)((char *)lp + 1);
    if ((w0 & 0x0000ff00UL) == 0)  return (void *)((char *)lp + 2);
    return (void *)((char *)lp + 3);
}


この Pageは MacOS X + Radio UserLand で作っています。