[ Home | Programming Tips | Mail ]
PowerPCに最適化した ANSI strlen() / memchr()です。単純に 1 byteずつちまちま見て行くアルゴリズムに比べて3倍以上高速です。
memchr()は String Search(まず先頭 1 byteをこれで探す)や Pixel Search(特に 8 bit depth)等にも使える(応用できる)と思います。
// -----------------------------------------------------------------------
// Metrowerks C++ optimizerの glitch対策
// なぜか k01010101 / k80808080に直接定数を代入すると registerに入らず
// immediate値が生成されてしまい遅い codeになってしまう。staticから
// 持って来ると compile時に値が確定できないためうまく registerに入ります。
// 32 bit immediate値を多用する codeでは注意!
// -----------------------------------------------------------------------
#if 1
static unsigned long k01 = 0x01010101UL;
#else
#define k01 0x01010101UL
#endif
// -----------------------------------------------------------------------
// strlen(const char *str)
// -----------------------------------------------------------------------
size_t strlen(const char *str)
{
unsigned long k01010101 = k01;
unsigned long k80808080 = k01010101 << 7;
unsigned long w0;
const unsigned long *lp;
long n = (long)str & 3;
if (n == 0) {
lp = (unsigned long *)str - 1;
}else{
unsigned long w1, w2;
unsigned long kMask = 0xffffffffL << ((4 - n) << 3);
lp = (unsigned long *)(str - n);
w0 = *lp | kMask;
w1 = w0 - k01010101;
w2 = k80808080 & ~w0;
if (w1 & w2) goto rtn;
}
for (;;) {
unsigned long w1, w2;
w0 = *++lp;
w1 = w0 - k01010101;
w2 = k80808080 & ~w0;
if (w1 & w2) break;
}
rtn:
{
size_t len = (const char *)lp - str;
if ((w0 & 0xff000000UL) == 0) return len;
if ((w0 & 0x00ff0000UL) == 0) return len + 1;
if ((w0 & 0x0000ff00UL) == 0) return len + 2;
return len + 3;
}
}
// -----------------------------------------------------------------------
// memchr(const void *src,int val,size_t n)
// -----------------------------------------------------------------------
void *memchr(const void *src,int val,size_t n)
{
unsigned long kVal = (unsigned char)val;
unsigned long w0;
const unsigned long *lp;
if (4 <= n) {
unsigned long k01010101 = k01;
unsigned long k80808080 = k01010101 << 7;
unsigned long kXorPatn = (kVal << 24) | (kVal << 16) | (kVal << 8) | kVal;
{
long align = (long)src & 3;
if (align == 0) {
lp = (unsigned long *)src - 1;
}else{
unsigned long w1, w2;
const long nBytes = (4 - align);
unsigned long kMask = 0xffffffffUL << (nBytes << 3);
lp = (unsigned long *)((char *)src - align);
w0 = (*lp ^ kXorPatn) | kMask;
w1 = w0 - k01010101;
w2 = k80808080 & ~w0;
if (w1 & w2) goto found;
n -= nBytes;
}
}
{
long nWords = n >> 2;
while (0 < nWords) {
unsigned long w1, w2;
w0 = *++lp ^ kXorPatn;
w1 = w0 - k01010101;
w2 = k80808080 & ~w0;
if (w1 & w2) goto found;
nWords--;
}
}
{
long remainBytes = n & 3;
if (remainBytes != 0) {
unsigned long w1, w2;
unsigned long kMask = 0xffffffffUL >> (remainBytes << 3);
w0 = (*++lp ^ kXorPatn) | kMask;
w1 = w0 - k01010101;
w2 = k80808080 & ~w0;
if (w1 & w2) goto found;
}
}
}else{
const unsigned char *p = src;
long nBytes = n;
while (0 < nBytes) {
if (*p == kVal) return (void *)p;
p++;
nBytes--;
}
}
return NULL;
found:
if ((w0 & 0xff000000UL) == 0) return (void *)lp;
if ((w0 & 0x00ff0000UL) == 0) return (void *)((char *)lp + 1);
if ((w0 & 0x0000ff00UL) == 0) return (void *)((char *)lp + 2);
return (void *)((char *)lp + 3);
}
この Pageは MacOS X + Radio UserLand で作っています。