From 34f3a25cf608320e8b95ada8cbe412ac7ec46be5 Mon Sep 17 00:00:00 2001 From: Eric Anderson Date: Mon, 27 Dec 2010 01:14:42 -0600 Subject: lcs_upper_bound's second sum is required all the time and try to handle endians --- lcs.c | 64 +++++++++++++++++++++++++++------------------------------------- 1 file changed, 27 insertions(+), 37 deletions(-) (limited to 'lcs.c') diff --git a/lcs.c b/lcs.c index f3a6203..2ea6f87 100644 --- a/lcs.c +++ b/lcs.c @@ -158,11 +158,7 @@ int lcs_32_rough(unsigned char *str1, unsigned char *str2) { #endif #ifdef USE_PRE_LCS -#define DO_SECOND_SUM - -#ifdef DO_SECOND_SUM #include "arpa/inet.h" -#endif int lcs_upper_bound(const uint32_t * const str1_int, const uint32_t * const str2_int) { static char contains[256] = {0}; @@ -170,7 +166,7 @@ int lcs_upper_bound(const uint32_t * const str1_int, const uint32_t * const str2 const unsigned char * const str2 = (const unsigned char * const)str2_int; int first_sum; int ret; - + contains[str1[0]] = contains[str1[1]] = contains[str1[2]] = contains[str1[3]] = contains[str1[4]] = contains[str1[5]] = contains[str1[6]] = contains[str1[7]] = contains[str1[8]] @@ -185,38 +181,32 @@ int lcs_upper_bound(const uint32_t * const str1_int, const uint32_t * const str2 + contains[str2[12]] + contains[str2[13]] + contains[str2[14]] + contains[str2[15]]; - // can be off up to two nibbles (e.g., "0110" and "1122") - ret = first_sum * 2 + 2; -#ifdef DO_SECOND_SUM - // We can only increase our accuracy by 1, so do second_sum only if we can - // turn a false-positive into a negative - if (ret == MSF) { - uint32_t bo_str2_int[4]; - uint32_t shifted_str2_int[4]; - const unsigned char * const str2_r = (const unsigned char * const)shifted_str2_int; - int second_sum; - - bo_str2_int[0] = ntohl(str2_int[0]); - bo_str2_int[1] = ntohl(str2_int[1]); - bo_str2_int[2] = ntohl(str2_int[2]); - bo_str2_int[3] = ntohl(str2_int[3]); - - shifted_str2_int[0] = (bo_str2_int[0] << 4) | (bo_str2_int[1] >> 28); - shifted_str2_int[1] = (bo_str2_int[1] << 4) | (bo_str2_int[2] >> 28); - shifted_str2_int[2] = (bo_str2_int[2] << 4) | (bo_str2_int[3] >> 28); - shifted_str2_int[3] = (bo_str2_int[3] << 4) | (bo_str2_int[0] >> 28); - - second_sum = contains[str2_r[0]] + contains[str2_r[1]] + contains[str2_r[2]] - + contains[str2_r[3]] + contains[str2_r[4]] + contains[str2_r[5]] - + contains[str2_r[6]] + contains[str2_r[7]] + contains[str2_r[8]] - + contains[str2_r[9]] + contains[str2_r[10]] + contains[str2_r[11]] - + contains[str2_r[12]] + contains[str2_r[13]] + contains[str2_r[14]] - + contains[str2_r[15]]; - - // can be off up to one nibble (e.g., "1110" and "2111") - ret = MAX(first_sum, second_sum) * 2 + 1; - } -#endif + uint32_t bo_str2_int[4]; + uint32_t shifted_str2_int[4]; + const unsigned char * const str2_r = (const unsigned char * const)shifted_str2_int; + int second_sum; + + bo_str2_int[0] = ntohl(str2_int[0]); + bo_str2_int[1] = ntohl(str2_int[1]); + bo_str2_int[2] = ntohl(str2_int[2]); + bo_str2_int[3] = ntohl(str2_int[3]); + + shifted_str2_int[0] = (bo_str2_int[0] << 4) | (bo_str2_int[1] >> 28); + shifted_str2_int[1] = (bo_str2_int[1] << 4) | (bo_str2_int[2] >> 28); + shifted_str2_int[2] = (bo_str2_int[2] << 4) | (bo_str2_int[3] >> 28); + shifted_str2_int[3] = (bo_str2_int[3] << 4); + + // don't add str2_r[LE_BYTE_INDEX(15)] because it is known-bad due to zero filling + second_sum = contains[str2_r[0]] + contains[str2_r[1]] + contains[str2_r[2]] + + contains[str2_r[3]] + contains[str2_r[4]] + contains[str2_r[5]] + + contains[str2_r[6]] + contains[str2_r[7]] + contains[str2_r[8]] + + contains[str2_r[9]] + contains[str2_r[10]] + contains[str2_r[11]] + + contains[str2_r[BE_BYTE_INDEX(12)]] + + contains[str2_r[BE_BYTE_INDEX(13)]] + + contains[str2_r[BE_BYTE_INDEX(14)]]; + + // can be off up to two nibbles + ret = MAX(first_sum, second_sum) * 2 + 2; contains[str1[0]] = contains[str1[1]] = contains[str1[2]] = contains[str1[3]] = contains[str1[4]] = contains[str1[5]] -- cgit v1.2.3-70-g09d2