/* From wikibooks, longest common substring with fixed size = 32. Don't think curr and prev need to be ints. */ #include #include #include "arpa/inet.h" unsigned char tbuf[32] __attribute__((aligned(32))); #if 0 unsigned char tbuf2[32] __attribute__((aligned(32))); #endif /* TYPE must be a single word; use typedef to make it so */ typedef unsigned int uint32; /* LENGTH1 must be >= LENGTH2 */ #define LCS_MEMDEF(LENGTH) \ unsigned char counts##LENGTH##_1[LENGTH] __attribute__((aligned(32))); \ unsigned char counts##LENGTH##_2[LENGTH] __attribute__((aligned(32))); #define LCS(TYPE, LENGTH1, LENGTH2) \ int lcs_##TYPE##_##LENGTH1##_##LENGTH2(TYPE *str1, TYPE *str2) { \ unsigned char *curr = counts##LENGTH1##_1; \ unsigned char *prev = counts##LENGTH1##_2; \ unsigned char *t = NULL; \ unsigned char maxSubstr = 0; \ \ unsigned char i, j; \ /* i index into str1, j index into str2 */ \ /* i=0 unrolled from the main loop */ \ TYPE c1 = str1[0]; \ for(j = 0; jy)?x:y) LCS_MEMDEF(32) /* Select one of the rough lcs implementations below */ #if 0 // Yields similar perf to the unsigned int version, but has a much larger error #define LCS_ERROR 14 typedef unsigned long long ull; LCS(ull, 32, 32) LCS(ull, 32, 24) int lcs_32_rough(unsigned char *str1, unsigned char *str2) { int max_length = 0; int max_length_next = 0; int offset; max_length = lcs_ull_32_32((unsigned long long*)str1, (unsigned long long*)str2); for (offset=1; offset < sizeof(unsigned long long); offset++) { memcpy(tbuf, str2+offset, 24); max_length_next = lcs_ull_32_24((unsigned long long*)str1, (unsigned long long*)tbuf); max_length = MAX(max_length, max_length_next); } return max_length; } #endif #if 1 // Yields 1120K/s #define LCS_ERROR 6 LCS(uint32, 32, 32) LCS(uint32, 32, 28) int lcs_32_rough_4(unsigned char *str1, unsigned char *str2) { int max_length = 0; int max_length_next = 0; int offset; max_length = lcs_uint32_32_32((unsigned int*)str1, (unsigned int*)str2); for (offset=1; offset < sizeof(unsigned int); offset++) { memcpy(tbuf, str2+offset, 28); max_length_next = lcs_uint32_32_28((unsigned int*)str1, (unsigned int*)tbuf); max_length = MAX(max_length, max_length_next); } return max_length; } #endif #if 0 // 645K/s w/error=1, 859K/s w/error=2 typedef unsigned short ushort; LCS_MEMDEF(30) LCS(ushort, 32, 32) LCS(ushort, 32, 30) LCS(ushort, 30, 30) /* May under-count by 2 */ int lcs_32_rough(unsigned char *str1, unsigned char *str2) { int max_length = 0; int max_length_next = 0; int offset; max_length = lcs_ushort_32_32((unsigned short*)str1, (unsigned short*)str2); memcpy(tbuf, str2+1, 30); max_length_next = lcs_ushort_32_30((unsigned short*)str1, (unsigned short*)tbuf); max_length = MAX(max_length, max_length_next); #if 0 /* Removes the off-by-two errors, yields Benchmark: 645/s */ #define LCS_ERROR 1 memcpy(tbuf2, str1+1, 30); max_length_next = lcs_ushort_32_30((unsigned short*)str2, (unsigned short*)tbuf2); max_length = MAX(max_length, max_length_next); max_length_next = lcs_ushort_32_30((unsigned short*)tbuf, (unsigned short*)tbuf2); max_length = MAX(max_length, max_length_next); #else /* yields Benchmark: 859K/s */ #define LCS_ERROR 2 #endif return max_length; } #endif int lcs_upper_bound(const uint32_t * const str1_int, const uint32_t * const str2_int) { static char contains[256] = {0}; const unsigned char * const str1 = (const unsigned char * const)str1_int; const unsigned char * const str2 = (const unsigned char * const)str2_int; int first_sum; int ret; contains[str1[0]] = contains[str1[1]] = contains[str1[2]] = contains[str1[3]] = contains[str1[4]] = contains[str1[5]] = contains[str1[6]] = contains[str1[7]] = contains[str1[8]] = contains[str1[9]] = contains[str1[10]] = contains[str1[11]] = contains[str1[12]] = contains[str1[13]] = contains[str1[14]] = contains[str1[15]] = 1; first_sum = contains[str2[0]] + contains[str2[1]] + contains[str2[2]] + contains[str2[3]] + contains[str2[4]] + contains[str2[5]] + contains[str2[6]] + contains[str2[7]] + contains[str2[8]] + contains[str2[9]] + contains[str2[10]] + contains[str2[11]] + contains[str2[12]] + contains[str2[13]] + contains[str2[14]] + contains[str2[15]]; uint32_t bo_str2_int[4]; uint32_t shifted_str2_int[4]; const unsigned char * const str2_r = (const unsigned char * const)shifted_str2_int; int second_sum; bo_str2_int[0] = ntohl(str2_int[0]); bo_str2_int[1] = ntohl(str2_int[1]); bo_str2_int[2] = ntohl(str2_int[2]); bo_str2_int[3] = ntohl(str2_int[3]); shifted_str2_int[0] = (bo_str2_int[0] << 4) | (bo_str2_int[1] >> 28); shifted_str2_int[1] = (bo_str2_int[1] << 4) | (bo_str2_int[2] >> 28); shifted_str2_int[2] = (bo_str2_int[2] << 4) | (bo_str2_int[3] >> 28); shifted_str2_int[3] = (bo_str2_int[3] << 4); // don't add str2_r[LE_BYTE_INDEX(15)] because it is known-bad due to zero filling second_sum = contains[str2_r[0]] + contains[str2_r[1]] + contains[str2_r[2]] + contains[str2_r[3]] + contains[str2_r[4]] + contains[str2_r[5]] + contains[str2_r[6]] + contains[str2_r[7]] + contains[str2_r[8]] + contains[str2_r[9]] + contains[str2_r[10]] + contains[str2_r[11]] + contains[str2_r[BE_BYTE_INDEX(12)]] + contains[str2_r[BE_BYTE_INDEX(13)]] + contains[str2_r[BE_BYTE_INDEX(14)]]; // can be off up to two nibbles ret = MAX(first_sum, second_sum) * 2 + 2; contains[str1[0]] = contains[str1[1]] = contains[str1[2]] = contains[str1[3]] = contains[str1[4]] = contains[str1[5]] = contains[str1[6]] = contains[str1[7]] = contains[str1[8]] = contains[str1[9]] = contains[str1[10]] = contains[str1[11]] = contains[str1[12]] = contains[str1[13]] = contains[str1[14]] = contains[str1[15]] = 0; return ret; } /* Exact lcs implementation */ typedef unsigned char uchar; LCS(uchar, 32, 32)