diff options
Diffstat (limited to 'lcs.c')
-rw-r--r-- | lcs.c | 217 |
1 files changed, 217 insertions, 0 deletions
@@ -0,0 +1,217 @@ +/* From wikibooks, longest common substring with fixed size = 32. + Don't think curr and prev need to be ints. + */ +#include <stdlib.h> +#include <string.h> + + +unsigned char tbuf[32] __attribute__((aligned(32))); +#if 0 +unsigned char tbuf2[32] __attribute__((aligned(32))); +#endif + +/* TYPE must be a single word; use typedef to make it so */ +typedef unsigned int uint32; +/* LENGTH1 must be >= LENGTH2 */ + +#define LCS_MEMDEF(LENGTH) \ +unsigned char counts##LENGTH##_1[LENGTH] __attribute__((aligned(32))); \ +unsigned char counts##LENGTH##_2[LENGTH] __attribute__((aligned(32))); + +#define LCS(TYPE, LENGTH1, LENGTH2) \ +int lcs_##TYPE##_##LENGTH1##_##LENGTH2(TYPE *str1, TYPE *str2) { \ + unsigned char *curr = counts##LENGTH1##_1; \ + unsigned char *prev = counts##LENGTH1##_2; \ + unsigned char *t = NULL; \ + unsigned char maxSubstr = 0; \ + \ + unsigned char i, j; \ + /* i index into str1, j index into str2 */ \ + /* i=0 unrolled from the main loop */ \ + TYPE c1 = str1[0]; \ + for(j = 0; j<LENGTH2/sizeof(TYPE); ++j) \ + { \ + if (c1 == str2[j]){ \ + prev[j] = 1; \ + maxSubstr = 1; \ + } else { \ + prev[j] = 0; \ + } \ + } \ + for(i = 1; i<LENGTH1/sizeof(TYPE); ++i) \ + { \ + TYPE c = str1[i]; \ + if(c == str2[0]){ /* j=0 unrolled from loop */ \ + curr[0] = 1; \ + if (!maxSubstr) { \ + maxSubstr = 1; \ + } \ + } else { \ + curr[0] = 0; \ + } \ + for(j = 1; j<LENGTH2/sizeof(TYPE); ++j) \ + { \ + if(c != str2[j]) \ + { \ + curr[j] = 0; \ + } \ + else \ + { \ + curr[j] = 1 + prev[j-1]; \ + /* The next if can be replaced with: */ \ + /* maxSubstr = max(maxSubstr, table[i][j]); */ \ + /* (You need algorithm.h library for using max()) */ \ + if(maxSubstr < curr[j]) \ + { \ + maxSubstr = curr[j]; \ + } \ + } \ + } \ + t = prev; \ + prev = curr; \ + curr = t; \ + } \ + return maxSubstr; \ +} + +/* simplistic MAX macro, easily misused */ +#define MAX(x,y) ((x>y)?x:y) + +LCS_MEMDEF(32) + +/* Select one of the rough lcs implementations below */ + +#if 0 // Yields similar perf to the unsigned int version, but has a much larger error +#define LCS_ERROR 14 +typedef unsigned long long ull; +LCS(ull, 32, 32) +LCS(ull, 32, 24) +int lcs_32_rough(unsigned char *str1, unsigned char *str2) { + int max_length = 0; + int max_length_next = 0; + int offset; + max_length = lcs_ull_32_32((unsigned long long*)str1, (unsigned long long*)str2); + for (offset=1; offset < sizeof(unsigned long long); offset++) { + memcpy(tbuf, str2+offset, 24); + max_length_next = lcs_ull_32_24((unsigned long long*)str1, (unsigned long long*)tbuf); + max_length = MAX(max_length, max_length_next); + } + return max_length; +} +#endif + +#if 1 // Yields 1120K/s +#define LCS_ERROR 6 +LCS(uint32, 32, 32) +LCS(uint32, 32, 28) + + +#ifdef USE_LCS4 +int lcs_32_rough(unsigned char *str1, unsigned char *str2) { + int max_length = 0; + int max_length_next = 0; + int offset; + max_length = lcs_uint32_32_32((unsigned int*)str1, (unsigned int*)str2); + for (offset=1; offset < sizeof(unsigned int); offset++) { + memcpy(tbuf, str2+offset, 28); + max_length_next = lcs_uint32_32_28((unsigned int*)str1, (unsigned int*)tbuf); + max_length = MAX(max_length, max_length_next); + } + return max_length; +} +#endif + +#endif + +#if 0 // 645K/s w/error=1, 859K/s w/error=2 +typedef unsigned short ushort; +LCS_MEMDEF(30) +LCS(ushort, 32, 32) +LCS(ushort, 32, 30) +LCS(ushort, 30, 30) + +/* May under-count by 2 */ +int lcs_32_rough(unsigned char *str1, unsigned char *str2) { + int max_length = 0; + int max_length_next = 0; + int offset; + max_length = lcs_ushort_32_32((unsigned short*)str1, (unsigned short*)str2); + + memcpy(tbuf, str2+1, 30); + max_length_next = lcs_ushort_32_30((unsigned short*)str1, (unsigned short*)tbuf); + max_length = MAX(max_length, max_length_next); + +#if 0 /* Removes the off-by-two errors, yields Benchmark: 645/s */ +#define LCS_ERROR 1 + memcpy(tbuf2, str1+1, 30); + max_length_next = lcs_ushort_32_30((unsigned short*)str2, (unsigned short*)tbuf2); + max_length = MAX(max_length, max_length_next); + + max_length_next = lcs_ushort_32_30((unsigned short*)tbuf, (unsigned short*)tbuf2); + max_length = MAX(max_length, max_length_next); +#else /* yields Benchmark: 859K/s */ +#define LCS_ERROR 2 +#endif + + return max_length; +} +#endif + +#ifdef USE_PRE_LCS +#include "arpa/inet.h" +int lcs_upper_bound(const uint32_t * const str1_int, const uint32_t * const str2_int) { + static char contains[256] = {0}; + uint32_t bo_str2_int[4]; + uint32_t shifted_str2_int[4]; + const unsigned char * const str1 = (const unsigned char * const)str1_int; + const unsigned char * const str2 = (const unsigned char * const)str2_int; + const unsigned char * const str2_r = (const unsigned char * const)shifted_str2_int; + int first_sum, second_sum; + + bo_str2_int[0] = ntohl(str2_int[0]); + bo_str2_int[1] = ntohl(str2_int[1]); + bo_str2_int[2] = ntohl(str2_int[2]); + bo_str2_int[3] = ntohl(str2_int[3]); + + shifted_str2_int[0] = (bo_str2_int[0] << 4) | (bo_str2_int[1] >> 28); + shifted_str2_int[1] = (bo_str2_int[1] << 4) | (bo_str2_int[2] >> 28); + shifted_str2_int[2] = (bo_str2_int[2] << 4) | (bo_str2_int[3] >> 28); + shifted_str2_int[3] = (bo_str2_int[3] << 4) | (bo_str2_int[0] >> 28); + + contains[str1[0]] = contains[str1[1]] = contains[str1[2]] + = contains[str1[3]] = contains[str1[4]] = contains[str1[5]] + = contains[str1[6]] = contains[str1[7]] = contains[str1[8]] + = contains[str1[9]] = contains[str1[10]] = contains[str1[11]] + = contains[str1[12]] = contains[str1[13]] = contains[str1[14]] + = contains[str1[15]] = 1; + + first_sum = contains[str2[0]] + contains[str2[1]] + contains[str2[2]] + + contains[str2[3]] + contains[str2[4]] + contains[str2[5]] + + contains[str2[6]] + contains[str2[7]] + contains[str2[8]] + + contains[str2[9]] + contains[str2[10]] + contains[str2[11]] + + contains[str2[12]] + contains[str2[13]] + contains[str2[14]] + + contains[str2[15]]; + + second_sum = contains[str2_r[0]] + contains[str2_r[1]] + contains[str2_r[2]] + + contains[str2_r[3]] + contains[str2_r[4]] + contains[str2_r[5]] + + contains[str2_r[6]] + contains[str2_r[7]] + contains[str2_r[8]] + + contains[str2_r[9]] + contains[str2_r[10]] + contains[str2_r[11]] + + contains[str2_r[12]] + contains[str2_r[13]] + contains[str2_r[14]] + + contains[str2_r[15]]; + + contains[str1[0]] = contains[str1[1]] = contains[str1[2]] + = contains[str1[3]] = contains[str1[4]] = contains[str1[5]] + = contains[str1[6]] = contains[str1[7]] = contains[str1[8]] + = contains[str1[9]] = contains[str1[10]] = contains[str1[11]] + = contains[str1[12]] = contains[str1[13]] = contains[str1[14]] + = contains[str1[15]] = 0; + + // can be off up to two nibbles + return MAX(first_sum, second_sum) * 2 + 2; +} +#endif + +/* Exact lcs implementation */ +typedef unsigned char uchar; +LCS(uchar, 32, 32) + |