summaryrefslogtreecommitdiff
path: root/lcs.c
blob: 17d80e9b5af28898fac61b264e56a290f8dea69b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
/* From wikibooks, longest common substring with fixed size = 32.
   Don't think curr and prev need to be ints.
   */
#include <stdlib.h>
#include <string.h>
#include "arpa/inet.h"

unsigned char tbuf[32] __attribute__((aligned(32)));
#if 0
unsigned char tbuf2[32] __attribute__((aligned(32)));
#endif

/* TYPE must be a single word; use typedef to make it so */
typedef unsigned int uint32;
/* LENGTH1 must be >= LENGTH2 */

#define LCS_MEMDEF(LENGTH) \
unsigned char counts##LENGTH##_1[LENGTH] __attribute__((aligned(32))); \
unsigned char counts##LENGTH##_2[LENGTH] __attribute__((aligned(32)));

#define LCS(TYPE, LENGTH1, LENGTH2) \
int lcs_##TYPE##_##LENGTH1##_##LENGTH2(TYPE *str1, TYPE *str2) { \
    unsigned char *curr = counts##LENGTH1##_1; \
    unsigned char *prev = counts##LENGTH1##_2; \
    unsigned char *t = NULL; \
    unsigned char maxSubstr = 0; \
  \
    unsigned char i, j; \
    /* i index into str1, j index into str2 */ \
    /* i=0 unrolled from the main loop */ \
    TYPE c1 = str1[0]; \
    for(j = 0; j<LENGTH2/sizeof(TYPE); ++j) \
    { \
        if (c1 == str2[j]){ \
            prev[j] = 1; \
            maxSubstr = 1; \
        } else { \
            prev[j] = 0; \
        } \
    } \
    for(i = 1; i<LENGTH1/sizeof(TYPE); ++i) \
    { \
        TYPE c = str1[i]; \
        if(c == str2[0]){ /* j=0 unrolled from loop */ \
            curr[0] = 1; \
            if (!maxSubstr) { \
                maxSubstr = 1; \
            } \
        } else { \
            curr[0] = 0; \
        } \
        for(j = 1; j<LENGTH2/sizeof(TYPE); ++j) \
        { \
            if(c != str2[j]) \
            { \
                curr[j] = 0; \
            } \
            else \
            { \
                curr[j] = 1 + prev[j-1]; \
                /* The next if can be replaced with: */ \
                /* maxSubstr = max(maxSubstr, table[i][j]); */ \
                /* (You need algorithm.h library for using max()) */ \
                if(maxSubstr < curr[j]) \
                { \
                     maxSubstr = curr[j]; \
                } \
            } \
        } \
        t = prev; \
        prev = curr; \
        curr = t; \
    } \
    return maxSubstr; \
}

/* simplistic MAX macro, easily misused */
#define MAX(x,y) ((x>y)?x:y)

LCS_MEMDEF(32)

/* Select one of the rough lcs implementations below */

#if 0 // Yields similar perf to the unsigned int version, but has a much larger error
#define LCS_ERROR 14
typedef unsigned long long ull;
LCS(ull, 32, 32)
LCS(ull, 32, 24)
int lcs_32_rough(unsigned char *str1, unsigned char *str2) {
    int max_length = 0;
    int max_length_next = 0;
    int offset;
    max_length = lcs_ull_32_32((unsigned long long*)str1, (unsigned long long*)str2);
    for (offset=1; offset < sizeof(unsigned long long); offset++) {
        memcpy(tbuf, str2+offset, 24);
        max_length_next = lcs_ull_32_24((unsigned long long*)str1, (unsigned long long*)tbuf);
        max_length = MAX(max_length, max_length_next);
    }
    return max_length;
}
#endif

#if 1 // Yields 1120K/s
#define LCS_ERROR 6
LCS(uint32, 32, 32)
LCS(uint32, 32, 28)


int lcs_32_rough_4(unsigned char *str1, unsigned char *str2) {
    int max_length = 0;
    int max_length_next = 0;
    int offset;
    max_length = lcs_uint32_32_32((unsigned int*)str1, (unsigned int*)str2);
    for (offset=1; offset < sizeof(unsigned int); offset++) {
        memcpy(tbuf, str2+offset, 28);
        max_length_next = lcs_uint32_32_28((unsigned int*)str1, (unsigned int*)tbuf);
        max_length = MAX(max_length, max_length_next);
    }
    return max_length;
}

#endif

#if 0 // 645K/s w/error=1, 859K/s w/error=2
typedef unsigned short ushort;
LCS_MEMDEF(30)
LCS(ushort, 32, 32)
LCS(ushort, 32, 30)
LCS(ushort, 30, 30)

/* May under-count by 2 */
int lcs_32_rough(unsigned char *str1, unsigned char *str2) {
    int max_length = 0;
    int max_length_next = 0;
    int offset;
    max_length = lcs_ushort_32_32((unsigned short*)str1, (unsigned short*)str2);

    memcpy(tbuf, str2+1, 30);
    max_length_next = lcs_ushort_32_30((unsigned short*)str1, (unsigned short*)tbuf);
    max_length = MAX(max_length, max_length_next);

#if 0 /* Removes the off-by-two errors, yields Benchmark: 645/s */
#define LCS_ERROR 1
    memcpy(tbuf2, str1+1, 30);
    max_length_next = lcs_ushort_32_30((unsigned short*)str2, (unsigned short*)tbuf2);
    max_length = MAX(max_length, max_length_next);

    max_length_next = lcs_ushort_32_30((unsigned short*)tbuf, (unsigned short*)tbuf2);
    max_length = MAX(max_length, max_length_next);
#else /* yields Benchmark: 859K/s */
#define LCS_ERROR 2
#endif

    return max_length;
}
#endif

int lcs_upper_bound(const uint32_t * const str1_int, const uint32_t * const str2_int) {
    static char contains[256] = {0};
    const unsigned char * const str1 = (const unsigned char * const)str1_int;
    const unsigned char * const str2 = (const unsigned char * const)str2_int;
    int first_sum;
    int ret;

    contains[str1[0]] = contains[str1[1]] = contains[str1[2]]
        = contains[str1[3]] = contains[str1[4]] = contains[str1[5]]
        = contains[str1[6]] = contains[str1[7]] = contains[str1[8]]
        = contains[str1[9]] = contains[str1[10]] = contains[str1[11]]
        = contains[str1[12]] = contains[str1[13]] = contains[str1[14]]
        = contains[str1[15]] = 1;

    first_sum = contains[str2[0]] + contains[str2[1]] + contains[str2[2]]
        + contains[str2[3]] + contains[str2[4]] + contains[str2[5]]
        + contains[str2[6]] + contains[str2[7]] + contains[str2[8]]
        + contains[str2[9]] + contains[str2[10]] + contains[str2[11]]
        + contains[str2[12]] + contains[str2[13]] + contains[str2[14]]
        + contains[str2[15]];

    uint32_t bo_str2_int[4];
    uint32_t shifted_str2_int[4];
    const unsigned char * const str2_r = (const unsigned char * const)shifted_str2_int;
    int second_sum;

    bo_str2_int[0] = ntohl(str2_int[0]);
    bo_str2_int[1] = ntohl(str2_int[1]);
    bo_str2_int[2] = ntohl(str2_int[2]);
    bo_str2_int[3] = ntohl(str2_int[3]);

    shifted_str2_int[0] = (bo_str2_int[0] << 4) | (bo_str2_int[1] >> 28);
    shifted_str2_int[1] = (bo_str2_int[1] << 4) | (bo_str2_int[2] >> 28);
    shifted_str2_int[2] = (bo_str2_int[2] << 4) | (bo_str2_int[3] >> 28);
    shifted_str2_int[3] = (bo_str2_int[3] << 4);

    // don't add str2_r[LE_BYTE_INDEX(15)] because it is known-bad due to zero filling
    second_sum = contains[str2_r[0]] + contains[str2_r[1]] + contains[str2_r[2]]
        + contains[str2_r[3]] + contains[str2_r[4]] + contains[str2_r[5]]
        + contains[str2_r[6]] + contains[str2_r[7]] + contains[str2_r[8]]
        + contains[str2_r[9]] + contains[str2_r[10]] + contains[str2_r[11]]
        + contains[str2_r[BE_BYTE_INDEX(12)]]
        + contains[str2_r[BE_BYTE_INDEX(13)]]
        + contains[str2_r[BE_BYTE_INDEX(14)]];

    // can be off up to two nibbles
    ret = MAX(first_sum, second_sum) * 2 + 2;

    contains[str1[0]] = contains[str1[1]] = contains[str1[2]]
        = contains[str1[3]] = contains[str1[4]] = contains[str1[5]]
        = contains[str1[6]] = contains[str1[7]] = contains[str1[8]]
        = contains[str1[9]] = contains[str1[10]] = contains[str1[11]]
        = contains[str1[12]] = contains[str1[13]] = contains[str1[14]]
        = contains[str1[15]] = 0;

    return ret;
}

/* Exact lcs implementation */
typedef unsigned char uchar;
LCS(uchar, 32, 32)