#include #include #include #include #include #include "tsc.c" #include "md5.c" #define MD5_DIGEST_LENGTH 16 #ifndef USE_PRE_LCS #define USE_PRE_LCS 1 #endif #ifndef USE_ROUGH #define USE_ROUGH 1 #endif #ifndef SINGLERUN # define SINGLERUN 0 #endif #ifndef TESTING # define TESTING 0 #endif #ifndef USE_ROUGH_LCS # ifdef USE_LCS4 # define USE_ROUGH_LCS 4 # endif # ifdef USE_LCS5 # define USE_ROUGH_LCS 5 # endif # ifndef USE_ROUGH_LCS # error You need to specify which rough LCS implementation to use with USE_ROUGH_LCS=5 or the like # endif #endif //#define PEDANTIC #ifndef BENCH_VAL #define BENCH_VAL 8000000 #endif /* With MSF 11, we may miss 10's, but we don't have the false positives. * But we only get ~1110K/s instead of the ~1115K/s we get with MSF 8. Whahuh? */ #define MSF 8 #if BYTE_ORDER == LITTLE_ENDIAN # define BE_BYTE_INDEX(i) (i^3) # define LE_BYTE_INDEX(i) (i) #elif BYTE_ORDER == BIG_ENDIAN # define BE_BYTE_INDEX(i) (i) # define LE_BYTE_INDEX(i) (i^3) #else # error Unknown endianness #endif #ifdef PEDANTIC // use similar to "assert(a >= b);" # define PEDANTIC_TEST_GTE(a, b) {if ((a) < (b)) printf("Pedantic test failed (%s:%d): %s\n failed: %s (%d) >= %s (%d)\n", __FILE__, __LINE__, input_hex, #a, a, #b, b);} #else # define PEDANTIC_TEST_GTE(a, b) #endif #ifdef TIMINGS unsigned long long t0, t1; # define TIMINGS_START() (t0 = rdtsc()) # define TIMINGS_END(str) {t1 = rdtsc(); if (i % 1000000 == 0) printf(str "=%llu\n", (t1-t0-63));} #else # define TIMINGS_START() # define TIMINGS_END(str) #endif unsigned char tbl[512]; md5_state_t global_md5; #if 0 __inline__ void MD5(unsigned char *input, unsigned int input_len, unsigned char *out) { md5_init(&global_md5); md5_append(&global_md5, (const md5_byte_t *)input, input_len); md5_finish(&global_md5, out); } #endif void raw_MD5_64(unsigned char* input, unsigned char* digest) { /* initialize what we need in the md5_state_t */ global_md5.abcd[0] = 0x67452301; global_md5.abcd[1] = 0xefcdab89; global_md5.abcd[2] = 0x98badcfe; global_md5.abcd[3] = 0x10325476; /* process the fixed-sized input, pre-padded with the length */ md5_process(&global_md5, input); /* and finally store the digest * TODO: convert straight to hex? */ unsigned int i; for (i = 0; i < 16; ++i) { memcpy(digest, tbl + ((unsigned char*)(global_md5.abcd))[LE_BYTE_INDEX(i)] * 2, 2); digest += 2; } } #ifndef TIGER #include "srandomdev.c" #endif #include "lcs.c" #include "lcs_sse.c" /* timing functions */ struct timeval stopwatch_start; struct timeval stopwatch_stop; void start_stopwatch() { gettimeofday(&stopwatch_start, NULL); } unsigned long stop_stopwatch() { int seconds, usecs; gettimeofday(&stopwatch_stop, NULL); seconds = stopwatch_stop.tv_sec - stopwatch_start.tv_sec; usecs = stopwatch_stop.tv_usec - stopwatch_start.tv_usec; if (usecs < 0) { seconds -= 1; usecs += 1000000; } return seconds * 1000000 + usecs; } const char *hexchar = "0123456789abcdef"; unsigned char desttohex[64]; unsigned char test_input[] = { // 0x00, 0x04, 0x89, 0x4e, 0xf8, 0x11, 0x06, 0xbf, 0x87, 0xe3, 0x35, 0x6f, 0x68, 0xa5, 0xfa, 0xb3, // 0xcf, 0xa2, 0x0e, 0x2a, 0xda, 0xfc, 0x8e, 0x06, 0x66, 0x7b, 0x99, 0xa4, 0x06, 0x21, 0x95, 0x63, /* Eli's standard */ 0x22, 0x2b, 0x5b, 0x3c, 0xf7, 0xb9, 0xc2, 0x65, 0xc0, 0x4d, 0xab, 0x6d, 0xc6, 0xb9, 0x56, 0xeb, // 0xc4, 0x48, 0x13, 0xf8, 0x8e, 0xff, 0xd3, 0x03, 0x4f, 0x22, 0x0b, 0xe7, 0xe5, 0xe3, 0x35, 0xa0, /* One that fails with FALSE NEGATIVE2 when using non-byteswapped sse lcs */ /* 0xee, 0xf5, 0x67, 0x7f, 0x61, 0x88, 0xcb, 0xea, 0x8f, 0x21, 0x13, 0x85, 0x1f, 0xac, 0x4d, 0xab */ }; unsigned char input_raw[MD5_DIGEST_LENGTH]; unsigned char input_hex[64] __attribute__((aligned(64))); /* Writes hexadecimal form of argument to desttohex, no terminating null * character. */ void tohex(unsigned char *md5val) { unsigned char *c = desttohex; unsigned int i; for(i=0; i> 4]; *ct++ = hexchar[(x & 0xf)]; } desttohex[32] = '\0'; for (x=0; x<64; x++) { input_hex[x] = '\0'; } input_hex[32] = 0x80; // start of md5 pad input_hex[57] = 0x01; // hardcoded length of 256 bits int n; unsigned int i = 0; unsigned int matches_found = 0; unsigned int possible_matches = 0; printf("%p %p %p\n", desttohex, input_hex, tbl); /* Generate a random start point */ if (TESTING) { memcpy(input_raw, test_input, MD5_DIGEST_LENGTH); } else { srandomdev(); for(i=0; i<16; i++) { input_raw[i] = random() & 0xff; } i = 0; } tohex(input_raw); memcpy((char *)input_hex, (char *)desttohex, 32); printf("Starting at "); print_input_hex(); printf("\n"); fflush(stdout); #ifdef TIMINGS unsigned long long t0, t1; #endif /* Loop */ start_stopwatch(); while (1) { TIMINGS_START(); raw_MD5_64(input_hex, desttohex); TIMINGS_END("md5"); #ifdef PEDANTIC const int pedantic_n = lcs_uchar_32_32(input_hex, desttohex); #endif do { if (USE_PRE_LCS) { n = lcs_upper_bound((uint32_t*)input_raw, (uint32_t*)global_md5.abcd); PEDANTIC_TEST_GTE(n, pedantic_n); if (n < MSF) break; } if (USE_ROUGH) { TIMINGS_START(); if (USE_ROUGH_LCS == 4) n = lcs_32_rough_4(input_hex, desttohex); else if (USE_ROUGH_LCS == 5) n = lcs_32_rough_5((uint32_t*)input_raw, (uint32_t*)global_md5.abcd); else assert(0); TIMINGS_END("rough"); /* Based on testing, the 'rough' match may undercount by up to 6 * characters (for an int32-based implementation). It now returns * 1/4th the length of the match it found, so it will return >0 if * there is a match of 7 or greater, and sometimes return >0 if there * is a match of 4 or greater. */ n = n * 4 + 6; PEDANTIC_TEST_GTE(n, pedantic_n); if (n < MSF) break; } possible_matches++; TIMINGS_START(); n = lcs_uchar_32_32(input_hex, desttohex); /* get the exact number */ TIMINGS_END("full"); if (n < MSF) break; matches_found++; if (!SINGLERUN) { print_input_hex(); printf(" %s %d\n", desttohex, n); fflush(stdout); } } while (0); /* keep us updated on speed */ i++; if (i == BENCH_VAL) { unsigned long elapsed = stop_stopwatch(); printf("Benchmark: %dK/s (found %d matches and %d false matches in %d processed)\n", (int) ((float)BENCH_VAL * 1000 / elapsed), matches_found, possible_matches - matches_found, i); if (SINGLERUN) printf("%d processed in %ld us\n", BENCH_VAL, elapsed); fflush(stdout); /* don't include the time it takes us to print out the benchmark */ start_stopwatch(); i = 0; matches_found = 0; possible_matches = 0; if (SINGLERUN) return 0; } inc_input(); } return 0; }