librsync  2.3.4
rabinkarp.c
1/*= -*- c-basic-offset: 4; indent-tabs-mode: nil; -*-
2 *
3 * rabinkarp -- The RabinKarp rolling checksum.
4 *
5 * Copyright (C) 2019 by Donovan Baarda <abo@minkirri.apana.org.au>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU Lesser General Public License as published by
9 * the Free Software Foundation; either version 2.1 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 */
21#include "config.h" /* IWYU pragma: keep */
22#include "rabinkarp.h"
23
24/* Constant for RABINKARP_MULT^2. */
25#define RABINKARP_MULT2 0xa5b71959U
26
27/* Macros for doing 16 bytes with 2 mults that can be done in parallel. Testing
28 showed this as a performance sweet spot vs 16x1, 8x2, 4x4 1x16 alternative
29 arrangements. */
30#define PAR2X1(hash,buf,i) (RABINKARP_MULT2*(hash) + \
31 RABINKARP_MULT*buf[i] + \
32 buf[i+1])
33#define PAR2X2(hash,buf,i) PAR2X1(PAR2X1(hash,buf,i),buf,i+2)
34#define PAR2X4(hash,buf,i) PAR2X2(PAR2X2(hash,buf,i),buf,i+4)
35#define PAR2X8(hash,buf) PAR2X4(PAR2X4(hash,buf,0),buf,8)
36
37/* Table of RABINKARP_MULT^(2^(i+1)) for power lookups. */
38const static uint32_t RABINKARP_MULT_POW2[32] = {
39 0x08104225U,
40 0xa5b71959U,
41 0xf9c080f1U,
42 0x7c71e2e1U,
43 0x0bb409c1U,
44 0x4dc72381U,
45 0xd17a8701U,
46 0x96260e01U,
47 0x55101c01U,
48 0x2d303801U,
49 0x66a07001U,
50 0xfe40e001U,
51 0xc081c001U,
52 0x91038001U,
53 0x62070001U,
54 0xc40e0001U,
55 0x881c0001U,
56 0x10380001U,
57 0x20700001U,
58 0x40e00001U,
59 0x81c00001U,
60 0x03800001U,
61 0x07000001U,
62 0x0e000001U,
63 0x1c000001U,
64 0x38000001U,
65 0x70000001U,
66 0xe0000001U,
67 0xc0000001U,
68 0x80000001U,
69 0x00000001U,
70 0x00000001U
71};
72
73/* Get the value of RABINKARP_MULT^n. */
74static inline uint32_t rabinkarp_pow(uint32_t n)
75{
76 const uint32_t *m = RABINKARP_MULT_POW2;
77 uint32_t ans = 1;
78 while (n) {
79 if (n & 1) {
80 ans *= *m;
81 }
82 m++;
83 n >>= 1;
84 }
85 return ans;
86}
87
88void rabinkarp_update(rabinkarp_t *sum, const unsigned char *buf, size_t len)
89{
90 size_t n = len;
91 uint32_t hash = sum->hash;
92
93 while (n >= 16) {
94 hash = PAR2X8(hash, buf);
95 buf += 16;
96 n -= 16;
97 }
98 while (n) {
99 hash = RABINKARP_MULT * hash + *buf++;
100 n--;
101 }
102 sum->hash = hash;
103 sum->count += len;
104 sum->mult *= rabinkarp_pow((uint32_t)len);
105}
The rabinkarp class implementation of the RabinKarp rollsum.
#define RABINKARP_MULT
The RabinKarp multiplier.
Definition: rabinkarp.h:41
The rabinkarp_t state type.
Definition: rabinkarp.h:56
uint32_t mult
The value of RABINKARP_MULT^count.
Definition: rabinkarp.h:59
size_t count
Count of bytes included in sum.
Definition: rabinkarp.h:57
uint32_t hash
The accumulated hash value.
Definition: rabinkarp.h:58