xframe/vendor/github.com/go-faster/city/ch_128.go

159 lines
3.9 KiB
Go

package city
import "encoding/binary"
// A subroutine for CH128(). Returns a decent 128-bit hash for strings
// of any length representable in signed long. Based on City and Mumur.
func chMurmur(s []byte, seed U128) U128 {
length := len(s)
a := seed.Low
b := seed.High
c := uint64(0)
d := uint64(0)
l := length - 16
if len(s) <= 16 { // length <= 16
a = shiftMix(a*k1) * k1
c = b*k1 + ch0to16(s, length)
if length >= 8 {
d = shiftMix(a + binary.LittleEndian.Uint64(s))
} else {
d = shiftMix(a + c)
}
} else { // length > 16
c = ch16(binary.LittleEndian.Uint64(s[length-8:])+k1, a)
d = ch16(b+uint64(length), c+binary.LittleEndian.Uint64(s[length-16:]))
a += d
{
a ^= shiftMix(binary.LittleEndian.Uint64(s[0:8:8])*k1) * k1
a *= k1
b ^= a
c ^= shiftMix(binary.LittleEndian.Uint64(s[8:8+8:8+8])*k1) * k1
c *= k1
d ^= c
s = s[16:]
l -= 16
}
if l > 0 {
for len(s) >= 16 {
a ^= shiftMix(binary.LittleEndian.Uint64(s[0:8:8])*k1) * k1
a *= k1
b ^= a
c ^= shiftMix(binary.LittleEndian.Uint64(s[8:8+8:8+8])*k1) * k1
c *= k1
d ^= c
s = s[16:]
l -= 16
if l <= 0 {
break
}
}
}
}
a = ch16(a, c)
b = ch16(d, b)
return U128{a ^ b, ch16(b, a)}
}
// CH128 returns 128-bit ClickHouse CityHash.
func CH128(s []byte) U128 {
if len(s) >= 16 {
return CH128Seed(s[16:], U128{
Low: binary.LittleEndian.Uint64(s[0:8:8]) ^ k3,
High: binary.LittleEndian.Uint64(s[8 : 8+8 : 8+8]),
})
}
if len(s) >= 8 {
l := uint64(len(s))
return CH128Seed(nil, U128{
Low: binary.LittleEndian.Uint64(s) ^ (l * k0),
High: binary.LittleEndian.Uint64(s[l-8:]) ^ k1,
})
}
return CH128Seed(s, U128{Low: k0, High: k1})
}
// CH128Seed returns 128-bit seeded ClickHouse CityHash.
func CH128Seed(s []byte, seed U128) U128 {
if len(s) < 128 {
return chMurmur(s, seed)
}
// Saving initial input for tail hashing.
t := s
// We expect len >= 128 to be the common case. Keep 56 bytes of state:
// v, w, x, y and z.
var v, w U128
x := seed.Low
y := seed.High
z := uint64(len(s)) * k1
{
subSlice := (*[96]byte)(s[0:])
v.Low = rot64(y^k1, 49)*k1 + binary.LittleEndian.Uint64(subSlice[0:])
v.High = rot64(v.Low, 42)*k1 + binary.LittleEndian.Uint64(subSlice[8:])
w.Low = rot64(y+z, 35)*k1 + x
w.High = rot64(x+binary.LittleEndian.Uint64(subSlice[88:]), 53) * k1
}
// This is the same inner loop as CH64(), manually unrolled.
for len(s) >= 128 {
// Roll 1.
{
x = rot64(x+y+v.Low+binary.LittleEndian.Uint64(s[16:16+8:16+8]), 37) * k1
y = rot64(y+v.High+binary.LittleEndian.Uint64(s[48:48+8:48+8]), 42) * k1
x ^= w.High
y ^= v.Low
z = rot64(z^w.Low, 33)
v = weakHash32SeedsByte(s, v.High*k1, x+w.Low)
w = weakHash32SeedsByte(s[32:], z+w.High, y)
z, x = x, z
}
// Roll 2.
{
const offset = 64
x = rot64(x+y+v.Low+binary.LittleEndian.Uint64(s[offset+16:offset+16+8:offset+16+8]), 37) * k1
y = rot64(y+v.High+binary.LittleEndian.Uint64(s[offset+48:offset+48+8:offset+48+8]), 42) * k1
x ^= w.High
y ^= v.Low
z = rot64(z^w.Low, 33)
v = weakHash32SeedsByte(s[offset:], v.High*k1, x+w.Low)
w = weakHash32SeedsByte(s[offset+32:], z+w.High, y)
z, x = x, z
}
s = s[128:]
}
y += rot64(w.Low, 37)*k0 + z
x += rot64(v.Low+z, 49) * k0
// If 0 < length < 128, hash up to 4 chunks of 32 bytes each from the end of s.
for i := 0; i < len(s); {
i += 32
y = rot64(y-x, 42)*k0 + v.High
w.Low += binary.LittleEndian.Uint64(t[len(t)-i+16:])
x = rot64(x, 49)*k0 + w.Low
w.Low += v.Low
v = weakHash32SeedsByte(t[len(t)-i:], v.Low, v.High)
}
// At this point our 48 bytes of state should contain more than
// enough information for a strong 128-bit hash. We use two
// different 48-byte-to-8-byte hashes to get a 16-byte final result.
x = ch16(x, v.Low)
y = ch16(y, w.Low)
return U128{
Low: ch16(x+v.High, w.High) + y,
High: ch16(x+w.High, y+v.High),
}
}