简介

cityhash系列字符串散列算法是由著名的搜索引擎公司Google 发布的 (http://www.cityhash.org.uk/)。Google发布的有两种算法:cityhash64 与 cityhash128。它们分别根据字串计算 64 和 128 位的散列值。这些算法不适用于加密,但适合用在散列表等处。目前cityHash算法只有C++和python的实现,java目前还没有开源的cityHash算法,下面通过java代码实现java版的cityHash64 与cityHash128,经过大量的数据测试和C++开源的cityHash算法产生的结果数据一致,实现者是我的同事(中科院博士-薛健)。

import java.io.*;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.UUID;public class CityHash {private static final long k0 = 0xc3a5c85c97cb3127L;private static final long k1 = 0xb492b66fbe98f273L;private static final long k2 = 0x9ae16a3b2f90404fL;private static final long k3 = 0xc949d7c7509e6557L;private static long toLongLE(byte[] b, int i) {return (((long)b[i+7] << 56) +((long)(b[i+6] & 255) << 48) +((long)(b[i+5] & 255) << 40) +((long)(b[i+4] & 255) << 32) +((long)(b[i+3] & 255) << 24) +((b[i+2] & 255) << 16) +((b[i+1] & 255) <<  8) +((b[i+0] & 255) <<  0));}private static long toIntLE(byte[] b, int i) {return (((b[i+3] & 255L) << 24) + ((b[i+2] & 255L) << 16) + ((b[i+1] & 255L) << 8) + ((b[i+0] & 255L) << 0));}private static long fetch64(byte[] s, int pos) {return toLongLE(s, pos);}private static long fetch32(byte[] s, int pos) {return toIntLE(s, pos);}private static int staticCastToInt(byte b) {return b & 0xFF;}private static long rotate(long val, int shift) {return shift == 0 ? val : (val >>> shift) | (val << (64 - shift));}private static long rotateByAtLeast1(long val, int shift) {return (val >>> shift) | (val << (64 - shift));}private static long shiftMix(long val) {return val ^ (val >>> 47);}private static final long kMul = 0x9ddfea08eb382d69L;private static long hash128to64(long u, long v) {long a = (u ^ v) * kMul;a ^= (a >>> 47);long b = (v ^ a) * kMul;b ^= (b >>> 47);b *= kMul;return b;}private static long hashLen16(long u, long v) {return hash128to64(u, v);}private static long hashLen16(long u, long v, long kmul) {long a = (u ^ v) * kmul;a ^= (a >>> 47);long b = (v ^ a) * kmul;b ^= (b >>> 47);b *= kmul;return b;}private static long hashLen0to16(byte[] s, int pos, int len) {if (len >= 8) {long kmul = k2 + len * 2;long a = fetch64(s, pos + 0) + k2;long b = fetch64(s, pos + len - 8);long c = rotate(b, 37)*kmul + a;long d = (rotate(a, 25)+b)*kmul;return hashLen16(c, d, kmul);}if (len >= 4) {long kmul = k2 + len * 2;long a = fetch32(s, pos + 0);return hashLen16((a << 3) + len, fetch32(s, pos + len - 4), kmul);}if (len > 0) {byte a = s[pos + 0];byte b = s[pos + (len >>> 1)];byte c = s[pos + len - 1];int y = staticCastToInt(a) + (staticCastToInt(b) << 8);int z = len + (staticCastToInt(c) << 2);return shiftMix(y * k2 ^ z * k0) * k2;}return k2;}private static long hashLen17to32(byte[] s, int pos, int len){long mul = k2 + len * 2;long a = fetch64(s, pos+0) * k1;long b = fetch64(s, pos+8);long c = fetch64(s, pos+len - 8) * mul;long d = fetch64(s,  pos+len - 16) * k2;return hashLen16(rotate(a + b, 43) + rotate(c, 30) + d,a + rotate(b + k2, 18) + c, mul);}public static Long reversalByte(Long l ){ByteBuffer buffer = ByteBuffer.allocate(8);byte[] array = buffer.putLong(0, l).array();byte[] newArr=new byte[array.length];for (int i=array.length-1;i>=0;i--){newArr[array.length-i-1]=array[i];}ByteBuffer buffer2 = ByteBuffer.wrap(newArr,0,8);/*if(littleEndian){// ByteBuffer.order(ByteOrder) 方法指定字节序,即大小端模式(BIG_ENDIAN/LITTLE_ENDIAN)// ByteBuffer 默认为大端(BIG_ENDIAN)模式buffer.order(ByteOrder.LITTLE_ENDIAN);}*/buffer.order(ByteOrder.LITTLE_ENDIAN);return buffer.getLong();}private static long hashLen33to64(byte[] s, int pos, int len) {long mul = k2 + len * 2;long a = fetch64(s, pos) * k2;long b = fetch64(s, pos + 8);long c = fetch64(s, pos + len - 24);long d = fetch64(s, pos + len - 32);long e = fetch64(s, pos + 16) * k2;long f = fetch64(s, pos + 24) * 9;long g = fetch64(s, pos + len - 8);long h = fetch64(s, pos + len - 16) * mul;long u = rotate(a + g, 43) + (rotate(b, 30) + c) * 9;long v = ((a + g) ^ d) + f + 1;long w = reversalByte((u + v) * mul) + h;long x = rotate(e + f, 42) + c;long y = (reversalByte((v + w) * mul) + g) * mul;long z = e + f + c;a = reversalByte((x + z) * mul + y) + b;b = shiftMix((z + a) * mul + d + h) * mul;return b + x;}public static long cityHash64(byte[] s, int pos, int len) {if (len <= 32) {if (len <= 16) {return hashLen0to16(s, pos, len);} else {return hashLen17to32(s, pos, len);}} else if (len <= 64) {return hashLen33to64(s, pos, len);}// For strings over 64 bytes we hash the end first, and then as we// loop we keep 56 bytes of state: v, w, x, y, and z.long x = fetch64(s, pos + len - 40);long y = fetch64(s, pos + len - 16) + fetch64(s, pos + len - 56);long z = hashLen16(fetch64(s, pos + len - 48) + len, fetch64(s, pos + len - 24));long [] v = weakHashLen32WithSeeds(s, pos + len - 64, len, z);long [] w = weakHashLen32WithSeeds(s, pos + len - 32, y + k1, x);x = x * k1 + fetch64(s, pos);// Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.len = (len - 1) & ~63;do {x = rotate(x + y + v[0] + fetch64(s, pos + 8), 37) * k1;y = rotate(y + v[1] + fetch64(s, pos + 48), 42) * k1;x ^= w[1];y += v[0] + fetch64(s, pos + 40);z = rotate(z + w[0], 33) * k1;v = weakHashLen32WithSeeds(s, pos, v[1] * k1, x + w[0]);w = weakHashLen32WithSeeds(s, pos + 32, z + w[1], y + fetch64(s, pos + 16));long tmp = x;x = z;z = tmp;pos += 64;len -= 64;} while (len != 0);return hashLen16(hashLen16(v[0], w[0]) + shiftMix(y) * k1 + z,hashLen16(v[1], w[1]) + x);}private static long[] weakHashLen32WithSeeds(long w, long x, long y, long z,long a, long b) {a += w;b = rotate(b + a + z, 21);long c = a;a += x;a += y;b += rotate(a, 44);return new long[]{ a + z, b + c };}private static long[] weakHashLen32WithSeeds(byte[] s, int pos, long a, long b) {return weakHashLen32WithSeeds(fetch64(s, pos + 0),fetch64(s, pos + 8),fetch64(s, pos + 16),fetch64(s, pos + 24),a,b);}private static long[] cityMurmur(byte[] s, int pos, int len, long seed0, long seed1) {long a = seed0;long b = seed1;long c = 0;long d = 0;int l = len - 16;if (l <= 0) {a = shiftMix(a * k1) * k1;c = b * k1 + hashLen0to16(s, pos, len);d = shiftMix(a + (len >= 8 ? fetch64(s, pos + 0) : c));} else {c = hashLen16(fetch64(s, pos + len - 8) + k1, a);d = hashLen16(b + len, c + fetch64(s, pos + len - 16));a += d;do {a ^= shiftMix(fetch64(s, pos + 0) * k1) * k1;a *= k1;b ^= a;c ^= shiftMix(fetch64(s, pos + 8) * k1) * k1;c *= k1;d ^= c;pos += 16;l -= 16;} while (l > 0);}a = hashLen16(a, c);b = hashLen16(d, b);return new long[]{ a ^ b, hashLen16(b, a) };}private static long[] cityHash128WithSeed(byte[] s, int pos, int len, long seed0, long seed1) {if (len < 128) {return cityMurmur(s, pos, len, seed0, seed1);}long[] v = new long[2], w = new long[2];long x = seed0;long y = seed1;long z = k1 * len;v[0] = rotate(y ^ k1, 49) * k1 + fetch64(s, pos);v[1] = rotate(v[0], 42) * k1 + fetch64(s, pos + 8);w[0] = rotate(y + z, 35) * k1 + x;w[1] = rotate(x + fetch64(s, pos + 88), 53) * k1;// This is the same inner loop as CityHash64(), manually unrolled.do {x = rotate(x + y + v[0] + fetch64(s, pos + 16), 37) * k1;y = rotate(y + v[1] + fetch64(s, pos + 48), 42) * k1;x ^= w[1];y ^= v[0] ;z = rotate(z ^ w[0], 33);v = weakHashLen32WithSeeds(s, pos, v[1] * k1, x + w[0]);w = weakHashLen32WithSeeds(s, pos + 32, z + w[1], y);{ long swap = z; z = x; x = swap; }pos += 64;x = rotate(x + y + v[0] + fetch64(s, pos + 16), 37) * k1;y = rotate(y + v[1] + fetch64(s, pos + 48), 42) * k1;x ^= w[1];y ^= v[0];z = rotate(z ^ w[0], 33);v = weakHashLen32WithSeeds(s, pos, v[1] * k1, x + w[0]);w = weakHashLen32WithSeeds(s, pos + 32, z + w[1], y);{ long swap = z; z = x; x = swap; }pos += 64;len -= 128;} while (len >= 128);y += rotate(w[0], 37) * k0 + z;x += rotate(v[0] + z, 49) * k0;// If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.for (int tail_done = 0; tail_done < len; ) {tail_done += 32;y = rotate(y - x, 42) * k0 + v[1];w[0] += fetch64(s, pos + len - tail_done + 16);x = rotate(x, 49) * k0 + w[0];w[0] += v[0];v = weakHashLen32WithSeeds(s, pos + len - tail_done, v[0], v[1]);}// At this point our 48 bytes of state should contain more than// enough information for a strong 128-bit hash.  We use two// different 48-byte-to-8-byte hashes to get a 16-byte final result.x = hashLen16(x, v[0]);y = hashLen16(y, w[0]);return new long[]{hashLen16(x + v[1], w[1]) + y,hashLen16(x + w[1], y + v[1])};}static long[] cityHash128(byte[] s, int pos, int len) {if (len >= 16) {return cityHash128WithSeed(s, pos + 16,len - 16,fetch64(s, pos) ^ k3,fetch64(s, pos + 8));} else if (len >= 8) {return cityHash128WithSeed(new byte[0], 0, 0,fetch64(s, pos ) ^ (len * k0),fetch64(s, pos + len -8) ^ k1);} else {return cityHash128WithSeed(s, pos, len, k0, k1);}}}

Java版cityHash64 与cityHash128算法的实现相关推荐

  1. java 排序算法面试题_面试题: java中常见的排序算法的实现及比较

    1.冒泡排序 1.1 冒泡排序普通版 每次冒泡过程都是从数列的第一个元素开始,然后依次和剩余的元素进行比较,若小于相邻元素,则交换两者位置,同时将较大元素作为下一个比较的基准元素,继续将该元素与其相邻 ...

  2. java em算法_python em算法的实现

    ''' 数据集:伪造数据集(两个高斯分布混合) 数据集长度:1000 ------------------------------ 运行结果: ---------------------------- ...

  3. java开发抢红包算法,抢红包算法的实现-java

    二倍均值法 原理 原理 剩余红包金额M,剩余人数N,那么:每次抢到金额=随机(0,M/N*2) 保证了每次随机金额的平均值是公平的 假设10人,红包金额100元 第一人:100/10*2=20,随机范 ...

  4. 53.垃圾回收算法的实现原理、启动Java垃圾回收、Java垃圾回收过程、垃圾回收中实例的终结、对象什么时候符合垃圾回收的条件、GC Scope 示例程序、GC OutOfMemoryError的示例

    53.垃圾回收算法的实现原理 53.1.目录 53.2.启动Java垃圾回收 53.3.Java垃圾回收过程 53.4.垃圾回收中实例的终结 53.5.对象什么时候符合垃圾回收的条件? 53.5.1. ...

  5. 查找算法的实现c语言,查找算法的实现(C语言版)

    <查找算法的实现(C语言版)>由会员分享,可在线阅读,更多相关<查找算法的实现(C语言版)(6页珍藏版)>请在人人文库网上搜索. 1.芬梨受交峙东喊济雏狈违晕裂赵檀芬逛温乌摄阿 ...

  6. 数据挖掘Java——DBSCAN算法的实现

    一.DBSCAN算法的前置知识 DBSCAN算法:如果一个点q的区域内包含多于MinPts个对象,则创建一个q作为核心对象的簇.然后,反复地寻找从这些核心对象直接密度可达的对象,把一些密度可达簇进行合 ...

  7. JAVA实现中点画线_实验1-中点画线和Bresenham画线算法的实现

    <实验1-中点画线和Bresenham画线算法的实现>由会员分享,可在线阅读,更多相关<实验1-中点画线和Bresenham画线算法的实现(9页珍藏版)>请在人人文库网上搜索. ...

  8. java令牌桶_简单分析Guava中RateLimiter中的令牌桶算法的实现

    令牌桶算法是网络流量整形(Traffic Shaping)和速率限制(Rate Limiting)中最常使用的一种算法.典型情况下,令牌桶算法用来控制发送到网络上的数据的数目,并允许突发数据的发送. ...

  9. 约瑟夫问题算法的实现(代码实现) [Java][数据结构]

    约瑟夫问题算法的实现(代码实现) 代码如下(我们将这个方法也定义到单向环形链表类中): 其实我们的约瑟夫问题算法就相当于是一个特殊的删除单向环形链表中的结点的方法,所以我们就写到单向环形链表类中 /* ...

最新文章

  1. 超星未来发布新一代高级别自动驾驶车载计算平台
  2. 【Tools】git提示“warning: LF will be replaced by CRLF”的解决办法
  3. 入门SAP PP的学习流程
  4. 前端笔记-使用vue-cli(脚手架)开发TodoList
  5. 大学计算机专业全民,计算机专业大学排名实力顺序(上大学国内计算机专业大学哪个好值得报读)...
  6. [xPlugins] 开发中常用富文本编辑器介绍
  7. js 匿名函数_javascript:函数的使用
  8. python基础: 选择语句
  9. 七年之后的《深入理解计算机系统》
  10. 坦克大战-创建者模式实现-c#
  11. jQuery插件,UI
  12. python脚本——ping检测在线主机
  13. Ajax和Json使用入门
  14. windows批处理批量更改文件名称
  15. 【DB笔试面试755】在Oracle的DG中,RFS、LNSn、MRP、LSP进程的作用分别是什么?
  16. 用python123.io编程世界你好_python语言IO编程
  17. 专访马云:下一个星辰大海是百货商店
  18. 上拉电阻、下拉电阻的理解
  19. 攻防世界MISC刷题1-50
  20. java 集成 atlas

热门文章

  1. 电子商务领域的架构师弄潮儿 ——访麦包包首席架构师盛国军
  2. JavaSpring过时的经典语录
  3. Educoder 分布式文件系统HDFS
  4. 图像处理 | 灰度线性变换与非线性变化
  5. 雅虎创始人杨致远谈青年该如何创业
  6. Python爬虫实战:爬取解放日报新闻文章
  7. Attentive Pooling Networks论文讲解
  8. Java项目:JSP会员卡积分管理系统
  9. 华北五省比赛经验教训
  10. 企业如何选择合适的分销商城系统呢?