Murmurhash 哈希算法 介绍与实现
最近在项目代码中看到了一种hash算法,以前没有遇见过,在此记录下来。
一、介绍
MurmurHash 是一种非加密型哈希函数,适用于一般的哈希检索操作。 由Austin Appleby在2008年发明, 并出现了多个变种,都已经发布到了公有领域(public domain)。与其它流行的哈希函数相比,对于规律性较强的key,MurmurHash的随机分布特征表现更良好。—摘自wiki
Redis在实现字典时用到了两种不同的哈希算法,MurmurHash便是其中一种(另一种是djb),在Redis中应用十分广泛,包括数据库、集群、哈希键、阻塞操作等功能都用到了这个算法。发明算法的作者被邀到google工作,该算法最新版本是MurmurHash3,基于MurmurHash2改进了一些小瑕疵,使得速度更快,实现了32位(低延时)、128位HashKey,尤其对大块的数据,具有较高的平衡性与低碰撞率。
二、代码
代码来自GitHub: https://github.com/aappleby/smhasher
因为Murmurhash最新版本是MurmurHash3,这里只简单测试了一下MurmurHash3
Murmurhash3.h
//-----------------------------------------------------------------------------
// MurmurHash3 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.#ifndef _MURMURHASH3_H_
#define _MURMURHASH3_H_//-----------------------------------------------------------------------------
// Platform-specific functions and macros// Microsoft Visual Studio#if defined(_MSC_VER) && (_MSC_VER < 1600)typedef unsigned char uint8_t;
typedef unsigned int uint32_t;
typedef unsigned __int64 uint64_t;// Other compilers#else // defined(_MSC_VER)#include <stdint.h>#endif // !defined(_MSC_VER)//-----------------------------------------------------------------------------void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out );void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out );void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out );//-----------------------------------------------------------------------------#endif // _MURMURHASH3_H_
Murmurhash3.c
//-----------------------------------------------------------------------------
// MurmurHash3 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.// Note - The x86 and x64 versions do _not_ produce the same results, as the
// algorithms are optimized for their respective platforms. You can still
// compile and run any of them on any platform, but your performance with the
// non-native version will be less than optimal.
//
// github : https://github.com/aappleby/smhasher#include "MurmurHash3.h"//-----------------------------------------------------------------------------
// Platform-specific functions and macros// Microsoft Visual Studio#if defined(_MSC_VER)#define FORCE_INLINE __forceinline#include <stdlib.h>#define ROTL32(x,y) _rotl(x,y)
#define ROTL64(x,y) _rotl64(x,y)#define BIG_CONSTANT(x) (x)// Other compilers#else // defined(_MSC_VER)
#define FORCE_INLINE inline __attribute__((always_inline))inline static uint32_t rotl32 ( uint32_t x, int8_t r )
{return (x << r) | (x >> (32 - r));
}inline static uint64_t rotl64 ( uint64_t x, int8_t r )
{return (x << r) | (x >> (64 - r));
}#define ROTL32(x,y) rotl32(x,y)
#define ROTL64(x,y) rotl64(x,y)#define BIG_CONSTANT(x) (x##LLU)#endif // !defined(_MSC_VER)//-----------------------------------------------------------------------------
// Block read - if your platform needs to do endian-swapping or can only
// handle aligned reads, do the conversion hereFORCE_INLINE uint32_t getblock32 ( const uint32_t * p, int i )
{return p[i];
}FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, int i )
{return p[i];
}//-----------------------------------------------------------------------------
// Finalization mix - force all bits of a hash block to avalancheFORCE_INLINE uint32_t fmix32 ( uint32_t h )
{h ^= h >> 16;h *= 0x85ebca6b;h ^= h >> 13;h *= 0xc2b2ae35;h ^= h >> 16;return h;
}//----------FORCE_INLINE uint64_t fmix64 ( uint64_t k )
{k ^= k >> 33;k *= BIG_CONSTANT(0xff51afd7ed558ccd);k ^= k >> 33;k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);k ^= k >> 33;return k;
}//-----------------------------------------------------------------------------void MurmurHash3_x86_32 ( const void * key, int len,¦ uint32_t seed, void * out )
{const uint8_t * data = (const uint8_t*)key;const int nblocks = len / 4;uint32_t h1 = seed;const uint32_t c1 = 0xcc9e2d51;const uint32_t c2 = 0x1b873593;//----------// bodyconst uint32_t * blocks = (const uint32_t *)(data + nblocks*4);for(int i = -nblocks; i; i++){uint32_t k1 = getblock32(blocks,i);k1 *= c1;k1 = ROTL32(k1,15);k1 *= c2;h1 ^= k1;h1 = ROTL32(h1,13);h1 = h1*5+0xe6546b64;}//----------// tailconst uint8_t * tail = (const uint8_t*)(data + nblocks*4);uint32_t k1 = 0;switch(len & 3){case 3: k1 ^= tail[2] << 16;case 2: k1 ^= tail[1] << 8;case 1: k1 ^= tail[0];k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;};//----------// finalizationh1 ^= len;h1 = fmix32(h1);*(uint32_t*)out = h1;
}//-----------------------------------------------------------------------------void MurmurHash3_x86_128 ( const void * key, const int len,uint32_t seed, void * out )
{const uint8_t * data = (const uint8_t*)key;const int nblocks = len / 16;uint32_t h1 = seed;uint32_t h2 = seed;uint32_t h3 = seed;uint32_t h4 = seed;const uint32_t c1 = 0x239b961b;const uint32_t c2 = 0xab0e9789;const uint32_t c3 = 0x38b34ae5;const uint32_t c4 = 0xa1e38b93;//----------// bodyconst uint32_t * blocks = (const uint32_t *)(data + nblocks*16);for(int i = -nblocks; i; i++){uint32_t k1 = getblock32(blocks,i*4+0);uint32_t k2 = getblock32(blocks,i*4+1);uint32_t k3 = getblock32(blocks,i*4+2);uint32_t k4 = getblock32(blocks,i*4+3);k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2;h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3;h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4;h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;}//----------// tailconst uint8_t * tail = (const uint8_t*)(data + nblocks*16);uint32_t k1 = 0;uint32_t k2 = 0;uint32_t k3 = 0;uint32_t k4 = 0;switch(len & 15){case 15: k4 ^= tail[14] << 16;case 14: k4 ^= tail[13] << 8;case 13: k4 ^= tail[12] << 0;k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4;case 12: k3 ^= tail[11] << 24;case 11: k3 ^= tail[10] << 16;case 10: k3 ^= tail[ 9] << 8;case 9: k3 ^= tail[ 8] << 0;k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3;case 8: k2 ^= tail[ 7] << 24;case 7: k2 ^= tail[ 6] << 16;case 6: k2 ^= tail[ 5] << 8;case 5: k2 ^= tail[ 4] << 0;k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2;case 4: k1 ^= tail[ 3] << 24;case 3: k1 ^= tail[ 2] << 16;case 2: k1 ^= tail[ 1] << 8;case 1: k1 ^= tail[ 0] << 0;k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;};//----------// finalizationh1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len;h1 += h2; h1 += h3; h1 += h4;h2 += h1; h3 += h1; h4 += h1;h1 = fmix32(h1);h2 = fmix32(h2);h3 = fmix32(h3);h4 = fmix32(h4);h1 += h2; h1 += h3; h1 += h4;h2 += h1; h3 += h1; h4 += h1;((uint32_t*)out)[0] = h1;((uint32_t*)out)[1] = h2;((uint32_t*)out)[2] = h3;((uint32_t*)out)[3] = h4;
}//-----------------------------------------------------------------------------void MurmurHash3_x64_128 ( const void * key, const int len,const uint32_t seed, void * out )
{const uint8_t * data = (const uint8_t*)key;const int nblocks = len / 16;uint64_t h1 = seed;uint64_t h2 = seed;const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);//----------// bodyconst uint64_t * blocks = (const uint64_t *)(data);for(int i = 0; i < nblocks; i++){uint64_t k1 = getblock64(blocks,i*2+0);uint64_t k2 = getblock64(blocks,i*2+1);k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;}//----------// tailconst uint8_t * tail = (const uint8_t*)(data + nblocks*16);uint64_t k1 = 0;uint64_t k2 = 0;switch(len & 15){case 15: k2 ^= ((uint64_t)tail[14]) << 48;case 14: k2 ^= ((uint64_t)tail[13]) << 40;case 13: k2 ^= ((uint64_t)tail[12]) << 32;case 12: k2 ^= ((uint64_t)tail[11]) << 24;case 11: k2 ^= ((uint64_t)tail[10]) << 16;case 10: k2 ^= ((uint64_t)tail[ 9]) << 8;case 9: k2 ^= ((uint64_t)tail[ 8]) << 0;k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;case 8: k1 ^= ((uint64_t)tail[ 7]) << 56;case 7: k1 ^= ((uint64_t)tail[ 6]) << 48;case 6: k1 ^= ((uint64_t)tail[ 5]) << 40;case 5: k1 ^= ((uint64_t)tail[ 4]) << 32;case 4: k1 ^= ((uint64_t)tail[ 3]) << 24;case 3: k1 ^= ((uint64_t)tail[ 2]) << 16;case 2: k1 ^= ((uint64_t)tail[ 1]) << 8;case 1: k1 ^= ((uint64_t)tail[ 0]) << 0;k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;};//----------// finalizationh1 ^= len; h2 ^= len;h1 += h2;h2 += h1;h1 = fmix64(h1);h2 = fmix64(h2);h1 += h2;h2 += h1;((uint64_t*)out)[0] = h1;((uint64_t*)out)[1] = h2;
}//-----------------------------------------------------------------------------
#if 1#include <string.h>
#include <stdio.h>
#define SEED 0x97c29b3aint main()
{const char *str="abcdefghijklmn";uint32_t out1;MurmurHash3_x86_32(str, strlen(str), SEED, &out1);printf("%u\n", out1);uint32_t out2[4];MurmurHash3_x86_128(str, strlen(str), SEED, out2);printf("%u, %u, %u, %u\n", out2[0], out2[1], out2[2], out2[3]);uint64_t out3[2];MurmurHash3_x64_128(str, strlen(str), SEED, out3);printf("%lu, %lu\n", out3[0], out3[1]);return 0;
}#endif
MurmurHash3_x86_32 将key 哈希32位的正整数
MurmurHash3_x86_128 将key 哈希128位的4个无符号位32整数,x86是32位的
MurmurHash3_x64_128 将key 哈希128位的2个无符号64位整数,x64是64位的
编译: gcc -std=c99 MurmurHash3.c MurmurHash3.h -o MurmurHash3
注意: 直接从GitHub拉取的代码在Linux可能编译不过去。
问题是在定义rotl32() 和rotl64()使用关键字inline,要么把inline去掉,要么在inline后面加关键字static.
关注公众号"小败日记",搬砖过程遇到的问题,大家一起探讨,资源共享
Murmurhash 哈希算法 介绍与实现相关推荐
- MurmurHash 哈希算法
MurmurHash:(multiply and rotate) and (multiply and rotate) Hash,乘法和旋转的hash 算法. 一.哈希函数 定义 散列函数(英语:Has ...
- 一致性哈希算法介绍,及java实现
https://www.cnblogs.com/hupengcool/p/3659016.html 应用场景 在做服务器负载均衡时候可供选择的负载均衡的算法有很多,包括: 轮循算法(Round Rob ...
- 一文搞懂负载均衡中的一致性哈希算法
一致性哈希算法在很多领域有应用,例如分布式缓存领域的 MemCache,Redis,负载均衡领域的 Nginx,各类 RPC 框架.不同领域场景不同,需要顾及的因素也有所差异,本文主要讨论在负载均衡中 ...
- 一致性哈希算法学习及JAVA代码实现分析
1,对于待存储的海量数据,如何将它们分配到各个机器中去?---数据分片与路由 当数据量很大时,通过改善单机硬件资源的纵向扩充方式来存储数据变得越来越不适用,而通过增加机器数目来获得水平横向扩展的方式则 ...
- .NET平台开源项目速览(12)哈希算法集合类库HashLib
阅读目录 1.哈希算法介绍 2.HashLib功能介绍 .NET的System.Security.Cryptography命名空间本身是提供加密服务,散列函数,对称与非对称加密算法等功能.实际上,大部 ...
- java 哈希一致算法_一致哈希算法Java实现
一致哈希算法(Consistent Hashing Algorithms)是一个分布式系统中常用的算法.传统的Hash算法当槽位(Slot)增减时,面临所有数据重新部署的问题,而一致哈希算法确可以保证 ...
- java murmurhash实现_一致性哈希算法与Java实现
一致性哈希算法是分布式系统中常用的算法.比如,一个分布式的存储系统,要将数据存储到具体的节点上,如果采用普通的hash方法,将数据映射到具体的节点上,如key%N,key是数据的key,N是机器节点数 ...
- Algorithm:C++语言实现之Hash哈希算法相关(dbj2、sdbm、MurmurHash)
Algorithm:C++语言实现之Hash哈希算法相关(dbj2.sdbm.MurmurHash) 目录 一.Hash知识 1.dbj2 2.sdbm 3.MurmurHash Hash Has ...
- 哈希算法——murmurhash一致性哈希算法
Murmurhash: 是一种非加密型哈希函数,适用于一般的哈希检索操作.高运算性能,低碰撞率,由Austin Appleby创建于2008年,现已应用到Hadoop.libstdc++.nginx. ...
最新文章
- java outofmemory jsp_Java 内存溢出(java.lang.OutOfMemoryError)的常见情况和处理方式总结...
- 手机淘宝 521 性能优化项目揭秘
- 【自动驾驶】24.欧拉角、旋转向量、四元数、万向锁
- 公共互联网网络安全突发事件应急预案_安徽新规:发生重大突发事件,官方5小时内必须发声...
- 前端笔试题(附答案)
- 春天:注入列表,地图,可选对象和getBeansOfType()陷阱
- 夺命雷公狗---ECSHOP---08---商品页的拇改成星星
- python 算术运算
- 【day31】函数总结
- 浏览器内核选型列表,请大家继续补充
- Python进阶(十三) os、random、time等标准库
- 魅族自动化测试架构之路
- mac系统安装搭载Windows系统虚拟机方法教程
- HDU 6438 Buy and Resell (优先队列 or 贪心)
- 如何实现对网站页面访问量的统计?
- 智慧路灯点亮新型城市
- 对辊柱塞式成型机总体设计
- Google word/sheets 常见的使用:
- 渗透测试-越权漏洞之垂直越权和水平越权
- CSS传统布局所用的元素