900字范文 > 暴雪hash来作整数的hash

暴雪hash来作整数的hash

时间：2018-07-01 21:42:44

相关推荐

暴雪hash来作整数的hash

问题提出

我们常常在项目中会用到探测一个数组B中是不是有数组A中的数据。

方案1：将B中的数据放到一个位图中，然后用A中的数据探测。

缺点：如果B中的数据比较稀疏，比如[1,2,900,800]等，位图开销大，因为要为没有出现的数也需要有一个bit来标识。

方案2：将B中的数据存到一个hash表中，然后用A中的数据作探测。

暴雪hash的原理见我之前转载的文章暴雪hash

以unordered_set为例，将所有的数据存在set中会占用大量的空间。如果采用改版后的暴雪hash算法，那么可以采用一个bool字段来标识这个整数。

源码参考来源

整数hash函数选取的来源

源码如下：

//头文件#ifndef _Uint64Hash_H#define _Uint64Hash_H#include <string>using namespace std;#define MAXTABLELEN 1024 // 默认哈希索引表大小 ////////////////////////////////////////////////////////////////////////// // 哈希索引表定义 typedef struct _HASHTABLE{ uint64_t nHashA; uint64_t nHashB; bool bExists; }HASHTABLE, *PHASHTABLE ; class Uint64Hash{public:Uint64Hash(const long nTableLength = MAXTABLELEN);~Uint64Hash(void);private: uint64_t cryptTable[0x500]; uint64_t m_tablelength; // 哈希索引表长度 HASHTABLE *m_HashIndexTable; private:void InitCryptTable(); // 对哈希索引表预处理uint64_t HashUint64(uint64_t key); uint64_t HashUint64A(uint64_t key); uint64_t HashUint64B(uint64_t key); public:uint64_t Hash(uint64_t val);bool Hashed(uint64_t val); // 检测url是否被hash过};#endif

#include<iostream>#include "Uint64Hash.h"//#include <boost/timer.hpp>#include <string>#include <stdlib.h>using namespace std;Uint64Hash::Uint64Hash(const long nTableLength /*= MAXTABLELEN*/){ InitCryptTable();m_tablelength = nTableLength;//初始化hash表 m_HashIndexTable = new HASHTABLE[nTableLength];for ( int i = 0; i < nTableLength; i++ ){m_HashIndexTable[i].nHashA = -1;m_HashIndexTable[i].nHashB = -1;m_HashIndexTable[i].bExists = false;}}Uint64Hash::~Uint64Hash(void){ //清理内存 if ( NULL != m_HashIndexTable ){delete []m_HashIndexTable;m_HashIndexTable = NULL;m_tablelength = 0;} }/************************************************************************//*函数名：InitCryptTable*功能：对哈希索引表预处理 *返回值：无************************************************************************/void Uint64Hash::InitCryptTable() { uint64_t seed = 0x00100001, index1 = 0, index2 = 0, i;for( index1 = 0; index1 < 0x100; index1++ ){ for( index2 = index1, i = 0; i < 5; i++, index2 += 0x100 ){uint64_t temp1, temp2; seed = (seed * 125 + 3) % 0x2AAAAB; temp1 = (seed & 0xFFFF) << 0x10; seed = (seed * 125 + 3) % 0x2AAAAB; temp2 = (seed & 0xFFFF); cryptTable[index2] = ( temp1 | temp2 ); } } } /************************************************************************//*函数名：HashUint64*功能：求取哈希值 *返回值：返回hash值************************************************************************/uint64_t Uint64Hash::HashUint64( uint64_t key) { key = (~key) + (key << 21); // key = (key << 21) - key - 1; key = key ^ (key >> 24); key = (key + (key << 3)) + (key << 8); // key * 265 key = key ^ (key >> 14); key = (key + (key << 2)) + (key << 4); // key * 21 key = key ^ (key >> 28); key = key + (key << 31); return key; } uint64_t Uint64Hash::HashUint64A( uint64_t key) { key = (~key) + (key << 18); // key = (key << 18) - key - 1; key = key ^ (key >> 31); key = key * 21; // key = (key + (key << 2)) + (key << 4); key = key ^ (key >> 11); key = key + (key << 6); key = key ^ (key >> 22); return key; } uint64_t Uint64Hash::HashUint64B( uint64_t key) { key = ~key + (key << 15); // key = (key << 15) - key - 1; key = key ^ (key >> 12); key = key + (key << 2); key = key ^ (key >> 4); key = key * 2057; // key = (key + (key << 3)) + (key << 11); key = key ^ (key >> 16); return key; } /************************************************************************//*函数名：Hashed*功能：检测一个字符串是否被hash过*返回值：如果存在，1；否则，返回0************************************************************************/bool Uint64Hash::Hashed( uint64_t key ) { //不同的字符串三次hash还会碰撞的几率无限接近于不可能 uint64_t nHash = HashUint64(key);uint64_t nHashA = HashUint64A(key);uint64_t nHashB = HashUint64B(key);uint64_t nHashStart = nHash % m_tablelength,nHashPos = nHashStart;while ( m_HashIndexTable[nHashPos].bExists){ if ( m_HashIndexTable[nHashPos].nHashA == nHashA && m_HashIndexTable[nHashPos].nHashB == nHashB)return 1; elsenHashPos = (nHashPos + 1) % m_tablelength;if (nHashPos == nHashStart)break; }return 0; //没有找到 } /************************************************************************//*函数名：Hash*功能：hash一个字符串 *返回值：成功，返回位置；失败，返回-1************************************************************************/uint64_t Uint64Hash::Hash(uint64_t key){uint64_t nHash = HashUint64(key);uint64_t nHashA = HashUint64A(key);uint64_t nHashB = HashUint64B(key);uint64_t nHashStart = nHash % m_tablelength, nHashPos = nHashStart;while ( m_HashIndexTable[nHashPos].bExists){ nHashPos = (nHashPos + 1) % m_tablelength;if (nHashPos == nHashStart) //一个轮回{ //hash表中没有空余的位置了,无法完成hash return -1; }}m_HashIndexTable[nHashPos].bExists = true;m_HashIndexTable[nHashPos].nHashA = nHashA;m_HashIndexTable[nHashPos].nHashB = nHashB;return nHashPos; }int main(){// boost::timer t; const int SIZE = 5000000; Uint64Hash s( SIZE*2 ) ; for(int i = 0 ; i < SIZE ; i++){s.Hash(i ); } for(int i = 0 ; i < SIZE ; i++){if( !s.Hashed(i )){cout<<"ERROR"<<endl; break; } } cout<<t.elapsed()<<endl; for(int i = SIZE ; i < 2*SIZE ; i++){if( s.Hashed(i )){cout<<"ERROR " << i<<endl; } }// cout<<t.elapsed()<<endl;}

编译命令是：

g++ -O3 –std=c++11

如果需要用到boost 中的timer来统计时间的话，编译命令是：

g++ -O3 –std=c++11 *.cpp -lboost_system

和stl中unordered_set比较

unordered_set的代码为：

#include<iostream>#include <unordered_set> #include <boost/timer.hpp>using namespace std;int main(){boost::timer t;const int SIZE = 5000000;unordered_set<int> s(SIZE);for(int i = 0 ; i < SIZE ; i++){s.insert( i );}for(int i = 0 ; i < SIZE ; i++){if( s.end() == s.find( i )){cout<<"ERROR"<<endl;break;}}cout<<t.elapsed()<<endl;}

时间上面，暴雪的hash函数在时间效率上慢很多，在都加上了O3优化选项后，在500W数量上面，暴雪的插入和探测时间是1.11s，unordered_set耗时：0.46s。

本内容不代表本网观点和政治立场，如有侵犯你的权益请联系我们处理。

网友评论

网友评论仅供其表达个人看法，并不表明网站立场。