Go 语言运行时同时使用了多个数据结构组合表示哈希表,其中使用 hmap , 代码位置: src/runtime/map.go@hmap
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
// A header for a Go map. type hmap struct { // Note: the format of the hmap is also encoded in cmd/compile/internal/gc/reflect.go. // Make sure this stays in sync with the compiler's definition. count int// # live cells == size of map. Must be first (used by len() builtin) flags uint8 B uint8// log_2 of # of buckets (can hold up to loadFactor * 2^B items) noverflow uint16// approximate number of overflow buckets; see incrnoverflow for details hash0 uint32// hash seed
buckets unsafe.Pointer // array of 2^B Buckets. may be nil if count==0. oldbuckets unsafe.Pointer // previous bucket array of half the size, non-nil only when growing nevacuate uintptr// progress counter for evacuation (buckets less than this have been evacuated)
// A bucket for a Go map. type bmap struct { // tophash generally contains the top byte of the hash value // for each key in this bucket. If tophash[0] < minTopHash, // tophash[0] is a bucket evacuation state instead. tophash [bucketCnt]uint8 //Followed by bucketCnt keys and then bucketCnt elems. // Followed by bucketCnt keys and then bucketCnt elems. // NOTE: packing all the keys together and then all the elems together makes the // code a bit more complicated than alternating key/elem/key/elem/... but it allows // us to eliminate padding which would be needed for, e.g., map[int64]int8. // Followed by an overflow pointer.
funcmaplit(n *Node, m *Node, init *Nodes) { // make the map var a := nod(OMAKE, nil, nil) a.Esc = n.Esc a.List.Set2(typenod(n.Type), nodintconst(int64(n.List.Len()))) litas(m, a, init)
entries := n.List.Slice()
// The order pass already removed any dynamic (runtime-computed) entries. // All remaining entries are static. Double-check that. for _, r := range entries { if !isStaticCompositeLiteral(r.Left) || !isStaticCompositeLiteral(r.Right) { Fatalf("maplit: entry is not a literal: %v", r) } }
iflen(entries) > 25 { // For a large number of entries, put them in an array and loop.
// build types [count]Tindex and [count]Tvalue tk := types.NewArray(n.Type.Key(), int64(len(entries))) te := types.NewArray(n.Type.Elem(), int64(len(entries)))
tk.SetNoalg(true) te.SetNoalg(true)
dowidth(tk) dowidth(te)
// make and initialize static arrays vstatk := readonlystaticname(tk) vstate := readonlystaticname(te)
datak := nod(OARRAYLIT, nil, nil) datae := nod(OARRAYLIT, nil, nil) for _, r := range entries { datak.List.Append(r.Left) datae.List.Append(r.Right) } fixedlit(inInitFunction, initKindStatic, datak, vstatk, init) fixedlit(inInitFunction, initKindStatic, datae, vstate, init)
// loop adding structure elements to map // for i = 0; i < len(vstatk); i++ { // map[vstatk[i]] = vstate[i] // } i := temp(types.Types[TINT]) rhs := nod(OINDEX, vstate, i) rhs.SetBounded(true)
zero := nod(OAS, i, nodintconst(0)) cond := nod(OLT, i, nodintconst(tk.NumElem())) incr := nod(OAS, i, nod(OADD, i, nodintconst(1))) body := nod(OAS, lhs, rhs)
loop = typecheck(loop, ctxStmt) loop = walkstmt(loop) init.Append(loop) return } // For a small number of entries, just add them directly.
// Build list of var[c] = expr. // Use temporaries so that mapassign1 can have addressable key, elem. // TODO(josharian): avoid map key temporaries for mapfast_* assignments with literal keys. tmpkey := temp(m.Type.Key()) tmpelem := temp(m.Type.Elem())
for _, r := range entries { index, elem := r.Left, r.Right
setlineno(index) a := nod(OAS, tmpkey, index) a = typecheck(a, ctxStmt) a = walkstmt(a) init.Append(a)
setlineno(elem) a = nod(OAS, tmpelem, elem) a = typecheck(a, ctxStmt) a = walkstmt(a) init.Append(a)
setlineno(tmpelem) a = nod(OAS, nod(OINDEX, m, tmpkey), tmpelem) a = typecheck(a, ctxStmt) a = walkstmt(a) init.Append(a) }
a = nod(OVARKILL, tmpkey, nil) a = typecheck(a, ctxStmt) init.Append(a) a = nod(OVARKILL, tmpelem, nil) a = typecheck(a, ctxStmt) init.Append(a) }
// makemap implements Go map creation for make(map[k]v, hint). // If the compiler has determined that the map or the first bucket // can be created on the stack, h and/or bucket may be non-nil. // If h != nil, the map can be created directly in h. // If h.buckets != nil, bucket pointed to can be used as the first bucket. funcmakemap(t *maptype, hint int, h *hmap) *hmap { mem, overflow := math.MulUintptr(uintptr(hint), t.bucket.size) if overflow || mem > maxAlloc { hint = 0 }
// initialize Hmap if h == nil { h = new(hmap) } h.hash0 = fastrand()
// Find the size parameter B which will hold the requested # of elements. // For hint < 0 overLoadFactor returns false since hint < bucketCnt. B := uint8(0) for overLoadFactor(hint, B) { B++ } h.B = B
// allocate initial hash table // if B == 0, the buckets field is allocated lazily later (in mapassign) // If hint is large zeroing this memory could take a while. if h.B != 0 { var nextOverflow *bmap h.buckets, nextOverflow = makeBucketArray(t, h.B, nil) if nextOverflow != nil { h.extra = new(mapextra) h.extra.nextOverflow = nextOverflow } }
return h }
这个函数的执行过程会分成以下几个部分:
计算哈希占用的内存是否溢出或者超出能分配的最大值
调用 fastrand 获取一个随机的哈希种子
根据传入的 hint 计算出需要的最小需要的桶的数量
使用 runtime.makeBucketArray 创建用于保存桶的数组
runtime.makeBucketArray(src/runtime/map.go@makeBucketArray) 函数会根据传入的 B 计算出的需要创建的桶数量在内存中分配一片连续的空间用于存储数据.
// makeBucketArray initializes a backing array for map buckets. // 1<<b is the minimum number of buckets to allocate. // dirtyalloc should either be nil or a bucket array previously // allocated by makeBucketArray with the same t and b parameters. // If dirtyalloc is nil a new backing array will be alloced and // otherwise dirtyalloc will be cleared and reused as backing array. funcmakeBucketArray(t *maptype, b uint8, dirtyalloc unsafe.Pointer) (buckets unsafe.Pointer, nextOverflow *bmap) { base := bucketShift(b) nbuckets := base // For small b, overflow buckets are unlikely. // Avoid the overhead of the calculation. if b >= 4 { // Add on the estimated number of overflow buckets // required to insert the median number of elements // used with this value of b. nbuckets += bucketShift(b - 4) sz := t.bucket.size * nbuckets up := roundupsize(sz) if up != sz { nbuckets = up / t.bucket.size } }
if dirtyalloc == nil { buckets = newarray(t.bucket, int(nbuckets)) } else { // dirtyalloc was previously generated by // the above newarray(t.bucket, int(nbuckets)) // but may not be empty. buckets = dirtyalloc size := t.bucket.size * nbuckets if t.bucket.ptrdata != 0 { memclrHasPointers(buckets, size) } else { memclrNoHeapPointers(buckets, size) } }
if base != nbuckets { // We preallocated some overflow buckets. // To keep the overhead of tracking these overflow buckets to a minimum, // we use the convention that if a preallocated overflow bucket's overflow // pointer is nil, then there are more available by bumping the pointer. // We need a safe non-nil pointer for the last overflow bucket; just use buckets. nextOverflow = (*bmap)(add(buckets, base*uintptr(t.bucketsize))) last := (*bmap)(add(buckets, (nbuckets-1)*uintptr(t.bucketsize))) last.setoverflow(t, (*bmap)(buckets)) } return buckets, nextOverflow }
// mapaccess1 returns a pointer to h[key]. Never returns nil, instead // it will return a reference to the zero object for the elem type if // the key is not in the map. // NOTE: The returned pointer may keep the whole map live, so don't // hold onto it for very long. funcmapaccess1(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer { if raceenabled && h != nil { callerpc := getcallerpc() pc := funcPC(mapaccess1) racereadpc(unsafe.Pointer(h), callerpc, pc) raceReadObjectPC(t.key, key, callerpc, pc) } if msanenabled && h != nil { msanread(key, t.key.size) } if h == nil || h.count == 0 { if t.hashMightPanic() { t.hasher(key, 0) // see issue 23734 } return unsafe.Pointer(&zeroVal[0]) } if h.flags&hashWriting != 0 { throw("concurrent map read and map write") } hash := t.hasher(key, uintptr(h.hash0)) m := bucketMask(h.B) b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize))) if c := h.oldbuckets; c != nil { if !h.sameSizeGrow() { // There used to be half as many buckets; mask down one more power of two. m >>= 1 } oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize))) if !evacuated(oldb) { b = oldb } } top := tophash(hash) bucketloop: for ; b != nil; b = b.overflow(t) { for i := uintptr(0); i < bucketCnt; i++ { if b.tophash[i] != top { if b.tophash[i] == emptyRest { break bucketloop } continue } k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize)) if t.indirectkey() { k = *((*unsafe.Pointer)(k)) } if t.key.equal(key, k) { e := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.elemsize)) if t.indirectelem() { e = *((*unsafe.Pointer)(e)) } return e } } } return unsafe.Pointer(&zeroVal[0]) }
funcmapaccess2(t *maptype, h *hmap, key unsafe.Pointer) (unsafe.Pointer, bool) { if raceenabled && h != nil { callerpc := getcallerpc() pc := funcPC(mapaccess2) racereadpc(unsafe.Pointer(h), callerpc, pc) raceReadObjectPC(t.key, key, callerpc, pc) } if msanenabled && h != nil { msanread(key, t.key.size) } if h == nil || h.count == 0 { if t.hashMightPanic() { t.hasher(key, 0) // see issue 23734 } return unsafe.Pointer(&zeroVal[0]), false } if h.flags&hashWriting != 0 { throw("concurrent map read and map write") } hash := t.hasher(key, uintptr(h.hash0)) m := bucketMask(h.B) b := (*bmap)(unsafe.Pointer(uintptr(h.buckets) + (hash&m)*uintptr(t.bucketsize))) if c := h.oldbuckets; c != nil { if !h.sameSizeGrow() { // There used to be half as many buckets; mask down one more power of two. m >>= 1 } oldb := (*bmap)(unsafe.Pointer(uintptr(c) + (hash&m)*uintptr(t.bucketsize))) if !evacuated(oldb) { b = oldb } } top := tophash(hash) bucketloop: for ; b != nil; b = b.overflow(t) { for i := uintptr(0); i < bucketCnt; i++ { if b.tophash[i] != top { if b.tophash[i] == emptyRest { break bucketloop } continue } k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize)) if t.indirectkey() { k = *((*unsafe.Pointer)(k)) } if t.key.equal(key, k) { e := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.elemsize)) if t.indirectelem() { e = *((*unsafe.Pointer)(e)) } return e, true } } } return unsafe.Pointer(&zeroVal[0]), false }
使用 v, ok := hash[k] 的形式访问哈希表中元素时,我们能够通过这个布尔值更准确地知道当 v == nil 时,v 到底是哈希中存储的元素还是表示该键对应的元素不存在,所以在访问哈希时,更推荐使用这一种方式先判断元素是否存在。
var inserti *uint8 var insertk unsafe.Pointer var val unsafe.Pointer bucketloop: for { for i := uintptr(0); i < bucketCnt; i++ { if b.tophash[i] != top { if isEmpty(b.tophash[i]) && inserti == nil { inserti = &b.tophash[i] insertk = add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize)) val = add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize)) } if b.tophash[i] == emptyRest { break bucketloop } continue } k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize)) if !alg.equal(key, k) { continue } val = add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.valuesize)) goto done } ovf := b.overflow(t) if ovf == nil { break } b = ovf }
在上述的 for 循环中会依次遍历正常桶和溢出桶中存储的数据,整个过程会依次判断 tophash 是否相等、key 是否相等,遍历结束后会从循环中跳出。
funchashGrow(t *maptype, h *hmap) { // If we've hit the load factor, get bigger. // Otherwise, there are too many overflow buckets, // so keep the same number of buckets and "grow" laterally. bigger := uint8(1) if !overLoadFactor(h.count+1, h.B) { bigger = 0 h.flags |= sameSizeGrow } oldbuckets := h.buckets newbuckets, nextOverflow := makeBucketArray(t, h.B+bigger, nil)
funcevacuate(t *maptype, h *hmap, oldbucket uintptr) { b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))) newbit := h.noldbuckets() if !evacuated(b) { // TODO: reuse overflow buckets instead of using new ones, if there // is no iterator using the old buckets. (If !oldIterator.)
// xy contains the x and y (low and high) evacuation destinations. var xy [2]evacDst x := &xy[0] x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize))) x.k = add(unsafe.Pointer(x.b), dataOffset) x.e = add(x.k, bucketCnt*uintptr(t.keysize))
if !h.sameSizeGrow() { // Only calculate y pointers if we're growing bigger. // Otherwise GC can see bad pointers. y := &xy[1] y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize))) y.k = add(unsafe.Pointer(y.b), dataOffset) y.e = add(y.k, bucketCnt*uintptr(t.keysize)) }
for ; b != nil; b = b.overflow(t) { k := add(unsafe.Pointer(b), dataOffset) e := add(k, bucketCnt*uintptr(t.keysize)) for i := 0; i < bucketCnt; i, k, e = i+1, add(k, uintptr(t.keysize)), add(e, uintptr(t.elemsize)) { top := b.tophash[i] if isEmpty(top) { b.tophash[i] = evacuatedEmpty continue } if top < minTopHash { throw("bad map state") } k2 := k if t.indirectkey() { k2 = *((*unsafe.Pointer)(k2)) } var useY uint8 if !h.sameSizeGrow() { // Compute hash to make our evacuation decision (whether we need // to send this key/elem to bucket x or bucket y). hash := t.hasher(k2, uintptr(h.hash0)) if h.flags&iterator != 0 && !t.reflexivekey() && !t.key.equal(k2, k2) { // If key != key (NaNs), then the hash could be (and probably // will be) entirely different from the old hash. Moreover, // it isn't reproducible. Reproducibility is required in the // presence of iterators, as our evacuation decision must // match whatever decision the iterator made. // Fortunately, we have the freedom to send these keys either // way. Also, tophash is meaningless for these kinds of keys. // We let the low bit of tophash drive the evacuation decision. // We recompute a new random tophash for the next level so // these keys will get evenly distributed across all buckets // after multiple grows. useY = top & 1 top = tophash(hash) } else { if hash&newbit != 0 { useY = 1 } } }
if dst.i == bucketCnt { dst.b = h.newoverflow(t, dst.b) dst.i = 0 dst.k = add(unsafe.Pointer(dst.b), dataOffset) dst.e = add(dst.k, bucketCnt*uintptr(t.keysize)) } dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check if t.indirectkey() { *(*unsafe.Pointer)(dst.k) = k2 // copy pointer } else { typedmemmove(t.key, dst.k, k) // copy elem } if t.indirectelem() { *(*unsafe.Pointer)(dst.e) = *(*unsafe.Pointer)(e) } else { typedmemmove(t.elem, dst.e, e) } dst.i++ // These updates might push these pointers past the end of the // key or elem arrays. That's ok, as we have the overflow pointer // at the end of the bucket to protect against pointing past the // end of the bucket. dst.k = add(dst.k, uintptr(t.keysize)) dst.e = add(dst.e, uintptr(t.elemsize)) } } // Unlink the overflow buckets & clear key/elem to help GC. if h.flags&oldIterator == 0 && t.bucket.ptrdata != 0 { b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)) // Preserve b.tophash because the evacuation // state is maintained there. ptr := add(b, dataOffset) n := uintptr(t.bucketsize) - dataOffset memclrHasPointers(ptr, n) } }
if oldbucket == h.nevacuate { advanceEvacuationMark(h, t, newbit) } }
funcadvanceEvacuationMark(h *hmap, t *maptype, newbit uintptr) { h.nevacuate++ // Experiments suggest that 1024 is overkill by at least an order of magnitude. // Put it in there as a safeguard anyway, to ensure O(1) behavior. stop := h.nevacuate + 1024 if stop > newbit { stop = newbit } for h.nevacuate != stop && bucketEvacuated(t, h, h.nevacuate) { h.nevacuate++ } if h.nevacuate == newbit { // newbit == # of oldbuckets // Growing is all done. Free old main bucket array. h.oldbuckets = nil // Can discard old overflow buckets as well. // If they are still referenced by an iterator, // then the iterator holds a pointers to the slice. if h.extra != nil { h.extra.oldoverflow = nil } h.flags &^= sameSizeGrow } }
funcmapdelete(t *maptype, h *hmap, key unsafe.Pointer) { if raceenabled && h != nil { callerpc := getcallerpc() pc := funcPC(mapdelete) racewritepc(unsafe.Pointer(h), callerpc, pc) raceReadObjectPC(t.key, key, callerpc, pc) } if msanenabled && h != nil { msanread(key, t.key.size) } if h == nil || h.count == 0 { if t.hashMightPanic() { t.hasher(key, 0) // see issue 23734 } return } if h.flags&hashWriting != 0 { throw("concurrent map writes") }
hash := t.hasher(key, uintptr(h.hash0))
// Set hashWriting after calling t.hasher, since t.hasher may panic, // in which case we have not actually done a write (delete). h.flags ^= hashWriting
bucket := hash & bucketMask(h.B) if h.growing() { growWork(t, h, bucket) } b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize))) bOrig := b top := tophash(hash) search: for ; b != nil; b = b.overflow(t) { for i := uintptr(0); i < bucketCnt; i++ { if b.tophash[i] != top { if b.tophash[i] == emptyRest { break search } continue } k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize)) k2 := k if t.indirectkey() { k2 = *((*unsafe.Pointer)(k2)) } if !t.key.equal(key, k2) { continue } // Only clear key if there are pointers in it. if t.indirectkey() { *(*unsafe.Pointer)(k) = nil } elseif t.key.ptrdata != 0 { memclrHasPointers(k, t.key.size) } e := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.elemsize)) if t.indirectelem() { *(*unsafe.Pointer)(e) = nil } elseif t.elem.ptrdata != 0 { memclrHasPointers(e, t.elem.size) } else { memclrNoHeapPointers(e, t.elem.size) } b.tophash[i] = emptyOne // If the bucket now ends in a bunch of emptyOne states, // change those to emptyRest states. // It would be nice to make this a separate function, but // for loops are not currently inlineable. if i == bucketCnt-1 { if b.overflow(t) != nil && b.overflow(t).tophash[0] != emptyRest { goto notLast } } else { if b.tophash[i+1] != emptyRest { goto notLast } } for { b.tophash[i] = emptyRest if i == 0 { if b == bOrig { break// beginning of initial bucket, we're done. } // Find previous bucket, continue at its last entry. c := b for b = bOrig; b.overflow(t) != c; b = b.overflow(t) { } i = bucketCnt - 1 } else { i-- } if b.tophash[i] != emptyOne { break } } notLast: h.count-- // Reset the hash seed to make it more difficult for attackers to // repeatedly trigger hash collisions. See issue 25237. if h.count == 0 { h.hash0 = fastrand() } break search } }