您现在的位置是：网站首页> 编程资料编程资料

深入了解Golang的map增量扩容_Golang_

2023-05-26 597人已围观

简介深入了解Golang的map增量扩容_Golang_

核心思想

以空间换时间，访问速度与填充因子有关

扩容hash表的时候每次都增大2倍，hash表大小始终为2的整数倍，有(hash mod 2^B) == (hash & (2^B-1))，方便于简化运算，避免取余操作

扩容前后的 hash mod 容量大小是不等的，因此要重新计算每一项在hash表中的位置，扩容后需要将old pair重新hash到新的hash表上（就是一个evacuate的过程）。这个过程不是一次性完成的，在每次insert、remove的时候会搬移1-2个pair。就是使用的是增量扩容

每个旧桶的键值对都会分流到2个不同的新桶中

为什么要使用增量扩容?

主要是缩短map容器的响应时间。如果不用增量扩容，当一个map存储很多元素后进行扩容，会阻塞很长时间无法响应请求。增量扩容的本质其实就是将总的扩容时间分摊到了每一次hash操作上

在搬数据的时候，并不会把旧的bucket从oldbucket中删除，只是加上了一个已删除的标记

扩容期间一部分数据在oldbucket中，一部分在bucket中，会对hash表的insert，remove，lookup操作的处理逻辑产生影响，如耗时更长等

只有当oldbucket中所有bucket移动到新表后，才会将oldbucket释放掉

扩容方式

如果grow的太频繁，空间的利用率就会很低，如果很久才grow，会形成很多的overflow buckets，查找速率会下降

map的填充因子是6.5

即当count / 2^B > 6.5时会触发一次grow.翻倍扩容

如果负载因子没有超标，但是使用的溢出桶较多，也会触发扩容。但是是等量扩容

原因是原桶中有太多的键值对被删除，等量扩容可以使得剩余的键值对排列更加紧凑，节省空间

 // Maximum average load of a bucket that triggers growth is 6.5. // Represent as loadFactorNum/loadFactorDen, to allow integer math. loadFactorNum = 13 loadFactorDen = 2

这个6.5来源于作者的一个测试程序，取了一个相对适中的值

// Picking loadFactor: too large and we have lots of overflow // buckets, too small and we waste a lot of space. I wrote // a simple program to check some stats for different loads: // (64-bit, 8 byte keys and elems) // loadFactor %overflow bytes/entry hitprobe missprobe // 4.00 2.13 20.77 3.00 4.00 // 4.50 4.05 17.30 3.25 4.50 // 5.00 6.85 14.77 3.50 5.00 // 5.50 10.55 12.94 3.75 5.50 // 6.00 15.27 11.67 4.00 6.00 // 6.50 20.90 10.79 4.25 6.50 // 7.00 27.14 10.15 4.50 7.00 // 7.50 34.03 9.73 4.75 7.50 // 8.00 41.10 9.40 5.00 8.00 // // %overflow = percentage of buckets which have an overflow bucket // bytes/entry = overhead bytes used per key/elem pair // hitprobe = # of entries to check when looking up a present key // missprobe = # of entries to check when looking up an absent key // // Keep in mind this data is for maximally loaded tables, i.e. just // before the table grows. Typical tables will be somewhat less loaded.

源码分析

// 只是分配新的buckets，并将老的buckets挂到oldbuckets字段上 // 真正搬迁的动作在growWork()中 func hashGrow(t *maptype, h *hmap) { // If we've hit the load factor, get bigger. // Otherwise, there are too many overflow buckets, // so keep the same number of buckets and "grow" laterally. // B+1 相当于之前的2倍空间 bigger := uint8(1) // 对应条件2 if !overLoadFactor(h.count+1, h.B) { // 进行等量扩容，B不变 bigger = 0 h.flags |= sameSizeGrow } // 将oldbuckets挂到buckets上 oldbuckets := h.buckets // 申请新的buckets空间 newbuckets, nextOverflow := makeBucketArray(t, h.B+bigger, nil) // 对标志位的处理 // &^表示 按位置0 // x=01010011 // y=01010100 // z=x&^y=00000011 // 如果y的bit位为1，那么z相应的bit位就为0 // 否则z对应的bit位就和x对应的bit位相同 // // 其实就是将h.flags的iterator和oldItertor位置为0 // 如果发现iterator位为1，那就把它转接到 oldIterator 位 // 使得 oldIterator 标志位变成 1 // bucket挂到了oldbuckets下，那么标志位也一样转移过去 flags := h.flags &^ (iterator | oldIterator) if h.flags&iterator != 0 { flags |= oldIterator } // // 可能有迭代器使用 buckets // iterator = 1 // 可能有迭代器使用 oldbuckets // oldIterator = 2 // 有协程正在向 map 中写入 key // hashWriting = 4 // 等量扩容（对应条件 2） // sameSizeGrow = 8 // 提交grow的动作 // commit the grow (atomic wrt gc) h.B += bigger h.flags = flags h.oldbuckets = oldbuckets h.buckets = newbuckets // 搬迁进度为0 h.nevacuate = 0 // 溢出bucket数量为0 h.noverflow = 0 if h.extra != nil && h.extra.overflow != nil { // Promote current overflow buckets to the old generation. if h.extra.oldoverflow != nil { throw("oldoverflow is not nil") } h.extra.oldoverflow = h.extra.overflow h.extra.overflow = nil } if nextOverflow != nil { if h.extra == nil { h.extra = new(mapextra) } h.extra.nextOverflow = nextOverflow } // the actual copying of the hash table data is done incrementally // by growWork() and evacuate(). } // growWork 真正执行搬迁工作的函数 // 调用其的动作在mapssign和mapdelete函数中，也就是插入、修改或删除的时候都会尝试进行搬迁 func growWork(t *maptype, h *hmap, bucket uintptr) { // make sure we evacuate the oldbucket corresponding // to the bucket we're about to use // 确保搬迁的老bucket对应的正在使用的新bucket // bucketmask 作用就是将key算出来的hash值与bucketmask相&，得到key应该落入的bucket // 只有hash值低B位决策key落入那个bucket evacuate(t, h, bucket&h.oldbucketmask()) // evacuate one more oldbucket to make progress on growing // 再搬迁一个bucket，加快搬迁进度，这就是说为什么可能每次操作会搬迁1-2个bucket if h.growing() { evacuate(t, h, h.nevacuate) } } // 返回扩容前的bucketmask // // 所谓的bucketmask作用就是将 key 计算出来的哈希值与 bucketmask 相与 // 得到的结果就是 key 应该落入的桶 // 比如 B = 5，那么 bucketmask 的低 5 位是 11111，其余位是 0 // hash 值与其相与的意思是，只有 hash 值的低 5 位决策 key 到底落入哪个 bucket。 // oldbucketmask provides a mask that can be applied to calculate n % noldbuckets(). func (h *hmap) oldbucketmask() uintptr { return h.noldbuckets() - 1 } // 检查oldbuckets是否搬迁完 // growing reports whether h is growing. The growth may be to the same size or bigger. func (h *hmap) growing() bool { return h.oldbuckets != nil }

// evacDst is an evacuation destination. type evacDst struct { // 标识bucket移动的目标地址 b *bmap // current destination bucket // k-v的索引 i int // key/elem index into b // 指向k k unsafe.Pointer // pointer to current key storage // 指向v e unsafe.Pointer // pointer to current elem storage } // evacuate 核心搬迁函数 func evacuate(t *maptype, h *hmap, oldbucket uintptr) { // 定位老的bucket的地址 b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))) // 结果是2^B，进行计算 如 B = 5，结果为32 newbit := h.noldbuckets() // 如果b没被搬迁过 if !evacuated(b) { // TODO: reuse overflow buckets instead of using new ones, if there // is no iterator using the old buckets. (If !oldIterator.) // xy contains the x and y (low and high) evacuation destinations. // xy包含了两个可能搬迁到的目的bucket地址 // 默认是等量扩容的，用x来搬迁 var xy [2]evacDst x := &xy[0] x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize))) x.k = add(unsafe.Pointer(x.b), dataOffset) x.e = add(x.k, bucketCnt*uintptr(t.keysize)) // 如果不是等量扩容，前后的bucket序号有变 // 使用y来搬迁 if !h.sameSizeGrow() { // Only calculate y pointers if we're growing bigger. // Otherwise GC can see bad pointers. y := &xy[1] // y代表的bucket序号增加了2^B y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize))) y.k = add(unsafe.Pointer(y.b), dataOffset) y.e = add(y.k, bucketCnt*uintptr(t.keysize)) } // 遍历所有的bucket，包括溢出bucket // b是老bucket的地址 for ; b != nil; b = b.overflow(t) { k := add(unsafe.Pointer(b), dataOffset) e := add(k, bucketCnt*uintptr(t.keysize)) // 遍历bucket里所有的cell for i := 0; i < bucketCnt; i, k, e = i+1, add(k, uintptr(t.keysize)), add(e, uintptr(t.elemsize)) { // 当前cell的tophash值 top := b.tophash[i] // 如果cell为空，即没有key // 说明其被搬迁过，作标记然后继续下一个cell if isEmpty(top) { b.tophash[i] = evacuatedEmpty continue } // 一般不会出现这种情况 // 未搬迁的cell只可能是empty或者正常的tophash // 不会小于minTopHash if top < minTopHash { throw("bad map state") } // 进行一次拷贝避免相同内存地址问题 k2 := k // key如果是指针就进行解引用 if t.indirectkey() { k2 = *((*unsafe.Pointer)(k2)) } // 默认值为0标识默认是使用x，进行等量扩容 var useY uint8 // 增量扩容 if !h.sameSizeGrow() { // Compute hash to make our evacuation decision (whether we need // to send this key/elem to bucket x or bucket y). // 计算hash值，与第一次写入一样 hash := t.hasher(k2, uintptr(h.hash0)) // 有协程在遍历map 且 出现相同的key，计算出的hash值不同 // 这里只会有一种情况，也就是float64的时候 // 每次hash出来都会是不同的hash值，这就意味着无法通过get去获取其key确切位置 // 因此采用取最低位位置来分辨 // 为下一个level重新计算一个随机的tophash // 这些key将会在多次增长后均匀的分布在所有的存储桶中 if h.flags&iterator != 0 && !t.reflexivekey() && !t.key.equal(k2, k2) { // If key != key (NaNs), then the hash could be (and probably // will be) entirely different from the old hash. Moreover, // it isn't reproducible. Reproducibility is required in the // presence of iterators, as our evacuation decision must // match whatever decision the iterator made. // Fortunately, we have the freedom to send these keys either // way. Also, tophash is meaningless for these kinds of keys. // We let the low bit of tophash drive the evacuation decision. // We recompute a new random tophash for the next level so // these keys will get evenly distributed across all buckets // after multiple grows. // 第B位 置1 // 如果tophash最低位是0就分配到x part 否则分配到y part useY = top & 1 top = tophash(hash) } else { // 对于正常的key // 第B位 置0 if hash&newbit != 0 { // 使用y部分 useY = 1 } } } if evacuatedX+1 != evacuatedY || evacuatedX^1 != evacuatedY { throw("bad evacuatedN") } // 这里其实就是重新设置tophash值 // 标记老的cell的tophash值，表示搬到useT部分（可能是x也可能是y，看具体取值） b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedY // 选择目标bucket的内存起始部分 dst := &xy[useY] // evacuation destination // 如果i=8说明要溢出了 if dst.i == bucketCnt { // 新建一个溢出bucket dst.b = h.newoverflow(t, dst.b) // 从0开始计数 dst.i = 0 // 标识key要移动到的位置 dst.k = add(unsafe.Pointer(dst.b), dataOffset) // 标识value要移动到的位置 dst.e = add(dst.k, bucketCnt*uintptr(t.keysize)) } // 重新设置tophash dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds check if t.indirectkey() { // 将原key（指针类型）复制到新的位置 *(*unsafe.Pointer)(dst.k) = k2 // copy pointer } else { // 将原key（值类型）复制到新位置 typedmemmove(t.key, dst.k, k) // copy elem } // 如果v是指针，操作同key if t.indirectelem() { *(*unsafe.Pointer)(dst.e) = *(*unsafe.Pointer)(e) } else { typedmemmove(t.elem, dst.e, e) } // 定位到下一个cell dst.i++ // These updates might push these pointers past the end of the // key or elem arrays. That's ok, as we have the overflow pointer // at the end of the bucket to protect against pointing past the // end of the bucket. // 两个溢出指针在bucket末尾用于保证
                提示：
                    本文由神整理自网络，如有侵权请联系本站删除！
                    

                    本站声明： 

                    1、本站所有资源均来源于互联网，不保证100%完整、不提供任何技术支持； 

                    2、本站所发布的文章以及附件仅限用于学习和研究目的;不得将用于商业或者非法用途；否则由此产生的法律后果，本站概不负责！
                
                
                
                                            上一篇：Golang中panic与recover的区别_Golang_
                                                                下一篇：Go语言dolphinscheduler任务调度处理_Golang_

您现在的位置是：网站首页> 编程资料编程资料

深入了解Golang的map增量扩容_Golang_

目录

核心思想

扩容方式

源码分析

相关内容

点击排行

本栏推荐

猜你喜欢