死磕Java集合之BitSet源码分析（JDK18）

文章目录

死磕Java集合之BitSet源码分析（JDK18）
- 简介
- 继承体系
- 存储结构
- 源码解析
- - 属性
  - 构造方法
  - set(int bitIndex)
  - set(int bitIndex, boolean value)
  - clear(int bitIndex)
  - set(int fromIndex, int toIndex)
  - clear(int fromIndex, int toIndex)
  - set(int fromIndex, int toIndex, boolean value)
  - flip(int bitIndex)
  - flip(int fromIndex, int toIndex)
  - cardinality()
- 总结

简介

因为Java中没有new bit[]这种直接创建一个bit数组的方式，所以Java提供了BitSet来实现位图，BitSet是采用一个long型的数组来实现位图的。

BitSet的首个long型数组表示的是[0, 63]这64个元素。

继承体系

BitSet实现了Cloneable和Serializable

存储结构

BitSet通过long型的数组来实现位图，一个long型元素可以表示64个元素是否存在（第i位为1，表示64 * n + i已存在）。

源码解析

属性

/** BitSets are packed into arrays of "words."  Currently a word is* a long, which consists of 64 bits, requiring 6 address bits.* The choice of word size is determined purely by performance concerns.*/
private static final int ADDRESS_BITS_PER_WORD = 6;
// 1 << 6的大小是64
private static final int BITS_PER_WORD = 1 << ADDRESS_BITS_PER_WORD;
private static final int BIT_INDEX_MASK = BITS_PER_WORD - 1;/* Used to shift left or right for a partial word mask */
private static final long WORD_MASK = 0xffffffffffffffffL;/*** @serialField bits long[]** The bits in this BitSet.  The ith bit is stored in bits[i/64] at* bit position i % 64 (where bit position 0 refers to the least* significant bit and 63 refers to the most significant bit).*/
@java.io.Serial
private static final ObjectStreamField[] serialPersistentFields = {new ObjectStreamField("bits", long[].class),
};/*** The internal field corresponding to the serialField "bits".* BitSet的底层实现是使用long数组作为内部存储结构的，所以BitSet的大小为long类型大小(64位)的整数倍。*/
private long[] words;/*** The number of words in the logical size of this BitSet.*/
private transient int wordsInUse = 0;/*** Whether the size of "words" is user-specified.  If so, we assume* the user knows what he's doing and try harder to preserve it.*/
private transient boolean sizeIsSticky = false;/* use serialVersionUID from JDK 1.0.2 for interoperability */
@java.io.Serial
private static final long serialVersionUID = 7997698588986878753L;

构造方法

/*** Creates a new bit set. All bits are initially {@code false}.*/
public BitSet() {// 默认初始化long[]的长度为1initWords(BITS_PER_WORD);sizeIsSticky = false;
}/*** Creates a bit set whose initial size is large enough to explicitly* represent bits with indices in the range {@code 0} through* {@code nbits-1}. All bits are initially {@code false}.** @param  nbits the initial size of the bit set* @throws NegativeArraySizeException if the specified initial size*         is negative*/
public BitSet(int nbits) {// nbits can't be negative; size 0 is OKif (nbits < 0)throw new NegativeArraySizeException("nbits < 0: " + nbits);initWords(nbits);sizeIsSticky = true;
}private void initWords(int nbits) {words = new long[wordIndex(nbits-1) + 1];
}/*** Creates a bit set using words as the internal representation.* The last word (if there is one) must be non-zero.*/
private BitSet(long[] words) {this.words = words;this.wordsInUse = words.length;checkInvariants();
}// 相当于bitIndex / 64
private static int wordIndex(int bitIndex) {return bitIndex >> ADDRESS_BITS_PER_WORD;
}

set(int bitIndex)

将值bitIndex加入BitSet，即索引为bitIndex的元素比特位置为1

public void set(int bitIndex) {if (bitIndex < 0)throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex);// 获取bitIndex所在long数组元素的索引int wordIndex = wordIndex(bitIndex);expandTo(wordIndex);// 将对应bitIndex的比特位置为1  words[wordIndex] |= (1L << bitIndex); // Restores invariantscheckInvariants();
}// 将long数组扩展到可以容纳索引wordIndex
private void expandTo(int wordIndex) {// 索引从0开始，所以需要的数组长度为wordIndex+1int wordsRequired = wordIndex+1;if (wordsInUse < wordsRequired) {ensureCapacity(wordsRequired);wordsInUse = wordsRequired;}
}// 确保BitSet的容量足够
private void ensureCapacity(int wordsRequired) {if (words.length < wordsRequired) {// Allocate larger of doubled size or required sizeint request = Math.max(2 * words.length, wordsRequired);words = Arrays.copyOf(words, request);sizeIsSticky = false;}
}// 新建一个newLength长度的数组，并将原数组的元素复制到新数组中，并返回新数组
public static long[] copyOf(long[] original, int newLength) {long[] copy = new long[newLength];System.arraycopy(original, 0, copy, 0,Math.min(original.length, newLength));return copy;
}

Java中的左移是循环位移，例如1 << 33相当于1 << 1值为2

set(int bitIndex, boolean value)

如果value为真，设置bitIndex位的值为1，否则则置为0

public void set(int bitIndex, boolean value) {if (value)set(bitIndex);elseclear(bitIndex);
}

clear(int bitIndex)

将bitIndex位的值置为0

public void clear(int bitIndex) {if (bitIndex < 0)throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex);// 获取bitIndex所在的long数组的索引int wordIndex = wordIndex(bitIndex);if (wordIndex >= wordsInUse)return;// 1L << bitIndex找到对应的比特位，取反后再&，就将对应比特位置为0了words[wordIndex] &= ~(1L << bitIndex);recalculateWordsInUse();checkInvariants();
}// 重新计算wordsInUse
private void recalculateWordsInUse() {// Traverse the bitset until a used word is foundint i;for (i = wordsInUse-1; i >= 0; i--)if (words[i] != 0)break;wordsInUse = i+1; // The new logical size
}private void checkInvariants() {assert(wordsInUse == 0 || words[wordsInUse - 1] != 0);assert(wordsInUse >= 0 && wordsInUse <= words.length);assert(wordsInUse == words.length || words[wordsInUse] == 0);
}

set(int fromIndex, int toIndex)

将[fromIndex, toIndex)的比特位置为1

public void set(int fromIndex, int toIndex) {checkRange(fromIndex, toIndex);if (fromIndex == toIndex)return;// Increase capacity if necessaryint startWordIndex = wordIndex(fromIndex);int endWordIndex   = wordIndex(toIndex - 1);expandTo(endWordIndex);long firstWordMask = WORD_MASK << fromIndex;// 这里的WORD_MASK >>> -toIndex等价于WORD_MASK >>> 64-toIndexlong lastWordMask  = WORD_MASK >>> -toIndex;if (startWordIndex == endWordIndex) {// Case 1: One word// 第1种情况：fromIndex和toIndex都在同一个word中words[startWordIndex] |= (firstWordMask & lastWordMask);} else {// Case 2: Multiple words// Handle first wordwords[startWordIndex] |= firstWordMask;// Handle intermediate words, if anyfor (int i = startWordIndex+1; i < endWordIndex; i++)words[i] = WORD_MASK;// Handle last word (restores invariants)words[endWordIndex] |= lastWordMask;}checkInvariants();
}

检查参数是否合法；
分别获取fromIndex和toIndex所在的word的索引；
如果fromIndex和toIndex在同一个word中，则将[fromIndex, toIndex)的比特位置为1；
如果不在，则分三步处理：
1. 处理第1个word；
2. 处理第2个到最后1个之间的word，全置为1；
3. 处理最后1个word。

clear(int fromIndex, int toIndex)

将[fromIndex, toIndex)的比特位置为0

public void clear(int fromIndex, int toIndex) {checkRange(fromIndex, toIndex);if (fromIndex == toIndex)return;int startWordIndex = wordIndex(fromIndex);if (startWordIndex >= wordsInUse)return;int endWordIndex = wordIndex(toIndex - 1);if (endWordIndex >= wordsInUse) {toIndex = length();endWordIndex = wordsInUse - 1;}long firstWordMask = WORD_MASK << fromIndex;// 这里的WORD_MASK >>> -toIndex等价于WORD_MASK >>> 64-toIndexlong lastWordMask  = WORD_MASK >>> -toIndex;if (startWordIndex == endWordIndex) {// Case 1: One word// 这里与set(fromIndex,toIndex)类似，只是后面进行了取反操作words[startWordIndex] &= ~(firstWordMask & lastWordMask);} else {// Case 2: Multiple words// Handle first wordwords[startWordIndex] &= ~firstWordMask;// Handle intermediate words, if anyfor (int i = startWordIndex+1; i < endWordIndex; i++)words[i] = 0;// Handle last wordwords[endWordIndex] &= ~lastWordMask;}recalculateWordsInUse();checkInvariants();
}

set(int fromIndex, int toIndex, boolean value)

如果value为真，将[fromIndex, toIndex)的比特位置为1，否则置为0

public void set(int fromIndex, int toIndex, boolean value) {if (value)set(fromIndex, toIndex);elseclear(fromIndex, toIndex);
}

flip(int bitIndex)

翻转bitIndex索引上的比特值

public void flip(int bitIndex) {if (bitIndex < 0)throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex);int wordIndex = wordIndex(bitIndex);expandTo(wordIndex);// 对bitIndex上的比特值做异或操作words[wordIndex] ^= (1L << bitIndex);recalculateWordsInUse();checkInvariants();
}

flip(int fromIndex, int toIndex)

翻转[fromIndex, toIndex)的比特位

public void flip(int fromIndex, int toIndex) {checkRange(fromIndex, toIndex);if (fromIndex == toIndex)return;int startWordIndex = wordIndex(fromIndex);int endWordIndex   = wordIndex(toIndex - 1);expandTo(endWordIndex);long firstWordMask = WORD_MASK << fromIndex;long lastWordMask  = WORD_MASK >>> -toIndex;if (startWordIndex == endWordIndex) {// Case 1: One wordwords[startWordIndex] ^= (firstWordMask & lastWordMask);} else {// Case 2: Multiple words// Handle first wordwords[startWordIndex] ^= firstWordMask;// Handle intermediate words, if anyfor (int i = startWordIndex+1; i < endWordIndex; i++)words[i] ^= WORD_MASK;// Handle last wordwords[endWordIndex] ^= lastWordMask;}recalculateWordsInUse();checkInvariants();
}

cardinality()

计算整个BitSet中比特位为1的个数

public int cardinality() {int sum = 0;for (int i = 0; i < wordsInUse; i++)sum += Long.bitCount(words[i]);return sum;
}@IntrinsicCandidate
public static int bitCount(long i) {// HD, Figure 5-2i = i - ((i >>> 1) & 0x5555555555555555L);i = (i & 0x3333333333333333L) + ((i >>> 2) & 0x3333333333333333L);i = (i + (i >>> 4)) & 0x0f0f0f0f0f0f0f0fL;i = i + (i >>> 8);i = i + (i >>> 16);i = i + (i >>> 32);return (int)i & 0x7f;
}

总结

BitSet中的实现充分利用了位运算，速度很快，因为是位图，所以占用空间也比较小。

下面是boolean数组和BitSet的空间占用对比

public class ClassLayoutTest {public static void main(String[] args) {boolean[] bits = new boolean[1024];System.out.println(ClassLayout.parseInstance(bits).toPrintable());BitSet bitSet = new BitSet(1024);System.out.println(GraphLayout.parseInstance(bitSet).toPrintable());}
}[Z object internals:OFFSET  SIZE      TYPE DESCRIPTION                               VALUE0     4           (object header)                           01 00 00 00 (00000001 00000000 00000000 00000000) (1)4     4           (object header)                           00 00 00 00 (00000000 00000000 00000000 00000000) (0)8     4           (object header)                           05 00 00 f8 (00000101 00000000 00000000 11111000) (-134217723)12     4           (object header)                           00 04 00 00 (00000000 00000100 00000000 00000000) (1024)16  1024   boolean [Z.<elements>                             N/A
Instance size: 1040 bytes
Space losses: 0 bytes internal + 0 bytes external = 0 bytes totaljava.util.BitSet@15615099d object externals:ADDRESS       SIZE TYPE             PATH                           VALUE716ccf4f8         24 java.util.BitSet                                (object)716ccf510        144 [J               .words                         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

可以看出boolean数组，占用的空间是1040 bytes，BitSet占用的空间是24 + 144 = 168 bytes。