Poison

JVM Intrinsics

Richard Startin 在文章 A Quick Look at RoaringBitmap 中介绍 BitmapContainer 提到 Long.bitCount 在处理器支持的情况下,会被内联为调用指令 popcnt 实现。本文做简单记录,我们编写如下的代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
package me.tianshuang;

public class IntrinsicTest {

public static void main(String[] args) {
int result = 0;
for (int i = 0; i < 1000000000; i++) {
result += Long.bitCount(i);
}

System.out.println(result);
}

}

使用 javac IntrinsicTest.java 将 Java 类编译为 class 文件,然后再使用 java -XX:+UnlockDiagnosticVMOptions -XX:+PrintAssembly me.tianshuang.IntrinsicTest 打印出该类对应的汇编代码,其中关键的 main 方法对应的汇编代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
Decoding compiled method 0x000000010e911b10:
Code:
[Entry Point]
[Verified Entry Point]
[Constants]
# {method} {0x00000001066a82e0} 'main' '([Ljava/lang/String;)V' in 'me/tianshuang/IntrinsicTest'
0x000000010e911c60: callq 0x0000000102989054 ; {runtime_call}
0x000000010e911c65: data16 data16 nopw 0x0(%rax,%rax,1)
0x000000010e911c70: mov %eax,-0x14000(%rsp)
0x000000010e911c77: push %rbp
0x000000010e911c78: sub $0x20,%rsp
0x000000010e911c7c: mov 0x8(%rsi),%ebp
0x000000010e911c7f: mov (%rsi),%ebx
0x000000010e911c81: mov %rsi,%rdi
0x000000010e911c84: movabs $0x1029ebf86,%r10
0x000000010e911c8e: callq *%r10 ;*iload_2
; - me.tianshuang.IntrinsicTest::main@4 (line 7)

0x000000010e911c91: cmp $0x3b9aca00,%ebx
0x000000010e911c97: jge 0x000000010e911e2c ;*if_icmpge
; - me.tianshuang.IntrinsicTest::main@7 (line 7)

0x000000010e911c9d: mov %ebx,%r11d
0x000000010e911ca0: inc %r11d ;*goto
; - me.tianshuang.IntrinsicTest::main@21 (line 7)

0x000000010e911ca3: movslq %ebx,%r10
0x000000010e911ca6: popcnt %r10,%r8
0x000000010e911cab: add %r8d,%ebp ;*iadd
; - me.tianshuang.IntrinsicTest::main@16 (line 8)

0x000000010e911cae: mov %ebx,%r10d
0x000000010e911cb1: inc %r10d ;*iinc
; - me.tianshuang.IntrinsicTest::main@18 (line 7)

0x000000010e911cb4: cmp %r11d,%r10d
0x000000010e911cb7: jge 0x000000010e911cbe ;*if_icmpge
; - me.tianshuang.IntrinsicTest::main@7 (line 7)

0x000000010e911cb9: mov %r10d,%ebx
0x000000010e911cbc: jmp 0x000000010e911ca3
0x000000010e911cbe: cmp $0x3b9ac9f1,%r10d
0x000000010e911cc5: jge 0x000000010e911e1d
0x000000010e911ccb: jmp 0x000000010e911cd5
0x000000010e911ccd: data16 xchg %ax,%ax
0x000000010e911cd0: mov %eax,%ebp
0x000000010e911cd2: mov %ebx,%r10d ;*goto
; - me.tianshuang.IntrinsicTest::main@21 (line 7)

0x000000010e911cd5: vmovd %r10d,%xmm0
0x000000010e911cda: movslq %r10d,%r10 ;*i2l ; - me.tianshuang.IntrinsicTest::main@12 (line 8)

0x000000010e911cdd: mov %r10,%r11
0x000000010e911ce0: add $0xf,%r11
0x000000010e911ce4: mov %r10,%r8
0x000000010e911ce7: add $0xb,%r8
0x000000010e911ceb: popcnt %r11,%rax
0x000000010e911cf0: popcnt %r8,%r11
0x000000010e911cf5: mov %r10,%r8
0x000000010e911cf8: add $0xa,%r8
0x000000010e911cfc: mov %r10,%rcx
0x000000010e911cff: add $0x7,%rcx
0x000000010e911d03: popcnt %r8,%r9
0x000000010e911d08: popcnt %rcx,%rbx
0x000000010e911d0d: mov %r10,%r8
0x000000010e911d10: add $0x6,%r8
0x000000010e911d14: mov %r10,%rdi
0x000000010e911d17: add $0x5,%rdi
0x000000010e911d1b: popcnt %r8,%rcx
0x000000010e911d20: popcnt %rdi,%rdx
0x000000010e911d25: mov %r10,%r8
0x000000010e911d28: add $0x4,%r8
0x000000010e911d2c: mov %r10,%rsi
0x000000010e911d2f: add $0x3,%rsi
0x000000010e911d33: popcnt %r8,%rdi
0x000000010e911d38: popcnt %rsi,%r13
0x000000010e911d3d: mov %r10,%r8
0x000000010e911d40: add $0x2,%r8
0x000000010e911d44: popcnt %r10,%rsi
0x000000010e911d49: add %ebp,%esi
0x000000010e911d4b: popcnt %r8,%r8
0x000000010e911d50: mov %r10,%r14
0x000000010e911d53: add $0x1,%r14
0x000000010e911d57: mov %r10,%rbp
0x000000010e911d5a: add $0x8,%rbp
0x000000010e911d5e: popcnt %r14,%r14
0x000000010e911d63: add %esi,%r14d
0x000000010e911d66: add %r8d,%r14d
0x000000010e911d69: add %r13d,%r14d
0x000000010e911d6c: add %edi,%r14d
0x000000010e911d6f: add %edx,%r14d
0x000000010e911d72: add %ecx,%r14d
0x000000010e911d75: add %ebx,%r14d
0x000000010e911d78: popcnt %rbp,%rcx
0x000000010e911d7d: add %r14d,%ecx
0x000000010e911d80: mov %r10,%r8
0x000000010e911d83: add $0x9,%r8
0x000000010e911d87: mov %r10,%rbx
0x000000010e911d8a: add $0xc,%rbx
0x000000010e911d8e: popcnt %r8,%r8
0x000000010e911d93: add %ecx,%r8d
0x000000010e911d96: add %r9d,%r8d
0x000000010e911d99: add %r11d,%r8d
0x000000010e911d9c: popcnt %rbx,%r9
0x000000010e911da1: add %r8d,%r9d
0x000000010e911da4: mov %r10,%r11
0x000000010e911da7: add $0xd,%r11
0x000000010e911dab: add $0xe,%r10
0x000000010e911daf: popcnt %r11,%r11
0x000000010e911db4: add %r9d,%r11d
0x000000010e911db7: popcnt %r10,%r8
0x000000010e911dbc: add %r11d,%r8d
0x000000010e911dbf: add %r8d,%eax ;*iadd
; - me.tianshuang.IntrinsicTest::main@16 (line 8)

0x000000010e911dc2: vmovd %xmm0,%ebx
0x000000010e911dc6: add $0x10,%ebx ;*iinc
; - me.tianshuang.IntrinsicTest::main@18 (line 7)

0x000000010e911dc9: cmp $0x3b9ac9f1,%ebx
0x000000010e911dcf: jl 0x000000010e911cd0 ;*if_icmpge
; - me.tianshuang.IntrinsicTest::main@7 (line 7)

0x000000010e911dd5: vmovd %xmm0,%r11d
0x000000010e911dda: add $0xf,%r11d ;*iinc
; - me.tianshuang.IntrinsicTest::main@18 (line 7)

0x000000010e911dde: cmp $0x3b9aca00,%ebx
0x000000010e911de4: jge 0x000000010e911e27
0x000000010e911de6: jmp 0x000000010e911deb
0x000000010e911de8: mov %r10d,%ebx ;*goto
; - me.tianshuang.IntrinsicTest::main@21 (line 7)

0x000000010e911deb: movslq %ebx,%r10
0x000000010e911dee: popcnt %r10,%r11
0x000000010e911df3: add %r11d,%eax ;*iadd
; - me.tianshuang.IntrinsicTest::main@16 (line 8)

0x000000010e911df6: mov %ebx,%r10d
0x000000010e911df9: inc %r10d ;*iinc
; - me.tianshuang.IntrinsicTest::main@18 (line 7)

0x000000010e911dfc: cmp $0x3b9aca00,%r10d
0x000000010e911e03: jl 0x000000010e911de8
0x000000010e911e05: inc %ebx ;*if_icmpge
; - me.tianshuang.IntrinsicTest::main@7 (line 7)

0x000000010e911e07: mov $0xffffff65,%esi
0x000000010e911e0c: mov %eax,%ebp
0x000000010e911e0e: mov %ebx,0x4(%rsp)
0x000000010e911e12: nop
0x000000010e911e13: callq 0x000000010e8456a0 ; OopMap{off=440}
;*if_icmpge
; - me.tianshuang.IntrinsicTest::main@7 (line 7)
; {runtime_call}
0x000000010e911e18: callq 0x0000000102989054 ;*if_icmpge
; - me.tianshuang.IntrinsicTest::main@7 (line 7)
; {runtime_call}
0x000000010e911e1d: mov %ebp,%eax
0x000000010e911e1f: mov %ebx,%r11d
0x000000010e911e22: mov %r10d,%ebx
0x000000010e911e25: jmp 0x000000010e911dde
0x000000010e911e27: mov %r11d,%ebx
0x000000010e911e2a: jmp 0x000000010e911e05
0x000000010e911e2c: mov %ebp,%eax
0x000000010e911e2e: jmp 0x000000010e911e07
0x000000010e911e30: hlt
0x000000010e911e31: hlt
0x000000010e911e32: hlt
0x000000010e911e33: hlt
0x000000010e911e34: hlt
0x000000010e911e35: hlt
0x000000010e911e36: hlt
0x000000010e911e37: hlt
0x000000010e911e38: hlt
0x000000010e911e39: hlt
0x000000010e911e3a: hlt
0x000000010e911e3b: hlt
0x000000010e911e3c: hlt
0x000000010e911e3d: hlt
0x000000010e911e3e: hlt
0x000000010e911e3f: hlt

可以看出 line 8 Long.bitCount(i) 被编译为了基于 popcnt 的汇编指令实现,而未使用 Long.bitCount 对应的 Java 实现,我们再使用包含 -XX:-UsePopCountInstruction 的命令 java -XX:+UnlockDiagnosticVMOptions -XX:+PrintAssembly -XX:-UsePopCountInstruction me.tianshuang.IntrinsicTest 打印出汇编代码,该命令禁用了基于 popcnt 的指令内联:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
Decoding compiled method 0x0000000110911e10:
Code:
[Entry Point]
[Verified Entry Point]
[Constants]
# {method} {0x000000012fc002e0} 'main' '([Ljava/lang/String;)V' in 'me/tianshuang/IntrinsicTest'
0x0000000110911f60: callq 0x00000001063d1054 ; {runtime_call}
0x0000000110911f65: data16 data16 nopw 0x0(%rax,%rax,1)
0x0000000110911f70: mov %eax,-0x14000(%rsp)
0x0000000110911f77: push %rbp
0x0000000110911f78: sub $0x20,%rsp
0x0000000110911f7c: mov 0x8(%rsi),%ebp
0x0000000110911f7f: mov (%rsi),%ebx
0x0000000110911f81: mov %rsi,%rdi
0x0000000110911f84: movabs $0x106433f86,%r10
0x0000000110911f8e: callq *%r10 ;*iload_2
; - me.tianshuang.IntrinsicTest::main@4 (line 7)

0x0000000110911f91: cmp $0x3b9aca00,%ebx
0x0000000110911f97: jge 0x0000000110912270 ;*if_icmpge
; - me.tianshuang.IntrinsicTest::main@7 (line 7)

0x0000000110911f9d: mov %ebx,%r10d
0x0000000110911fa0: inc %r10d
0x0000000110911fa3: movabs $0xf0f0f0f0f0f0f0f,%r8
0x0000000110911fad: movabs $0xf0f0f0f0f0f0f,%r9
0x0000000110911fb7: movabs $0x3333333333333333,%rcx
0x0000000110911fc1: movabs $0x5555555555555555,%r13 ;*goto
; - me.tianshuang.IntrinsicTest::main@21 (line 7)

0x0000000110911fcb: movslq %ebx,%r11 ;*i2l ; - me.tianshuang.IntrinsicTest::main@12 (line 8)

0x0000000110911fce: mov %r11,%rdi
0x0000000110911fd1: shr %rdi
0x0000000110911fd4: and %r13,%rdi
0x0000000110911fd7: sub %rdi,%r11 ;*lsub
; - java.lang.Long::bitCount@8 (line 1470)
; - me.tianshuang.IntrinsicTest::main@13 (line 8)

0x0000000110911fda: mov %r11,%rdi
0x0000000110911fdd: and %rcx,%rdi
0x0000000110911fe0: shr $0x2,%r11
0x0000000110911fe4: and %rcx,%r11
0x0000000110911fe7: add %rdi,%r11 ;*ladd
; - java.lang.Long::bitCount@22 (line 1471)
; - me.tianshuang.IntrinsicTest::main@13 (line 8)

0x0000000110911fea: mov %r11,%rdi
0x0000000110911fed: shr $0x4,%rdi
0x0000000110911ff1: add %r11,%rdi ;*ladd
; - java.lang.Long::bitCount@28 (line 1472)
; - me.tianshuang.IntrinsicTest::main@13 (line 8)

0x0000000110911ff4: mov %rdi,%r11
0x0000000110911ff7: and %r8,%r11
0x0000000110911ffa: shr $0x8,%rdi
0x0000000110911ffe: and %r9,%rdi
0x0000000110912001: add %r11,%rdi ;*ladd
; - java.lang.Long::bitCount@39 (line 1473)
; - me.tianshuang.IntrinsicTest::main@13 (line 8)

0x0000000110912004: mov %rdi,%rdx
0x0000000110912007: shr $0x10,%rdx
0x000000011091200b: add %rdi,%rdx ;*ladd
; - java.lang.Long::bitCount@46 (line 1474)
; - me.tianshuang.IntrinsicTest::main@13 (line 8)

0x000000011091200e: mov %edx,%r11d
0x0000000110912011: shr $0x20,%rdx
0x0000000110912015: mov %edx,%edx
0x0000000110912017: add %r11d,%edx
0x000000011091201a: and $0x7f,%edx
0x000000011091201d: add %edx,%ebp ;*iadd
; - me.tianshuang.IntrinsicTest::main@16 (line 8)

0x000000011091201f: mov %ebx,%edx
0x0000000110912021: inc %edx ;*iinc
; - me.tianshuang.IntrinsicTest::main@18 (line 7)

0x0000000110912023: cmp %r10d,%edx
0x0000000110912026: jge 0x000000011091202c ;*if_icmpge
; - me.tianshuang.IntrinsicTest::main@7 (line 7)

0x0000000110912028: mov %edx,%ebx
0x000000011091202a: jmp 0x0000000110911fcb
0x000000011091202c: cmp $0x3b9ac9fd,%edx
0x0000000110912032: jge 0x000000011091225d
0x0000000110912038: jmp 0x0000000110912044
0x000000011091203a: nopw 0x0(%rax,%rax,1)
0x0000000110912040: mov %eax,%ebp
0x0000000110912042: mov %ebx,%edx ;*goto
; - me.tianshuang.IntrinsicTest::main@21 (line 7)

0x0000000110912044: movslq %edx,%rbx ;*i2l ; - me.tianshuang.IntrinsicTest::main@12 (line 8)

0x0000000110912047: mov %rbx,%r10
0x000000011091204a: add $0x3,%r10
0x000000011091204e: mov %rbx,%r11
0x0000000110912051: shr %r11
0x0000000110912054: and %r13,%r11
0x0000000110912057: mov %rbx,%rdi
0x000000011091205a: sub %r11,%rdi ;*lsub
; - java.lang.Long::bitCount@8 (line 1470)
; - me.tianshuang.IntrinsicTest::main@13 (line 8)

0x000000011091205d: mov %rdi,%r11
0x0000000110912060: and %rcx,%r11
0x0000000110912063: shr %r10
0x0000000110912066: and %r13,%r10
0x0000000110912069: mov %rbx,%rsi
0x000000011091206c: sub %r10,%rsi
0x000000011091206f: shr $0x2,%rdi
0x0000000110912073: and %rcx,%rdi
0x0000000110912076: add %r11,%rdi ;*ladd
; - java.lang.Long::bitCount@22 (line 1471)
; - me.tianshuang.IntrinsicTest::main@13 (line 8)

0x0000000110912079: add $0x3,%rsi ;*lsub
; - java.lang.Long::bitCount@8 (line 1470)
; - me.tianshuang.IntrinsicTest::main@13 (line 8)

0x000000011091207d: mov %rsi,%r10
0x0000000110912080: and %rcx,%r10
0x0000000110912083: mov %rdi,%rax
0x0000000110912086: shr $0x4,%rax
0x000000011091208a: add %rdi,%rax ;*ladd
; - java.lang.Long::bitCount@28 (line 1472)
; - me.tianshuang.IntrinsicTest::main@13 (line 8)

0x000000011091208d: mov %rax,%r11
0x0000000110912090: and %r8,%r11
0x0000000110912093: shr $0x2,%rsi
0x0000000110912097: and %rcx,%rsi
0x000000011091209a: add %r10,%rsi ;*ladd
; - java.lang.Long::bitCount@22 (line 1471)
; - me.tianshuang.IntrinsicTest::main@13 (line 8)

0x000000011091209d: shr $0x8,%rax
0x00000001109120a1: and %r9,%rax
0x00000001109120a4: add %r11,%rax ;*ladd
; - java.lang.Long::bitCount@39 (line 1473)
; - me.tianshuang.IntrinsicTest::main@13 (line 8)

0x00000001109120a7: mov %rsi,%rdi
0x00000001109120aa: shr $0x4,%rdi
0x00000001109120ae: add %rsi,%rdi ;*ladd
; - java.lang.Long::bitCount@28 (line 1472)
; - me.tianshuang.IntrinsicTest::main@13 (line 8)

0x00000001109120b1: mov %rdi,%r10
0x00000001109120b4: and %r8,%r10
0x00000001109120b7: mov %rax,%r11
0x00000001109120ba: shr $0x10,%r11
0x00000001109120be: add %rax,%r11 ;*ladd
; - java.lang.Long::bitCount@46 (line 1474)
; - me.tianshuang.IntrinsicTest::main@13 (line 8)

0x00000001109120c1: shr $0x8,%rdi
0x00000001109120c5: and %r9,%rdi
0x00000001109120c8: add %r10,%rdi ;*ladd
; - java.lang.Long::bitCount@39 (line 1473)
; - me.tianshuang.IntrinsicTest::main@13 (line 8)

0x00000001109120cb: mov %r11,%r10
0x00000001109120ce: shr $0x20,%r10
0x00000001109120d2: mov %rdi,%rax
0x00000001109120d5: shr $0x10,%rax
0x00000001109120d9: add %rdi,%rax ;*ladd
; - java.lang.Long::bitCount@46 (line 1474)
; - me.tianshuang.IntrinsicTest::main@13 (line 8)

0x00000001109120dc: mov %r10d,%esi
0x00000001109120df: mov %rax,%rdi
0x00000001109120e2: shr $0x20,%rdi
0x00000001109120e6: mov %eax,%r10d
0x00000001109120e9: mov %edi,%eax
0x00000001109120eb: add %r10d,%eax
0x00000001109120ee: mov %r11d,%r11d
0x00000001109120f1: add %r11d,%esi
0x00000001109120f4: and $0x7f,%eax
0x00000001109120f7: and $0x7f,%esi
0x00000001109120fa: add %ebp,%esi
0x00000001109120fc: mov %rbx,%r10
0x00000001109120ff: add $0x1,%r10
0x0000000110912103: mov %rbx,%r11
0x0000000110912106: add $0x2,%r11
0x000000011091210a: shr %r10
0x000000011091210d: and %r13,%r10
0x0000000110912110: mov %rbx,%rdi
0x0000000110912113: sub %r10,%rdi
0x0000000110912116: shr %r11
0x0000000110912119: and %r13,%r11
0x000000011091211c: sub %r11,%rbx
0x000000011091211f: add $0x1,%rdi
0x0000000110912123: mov %rdi,%r10
0x0000000110912126: and %rcx,%r10
0x0000000110912129: add $0x2,%rbx ;*lsub
; - java.lang.Long::bitCount@8 (line 1470)
; - me.tianshuang.IntrinsicTest::main@13 (line 8)

0x000000011091212d: mov %rbx,%r11
0x0000000110912130: and %rcx,%r11
0x0000000110912133: shr $0x2,%rdi
0x0000000110912137: and %rcx,%rdi
0x000000011091213a: add %r10,%rdi
0x000000011091213d: shr $0x2,%rbx
0x0000000110912141: and %rcx,%rbx
0x0000000110912144: add %r11,%rbx ;*ladd
; - java.lang.Long::bitCount@22 (line 1471)
; - me.tianshuang.IntrinsicTest::main@13 (line 8)

0x0000000110912147: mov %rdi,%rbp
0x000000011091214a: shr $0x4,%rbp
0x000000011091214e: add %rdi,%rbp
0x0000000110912151: mov %rbp,%r10
0x0000000110912154: and %r8,%r10
0x0000000110912157: mov %rbx,%rdi
0x000000011091215a: shr $0x4,%rdi
0x000000011091215e: add %rbx,%rdi ;*ladd
; - java.lang.Long::bitCount@28 (line 1472)
; - me.tianshuang.IntrinsicTest::main@13 (line 8)

0x0000000110912161: mov %rdi,%r11
0x0000000110912164: and %r8,%r11
0x0000000110912167: shr $0x8,%rbp
0x000000011091216b: and %r9,%rbp
0x000000011091216e: add %r10,%rbp
0x0000000110912171: shr $0x8,%rdi
0x0000000110912175: and %r9,%rdi
0x0000000110912178: add %r11,%rdi ;*ladd
; - java.lang.Long::bitCount@39 (line 1473)
; - me.tianshuang.IntrinsicTest::main@13 (line 8)

0x000000011091217b: mov %rbp,%r14
0x000000011091217e: shr $0x10,%r14
0x0000000110912182: add %rbp,%r14
0x0000000110912185: mov %rdi,%r11
0x0000000110912188: shr $0x10,%r11
0x000000011091218c: add %rdi,%r11 ;*ladd
; - java.lang.Long::bitCount@46 (line 1474)
; - me.tianshuang.IntrinsicTest::main@13 (line 8)

0x000000011091218f: mov %r14,%r10
0x0000000110912192: shr $0x20,%r10
0x0000000110912196: mov %r11,%rbx
0x0000000110912199: shr $0x20,%rbx
0x000000011091219d: mov %r10d,%edi
0x00000001109121a0: mov %ebx,%ebx
0x00000001109121a2: mov %r11d,%r11d
0x00000001109121a5: add %r11d,%ebx
0x00000001109121a8: mov %r14d,%r10d
0x00000001109121ab: add %r10d,%edi
0x00000001109121ae: and $0x7f,%ebx
0x00000001109121b1: and $0x7f,%edi
0x00000001109121b4: add %esi,%edi
0x00000001109121b6: add %edi,%ebx
0x00000001109121b8: add %ebx,%eax ;*iadd
; - me.tianshuang.IntrinsicTest::main@16 (line 8)

0x00000001109121ba: mov %edx,%ebx
0x00000001109121bc: add $0x4,%ebx ;*iinc
; - me.tianshuang.IntrinsicTest::main@18 (line 7)

0x00000001109121bf: cmp $0x3b9ac9fd,%ebx
0x00000001109121c5: jl 0x0000000110912040 ;*if_icmpge
; - me.tianshuang.IntrinsicTest::main@7 (line 7)

0x00000001109121cb: add $0x3,%edx ;*iinc
; - me.tianshuang.IntrinsicTest::main@18 (line 7)

0x00000001109121ce: cmp $0x3b9aca00,%ebx
0x00000001109121d4: jge 0x000000011091226c
0x00000001109121da: jmp 0x00000001109121df
0x00000001109121dc: mov %r11d,%ebx ;*goto
; - me.tianshuang.IntrinsicTest::main@21 (line 7)

0x00000001109121df: movslq %ebx,%r10 ;*i2l ; - me.tianshuang.IntrinsicTest::main@12 (line 8)

0x00000001109121e2: mov %r10,%r11
0x00000001109121e5: shr %r11
0x00000001109121e8: and %r13,%r11
0x00000001109121eb: sub %r11,%r10 ;*lsub
; - java.lang.Long::bitCount@8 (line 1470)
; - me.tianshuang.IntrinsicTest::main@13 (line 8)

0x00000001109121ee: mov %r10,%r11
0x00000001109121f1: and %rcx,%r11
0x00000001109121f4: shr $0x2,%r10
0x00000001109121f8: and %rcx,%r10
0x00000001109121fb: add %r11,%r10 ;*ladd
; - java.lang.Long::bitCount@22 (line 1471)
; - me.tianshuang.IntrinsicTest::main@13 (line 8)

0x00000001109121fe: mov %r10,%r11
0x0000000110912201: shr $0x4,%r11
0x0000000110912205: add %r10,%r11 ;*ladd
; - java.lang.Long::bitCount@28 (line 1472)
; - me.tianshuang.IntrinsicTest::main@13 (line 8)

0x0000000110912208: mov %r11,%r10
0x000000011091220b: and %r8,%r10
0x000000011091220e: shr $0x8,%r11
0x0000000110912212: and %r9,%r11
0x0000000110912215: add %r10,%r11 ;*ladd
; - java.lang.Long::bitCount@39 (line 1473)
; - me.tianshuang.IntrinsicTest::main@13 (line 8)

0x0000000110912218: mov %r11,%r10
0x000000011091221b: shr $0x10,%r10
0x000000011091221f: add %r11,%r10 ;*ladd
; - java.lang.Long::bitCount@46 (line 1474)
; - me.tianshuang.IntrinsicTest::main@13 (line 8)

0x0000000110912222: mov %r10d,%r11d
0x0000000110912225: shr $0x20,%r10
0x0000000110912229: mov %r10d,%edi
0x000000011091222c: add %r11d,%edi
0x000000011091222f: and $0x7f,%edi
0x0000000110912232: add %edi,%eax ;*iadd
; - me.tianshuang.IntrinsicTest::main@16 (line 8)

0x0000000110912234: mov %ebx,%r11d
0x0000000110912237: inc %r11d ;*iinc
; - me.tianshuang.IntrinsicTest::main@18 (line 7)

0x000000011091223a: cmp $0x3b9aca00,%r11d
0x0000000110912241: jl 0x00000001109121dc
0x0000000110912243: inc %ebx ;*if_icmpge
; - me.tianshuang.IntrinsicTest::main@7 (line 7)

0x0000000110912245: mov $0xffffff65,%esi
0x000000011091224a: mov %eax,%ebp
0x000000011091224c: mov %ebx,0x4(%rsp)
0x0000000110912250: data16 xchg %ax,%ax
0x0000000110912253: callq 0x00000001108456a0 ; OopMap{off=760}
;*if_icmpge
; - me.tianshuang.IntrinsicTest::main@7 (line 7)
; {runtime_call}
0x0000000110912258: callq 0x00000001063d1054 ;*if_icmpge
; - me.tianshuang.IntrinsicTest::main@7 (line 7)
; {runtime_call}
0x000000011091225d: mov %ebp,%eax
0x000000011091225f: mov %ebx,%r10d
0x0000000110912262: mov %edx,%ebx
0x0000000110912264: mov %r10d,%edx
0x0000000110912267: jmpq 0x00000001109121ce
0x000000011091226c: mov %edx,%ebx
0x000000011091226e: jmp 0x0000000110912243
0x0000000110912270: mov %ebp,%eax
0x0000000110912272: jmp 0x0000000110912245
0x0000000110912274: hlt
0x0000000110912275: hlt
0x0000000110912276: hlt
0x0000000110912277: hlt
0x0000000110912278: hlt
0x0000000110912279: hlt
0x000000011091227a: hlt
0x000000011091227b: hlt
0x000000011091227c: hlt
0x000000011091227d: hlt
0x000000011091227e: hlt
0x000000011091227f: hlt

可以看出 line 8 调用了 JDK 中的 Long.bitCount 进行实现,如果进行基准测试,那么使用了 popcnt 指令的版本会比 JDK Long.bitCount 的速度要快,在此不再演示,可以被内联的方法可以参考:vmSymbols.hpp

Reference

Introduction to JVM Intrinsics | Baeldung
Bad Concurrency: Arithmetic Overflow and Intrinsics
GitHub - a10y/hsdis-macos: macOS Build artifacts for hsdis HotSpot Plugin
From Java to Assembly Down the Rabbit Hole (Charles Oliver Nutter, Red Hat)