xref: /aosp_15_r20/art/compiler/utils/x86_64/assembler_x86_64.cc (revision 795d594fd825385562da6b089ea9b2033f3abf5a)
1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "assembler_x86_64.h"
18 
19 #include "base/casts.h"
20 #include "base/memory_region.h"
21 #include "entrypoints/quick/quick_entrypoints.h"
22 #include "thread.h"
23 
24 namespace art HIDDEN {
25 namespace x86_64 {
26 
operator <<(std::ostream & os,const CpuRegister & reg)27 std::ostream& operator<<(std::ostream& os, const CpuRegister& reg) {
28   return os << reg.AsRegister();
29 }
30 
operator <<(std::ostream & os,const XmmRegister & reg)31 std::ostream& operator<<(std::ostream& os, const XmmRegister& reg) {
32   return os << reg.AsFloatRegister();
33 }
34 
operator <<(std::ostream & os,const X87Register & reg)35 std::ostream& operator<<(std::ostream& os, const X87Register& reg) {
36   return os << "ST" << static_cast<int>(reg);
37 }
38 
operator <<(std::ostream & os,const Address & addr)39 std::ostream& operator<<(std::ostream& os, const Address& addr) {
40   switch (addr.mod()) {
41     case 0:
42       if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) {
43         return os << "(%" << addr.cpu_rm() << ")";
44       } else if (addr.base() == RBP) {
45         return os << static_cast<int>(addr.disp32()) << "(,%" << addr.cpu_index()
46                   << "," << (1 << addr.scale()) << ")";
47       }
48       return os << "(%" << addr.cpu_base() << ",%"
49                 << addr.cpu_index() << "," << (1 << addr.scale()) << ")";
50     case 1:
51       if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) {
52         return os << static_cast<int>(addr.disp8()) << "(%" << addr.cpu_rm() << ")";
53       }
54       return os << static_cast<int>(addr.disp8()) << "(%" << addr.cpu_base() << ",%"
55                 << addr.cpu_index() << "," << (1 << addr.scale()) << ")";
56     case 2:
57       if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) {
58         return os << static_cast<int>(addr.disp32()) << "(%" << addr.cpu_rm() << ")";
59       }
60       return os << static_cast<int>(addr.disp32()) << "(%" << addr.cpu_base() << ",%"
61                 << addr.cpu_index() << "," << (1 << addr.scale()) << ")";
62     default:
63       return os << "<address?>";
64   }
65 }
66 
CpuHasAVXorAVX2FeatureFlag()67 bool X86_64Assembler::CpuHasAVXorAVX2FeatureFlag() {
68   if (has_AVX_ || has_AVX2_) {
69     return true;
70   }
71   return false;
72 }
73 
74 
call(CpuRegister reg)75 void X86_64Assembler::call(CpuRegister reg) {
76   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
77   EmitOptionalRex32(reg);
78   EmitUint8(0xFF);
79   EmitRegisterOperand(2, reg.LowBits());
80 }
81 
82 
call(const Address & address)83 void X86_64Assembler::call(const Address& address) {
84   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
85   EmitOptionalRex32(address);
86   EmitUint8(0xFF);
87   EmitOperand(2, address);
88 }
89 
90 
call(Label * label)91 void X86_64Assembler::call(Label* label) {
92   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
93   EmitUint8(0xE8);
94   static const int kSize = 5;
95   // Offset by one because we already have emitted the opcode.
96   EmitLabel(label, kSize - 1);
97 }
98 
pushq(CpuRegister reg)99 void X86_64Assembler::pushq(CpuRegister reg) {
100   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
101   EmitOptionalRex32(reg);
102   EmitUint8(0x50 + reg.LowBits());
103 }
104 
105 
pushq(const Address & address)106 void X86_64Assembler::pushq(const Address& address) {
107   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
108   EmitOptionalRex32(address);
109   EmitUint8(0xFF);
110   EmitOperand(6, address);
111 }
112 
113 
pushq(const Immediate & imm)114 void X86_64Assembler::pushq(const Immediate& imm) {
115   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
116   CHECK(imm.is_int32());  // pushq only supports 32b immediate.
117   if (imm.is_int8()) {
118     EmitUint8(0x6A);
119     EmitUint8(imm.value() & 0xFF);
120   } else {
121     EmitUint8(0x68);
122     EmitImmediate(imm);
123   }
124 }
125 
126 
popq(CpuRegister reg)127 void X86_64Assembler::popq(CpuRegister reg) {
128   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
129   EmitOptionalRex32(reg);
130   EmitUint8(0x58 + reg.LowBits());
131 }
132 
133 
popq(const Address & address)134 void X86_64Assembler::popq(const Address& address) {
135   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
136   EmitOptionalRex32(address);
137   EmitUint8(0x8F);
138   EmitOperand(0, address);
139 }
140 
141 
movq(CpuRegister dst,const Immediate & imm)142 void X86_64Assembler::movq(CpuRegister dst, const Immediate& imm) {
143   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
144   if (imm.is_int32()) {
145     // 32 bit. Note: sign-extends.
146     EmitRex64(dst);
147     EmitUint8(0xC7);
148     EmitRegisterOperand(0, dst.LowBits());
149     EmitInt32(static_cast<int32_t>(imm.value()));
150   } else {
151     EmitRex64(dst);
152     EmitUint8(0xB8 + dst.LowBits());
153     EmitInt64(imm.value());
154   }
155 }
156 
157 
movl(CpuRegister dst,const Immediate & imm)158 void X86_64Assembler::movl(CpuRegister dst, const Immediate& imm) {
159   CHECK(imm.is_int32());
160   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
161   EmitOptionalRex32(dst);
162   EmitUint8(0xB8 + dst.LowBits());
163   EmitImmediate(imm);
164 }
165 
166 
movq(const Address & dst,const Immediate & imm)167 void X86_64Assembler::movq(const Address& dst, const Immediate& imm) {
168   CHECK(imm.is_int32());
169   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
170   EmitRex64(dst);
171   EmitUint8(0xC7);
172   EmitOperand(0, dst);
173   EmitImmediate(imm);
174 }
175 
176 
movq(CpuRegister dst,CpuRegister src)177 void X86_64Assembler::movq(CpuRegister dst, CpuRegister src) {
178   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
179   // 0x89 is movq r/m64 <- r64, with op1 in r/m and op2 in reg: so reverse EmitRex64
180   EmitRex64(src, dst);
181   EmitUint8(0x89);
182   EmitRegisterOperand(src.LowBits(), dst.LowBits());
183 }
184 
185 
movl(CpuRegister dst,CpuRegister src)186 void X86_64Assembler::movl(CpuRegister dst, CpuRegister src) {
187   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
188   EmitOptionalRex32(dst, src);
189   EmitUint8(0x8B);
190   EmitRegisterOperand(dst.LowBits(), src.LowBits());
191 }
192 
193 
movq(CpuRegister dst,const Address & src)194 void X86_64Assembler::movq(CpuRegister dst, const Address& src) {
195   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
196   EmitRex64(dst, src);
197   EmitUint8(0x8B);
198   EmitOperand(dst.LowBits(), src);
199 }
200 
201 
movl(CpuRegister dst,const Address & src)202 void X86_64Assembler::movl(CpuRegister dst, const Address& src) {
203   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
204   EmitOptionalRex32(dst, src);
205   EmitUint8(0x8B);
206   EmitOperand(dst.LowBits(), src);
207 }
208 
209 
movq(const Address & dst,CpuRegister src)210 void X86_64Assembler::movq(const Address& dst, CpuRegister src) {
211   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
212   EmitRex64(src, dst);
213   EmitUint8(0x89);
214   EmitOperand(src.LowBits(), dst);
215 }
216 
217 
movl(const Address & dst,CpuRegister src)218 void X86_64Assembler::movl(const Address& dst, CpuRegister src) {
219   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
220   EmitOptionalRex32(src, dst);
221   EmitUint8(0x89);
222   EmitOperand(src.LowBits(), dst);
223 }
224 
movl(const Address & dst,const Immediate & imm)225 void X86_64Assembler::movl(const Address& dst, const Immediate& imm) {
226   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
227   EmitOptionalRex32(dst);
228   EmitUint8(0xC7);
229   EmitOperand(0, dst);
230   EmitImmediate(imm);
231 }
232 
movntl(const Address & dst,CpuRegister src)233 void X86_64Assembler::movntl(const Address& dst, CpuRegister src) {
234   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
235   EmitOptionalRex32(src, dst);
236   EmitUint8(0x0F);
237   EmitUint8(0xC3);
238   EmitOperand(src.LowBits(), dst);
239 }
240 
movntq(const Address & dst,CpuRegister src)241 void X86_64Assembler::movntq(const Address& dst, CpuRegister src) {
242   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
243   EmitRex64(src, dst);
244   EmitUint8(0x0F);
245   EmitUint8(0xC3);
246   EmitOperand(src.LowBits(), dst);
247 }
248 
cmov(Condition c,CpuRegister dst,CpuRegister src)249 void X86_64Assembler::cmov(Condition c, CpuRegister dst, CpuRegister src) {
250   cmov(c, dst, src, true);
251 }
252 
cmov(Condition c,CpuRegister dst,CpuRegister src,bool is64bit)253 void X86_64Assembler::cmov(Condition c, CpuRegister dst, CpuRegister src, bool is64bit) {
254   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
255   EmitOptionalRex(false, is64bit, dst.NeedsRex(), false, src.NeedsRex());
256   EmitUint8(0x0F);
257   EmitUint8(0x40 + c);
258   EmitRegisterOperand(dst.LowBits(), src.LowBits());
259 }
260 
261 
cmov(Condition c,CpuRegister dst,const Address & src,bool is64bit)262 void X86_64Assembler::cmov(Condition c, CpuRegister dst, const Address& src, bool is64bit) {
263   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
264   if (is64bit) {
265     EmitRex64(dst, src);
266   } else {
267     EmitOptionalRex32(dst, src);
268   }
269   EmitUint8(0x0F);
270   EmitUint8(0x40 + c);
271   EmitOperand(dst.LowBits(), src);
272 }
273 
274 
movzxb(CpuRegister dst,CpuRegister src)275 void X86_64Assembler::movzxb(CpuRegister dst, CpuRegister src) {
276   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
277   EmitOptionalByteRegNormalizingRex32(dst, src);
278   EmitUint8(0x0F);
279   EmitUint8(0xB6);
280   EmitRegisterOperand(dst.LowBits(), src.LowBits());
281 }
282 
283 
movzxb(CpuRegister dst,const Address & src)284 void X86_64Assembler::movzxb(CpuRegister dst, const Address& src) {
285   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
286   // Byte register is only in the source register form, so we don't use
287   // EmitOptionalByteRegNormalizingRex32(dst, src);
288   EmitOptionalRex32(dst, src);
289   EmitUint8(0x0F);
290   EmitUint8(0xB6);
291   EmitOperand(dst.LowBits(), src);
292 }
293 
294 
movsxb(CpuRegister dst,CpuRegister src)295 void X86_64Assembler::movsxb(CpuRegister dst, CpuRegister src) {
296   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
297   EmitOptionalByteRegNormalizingRex32(dst, src);
298   EmitUint8(0x0F);
299   EmitUint8(0xBE);
300   EmitRegisterOperand(dst.LowBits(), src.LowBits());
301 }
302 
303 
movsxb(CpuRegister dst,const Address & src)304 void X86_64Assembler::movsxb(CpuRegister dst, const Address& src) {
305   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
306   // Byte register is only in the source register form, so we don't use
307   // EmitOptionalByteRegNormalizingRex32(dst, src);
308   EmitOptionalRex32(dst, src);
309   EmitUint8(0x0F);
310   EmitUint8(0xBE);
311   EmitOperand(dst.LowBits(), src);
312 }
313 
314 
movb(CpuRegister,const Address &)315 void X86_64Assembler::movb(CpuRegister /*dst*/, const Address& /*src*/) {
316   LOG(FATAL) << "Use movzxb or movsxb instead.";
317 }
318 
319 
movb(const Address & dst,CpuRegister src)320 void X86_64Assembler::movb(const Address& dst, CpuRegister src) {
321   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
322   EmitOptionalByteRegNormalizingRex32(src, dst);
323   EmitUint8(0x88);
324   EmitOperand(src.LowBits(), dst);
325 }
326 
327 
movb(const Address & dst,const Immediate & imm)328 void X86_64Assembler::movb(const Address& dst, const Immediate& imm) {
329   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
330   EmitOptionalRex32(dst);
331   EmitUint8(0xC6);
332   EmitOperand(Register::RAX, dst);
333   CHECK(imm.is_int8());
334   EmitUint8(imm.value() & 0xFF);
335 }
336 
337 
movzxw(CpuRegister dst,CpuRegister src)338 void X86_64Assembler::movzxw(CpuRegister dst, CpuRegister src) {
339   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
340   EmitOptionalRex32(dst, src);
341   EmitUint8(0x0F);
342   EmitUint8(0xB7);
343   EmitRegisterOperand(dst.LowBits(), src.LowBits());
344 }
345 
346 
movzxw(CpuRegister dst,const Address & src)347 void X86_64Assembler::movzxw(CpuRegister dst, const Address& src) {
348   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
349   EmitOptionalRex32(dst, src);
350   EmitUint8(0x0F);
351   EmitUint8(0xB7);
352   EmitOperand(dst.LowBits(), src);
353 }
354 
355 
movsxw(CpuRegister dst,CpuRegister src)356 void X86_64Assembler::movsxw(CpuRegister dst, CpuRegister src) {
357   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
358   EmitOptionalRex32(dst, src);
359   EmitUint8(0x0F);
360   EmitUint8(0xBF);
361   EmitRegisterOperand(dst.LowBits(), src.LowBits());
362 }
363 
364 
movsxw(CpuRegister dst,const Address & src)365 void X86_64Assembler::movsxw(CpuRegister dst, const Address& src) {
366   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
367   EmitOptionalRex32(dst, src);
368   EmitUint8(0x0F);
369   EmitUint8(0xBF);
370   EmitOperand(dst.LowBits(), src);
371 }
372 
373 
movw(CpuRegister,const Address &)374 void X86_64Assembler::movw(CpuRegister /*dst*/, const Address& /*src*/) {
375   LOG(FATAL) << "Use movzxw or movsxw instead.";
376 }
377 
378 
movw(const Address & dst,CpuRegister src)379 void X86_64Assembler::movw(const Address& dst, CpuRegister src) {
380   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
381   EmitOperandSizeOverride();
382   EmitOptionalRex32(src, dst);
383   EmitUint8(0x89);
384   EmitOperand(src.LowBits(), dst);
385 }
386 
387 
movw(const Address & dst,const Immediate & imm)388 void X86_64Assembler::movw(const Address& dst, const Immediate& imm) {
389   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
390   EmitOperandSizeOverride();
391   EmitOptionalRex32(dst);
392   EmitUint8(0xC7);
393   EmitOperand(Register::RAX, dst);
394   CHECK(imm.is_uint16() || imm.is_int16());
395   EmitUint8(imm.value() & 0xFF);
396   EmitUint8(imm.value() >> 8);
397 }
398 
399 
leaq(CpuRegister dst,const Address & src)400 void X86_64Assembler::leaq(CpuRegister dst, const Address& src) {
401   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
402   EmitRex64(dst, src);
403   EmitUint8(0x8D);
404   EmitOperand(dst.LowBits(), src);
405 }
406 
407 
leal(CpuRegister dst,const Address & src)408 void X86_64Assembler::leal(CpuRegister dst, const Address& src) {
409   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
410   EmitOptionalRex32(dst, src);
411   EmitUint8(0x8D);
412   EmitOperand(dst.LowBits(), src);
413 }
414 
415 
movaps(XmmRegister dst,XmmRegister src)416 void X86_64Assembler::movaps(XmmRegister dst, XmmRegister src) {
417   if (CpuHasAVXorAVX2FeatureFlag()) {
418     vmovaps(dst, src);
419     return;
420   }
421   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
422   EmitOptionalRex32(dst, src);
423   EmitUint8(0x0F);
424   EmitUint8(0x28);
425   EmitXmmRegisterOperand(dst.LowBits(), src);
426 }
427 
428 
429 /**VEX.128.0F.WIG 28 /r VMOVAPS xmm1, xmm2 */
vmovaps(XmmRegister dst,XmmRegister src)430 void X86_64Assembler::vmovaps(XmmRegister dst, XmmRegister src) {
431   DCHECK(CpuHasAVXorAVX2FeatureFlag());
432   uint8_t byte_zero, byte_one, byte_two;
433   bool is_twobyte_form = true;
434   bool load = dst.NeedsRex();
435   bool store = !load;
436 
437   if (src.NeedsRex()&& dst.NeedsRex()) {
438     is_twobyte_form = false;
439   }
440   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
441   // Instruction VEX Prefix
442   byte_zero = EmitVexPrefixByteZero(is_twobyte_form);
443   X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
444   if (is_twobyte_form) {
445     bool rex_bit = (load) ? dst.NeedsRex() : src.NeedsRex();
446     byte_one = EmitVexPrefixByteOne(rex_bit,
447                                     vvvv_reg,
448                                     SET_VEX_L_128,
449                                     SET_VEX_PP_NONE);
450   } else {
451     byte_one = EmitVexPrefixByteOne(dst.NeedsRex(),
452                                     /*X=*/ false,
453                                     src.NeedsRex(),
454                                     SET_VEX_M_0F);
455     byte_two = EmitVexPrefixByteTwo(/*W=*/ false,
456                                     SET_VEX_L_128,
457                                     SET_VEX_PP_NONE);
458   }
459   EmitUint8(byte_zero);
460   EmitUint8(byte_one);
461   if (!is_twobyte_form) {
462     EmitUint8(byte_two);
463   }
464   // Instruction Opcode
465   if (is_twobyte_form && store) {
466     EmitUint8(0x29);
467   } else {
468     EmitUint8(0x28);
469   }
470   // Instruction Operands
471   if (is_twobyte_form && store) {
472     EmitXmmRegisterOperand(src.LowBits(), dst);
473   } else {
474     EmitXmmRegisterOperand(dst.LowBits(), src);
475   }
476 }
477 
movaps(XmmRegister dst,const Address & src)478 void X86_64Assembler::movaps(XmmRegister dst, const Address& src) {
479   if (CpuHasAVXorAVX2FeatureFlag()) {
480     vmovaps(dst, src);
481     return;
482   }
483   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
484   EmitOptionalRex32(dst, src);
485   EmitUint8(0x0F);
486   EmitUint8(0x28);
487   EmitOperand(dst.LowBits(), src);
488 }
489 
490 /**VEX.128.0F.WIG 28 /r VMOVAPS xmm1, m128 */
vmovaps(XmmRegister dst,const Address & src)491 void X86_64Assembler::vmovaps(XmmRegister dst, const Address& src) {
492   DCHECK(CpuHasAVXorAVX2FeatureFlag());
493   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
494   uint8_t ByteZero, ByteOne, ByteTwo;
495   bool is_twobyte_form = false;
496   // Instruction VEX Prefix
497   uint8_t rex = src.rex();
498   bool Rex_x = rex & GET_REX_X;
499   bool Rex_b = rex & GET_REX_B;
500   if (!Rex_b && !Rex_x) {
501     is_twobyte_form = true;
502   }
503   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
504   if (is_twobyte_form) {
505     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
506     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
507                                    vvvv_reg,
508                                    SET_VEX_L_128,
509                                    SET_VEX_PP_NONE);
510   } else {
511     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
512                                    Rex_x,
513                                    Rex_b,
514                                    SET_VEX_M_0F);
515     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
516                                    SET_VEX_L_128,
517                                    SET_VEX_PP_NONE);
518   }
519   EmitUint8(ByteZero);
520   EmitUint8(ByteOne);
521   if (!is_twobyte_form) {
522     EmitUint8(ByteTwo);
523   }
524   // Instruction Opcode
525   EmitUint8(0x28);
526   // Instruction Operands
527   EmitOperand(dst.LowBits(), src);
528 }
529 
movups(XmmRegister dst,const Address & src)530 void X86_64Assembler::movups(XmmRegister dst, const Address& src) {
531   if (CpuHasAVXorAVX2FeatureFlag()) {
532     vmovups(dst, src);
533     return;
534   }
535   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
536   EmitOptionalRex32(dst, src);
537   EmitUint8(0x0F);
538   EmitUint8(0x10);
539   EmitOperand(dst.LowBits(), src);
540 }
541 
542 /** VEX.128.0F.WIG 10 /r VMOVUPS xmm1, m128 */
vmovups(XmmRegister dst,const Address & src)543 void X86_64Assembler::vmovups(XmmRegister dst, const Address& src) {
544   DCHECK(CpuHasAVXorAVX2FeatureFlag());
545   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
546   uint8_t ByteZero, ByteOne, ByteTwo;
547   bool is_twobyte_form = false;
548   // Instruction VEX Prefix
549   uint8_t rex = src.rex();
550   bool Rex_x = rex & GET_REX_X;
551   bool Rex_b = rex & GET_REX_B;
552   if (!Rex_x && !Rex_b) {
553     is_twobyte_form = true;
554   }
555   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
556   if (is_twobyte_form) {
557     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
558     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
559                                    vvvv_reg,
560                                    SET_VEX_L_128,
561                                    SET_VEX_PP_NONE);
562   } else {
563     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
564                                    Rex_x,
565                                    Rex_b,
566                                    SET_VEX_M_0F);
567     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
568                                    SET_VEX_L_128,
569                                    SET_VEX_PP_NONE);
570   }
571   EmitUint8(ByteZero);
572   EmitUint8(ByteOne);
573   if (!is_twobyte_form) {
574     EmitUint8(ByteTwo);
575   }
576   // Instruction Opcode
577   EmitUint8(0x10);
578   // Instruction Operands
579   EmitOperand(dst.LowBits(), src);
580 }
581 
582 
movaps(const Address & dst,XmmRegister src)583 void X86_64Assembler::movaps(const Address& dst, XmmRegister src) {
584   if (CpuHasAVXorAVX2FeatureFlag()) {
585     vmovaps(dst, src);
586     return;
587   }
588   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
589   EmitOptionalRex32(src, dst);
590   EmitUint8(0x0F);
591   EmitUint8(0x29);
592   EmitOperand(src.LowBits(), dst);
593 }
594 
595 /** VEX.128.0F.WIG 29 /r VMOVAPS m128, xmm1 */
vmovaps(const Address & dst,XmmRegister src)596 void X86_64Assembler::vmovaps(const Address& dst, XmmRegister src) {
597   DCHECK(CpuHasAVXorAVX2FeatureFlag());
598   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
599   uint8_t ByteZero, ByteOne, ByteTwo;
600   bool is_twobyte_form = false;
601 
602   // Instruction VEX Prefix
603   uint8_t rex = dst.rex();
604   bool Rex_x = rex & GET_REX_X;
605   bool Rex_b = rex & GET_REX_B;
606   if (!Rex_b && !Rex_x) {
607     is_twobyte_form = true;
608   }
609   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
610   if (is_twobyte_form) {
611     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
612     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
613                                    vvvv_reg,
614                                    SET_VEX_L_128,
615                                    SET_VEX_PP_NONE);
616   } else {
617     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
618                                    Rex_x,
619                                    Rex_b,
620                                    SET_VEX_M_0F);
621     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
622                                    SET_VEX_L_128,
623                                    SET_VEX_PP_NONE);
624   }
625   EmitUint8(ByteZero);
626   EmitUint8(ByteOne);
627   if (!is_twobyte_form) {
628     EmitUint8(ByteTwo);
629   }
630   // Instruction Opcode
631   EmitUint8(0x29);
632   // Instruction Operands
633   EmitOperand(src.LowBits(), dst);
634 }
635 
movups(const Address & dst,XmmRegister src)636 void X86_64Assembler::movups(const Address& dst, XmmRegister src) {
637   if (CpuHasAVXorAVX2FeatureFlag()) {
638     vmovups(dst, src);
639     return;
640   }
641   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
642   EmitOptionalRex32(src, dst);
643   EmitUint8(0x0F);
644   EmitUint8(0x11);
645   EmitOperand(src.LowBits(), dst);
646 }
647 
648 /** VEX.128.0F.WIG 11 /r VMOVUPS m128, xmm1 */
vmovups(const Address & dst,XmmRegister src)649 void X86_64Assembler::vmovups(const Address& dst, XmmRegister src) {
650   DCHECK(CpuHasAVXorAVX2FeatureFlag());
651   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
652   uint8_t ByteZero, ByteOne, ByteTwo;
653   bool is_twobyte_form = false;
654 
655   // Instruction VEX Prefix
656   uint8_t rex = dst.rex();
657   bool Rex_x = rex & GET_REX_X;
658   bool Rex_b = rex & GET_REX_B;
659   if (!Rex_b && !Rex_x) {
660     is_twobyte_form = true;
661   }
662   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
663   if (is_twobyte_form) {
664     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
665     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
666                                    vvvv_reg,
667                                    SET_VEX_L_128,
668                                    SET_VEX_PP_NONE);
669   } else {
670     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
671                                    Rex_x,
672                                    Rex_b,
673                                    SET_VEX_M_0F);
674     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
675                                    SET_VEX_L_128,
676                                    SET_VEX_PP_NONE);
677   }
678   EmitUint8(ByteZero);
679   EmitUint8(ByteOne);
680   if (!is_twobyte_form) {
681     EmitUint8(ByteTwo);
682   }
683   // Instruction Opcode
684   EmitUint8(0x11);
685   // Instruction Operands
686   EmitOperand(src.LowBits(), dst);
687 }
688 
689 
movss(XmmRegister dst,const Address & src)690 void X86_64Assembler::movss(XmmRegister dst, const Address& src) {
691   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
692   EmitUint8(0xF3);
693   EmitOptionalRex32(dst, src);
694   EmitUint8(0x0F);
695   EmitUint8(0x10);
696   EmitOperand(dst.LowBits(), src);
697 }
698 
699 
movss(const Address & dst,XmmRegister src)700 void X86_64Assembler::movss(const Address& dst, XmmRegister src) {
701   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
702   EmitUint8(0xF3);
703   EmitOptionalRex32(src, dst);
704   EmitUint8(0x0F);
705   EmitUint8(0x11);
706   EmitOperand(src.LowBits(), dst);
707 }
708 
709 
movss(XmmRegister dst,XmmRegister src)710 void X86_64Assembler::movss(XmmRegister dst, XmmRegister src) {
711   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
712   EmitUint8(0xF3);
713   EmitOptionalRex32(src, dst);  // Movss is MR encoding instead of the usual RM.
714   EmitUint8(0x0F);
715   EmitUint8(0x11);
716   EmitXmmRegisterOperand(src.LowBits(), dst);
717 }
718 
719 
movsxd(CpuRegister dst,CpuRegister src)720 void X86_64Assembler::movsxd(CpuRegister dst, CpuRegister src) {
721   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
722   EmitRex64(dst, src);
723   EmitUint8(0x63);
724   EmitRegisterOperand(dst.LowBits(), src.LowBits());
725 }
726 
727 
movsxd(CpuRegister dst,const Address & src)728 void X86_64Assembler::movsxd(CpuRegister dst, const Address& src) {
729   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
730   EmitRex64(dst, src);
731   EmitUint8(0x63);
732   EmitOperand(dst.LowBits(), src);
733 }
734 
735 
movd(XmmRegister dst,CpuRegister src)736 void X86_64Assembler::movd(XmmRegister dst, CpuRegister src) {
737   movd(dst, src, true);
738 }
739 
movd(CpuRegister dst,XmmRegister src)740 void X86_64Assembler::movd(CpuRegister dst, XmmRegister src) {
741   movd(dst, src, true);
742 }
743 
movd(XmmRegister dst,CpuRegister src,bool is64bit)744 void X86_64Assembler::movd(XmmRegister dst, CpuRegister src, bool is64bit) {
745   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
746   EmitUint8(0x66);
747   EmitOptionalRex(false, is64bit, dst.NeedsRex(), false, src.NeedsRex());
748   EmitUint8(0x0F);
749   EmitUint8(0x6E);
750   EmitOperand(dst.LowBits(), Operand(src));
751 }
752 
movd(CpuRegister dst,XmmRegister src,bool is64bit)753 void X86_64Assembler::movd(CpuRegister dst, XmmRegister src, bool is64bit) {
754   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
755   EmitUint8(0x66);
756   EmitOptionalRex(false, is64bit, src.NeedsRex(), false, dst.NeedsRex());
757   EmitUint8(0x0F);
758   EmitUint8(0x7E);
759   EmitOperand(src.LowBits(), Operand(dst));
760 }
761 
addss(XmmRegister dst,XmmRegister src)762 void X86_64Assembler::addss(XmmRegister dst, XmmRegister src) {
763   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
764   EmitUint8(0xF3);
765   EmitOptionalRex32(dst, src);
766   EmitUint8(0x0F);
767   EmitUint8(0x58);
768   EmitXmmRegisterOperand(dst.LowBits(), src);
769 }
770 
addss(XmmRegister dst,const Address & src)771 void X86_64Assembler::addss(XmmRegister dst, const Address& src) {
772   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
773   EmitUint8(0xF3);
774   EmitOptionalRex32(dst, src);
775   EmitUint8(0x0F);
776   EmitUint8(0x58);
777   EmitOperand(dst.LowBits(), src);
778 }
779 
780 
subss(XmmRegister dst,XmmRegister src)781 void X86_64Assembler::subss(XmmRegister dst, XmmRegister src) {
782   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
783   EmitUint8(0xF3);
784   EmitOptionalRex32(dst, src);
785   EmitUint8(0x0F);
786   EmitUint8(0x5C);
787   EmitXmmRegisterOperand(dst.LowBits(), src);
788 }
789 
790 
subss(XmmRegister dst,const Address & src)791 void X86_64Assembler::subss(XmmRegister dst, const Address& src) {
792   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
793   EmitUint8(0xF3);
794   EmitOptionalRex32(dst, src);
795   EmitUint8(0x0F);
796   EmitUint8(0x5C);
797   EmitOperand(dst.LowBits(), src);
798 }
799 
800 
mulss(XmmRegister dst,XmmRegister src)801 void X86_64Assembler::mulss(XmmRegister dst, XmmRegister src) {
802   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
803   EmitUint8(0xF3);
804   EmitOptionalRex32(dst, src);
805   EmitUint8(0x0F);
806   EmitUint8(0x59);
807   EmitXmmRegisterOperand(dst.LowBits(), src);
808 }
809 
810 
mulss(XmmRegister dst,const Address & src)811 void X86_64Assembler::mulss(XmmRegister dst, const Address& src) {
812   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
813   EmitUint8(0xF3);
814   EmitOptionalRex32(dst, src);
815   EmitUint8(0x0F);
816   EmitUint8(0x59);
817   EmitOperand(dst.LowBits(), src);
818 }
819 
820 
divss(XmmRegister dst,XmmRegister src)821 void X86_64Assembler::divss(XmmRegister dst, XmmRegister src) {
822   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
823   EmitUint8(0xF3);
824   EmitOptionalRex32(dst, src);
825   EmitUint8(0x0F);
826   EmitUint8(0x5E);
827   EmitXmmRegisterOperand(dst.LowBits(), src);
828 }
829 
830 
divss(XmmRegister dst,const Address & src)831 void X86_64Assembler::divss(XmmRegister dst, const Address& src) {
832   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
833   EmitUint8(0xF3);
834   EmitOptionalRex32(dst, src);
835   EmitUint8(0x0F);
836   EmitUint8(0x5E);
837   EmitOperand(dst.LowBits(), src);
838 }
839 
840 
addps(XmmRegister dst,XmmRegister src)841 void X86_64Assembler::addps(XmmRegister dst, XmmRegister src) {
842   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
843   EmitOptionalRex32(dst, src);
844   EmitUint8(0x0F);
845   EmitUint8(0x58);
846   EmitXmmRegisterOperand(dst.LowBits(), src);
847 }
848 
849 
subps(XmmRegister dst,XmmRegister src)850 void X86_64Assembler::subps(XmmRegister dst, XmmRegister src) {
851   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
852   EmitOptionalRex32(dst, src);
853   EmitUint8(0x0F);
854   EmitUint8(0x5C);
855   EmitXmmRegisterOperand(dst.LowBits(), src);
856 }
857 
vaddps(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)858 void X86_64Assembler::vaddps(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
859   DCHECK(CpuHasAVXorAVX2FeatureFlag());
860   bool is_twobyte_form = false;
861   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
862   if (!add_right.NeedsRex()) {
863     is_twobyte_form = true;
864   } else if (!add_left.NeedsRex()) {
865     return vaddps(dst, add_right, add_left);
866   }
867   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
868   X86_64ManagedRegister vvvv_reg =
869       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
870   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
871   if (is_twobyte_form) {
872     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
873   } else {
874     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
875                                    /*X=*/ false,
876                                    add_right.NeedsRex(),
877                                    SET_VEX_M_0F);
878     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
879   }
880   EmitUint8(ByteZero);
881   EmitUint8(ByteOne);
882   if (!is_twobyte_form) {
883     EmitUint8(ByteTwo);
884   }
885   EmitUint8(0x58);
886   EmitXmmRegisterOperand(dst.LowBits(), add_right);
887 }
888 
vsubps(XmmRegister dst,XmmRegister src1,XmmRegister src2)889 void X86_64Assembler::vsubps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
890   DCHECK(CpuHasAVXorAVX2FeatureFlag());
891   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
892   bool is_twobyte_form = false;
893   uint8_t byte_zero = 0x00, byte_one = 0x00, byte_two = 0x00;
894   if (!src2.NeedsRex()) {
895     is_twobyte_form = true;
896   }
897   byte_zero = EmitVexPrefixByteZero(is_twobyte_form);
898   X86_64ManagedRegister vvvv_reg = X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
899   if (is_twobyte_form) {
900     byte_one = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
901   } else {
902     byte_one = EmitVexPrefixByteOne(dst.NeedsRex(), /*X=*/ false, src2.NeedsRex(), SET_VEX_M_0F);
903     byte_two = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
904   }
905   EmitUint8(byte_zero);
906   EmitUint8(byte_one);
907   if (!is_twobyte_form) {
908     EmitUint8(byte_two);
909   }
910   EmitUint8(0x5C);
911   EmitXmmRegisterOperand(dst.LowBits(), src2);
912 }
913 
914 
mulps(XmmRegister dst,XmmRegister src)915 void X86_64Assembler::mulps(XmmRegister dst, XmmRegister src) {
916   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
917   EmitOptionalRex32(dst, src);
918   EmitUint8(0x0F);
919   EmitUint8(0x59);
920   EmitXmmRegisterOperand(dst.LowBits(), src);
921 }
922 
vmulps(XmmRegister dst,XmmRegister src1,XmmRegister src2)923 void X86_64Assembler::vmulps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
924   DCHECK(CpuHasAVXorAVX2FeatureFlag());
925   bool is_twobyte_form = false;
926   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
927   if (!src2.NeedsRex()) {
928     is_twobyte_form = true;
929   } else if (!src1.NeedsRex()) {
930     return vmulps(dst, src2, src1);
931   }
932   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
933   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
934   X86_64ManagedRegister vvvv_reg =
935       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
936   if (is_twobyte_form) {
937     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
938   } else {
939     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
940                                    /*X=*/ false,
941                                    src2.NeedsRex(),
942                                    SET_VEX_M_0F);
943     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
944   }
945   EmitUint8(ByteZero);
946   EmitUint8(ByteOne);
947   if (!is_twobyte_form) {
948     EmitUint8(ByteTwo);
949   }
950   EmitUint8(0x59);
951   EmitXmmRegisterOperand(dst.LowBits(), src2);
952 }
953 
divps(XmmRegister dst,XmmRegister src)954 void X86_64Assembler::divps(XmmRegister dst, XmmRegister src) {
955   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
956   EmitOptionalRex32(dst, src);
957   EmitUint8(0x0F);
958   EmitUint8(0x5E);
959   EmitXmmRegisterOperand(dst.LowBits(), src);
960 }
961 
vdivps(XmmRegister dst,XmmRegister src1,XmmRegister src2)962 void X86_64Assembler::vdivps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
963   DCHECK(CpuHasAVXorAVX2FeatureFlag());
964   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
965   bool is_twobyte_form = false;
966   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
967   if (!src2.NeedsRex()) {
968     is_twobyte_form = true;
969   }
970   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
971   X86_64ManagedRegister vvvv_reg =
972       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
973   if (is_twobyte_form) {
974     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
975   } else {
976     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
977                                    /*X=*/ false,
978                                    src2.NeedsRex(),
979                                    SET_VEX_M_0F);
980     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
981   }
982   EmitUint8(ByteZero);
983   EmitUint8(ByteOne);
984   if (!is_twobyte_form) {
985     EmitUint8(ByteTwo);
986   }
987   EmitUint8(0x5E);
988   EmitXmmRegisterOperand(dst.LowBits(), src2);
989 }
990 
vfmadd213ss(XmmRegister acc,XmmRegister left,XmmRegister right)991 void X86_64Assembler::vfmadd213ss(XmmRegister acc, XmmRegister left, XmmRegister right) {
992   DCHECK(CpuHasAVXorAVX2FeatureFlag());
993   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
994   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
995   ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
996   X86_64ManagedRegister vvvv_reg =
997       X86_64ManagedRegister::FromXmmRegister(left.AsFloatRegister());
998   ByteOne = EmitVexPrefixByteOne(acc.NeedsRex(),
999                                  /*X=*/ false,
1000                                  right.NeedsRex(),
1001                                  SET_VEX_M_0F_38);
1002   ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1003   EmitUint8(ByteZero);
1004   EmitUint8(ByteOne);
1005   EmitUint8(ByteTwo);
1006   EmitUint8(0xA9);
1007   EmitXmmRegisterOperand(acc.LowBits(), right);
1008 }
1009 
vfmadd213sd(XmmRegister acc,XmmRegister left,XmmRegister right)1010 void X86_64Assembler::vfmadd213sd(XmmRegister acc, XmmRegister left, XmmRegister right) {
1011   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1012   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1013   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1014   ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
1015   X86_64ManagedRegister vvvv_reg =
1016       X86_64ManagedRegister::FromXmmRegister(left.AsFloatRegister());
1017   ByteOne = EmitVexPrefixByteOne(acc.NeedsRex(),
1018                                  /*X=*/ false,
1019                                  right.NeedsRex(),
1020                                  SET_VEX_M_0F_38);
1021   ByteTwo = EmitVexPrefixByteTwo(/*W=*/ true, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1022   EmitUint8(ByteZero);
1023   EmitUint8(ByteOne);
1024   EmitUint8(ByteTwo);
1025   EmitUint8(0xA9);
1026   EmitXmmRegisterOperand(acc.LowBits(), right);
1027 }
flds(const Address & src)1028 void X86_64Assembler::flds(const Address& src) {
1029   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1030   EmitUint8(0xD9);
1031   EmitOperand(0, src);
1032 }
1033 
1034 
fsts(const Address & dst)1035 void X86_64Assembler::fsts(const Address& dst) {
1036   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1037   EmitUint8(0xD9);
1038   EmitOperand(2, dst);
1039 }
1040 
1041 
fstps(const Address & dst)1042 void X86_64Assembler::fstps(const Address& dst) {
1043   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1044   EmitUint8(0xD9);
1045   EmitOperand(3, dst);
1046 }
1047 
1048 
movapd(XmmRegister dst,XmmRegister src)1049 void X86_64Assembler::movapd(XmmRegister dst, XmmRegister src) {
1050   if (CpuHasAVXorAVX2FeatureFlag()) {
1051     vmovapd(dst, src);
1052     return;
1053   }
1054   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1055   EmitUint8(0x66);
1056   EmitOptionalRex32(dst, src);
1057   EmitUint8(0x0F);
1058   EmitUint8(0x28);
1059   EmitXmmRegisterOperand(dst.LowBits(), src);
1060 }
1061 
1062 /** VEX.128.66.0F.WIG 28 /r VMOVAPD xmm1, xmm2 */
vmovapd(XmmRegister dst,XmmRegister src)1063 void X86_64Assembler::vmovapd(XmmRegister dst, XmmRegister src) {
1064   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1065   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1066   uint8_t ByteZero, ByteOne, ByteTwo;
1067   bool is_twobyte_form = true;
1068 
1069   if (src.NeedsRex() && dst.NeedsRex()) {
1070     is_twobyte_form = false;
1071   }
1072   // Instruction VEX Prefix
1073   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1074   bool load = dst.NeedsRex();
1075   if (is_twobyte_form) {
1076     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1077     bool rex_bit = load ? dst.NeedsRex() : src.NeedsRex();
1078     ByteOne = EmitVexPrefixByteOne(rex_bit,
1079                                    vvvv_reg,
1080                                    SET_VEX_L_128,
1081                                    SET_VEX_PP_66);
1082   } else {
1083     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1084                                    /*X=*/ false,
1085                                    src.NeedsRex(),
1086                                    SET_VEX_M_0F);
1087     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1088                                    SET_VEX_L_128,
1089                                    SET_VEX_PP_66);
1090   }
1091   EmitUint8(ByteZero);
1092   EmitUint8(ByteOne);
1093   if (!is_twobyte_form) {
1094     EmitUint8(ByteTwo);
1095   }
1096   // Instruction Opcode
1097   if (is_twobyte_form && !load) {
1098     EmitUint8(0x29);
1099   } else {
1100     EmitUint8(0x28);
1101   }
1102   // Instruction Operands
1103   if (is_twobyte_form && !load) {
1104     EmitXmmRegisterOperand(src.LowBits(), dst);
1105   } else {
1106     EmitXmmRegisterOperand(dst.LowBits(), src);
1107   }
1108 }
1109 
movapd(XmmRegister dst,const Address & src)1110 void X86_64Assembler::movapd(XmmRegister dst, const Address& src) {
1111   if (CpuHasAVXorAVX2FeatureFlag()) {
1112     vmovapd(dst, src);
1113     return;
1114   }
1115   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1116   EmitUint8(0x66);
1117   EmitOptionalRex32(dst, src);
1118   EmitUint8(0x0F);
1119   EmitUint8(0x28);
1120   EmitOperand(dst.LowBits(), src);
1121 }
1122 
1123 /** VEX.128.66.0F.WIG 28 /r VMOVAPD xmm1, m128 */
vmovapd(XmmRegister dst,const Address & src)1124 void X86_64Assembler::vmovapd(XmmRegister dst, const Address& src) {
1125   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1126   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1127   uint8_t ByteZero, ByteOne, ByteTwo;
1128   bool is_twobyte_form = false;
1129 
1130   // Instruction VEX Prefix
1131   uint8_t rex = src.rex();
1132   bool Rex_x = rex & GET_REX_X;
1133   bool Rex_b = rex & GET_REX_B;
1134   if (!Rex_b && !Rex_x) {
1135     is_twobyte_form = true;
1136   }
1137   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1138   if (is_twobyte_form) {
1139     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1140     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1141                                    vvvv_reg,
1142                                    SET_VEX_L_128,
1143                                    SET_VEX_PP_66);
1144   } else {
1145     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1146                                    Rex_x,
1147                                    Rex_b,
1148                                    SET_VEX_M_0F);
1149     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1150                                    SET_VEX_L_128,
1151                                    SET_VEX_PP_66);
1152   }
1153   EmitUint8(ByteZero);
1154   EmitUint8(ByteOne);
1155   if (!is_twobyte_form) {
1156     EmitUint8(ByteTwo);
1157   }
1158   // Instruction Opcode
1159   EmitUint8(0x28);
1160   // Instruction Operands
1161   EmitOperand(dst.LowBits(), src);
1162 }
1163 
movupd(XmmRegister dst,const Address & src)1164 void X86_64Assembler::movupd(XmmRegister dst, const Address& src) {
1165   if (CpuHasAVXorAVX2FeatureFlag()) {
1166     vmovupd(dst, src);
1167     return;
1168   }
1169   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1170   EmitUint8(0x66);
1171   EmitOptionalRex32(dst, src);
1172   EmitUint8(0x0F);
1173   EmitUint8(0x10);
1174   EmitOperand(dst.LowBits(), src);
1175 }
1176 
1177 /** VEX.128.66.0F.WIG 10 /r VMOVUPD xmm1, m128 */
vmovupd(XmmRegister dst,const Address & src)1178 void X86_64Assembler::vmovupd(XmmRegister dst, const Address& src) {
1179   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1180   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1181   bool is_twobyte_form = false;
1182   uint8_t ByteZero, ByteOne, ByteTwo;
1183 
1184   // Instruction VEX Prefix
1185   uint8_t rex = src.rex();
1186   bool Rex_x = rex & GET_REX_X;
1187   bool Rex_b = rex & GET_REX_B;
1188   if (!Rex_b && !Rex_x) {
1189     is_twobyte_form = true;
1190   }
1191   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1192   if (is_twobyte_form) {
1193     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1194     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1195                                    vvvv_reg,
1196                                    SET_VEX_L_128,
1197                                    SET_VEX_PP_66);
1198   } else {
1199     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1200                                    Rex_x,
1201                                    Rex_b,
1202                                    SET_VEX_M_0F);
1203     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1204                                    SET_VEX_L_128,
1205                                    SET_VEX_PP_66);
1206   }
1207   EmitUint8(ByteZero);
1208   EmitUint8(ByteOne);
1209   if (!is_twobyte_form)
1210   EmitUint8(ByteTwo);
1211   // Instruction Opcode
1212   EmitUint8(0x10);
1213   // Instruction Operands
1214   EmitOperand(dst.LowBits(), src);
1215 }
1216 
movapd(const Address & dst,XmmRegister src)1217 void X86_64Assembler::movapd(const Address& dst, XmmRegister src) {
1218   if (CpuHasAVXorAVX2FeatureFlag()) {
1219     vmovapd(dst, src);
1220     return;
1221   }
1222   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1223   EmitUint8(0x66);
1224   EmitOptionalRex32(src, dst);
1225   EmitUint8(0x0F);
1226   EmitUint8(0x29);
1227   EmitOperand(src.LowBits(), dst);
1228 }
1229 
1230 /** VEX.128.66.0F.WIG 29 /r VMOVAPD m128, xmm1 */
vmovapd(const Address & dst,XmmRegister src)1231 void X86_64Assembler::vmovapd(const Address& dst, XmmRegister src) {
1232   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1233   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1234   bool is_twobyte_form = false;
1235   uint8_t ByteZero, ByteOne, ByteTwo;
1236   // Instruction VEX Prefix
1237   uint8_t rex = dst.rex();
1238   bool Rex_x = rex & GET_REX_X;
1239   bool Rex_b = rex & GET_REX_B;
1240   if (!Rex_x && !Rex_b) {
1241     is_twobyte_form = true;
1242   }
1243   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1244   if (is_twobyte_form) {
1245     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1246     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1247                                    vvvv_reg,
1248                                    SET_VEX_L_128,
1249                                    SET_VEX_PP_66);
1250   } else {
1251     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1252                                    Rex_x,
1253                                    Rex_b,
1254                                    SET_VEX_M_0F);
1255     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1256                                    SET_VEX_L_128,
1257                                    SET_VEX_PP_66);
1258   }
1259   EmitUint8(ByteZero);
1260   EmitUint8(ByteOne);
1261   if (!is_twobyte_form) {
1262     EmitUint8(ByteTwo);
1263   }
1264   // Instruction Opcode
1265   EmitUint8(0x29);
1266   // Instruction Operands
1267   EmitOperand(src.LowBits(), dst);
1268 }
1269 
movupd(const Address & dst,XmmRegister src)1270 void X86_64Assembler::movupd(const Address& dst, XmmRegister src) {
1271   if (CpuHasAVXorAVX2FeatureFlag()) {
1272     vmovupd(dst, src);
1273     return;
1274   }
1275   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1276   EmitUint8(0x66);
1277   EmitOptionalRex32(src, dst);
1278   EmitUint8(0x0F);
1279   EmitUint8(0x11);
1280   EmitOperand(src.LowBits(), dst);
1281 }
1282 
1283 /** VEX.128.66.0F.WIG 11 /r VMOVUPD m128, xmm1 */
vmovupd(const Address & dst,XmmRegister src)1284 void X86_64Assembler::vmovupd(const Address& dst, XmmRegister src) {
1285   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1286   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1287   bool is_twobyte_form = false;
1288   uint8_t ByteZero, ByteOne, ByteTwo;
1289 
1290   // Instruction VEX Prefix
1291   uint8_t rex = dst.rex();
1292   bool Rex_x = rex & GET_REX_X;
1293   bool Rex_b = rex & GET_REX_B;
1294   if (!Rex_x && !Rex_b) {
1295     is_twobyte_form = true;
1296   }
1297   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1298   if (is_twobyte_form) {
1299     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1300     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1301                                    vvvv_reg,
1302                                    SET_VEX_L_128,
1303                                    SET_VEX_PP_66);
1304   } else {
1305     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1306                                    Rex_x,
1307                                    Rex_b,
1308                                    SET_VEX_M_0F);
1309     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1310                                    SET_VEX_L_128,
1311                                    SET_VEX_PP_66);
1312   }
1313   EmitUint8(ByteZero);
1314   EmitUint8(ByteOne);
1315   if (!is_twobyte_form) {
1316     EmitUint8(ByteTwo);
1317   }
1318   // Instruction Opcode
1319   EmitUint8(0x11);
1320   // Instruction Operands
1321   EmitOperand(src.LowBits(), dst);
1322 }
1323 
1324 
movsd(XmmRegister dst,const Address & src)1325 void X86_64Assembler::movsd(XmmRegister dst, const Address& src) {
1326   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1327   EmitUint8(0xF2);
1328   EmitOptionalRex32(dst, src);
1329   EmitUint8(0x0F);
1330   EmitUint8(0x10);
1331   EmitOperand(dst.LowBits(), src);
1332 }
1333 
1334 
movsd(const Address & dst,XmmRegister src)1335 void X86_64Assembler::movsd(const Address& dst, XmmRegister src) {
1336   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1337   EmitUint8(0xF2);
1338   EmitOptionalRex32(src, dst);
1339   EmitUint8(0x0F);
1340   EmitUint8(0x11);
1341   EmitOperand(src.LowBits(), dst);
1342 }
1343 
1344 
movsd(XmmRegister dst,XmmRegister src)1345 void X86_64Assembler::movsd(XmmRegister dst, XmmRegister src) {
1346   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1347   EmitUint8(0xF2);
1348   EmitOptionalRex32(src, dst);  // Movsd is MR encoding instead of the usual RM.
1349   EmitUint8(0x0F);
1350   EmitUint8(0x11);
1351   EmitXmmRegisterOperand(src.LowBits(), dst);
1352 }
1353 
1354 
addsd(XmmRegister dst,XmmRegister src)1355 void X86_64Assembler::addsd(XmmRegister dst, XmmRegister src) {
1356   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1357   EmitUint8(0xF2);
1358   EmitOptionalRex32(dst, src);
1359   EmitUint8(0x0F);
1360   EmitUint8(0x58);
1361   EmitXmmRegisterOperand(dst.LowBits(), src);
1362 }
1363 
1364 
addsd(XmmRegister dst,const Address & src)1365 void X86_64Assembler::addsd(XmmRegister dst, const Address& src) {
1366   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1367   EmitUint8(0xF2);
1368   EmitOptionalRex32(dst, src);
1369   EmitUint8(0x0F);
1370   EmitUint8(0x58);
1371   EmitOperand(dst.LowBits(), src);
1372 }
1373 
1374 
subsd(XmmRegister dst,XmmRegister src)1375 void X86_64Assembler::subsd(XmmRegister dst, XmmRegister src) {
1376   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1377   EmitUint8(0xF2);
1378   EmitOptionalRex32(dst, src);
1379   EmitUint8(0x0F);
1380   EmitUint8(0x5C);
1381   EmitXmmRegisterOperand(dst.LowBits(), src);
1382 }
1383 
1384 
subsd(XmmRegister dst,const Address & src)1385 void X86_64Assembler::subsd(XmmRegister dst, const Address& src) {
1386   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1387   EmitUint8(0xF2);
1388   EmitOptionalRex32(dst, src);
1389   EmitUint8(0x0F);
1390   EmitUint8(0x5C);
1391   EmitOperand(dst.LowBits(), src);
1392 }
1393 
1394 
mulsd(XmmRegister dst,XmmRegister src)1395 void X86_64Assembler::mulsd(XmmRegister dst, XmmRegister src) {
1396   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1397   EmitUint8(0xF2);
1398   EmitOptionalRex32(dst, src);
1399   EmitUint8(0x0F);
1400   EmitUint8(0x59);
1401   EmitXmmRegisterOperand(dst.LowBits(), src);
1402 }
1403 
1404 
mulsd(XmmRegister dst,const Address & src)1405 void X86_64Assembler::mulsd(XmmRegister dst, const Address& src) {
1406   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1407   EmitUint8(0xF2);
1408   EmitOptionalRex32(dst, src);
1409   EmitUint8(0x0F);
1410   EmitUint8(0x59);
1411   EmitOperand(dst.LowBits(), src);
1412 }
1413 
1414 
divsd(XmmRegister dst,XmmRegister src)1415 void X86_64Assembler::divsd(XmmRegister dst, XmmRegister src) {
1416   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1417   EmitUint8(0xF2);
1418   EmitOptionalRex32(dst, src);
1419   EmitUint8(0x0F);
1420   EmitUint8(0x5E);
1421   EmitXmmRegisterOperand(dst.LowBits(), src);
1422 }
1423 
1424 
divsd(XmmRegister dst,const Address & src)1425 void X86_64Assembler::divsd(XmmRegister dst, const Address& src) {
1426   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1427   EmitUint8(0xF2);
1428   EmitOptionalRex32(dst, src);
1429   EmitUint8(0x0F);
1430   EmitUint8(0x5E);
1431   EmitOperand(dst.LowBits(), src);
1432 }
1433 
1434 
addpd(XmmRegister dst,XmmRegister src)1435 void X86_64Assembler::addpd(XmmRegister dst, XmmRegister src) {
1436   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1437   EmitUint8(0x66);
1438   EmitOptionalRex32(dst, src);
1439   EmitUint8(0x0F);
1440   EmitUint8(0x58);
1441   EmitXmmRegisterOperand(dst.LowBits(), src);
1442 }
1443 
1444 
vaddpd(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1445 void X86_64Assembler::vaddpd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1446   bool is_twobyte_form = false;
1447   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1448   if (!add_right.NeedsRex()) {
1449     is_twobyte_form = true;
1450   } else if (!add_left.NeedsRex()) {
1451     return vaddpd(dst, add_right, add_left);
1452   }
1453   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1454   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1455   X86_64ManagedRegister vvvv_reg =
1456       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
1457   if (is_twobyte_form) {
1458     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1459   } else {
1460     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1461                                    /*X=*/ false,
1462                                    add_right.NeedsRex(),
1463                                    SET_VEX_M_0F);
1464     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1465   }
1466   EmitUint8(ByteZero);
1467   EmitUint8(ByteOne);
1468   if (!is_twobyte_form) {
1469     EmitUint8(ByteTwo);
1470   }
1471   EmitUint8(0x58);
1472   EmitXmmRegisterOperand(dst.LowBits(), add_right);
1473 }
1474 
1475 
subpd(XmmRegister dst,XmmRegister src)1476 void X86_64Assembler::subpd(XmmRegister dst, XmmRegister src) {
1477   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1478   EmitUint8(0x66);
1479   EmitOptionalRex32(dst, src);
1480   EmitUint8(0x0F);
1481   EmitUint8(0x5C);
1482   EmitXmmRegisterOperand(dst.LowBits(), src);
1483 }
1484 
1485 
vsubpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)1486 void X86_64Assembler::vsubpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1487   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1488   bool is_twobyte_form = false;
1489   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1490   if (!src2.NeedsRex()) {
1491     is_twobyte_form = true;
1492   }
1493   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1494   X86_64ManagedRegister vvvv_reg =
1495       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
1496   if (is_twobyte_form) {
1497     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1498   } else {
1499     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1500                                    /*X=*/ false,
1501                                    src2.NeedsRex(),
1502                                    SET_VEX_M_0F);
1503     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1504   }
1505   EmitUint8(ByteZero);
1506   EmitUint8(ByteOne);
1507   if (!is_twobyte_form) {
1508     EmitUint8(ByteTwo);
1509   }
1510   EmitUint8(0x5C);
1511   EmitXmmRegisterOperand(dst.LowBits(), src2);
1512 }
1513 
1514 
mulpd(XmmRegister dst,XmmRegister src)1515 void X86_64Assembler::mulpd(XmmRegister dst, XmmRegister src) {
1516   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1517   EmitUint8(0x66);
1518   EmitOptionalRex32(dst, src);
1519   EmitUint8(0x0F);
1520   EmitUint8(0x59);
1521   EmitXmmRegisterOperand(dst.LowBits(), src);
1522 }
1523 
vmulpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)1524 void X86_64Assembler::vmulpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1525   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1526   bool is_twobyte_form = false;
1527   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1528   if (!src2.NeedsRex()) {
1529     is_twobyte_form = true;
1530   } else if (!src1.NeedsRex()) {
1531     return vmulpd(dst, src2, src1);
1532   }
1533   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1534   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1535   X86_64ManagedRegister vvvv_reg =
1536       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
1537   if (is_twobyte_form) {
1538     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1539   } else {
1540     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1541                                    /*X=*/ false,
1542                                    src2.NeedsRex(),
1543                                    SET_VEX_M_0F);
1544     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1545   }
1546   EmitUint8(ByteZero);
1547   EmitUint8(ByteOne);
1548   if (!is_twobyte_form) {
1549     EmitUint8(ByteTwo);
1550   }
1551   EmitUint8(0x59);
1552   EmitXmmRegisterOperand(dst.LowBits(), src2);
1553 }
1554 
divpd(XmmRegister dst,XmmRegister src)1555 void X86_64Assembler::divpd(XmmRegister dst, XmmRegister src) {
1556   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1557   EmitUint8(0x66);
1558   EmitOptionalRex32(dst, src);
1559   EmitUint8(0x0F);
1560   EmitUint8(0x5E);
1561   EmitXmmRegisterOperand(dst.LowBits(), src);
1562 }
1563 
1564 
vdivpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)1565 void X86_64Assembler::vdivpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1566   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1567   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1568   bool is_twobyte_form = false;
1569   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1570   if (!src2.NeedsRex()) {
1571     is_twobyte_form = true;
1572   }
1573   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1574   X86_64ManagedRegister vvvv_reg =
1575       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
1576   if (is_twobyte_form) {
1577     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1578   } else {
1579     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1580                                    /*X=*/ false,
1581                                    src2.NeedsRex(),
1582                                    SET_VEX_M_0F);
1583     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1584   }
1585   EmitUint8(ByteZero);
1586   EmitUint8(ByteOne);
1587   if (!is_twobyte_form) {
1588     EmitUint8(ByteTwo);
1589   }
1590   EmitUint8(0x5E);
1591   EmitXmmRegisterOperand(dst.LowBits(), src2);
1592 }
1593 
1594 
movdqa(XmmRegister dst,XmmRegister src)1595 void X86_64Assembler::movdqa(XmmRegister dst, XmmRegister src) {
1596   if (CpuHasAVXorAVX2FeatureFlag()) {
1597     vmovdqa(dst, src);
1598     return;
1599   }
1600   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1601   EmitUint8(0x66);
1602   EmitOptionalRex32(dst, src);
1603   EmitUint8(0x0F);
1604   EmitUint8(0x6F);
1605   EmitXmmRegisterOperand(dst.LowBits(), src);
1606 }
1607 
1608 /** VEX.128.66.0F.WIG 6F /r VMOVDQA xmm1, xmm2 */
vmovdqa(XmmRegister dst,XmmRegister src)1609 void X86_64Assembler::vmovdqa(XmmRegister dst, XmmRegister src) {
1610   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1611   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1612   uint8_t ByteZero, ByteOne, ByteTwo;
1613   bool is_twobyte_form = true;
1614 
1615   // Instruction VEX Prefix
1616   if (src.NeedsRex() && dst.NeedsRex()) {
1617     is_twobyte_form = false;
1618   }
1619   bool load = dst.NeedsRex();
1620   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1621   if (is_twobyte_form) {
1622     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1623     bool rex_bit = load ? dst.NeedsRex() : src.NeedsRex();
1624     ByteOne = EmitVexPrefixByteOne(rex_bit,
1625                                    vvvv_reg,
1626                                    SET_VEX_L_128,
1627                                    SET_VEX_PP_66);
1628   } else {
1629     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1630                                    /*X=*/ false,
1631                                    src.NeedsRex(),
1632                                    SET_VEX_M_0F);
1633     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1634                                    SET_VEX_L_128,
1635                                    SET_VEX_PP_66);
1636   }
1637   EmitUint8(ByteZero);
1638   EmitUint8(ByteOne);
1639   if (!is_twobyte_form) {
1640     EmitUint8(ByteTwo);
1641   }
1642   // Instruction Opcode
1643   if (is_twobyte_form && !load) {
1644     EmitUint8(0x7F);
1645   } else {
1646     EmitUint8(0x6F);
1647   }
1648   // Instruction Operands
1649   if (is_twobyte_form && !load) {
1650     EmitXmmRegisterOperand(src.LowBits(), dst);
1651   } else {
1652     EmitXmmRegisterOperand(dst.LowBits(), src);
1653   }
1654 }
1655 
movdqa(XmmRegister dst,const Address & src)1656 void X86_64Assembler::movdqa(XmmRegister dst, const Address& src) {
1657   if (CpuHasAVXorAVX2FeatureFlag()) {
1658     vmovdqa(dst, src);
1659     return;
1660   }
1661   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1662   EmitUint8(0x66);
1663   EmitOptionalRex32(dst, src);
1664   EmitUint8(0x0F);
1665   EmitUint8(0x6F);
1666   EmitOperand(dst.LowBits(), src);
1667 }
1668 
1669 /** VEX.128.66.0F.WIG 6F /r VMOVDQA xmm1, m128 */
vmovdqa(XmmRegister dst,const Address & src)1670 void X86_64Assembler::vmovdqa(XmmRegister dst, const Address& src) {
1671   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1672   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1673   uint8_t  ByteZero, ByteOne, ByteTwo;
1674   bool is_twobyte_form = false;
1675 
1676   // Instruction VEX Prefix
1677   uint8_t rex = src.rex();
1678   bool Rex_x = rex & GET_REX_X;
1679   bool Rex_b = rex & GET_REX_B;
1680   if (!Rex_x && !Rex_b) {
1681     is_twobyte_form = true;
1682   }
1683   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1684   if (is_twobyte_form) {
1685     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1686     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1687                                    vvvv_reg,
1688                                    SET_VEX_L_128,
1689                                    SET_VEX_PP_66);
1690   } else {
1691     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1692                                    Rex_x,
1693                                    Rex_b,
1694                                    SET_VEX_M_0F);
1695     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1696                                    SET_VEX_L_128,
1697                                    SET_VEX_PP_66);
1698   }
1699   EmitUint8(ByteZero);
1700   EmitUint8(ByteOne);
1701   if (!is_twobyte_form) {
1702     EmitUint8(ByteTwo);
1703   }
1704   // Instruction Opcode
1705   EmitUint8(0x6F);
1706   // Instruction Operands
1707   EmitOperand(dst.LowBits(), src);
1708 }
1709 
movdqu(XmmRegister dst,const Address & src)1710 void X86_64Assembler::movdqu(XmmRegister dst, const Address& src) {
1711   if (CpuHasAVXorAVX2FeatureFlag()) {
1712     vmovdqu(dst, src);
1713     return;
1714   }
1715   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1716   EmitUint8(0xF3);
1717   EmitOptionalRex32(dst, src);
1718   EmitUint8(0x0F);
1719   EmitUint8(0x6F);
1720   EmitOperand(dst.LowBits(), src);
1721 }
1722 
1723 /** VEX.128.F3.0F.WIG 6F /r VMOVDQU xmm1, m128
1724 Load Unaligned */
vmovdqu(XmmRegister dst,const Address & src)1725 void X86_64Assembler::vmovdqu(XmmRegister dst, const Address& src) {
1726   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1727   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1728   uint8_t ByteZero, ByteOne, ByteTwo;
1729   bool is_twobyte_form = false;
1730 
1731   // Instruction VEX Prefix
1732   uint8_t rex = src.rex();
1733   bool Rex_x = rex & GET_REX_X;
1734   bool Rex_b = rex & GET_REX_B;
1735   if (!Rex_x && !Rex_b) {
1736     is_twobyte_form = true;
1737   }
1738   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1739   if (is_twobyte_form) {
1740     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1741     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1742                                    vvvv_reg,
1743                                    SET_VEX_L_128,
1744                                    SET_VEX_PP_F3);
1745   } else {
1746     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1747                                    Rex_x,
1748                                    Rex_b,
1749                                    SET_VEX_M_0F);
1750     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1751                                    SET_VEX_L_128,
1752                                    SET_VEX_PP_F3);
1753   }
1754   EmitUint8(ByteZero);
1755   EmitUint8(ByteOne);
1756   if (!is_twobyte_form) {
1757     EmitUint8(ByteTwo);
1758   }
1759   // Instruction Opcode
1760   EmitUint8(0x6F);
1761   // Instruction Operands
1762   EmitOperand(dst.LowBits(), src);
1763 }
1764 
movdqa(const Address & dst,XmmRegister src)1765 void X86_64Assembler::movdqa(const Address& dst, XmmRegister src) {
1766   if (CpuHasAVXorAVX2FeatureFlag()) {
1767     vmovdqa(dst, src);
1768     return;
1769   }
1770   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1771   EmitUint8(0x66);
1772   EmitOptionalRex32(src, dst);
1773   EmitUint8(0x0F);
1774   EmitUint8(0x7F);
1775   EmitOperand(src.LowBits(), dst);
1776 }
1777 
1778 /** VEX.128.66.0F.WIG 7F /r VMOVDQA m128, xmm1 */
vmovdqa(const Address & dst,XmmRegister src)1779 void X86_64Assembler::vmovdqa(const Address& dst, XmmRegister src) {
1780   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1781   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1782   bool is_twobyte_form = false;
1783   uint8_t ByteZero, ByteOne, ByteTwo;
1784   // Instruction VEX Prefix
1785   uint8_t rex = dst.rex();
1786   bool Rex_x = rex & GET_REX_X;
1787   bool Rex_b = rex & GET_REX_B;
1788   if (!Rex_x && !Rex_b) {
1789     is_twobyte_form = true;
1790   }
1791   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1792   if (is_twobyte_form) {
1793     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1794     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1795                                    vvvv_reg,
1796                                    SET_VEX_L_128,
1797                                    SET_VEX_PP_66);
1798   } else {
1799     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1800                                    Rex_x,
1801                                    Rex_b,
1802                                    SET_VEX_M_0F);
1803     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1804                                    SET_VEX_L_128,
1805                                    SET_VEX_PP_66);
1806   }
1807   EmitUint8(ByteZero);
1808   EmitUint8(ByteOne);
1809   if (!is_twobyte_form) {
1810     EmitUint8(ByteTwo);
1811   }
1812   // Instruction Opcode
1813   EmitUint8(0x7F);
1814   // Instruction Operands
1815   EmitOperand(src.LowBits(), dst);
1816 }
1817 
movdqu(const Address & dst,XmmRegister src)1818 void X86_64Assembler::movdqu(const Address& dst, XmmRegister src) {
1819   if (CpuHasAVXorAVX2FeatureFlag()) {
1820     vmovdqu(dst, src);
1821     return;
1822   }
1823   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1824   EmitUint8(0xF3);
1825   EmitOptionalRex32(src, dst);
1826   EmitUint8(0x0F);
1827   EmitUint8(0x7F);
1828   EmitOperand(src.LowBits(), dst);
1829 }
1830 
1831 /** VEX.128.F3.0F.WIG 7F /r VMOVDQU m128, xmm1 */
vmovdqu(const Address & dst,XmmRegister src)1832 void X86_64Assembler::vmovdqu(const Address& dst, XmmRegister src) {
1833   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1834   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1835   uint8_t ByteZero, ByteOne, ByteTwo;
1836   bool is_twobyte_form = false;
1837 
1838   // Instruction VEX Prefix
1839   uint8_t rex = dst.rex();
1840   bool Rex_x = rex & GET_REX_X;
1841   bool Rex_b = rex & GET_REX_B;
1842   if (!Rex_b && !Rex_x) {
1843     is_twobyte_form = true;
1844   }
1845   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1846   if (is_twobyte_form) {
1847     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1848     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1849                                    vvvv_reg,
1850                                    SET_VEX_L_128,
1851                                    SET_VEX_PP_F3);
1852   } else {
1853     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1854                                    Rex_x,
1855                                    Rex_b,
1856                                    SET_VEX_M_0F);
1857     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1858                                    SET_VEX_L_128,
1859                                    SET_VEX_PP_F3);
1860   }
1861   EmitUint8(ByteZero);
1862   EmitUint8(ByteOne);
1863   if (!is_twobyte_form) {
1864     EmitUint8(ByteTwo);
1865   }
1866   // Instruction Opcode
1867   EmitUint8(0x7F);
1868   // Instruction Operands
1869   EmitOperand(src.LowBits(), dst);
1870 }
1871 
paddb(XmmRegister dst,XmmRegister src)1872 void X86_64Assembler::paddb(XmmRegister dst, XmmRegister src) {
1873   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1874   EmitUint8(0x66);
1875   EmitOptionalRex32(dst, src);
1876   EmitUint8(0x0F);
1877   EmitUint8(0xFC);
1878   EmitXmmRegisterOperand(dst.LowBits(), src);
1879 }
1880 
1881 
vpaddb(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1882 void X86_64Assembler::vpaddb(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1883   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1884   uint8_t ByteOne = 0x00, ByteZero = 0x00, ByteTwo = 0x00;
1885   bool is_twobyte_form = false;
1886   if (!add_right.NeedsRex()) {
1887     is_twobyte_form = true;
1888   } else if (!add_left.NeedsRex()) {
1889     return vpaddb(dst, add_right, add_left);
1890   }
1891   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1892   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1893   X86_64ManagedRegister vvvv_reg =
1894       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
1895   if (is_twobyte_form) {
1896     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1897   } else {
1898     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1899                                    /*X=*/ false,
1900                                    add_right.NeedsRex(),
1901                                    SET_VEX_M_0F);
1902     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1903   }
1904   EmitUint8(ByteZero);
1905   EmitUint8(ByteOne);
1906   if (!is_twobyte_form) {
1907     EmitUint8(ByteTwo);
1908   }
1909   EmitUint8(0xFC);
1910   EmitXmmRegisterOperand(dst.LowBits(), add_right);
1911 }
1912 
1913 
psubb(XmmRegister dst,XmmRegister src)1914 void X86_64Assembler::psubb(XmmRegister dst, XmmRegister src) {
1915   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1916   EmitUint8(0x66);
1917   EmitOptionalRex32(dst, src);
1918   EmitUint8(0x0F);
1919   EmitUint8(0xF8);
1920   EmitXmmRegisterOperand(dst.LowBits(), src);
1921 }
1922 
1923 
vpsubb(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1924 void X86_64Assembler::vpsubb(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1925   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1926   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1927   bool is_twobyte_form = false;
1928   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1929   if (!add_right.NeedsRex()) {
1930     is_twobyte_form = true;
1931   }
1932   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1933   X86_64ManagedRegister vvvv_reg =
1934       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
1935   if (is_twobyte_form) {
1936     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1937   } else {
1938     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1939                                    /*X=*/ false,
1940                                    add_right.NeedsRex(),
1941                                    SET_VEX_M_0F);
1942     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1943   }
1944   EmitUint8(ByteZero);
1945   EmitUint8(ByteOne);
1946   if (!is_twobyte_form) {
1947     EmitUint8(ByteTwo);
1948   }
1949   EmitUint8(0xF8);
1950   EmitXmmRegisterOperand(dst.LowBits(), add_right);
1951 }
1952 
1953 
paddw(XmmRegister dst,XmmRegister src)1954 void X86_64Assembler::paddw(XmmRegister dst, XmmRegister src) {
1955   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1956   EmitUint8(0x66);
1957   EmitOptionalRex32(dst, src);
1958   EmitUint8(0x0F);
1959   EmitUint8(0xFD);
1960   EmitXmmRegisterOperand(dst.LowBits(), src);
1961 }
1962 
vpaddw(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)1963 void X86_64Assembler::vpaddw(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1964   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1965   bool is_twobyte_form = false;
1966   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1967   if (!add_right.NeedsRex()) {
1968     is_twobyte_form = true;
1969   } else if (!add_left.NeedsRex()) {
1970     return vpaddw(dst, add_right, add_left);
1971   }
1972   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1973   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1974   X86_64ManagedRegister vvvv_reg =
1975       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
1976   if (is_twobyte_form) {
1977     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1978   } else {
1979     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1980                                    /*X=*/ false,
1981                                    add_right.NeedsRex(),
1982                                    SET_VEX_M_0F);
1983     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1984   }
1985   EmitUint8(ByteZero);
1986   EmitUint8(ByteOne);
1987   if (!is_twobyte_form) {
1988     EmitUint8(ByteTwo);
1989   }
1990   EmitUint8(0xFD);
1991   EmitXmmRegisterOperand(dst.LowBits(), add_right);
1992 }
1993 
1994 
psubw(XmmRegister dst,XmmRegister src)1995 void X86_64Assembler::psubw(XmmRegister dst, XmmRegister src) {
1996   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1997   EmitUint8(0x66);
1998   EmitOptionalRex32(dst, src);
1999   EmitUint8(0x0F);
2000   EmitUint8(0xF9);
2001   EmitXmmRegisterOperand(dst.LowBits(), src);
2002 }
2003 
vpsubw(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)2004 void X86_64Assembler::vpsubw(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
2005   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2006   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2007   bool is_twobyte_form = false;
2008   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2009   if (!add_right.NeedsRex()) {
2010     is_twobyte_form = true;
2011   }
2012   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2013   X86_64ManagedRegister vvvv_reg =
2014       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
2015   if (is_twobyte_form) {
2016     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2017   } else {
2018     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2019                                    /*X=*/ false,
2020                                    add_right.NeedsRex(),
2021                                    SET_VEX_M_0F);
2022     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2023   }
2024   EmitUint8(ByteZero);
2025   EmitUint8(ByteOne);
2026   if (!is_twobyte_form) {
2027     EmitUint8(ByteTwo);
2028   }
2029   EmitUint8(0xF9);
2030   EmitXmmRegisterOperand(dst.LowBits(), add_right);
2031 }
2032 
2033 
pmullw(XmmRegister dst,XmmRegister src)2034 void X86_64Assembler::pmullw(XmmRegister dst, XmmRegister src) {
2035   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2036   EmitUint8(0x66);
2037   EmitOptionalRex32(dst, src);
2038   EmitUint8(0x0F);
2039   EmitUint8(0xD5);
2040   EmitXmmRegisterOperand(dst.LowBits(), src);
2041 }
2042 
vpmullw(XmmRegister dst,XmmRegister src1,XmmRegister src2)2043 void X86_64Assembler::vpmullw(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2044   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2045   bool is_twobyte_form = false;
2046   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2047   if (!src2.NeedsRex()) {
2048     is_twobyte_form = true;
2049   } else if (!src1.NeedsRex()) {
2050     return vpmullw(dst, src2, src1);
2051   }
2052   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2053   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2054   X86_64ManagedRegister vvvv_reg =
2055       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2056   if (is_twobyte_form) {
2057     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2058   } else {
2059     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2060                                    /*X=*/ false,
2061                                    src2.NeedsRex(),
2062                                    SET_VEX_M_0F);
2063     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2064   }
2065   EmitUint8(ByteZero);
2066   EmitUint8(ByteOne);
2067   if (!is_twobyte_form) {
2068     EmitUint8(ByteTwo);
2069   }
2070   EmitUint8(0xD5);
2071   EmitXmmRegisterOperand(dst.LowBits(), src2);
2072 }
2073 
paddd(XmmRegister dst,XmmRegister src)2074 void X86_64Assembler::paddd(XmmRegister dst, XmmRegister src) {
2075   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2076   EmitUint8(0x66);
2077   EmitOptionalRex32(dst, src);
2078   EmitUint8(0x0F);
2079   EmitUint8(0xFE);
2080   EmitXmmRegisterOperand(dst.LowBits(), src);
2081 }
2082 
vpaddd(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)2083 void X86_64Assembler::vpaddd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
2084   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2085   bool is_twobyte_form = false;
2086   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2087   if (!add_right.NeedsRex()) {
2088     is_twobyte_form = true;
2089   } else if (!add_left.NeedsRex()) {
2090     return vpaddd(dst, add_right, add_left);
2091   }
2092   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2093   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2094   X86_64ManagedRegister vvvv_reg =
2095       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
2096   if (is_twobyte_form) {
2097     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2098   } else {
2099     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2100                                    /*X=*/ false,
2101                                    add_right.NeedsRex(),
2102                                    SET_VEX_M_0F);
2103     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2104   }
2105   EmitUint8(ByteZero);
2106   EmitUint8(ByteOne);
2107   if (!is_twobyte_form) {
2108     EmitUint8(ByteTwo);
2109   }
2110   EmitUint8(0xFE);
2111   EmitXmmRegisterOperand(dst.LowBits(), add_right);
2112 }
2113 
psubd(XmmRegister dst,XmmRegister src)2114 void X86_64Assembler::psubd(XmmRegister dst, XmmRegister src) {
2115   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2116   EmitUint8(0x66);
2117   EmitOptionalRex32(dst, src);
2118   EmitUint8(0x0F);
2119   EmitUint8(0xFA);
2120   EmitXmmRegisterOperand(dst.LowBits(), src);
2121 }
2122 
2123 
pmulld(XmmRegister dst,XmmRegister src)2124 void X86_64Assembler::pmulld(XmmRegister dst, XmmRegister src) {
2125   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2126   EmitUint8(0x66);
2127   EmitOptionalRex32(dst, src);
2128   EmitUint8(0x0F);
2129   EmitUint8(0x38);
2130   EmitUint8(0x40);
2131   EmitXmmRegisterOperand(dst.LowBits(), src);
2132 }
2133 
vpmulld(XmmRegister dst,XmmRegister src1,XmmRegister src2)2134 void X86_64Assembler::vpmulld(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2135   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2136   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2137   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2138   ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form*/ false);
2139   X86_64ManagedRegister vvvv_reg =
2140       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2141   ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2142                                    /*X=*/ false,
2143                                    src2.NeedsRex(),
2144                                    SET_VEX_M_0F_38);
2145   ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2146   EmitUint8(ByteZero);
2147   EmitUint8(ByteOne);
2148   EmitUint8(ByteTwo);
2149   EmitUint8(0x40);
2150   EmitXmmRegisterOperand(dst.LowBits(), src2);
2151 }
2152 
paddq(XmmRegister dst,XmmRegister src)2153 void X86_64Assembler::paddq(XmmRegister dst, XmmRegister src) {
2154   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2155   EmitUint8(0x66);
2156   EmitOptionalRex32(dst, src);
2157   EmitUint8(0x0F);
2158   EmitUint8(0xD4);
2159   EmitXmmRegisterOperand(dst.LowBits(), src);
2160 }
2161 
2162 
vpaddq(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)2163 void X86_64Assembler::vpaddq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
2164   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2165   bool is_twobyte_form = false;
2166   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2167   if (!add_right.NeedsRex()) {
2168     is_twobyte_form = true;
2169   } else if (!add_left.NeedsRex()) {
2170     return vpaddq(dst, add_right, add_left);
2171   }
2172   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2173   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2174   X86_64ManagedRegister vvvv_reg =
2175       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
2176   if (is_twobyte_form) {
2177     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2178   } else {
2179     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2180                                    /*X=*/ false,
2181                                    add_right.NeedsRex(),
2182                                    SET_VEX_M_0F);
2183     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2184   }
2185   EmitUint8(ByteZero);
2186   EmitUint8(ByteOne);
2187   if (!is_twobyte_form) {
2188     EmitUint8(ByteTwo);
2189   }
2190   EmitUint8(0xD4);
2191   EmitXmmRegisterOperand(dst.LowBits(), add_right);
2192 }
2193 
2194 
psubq(XmmRegister dst,XmmRegister src)2195 void X86_64Assembler::psubq(XmmRegister dst, XmmRegister src) {
2196   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2197   EmitUint8(0x66);
2198   EmitOptionalRex32(dst, src);
2199   EmitUint8(0x0F);
2200   EmitUint8(0xFB);
2201   EmitXmmRegisterOperand(dst.LowBits(), src);
2202 }
2203 
vpsubq(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)2204 void X86_64Assembler::vpsubq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
2205   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2206   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2207   bool is_twobyte_form = false;
2208   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2209   if (!add_right.NeedsRex()) {
2210     is_twobyte_form = true;
2211   }
2212   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2213   X86_64ManagedRegister vvvv_reg =
2214       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
2215   if (is_twobyte_form) {
2216     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2217   } else {
2218     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2219                                    /*X=*/ false,
2220                                    add_right.NeedsRex(),
2221                                    SET_VEX_M_0F);
2222     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2223   }
2224   EmitUint8(ByteZero);
2225   EmitUint8(ByteOne);
2226   if (!is_twobyte_form) {
2227     EmitUint8(ByteTwo);
2228   }
2229   EmitUint8(0xFB);
2230   EmitXmmRegisterOperand(dst.LowBits(), add_right);
2231 }
2232 
2233 
paddusb(XmmRegister dst,XmmRegister src)2234 void X86_64Assembler::paddusb(XmmRegister dst, XmmRegister src) {
2235   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2236   EmitUint8(0x66);
2237   EmitOptionalRex32(dst, src);
2238   EmitUint8(0x0F);
2239   EmitUint8(0xDC);
2240   EmitXmmRegisterOperand(dst.LowBits(), src);
2241 }
2242 
2243 
paddsb(XmmRegister dst,XmmRegister src)2244 void X86_64Assembler::paddsb(XmmRegister dst, XmmRegister src) {
2245   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2246   EmitUint8(0x66);
2247   EmitOptionalRex32(dst, src);
2248   EmitUint8(0x0F);
2249   EmitUint8(0xEC);
2250   EmitXmmRegisterOperand(dst.LowBits(), src);
2251 }
2252 
2253 
paddusw(XmmRegister dst,XmmRegister src)2254 void X86_64Assembler::paddusw(XmmRegister dst, XmmRegister src) {
2255   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2256   EmitUint8(0x66);
2257   EmitOptionalRex32(dst, src);
2258   EmitUint8(0x0F);
2259   EmitUint8(0xDD);
2260   EmitXmmRegisterOperand(dst.LowBits(), src);
2261 }
2262 
2263 
paddsw(XmmRegister dst,XmmRegister src)2264 void X86_64Assembler::paddsw(XmmRegister dst, XmmRegister src) {
2265   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2266   EmitUint8(0x66);
2267   EmitOptionalRex32(dst, src);
2268   EmitUint8(0x0F);
2269   EmitUint8(0xED);
2270   EmitXmmRegisterOperand(dst.LowBits(), src);
2271 }
2272 
2273 
psubusb(XmmRegister dst,XmmRegister src)2274 void X86_64Assembler::psubusb(XmmRegister dst, XmmRegister src) {
2275   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2276   EmitUint8(0x66);
2277   EmitOptionalRex32(dst, src);
2278   EmitUint8(0x0F);
2279   EmitUint8(0xD8);
2280   EmitXmmRegisterOperand(dst.LowBits(), src);
2281 }
2282 
2283 
psubsb(XmmRegister dst,XmmRegister src)2284 void X86_64Assembler::psubsb(XmmRegister dst, XmmRegister src) {
2285   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2286   EmitUint8(0x66);
2287   EmitOptionalRex32(dst, src);
2288   EmitUint8(0x0F);
2289   EmitUint8(0xE8);
2290   EmitXmmRegisterOperand(dst.LowBits(), src);
2291 }
2292 
2293 
vpsubd(XmmRegister dst,XmmRegister add_left,XmmRegister add_right)2294 void X86_64Assembler::vpsubd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
2295   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2296   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2297   bool is_twobyte_form = false;
2298   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2299   if (!add_right.NeedsRex()) {
2300     is_twobyte_form = true;
2301   }
2302   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2303   X86_64ManagedRegister vvvv_reg =
2304       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
2305   if (is_twobyte_form) {
2306     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2307   } else {
2308     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2309                                    /*X=*/ false,
2310                                    add_right.NeedsRex(),
2311                                    SET_VEX_M_0F);
2312     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2313   }
2314   EmitUint8(ByteZero);
2315   EmitUint8(ByteOne);
2316   if (!is_twobyte_form) {
2317     EmitUint8(ByteTwo);
2318   }
2319   EmitUint8(0xFA);
2320   EmitXmmRegisterOperand(dst.LowBits(), add_right);
2321 }
2322 
2323 
psubusw(XmmRegister dst,XmmRegister src)2324 void X86_64Assembler::psubusw(XmmRegister dst, XmmRegister src) {
2325   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2326   EmitUint8(0x66);
2327   EmitOptionalRex32(dst, src);
2328   EmitUint8(0x0F);
2329   EmitUint8(0xD9);
2330   EmitXmmRegisterOperand(dst.LowBits(), src);
2331 }
2332 
2333 
psubsw(XmmRegister dst,XmmRegister src)2334 void X86_64Assembler::psubsw(XmmRegister dst, XmmRegister src) {
2335   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2336   EmitUint8(0x66);
2337   EmitOptionalRex32(dst, src);
2338   EmitUint8(0x0F);
2339   EmitUint8(0xE9);
2340   EmitXmmRegisterOperand(dst.LowBits(), src);
2341 }
2342 
2343 
cvtsi2ss(XmmRegister dst,CpuRegister src)2344 void X86_64Assembler::cvtsi2ss(XmmRegister dst, CpuRegister src) {
2345   cvtsi2ss(dst, src, false);
2346 }
2347 
2348 
cvtsi2ss(XmmRegister dst,CpuRegister src,bool is64bit)2349 void X86_64Assembler::cvtsi2ss(XmmRegister dst, CpuRegister src, bool is64bit) {
2350   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2351   EmitUint8(0xF3);
2352   if (is64bit) {
2353     // Emit a REX.W prefix if the operand size is 64 bits.
2354     EmitRex64(dst, src);
2355   } else {
2356     EmitOptionalRex32(dst, src);
2357   }
2358   EmitUint8(0x0F);
2359   EmitUint8(0x2A);
2360   EmitOperand(dst.LowBits(), Operand(src));
2361 }
2362 
2363 
cvtsi2ss(XmmRegister dst,const Address & src,bool is64bit)2364 void X86_64Assembler::cvtsi2ss(XmmRegister dst, const Address& src, bool is64bit) {
2365   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2366   EmitUint8(0xF3);
2367   if (is64bit) {
2368     // Emit a REX.W prefix if the operand size is 64 bits.
2369     EmitRex64(dst, src);
2370   } else {
2371     EmitOptionalRex32(dst, src);
2372   }
2373   EmitUint8(0x0F);
2374   EmitUint8(0x2A);
2375   EmitOperand(dst.LowBits(), src);
2376 }
2377 
2378 
cvtsi2sd(XmmRegister dst,CpuRegister src)2379 void X86_64Assembler::cvtsi2sd(XmmRegister dst, CpuRegister src) {
2380   cvtsi2sd(dst, src, false);
2381 }
2382 
2383 
cvtsi2sd(XmmRegister dst,CpuRegister src,bool is64bit)2384 void X86_64Assembler::cvtsi2sd(XmmRegister dst, CpuRegister src, bool is64bit) {
2385   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2386   EmitUint8(0xF2);
2387   if (is64bit) {
2388     // Emit a REX.W prefix if the operand size is 64 bits.
2389     EmitRex64(dst, src);
2390   } else {
2391     EmitOptionalRex32(dst, src);
2392   }
2393   EmitUint8(0x0F);
2394   EmitUint8(0x2A);
2395   EmitOperand(dst.LowBits(), Operand(src));
2396 }
2397 
2398 
cvtsi2sd(XmmRegister dst,const Address & src,bool is64bit)2399 void X86_64Assembler::cvtsi2sd(XmmRegister dst, const Address& src, bool is64bit) {
2400   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2401   EmitUint8(0xF2);
2402   if (is64bit) {
2403     // Emit a REX.W prefix if the operand size is 64 bits.
2404     EmitRex64(dst, src);
2405   } else {
2406     EmitOptionalRex32(dst, src);
2407   }
2408   EmitUint8(0x0F);
2409   EmitUint8(0x2A);
2410   EmitOperand(dst.LowBits(), src);
2411 }
2412 
2413 
cvtss2si(CpuRegister dst,XmmRegister src)2414 void X86_64Assembler::cvtss2si(CpuRegister dst, XmmRegister src) {
2415   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2416   EmitUint8(0xF3);
2417   EmitOptionalRex32(dst, src);
2418   EmitUint8(0x0F);
2419   EmitUint8(0x2D);
2420   EmitXmmRegisterOperand(dst.LowBits(), src);
2421 }
2422 
2423 
cvtss2sd(XmmRegister dst,XmmRegister src)2424 void X86_64Assembler::cvtss2sd(XmmRegister dst, XmmRegister src) {
2425   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2426   EmitUint8(0xF3);
2427   EmitOptionalRex32(dst, src);
2428   EmitUint8(0x0F);
2429   EmitUint8(0x5A);
2430   EmitXmmRegisterOperand(dst.LowBits(), src);
2431 }
2432 
2433 
cvtss2sd(XmmRegister dst,const Address & src)2434 void X86_64Assembler::cvtss2sd(XmmRegister dst, const Address& src) {
2435   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2436   EmitUint8(0xF3);
2437   EmitOptionalRex32(dst, src);
2438   EmitUint8(0x0F);
2439   EmitUint8(0x5A);
2440   EmitOperand(dst.LowBits(), src);
2441 }
2442 
2443 
cvtsd2si(CpuRegister dst,XmmRegister src)2444 void X86_64Assembler::cvtsd2si(CpuRegister dst, XmmRegister src) {
2445   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2446   EmitUint8(0xF2);
2447   EmitOptionalRex32(dst, src);
2448   EmitUint8(0x0F);
2449   EmitUint8(0x2D);
2450   EmitXmmRegisterOperand(dst.LowBits(), src);
2451 }
2452 
2453 
cvttss2si(CpuRegister dst,XmmRegister src)2454 void X86_64Assembler::cvttss2si(CpuRegister dst, XmmRegister src) {
2455   cvttss2si(dst, src, false);
2456 }
2457 
2458 
cvttss2si(CpuRegister dst,XmmRegister src,bool is64bit)2459 void X86_64Assembler::cvttss2si(CpuRegister dst, XmmRegister src, bool is64bit) {
2460   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2461   EmitUint8(0xF3);
2462   if (is64bit) {
2463     // Emit a REX.W prefix if the operand size is 64 bits.
2464     EmitRex64(dst, src);
2465   } else {
2466     EmitOptionalRex32(dst, src);
2467   }
2468   EmitUint8(0x0F);
2469   EmitUint8(0x2C);
2470   EmitXmmRegisterOperand(dst.LowBits(), src);
2471 }
2472 
2473 
cvttsd2si(CpuRegister dst,XmmRegister src)2474 void X86_64Assembler::cvttsd2si(CpuRegister dst, XmmRegister src) {
2475   cvttsd2si(dst, src, false);
2476 }
2477 
2478 
cvttsd2si(CpuRegister dst,XmmRegister src,bool is64bit)2479 void X86_64Assembler::cvttsd2si(CpuRegister dst, XmmRegister src, bool is64bit) {
2480   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2481   EmitUint8(0xF2);
2482   if (is64bit) {
2483     // Emit a REX.W prefix if the operand size is 64 bits.
2484     EmitRex64(dst, src);
2485   } else {
2486     EmitOptionalRex32(dst, src);
2487   }
2488   EmitUint8(0x0F);
2489   EmitUint8(0x2C);
2490   EmitXmmRegisterOperand(dst.LowBits(), src);
2491 }
2492 
2493 
cvtsd2ss(XmmRegister dst,XmmRegister src)2494 void X86_64Assembler::cvtsd2ss(XmmRegister dst, XmmRegister src) {
2495   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2496   EmitUint8(0xF2);
2497   EmitOptionalRex32(dst, src);
2498   EmitUint8(0x0F);
2499   EmitUint8(0x5A);
2500   EmitXmmRegisterOperand(dst.LowBits(), src);
2501 }
2502 
2503 
cvtsd2ss(XmmRegister dst,const Address & src)2504 void X86_64Assembler::cvtsd2ss(XmmRegister dst, const Address& src) {
2505   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2506   EmitUint8(0xF2);
2507   EmitOptionalRex32(dst, src);
2508   EmitUint8(0x0F);
2509   EmitUint8(0x5A);
2510   EmitOperand(dst.LowBits(), src);
2511 }
2512 
2513 
cvtdq2ps(XmmRegister dst,XmmRegister src)2514 void X86_64Assembler::cvtdq2ps(XmmRegister dst, XmmRegister src) {
2515   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2516   EmitOptionalRex32(dst, src);
2517   EmitUint8(0x0F);
2518   EmitUint8(0x5B);
2519   EmitXmmRegisterOperand(dst.LowBits(), src);
2520 }
2521 
2522 
cvtdq2pd(XmmRegister dst,XmmRegister src)2523 void X86_64Assembler::cvtdq2pd(XmmRegister dst, XmmRegister src) {
2524   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2525   EmitUint8(0xF3);
2526   EmitOptionalRex32(dst, src);
2527   EmitUint8(0x0F);
2528   EmitUint8(0xE6);
2529   EmitXmmRegisterOperand(dst.LowBits(), src);
2530 }
2531 
2532 
comiss(XmmRegister a,XmmRegister b)2533 void X86_64Assembler::comiss(XmmRegister a, XmmRegister b) {
2534   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2535   EmitOptionalRex32(a, b);
2536   EmitUint8(0x0F);
2537   EmitUint8(0x2F);
2538   EmitXmmRegisterOperand(a.LowBits(), b);
2539 }
2540 
2541 
comiss(XmmRegister a,const Address & b)2542 void X86_64Assembler::comiss(XmmRegister a, const Address& b) {
2543   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2544   EmitOptionalRex32(a, b);
2545   EmitUint8(0x0F);
2546   EmitUint8(0x2F);
2547   EmitOperand(a.LowBits(), b);
2548 }
2549 
2550 
comisd(XmmRegister a,XmmRegister b)2551 void X86_64Assembler::comisd(XmmRegister a, XmmRegister b) {
2552   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2553   EmitUint8(0x66);
2554   EmitOptionalRex32(a, b);
2555   EmitUint8(0x0F);
2556   EmitUint8(0x2F);
2557   EmitXmmRegisterOperand(a.LowBits(), b);
2558 }
2559 
2560 
comisd(XmmRegister a,const Address & b)2561 void X86_64Assembler::comisd(XmmRegister a, const Address& b) {
2562   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2563   EmitUint8(0x66);
2564   EmitOptionalRex32(a, b);
2565   EmitUint8(0x0F);
2566   EmitUint8(0x2F);
2567   EmitOperand(a.LowBits(), b);
2568 }
2569 
2570 
ucomiss(XmmRegister a,XmmRegister b)2571 void X86_64Assembler::ucomiss(XmmRegister a, XmmRegister b) {
2572   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2573   EmitOptionalRex32(a, b);
2574   EmitUint8(0x0F);
2575   EmitUint8(0x2E);
2576   EmitXmmRegisterOperand(a.LowBits(), b);
2577 }
2578 
2579 
ucomiss(XmmRegister a,const Address & b)2580 void X86_64Assembler::ucomiss(XmmRegister a, const Address& b) {
2581   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2582   EmitOptionalRex32(a, b);
2583   EmitUint8(0x0F);
2584   EmitUint8(0x2E);
2585   EmitOperand(a.LowBits(), b);
2586 }
2587 
2588 
ucomisd(XmmRegister a,XmmRegister b)2589 void X86_64Assembler::ucomisd(XmmRegister a, XmmRegister b) {
2590   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2591   EmitUint8(0x66);
2592   EmitOptionalRex32(a, b);
2593   EmitUint8(0x0F);
2594   EmitUint8(0x2E);
2595   EmitXmmRegisterOperand(a.LowBits(), b);
2596 }
2597 
2598 
ucomisd(XmmRegister a,const Address & b)2599 void X86_64Assembler::ucomisd(XmmRegister a, const Address& b) {
2600   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2601   EmitUint8(0x66);
2602   EmitOptionalRex32(a, b);
2603   EmitUint8(0x0F);
2604   EmitUint8(0x2E);
2605   EmitOperand(a.LowBits(), b);
2606 }
2607 
2608 
roundsd(XmmRegister dst,XmmRegister src,const Immediate & imm)2609 void X86_64Assembler::roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
2610   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2611   EmitUint8(0x66);
2612   EmitOptionalRex32(dst, src);
2613   EmitUint8(0x0F);
2614   EmitUint8(0x3A);
2615   EmitUint8(0x0B);
2616   EmitXmmRegisterOperand(dst.LowBits(), src);
2617   EmitUint8(imm.value());
2618 }
2619 
2620 
roundss(XmmRegister dst,XmmRegister src,const Immediate & imm)2621 void X86_64Assembler::roundss(XmmRegister dst, XmmRegister src, const Immediate& imm) {
2622   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2623   EmitUint8(0x66);
2624   EmitOptionalRex32(dst, src);
2625   EmitUint8(0x0F);
2626   EmitUint8(0x3A);
2627   EmitUint8(0x0A);
2628   EmitXmmRegisterOperand(dst.LowBits(), src);
2629   EmitUint8(imm.value());
2630 }
2631 
2632 
sqrtsd(XmmRegister dst,XmmRegister src)2633 void X86_64Assembler::sqrtsd(XmmRegister dst, XmmRegister src) {
2634   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2635   EmitUint8(0xF2);
2636   EmitOptionalRex32(dst, src);
2637   EmitUint8(0x0F);
2638   EmitUint8(0x51);
2639   EmitXmmRegisterOperand(dst.LowBits(), src);
2640 }
2641 
2642 
sqrtss(XmmRegister dst,XmmRegister src)2643 void X86_64Assembler::sqrtss(XmmRegister dst, XmmRegister src) {
2644   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2645   EmitUint8(0xF3);
2646   EmitOptionalRex32(dst, src);
2647   EmitUint8(0x0F);
2648   EmitUint8(0x51);
2649   EmitXmmRegisterOperand(dst.LowBits(), src);
2650 }
2651 
2652 
xorpd(XmmRegister dst,const Address & src)2653 void X86_64Assembler::xorpd(XmmRegister dst, const Address& src) {
2654   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2655   EmitUint8(0x66);
2656   EmitOptionalRex32(dst, src);
2657   EmitUint8(0x0F);
2658   EmitUint8(0x57);
2659   EmitOperand(dst.LowBits(), src);
2660 }
2661 
2662 
xorpd(XmmRegister dst,XmmRegister src)2663 void X86_64Assembler::xorpd(XmmRegister dst, XmmRegister src) {
2664   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2665   EmitUint8(0x66);
2666   EmitOptionalRex32(dst, src);
2667   EmitUint8(0x0F);
2668   EmitUint8(0x57);
2669   EmitXmmRegisterOperand(dst.LowBits(), src);
2670 }
2671 
2672 
xorps(XmmRegister dst,const Address & src)2673 void X86_64Assembler::xorps(XmmRegister dst, const Address& src) {
2674   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2675   EmitOptionalRex32(dst, src);
2676   EmitUint8(0x0F);
2677   EmitUint8(0x57);
2678   EmitOperand(dst.LowBits(), src);
2679 }
2680 
2681 
xorps(XmmRegister dst,XmmRegister src)2682 void X86_64Assembler::xorps(XmmRegister dst, XmmRegister src) {
2683   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2684   EmitOptionalRex32(dst, src);
2685   EmitUint8(0x0F);
2686   EmitUint8(0x57);
2687   EmitXmmRegisterOperand(dst.LowBits(), src);
2688 }
2689 
pxor(XmmRegister dst,XmmRegister src)2690 void X86_64Assembler::pxor(XmmRegister dst, XmmRegister src) {
2691   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2692   EmitUint8(0x66);
2693   EmitOptionalRex32(dst, src);
2694   EmitUint8(0x0F);
2695   EmitUint8(0xEF);
2696   EmitXmmRegisterOperand(dst.LowBits(), src);
2697 }
2698 
2699 /* VEX.128.66.0F.WIG EF /r VPXOR xmm1, xmm2, xmm3/m128 */
vpxor(XmmRegister dst,XmmRegister src1,XmmRegister src2)2700 void X86_64Assembler::vpxor(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2701   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2702   bool is_twobyte_form = false;
2703   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2704   if (!src2.NeedsRex()) {
2705     is_twobyte_form = true;
2706   } else if (!src1.NeedsRex()) {
2707     return vpxor(dst, src2, src1);
2708   }
2709   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2710   X86_64ManagedRegister vvvv_reg =
2711       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2712   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2713   if (is_twobyte_form) {
2714     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2715   } else {
2716     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2717                                    /*X=*/ false,
2718                                    src2.NeedsRex(),
2719                                    SET_VEX_M_0F);
2720     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2721   }
2722   EmitUint8(ByteZero);
2723   EmitUint8(ByteOne);
2724   if (!is_twobyte_form) {
2725     EmitUint8(ByteTwo);
2726   }
2727   EmitUint8(0xEF);
2728   EmitXmmRegisterOperand(dst.LowBits(), src2);
2729 }
2730 
2731 /* VEX.128.0F.WIG 57 /r VXORPS xmm1,xmm2, xmm3/m128 */
vxorps(XmmRegister dst,XmmRegister src1,XmmRegister src2)2732 void X86_64Assembler::vxorps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2733   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2734   bool is_twobyte_form = false;
2735   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2736   if (!src2.NeedsRex()) {
2737     is_twobyte_form = true;
2738   } else if (!src1.NeedsRex()) {
2739     return vxorps(dst, src2, src1);
2740   }
2741   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2742   X86_64ManagedRegister vvvv_reg =
2743       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2744   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2745   if (is_twobyte_form) {
2746     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2747   } else {
2748     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2749                                    /*X=*/ false,
2750                                    src2.NeedsRex(),
2751                                    SET_VEX_M_0F);
2752     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2753   }
2754   EmitUint8(ByteZero);
2755   EmitUint8(ByteOne);
2756   if (!is_twobyte_form) {
2757     EmitUint8(ByteTwo);
2758   }
2759   EmitUint8(0x57);
2760   EmitXmmRegisterOperand(dst.LowBits(), src2);
2761 }
2762 
2763 /* VEX.128.66.0F.WIG 57 /r VXORPD xmm1,xmm2, xmm3/m128 */
vxorpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)2764 void X86_64Assembler::vxorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2765   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2766   bool is_twobyte_form = false;
2767   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2768   if (!src2.NeedsRex()) {
2769     is_twobyte_form = true;
2770   } else if (!src1.NeedsRex()) {
2771     return vxorpd(dst, src2, src1);
2772   }
2773   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2774   X86_64ManagedRegister vvvv_reg =
2775       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2776   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2777   if (is_twobyte_form) {
2778     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2779   } else {
2780     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2781                                    /*X=*/ false,
2782                                    src2.NeedsRex(),
2783                                    SET_VEX_M_0F);
2784     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2785   }
2786   EmitUint8(ByteZero);
2787   EmitUint8(ByteOne);
2788   if (!is_twobyte_form) {
2789     EmitUint8(ByteTwo);
2790   }
2791   EmitUint8(0x57);
2792   EmitXmmRegisterOperand(dst.LowBits(), src2);
2793 }
2794 
andpd(XmmRegister dst,const Address & src)2795 void X86_64Assembler::andpd(XmmRegister dst, const Address& src) {
2796   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2797   EmitUint8(0x66);
2798   EmitOptionalRex32(dst, src);
2799   EmitUint8(0x0F);
2800   EmitUint8(0x54);
2801   EmitOperand(dst.LowBits(), src);
2802 }
2803 
andpd(XmmRegister dst,XmmRegister src)2804 void X86_64Assembler::andpd(XmmRegister dst, XmmRegister src) {
2805   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2806   EmitUint8(0x66);
2807   EmitOptionalRex32(dst, src);
2808   EmitUint8(0x0F);
2809   EmitUint8(0x54);
2810   EmitXmmRegisterOperand(dst.LowBits(), src);
2811 }
2812 
andps(XmmRegister dst,XmmRegister src)2813 void X86_64Assembler::andps(XmmRegister dst, XmmRegister src) {
2814   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2815   EmitOptionalRex32(dst, src);
2816   EmitUint8(0x0F);
2817   EmitUint8(0x54);
2818   EmitXmmRegisterOperand(dst.LowBits(), src);
2819 }
2820 
pand(XmmRegister dst,XmmRegister src)2821 void X86_64Assembler::pand(XmmRegister dst, XmmRegister src) {
2822   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2823   EmitUint8(0x66);
2824   EmitOptionalRex32(dst, src);
2825   EmitUint8(0x0F);
2826   EmitUint8(0xDB);
2827   EmitXmmRegisterOperand(dst.LowBits(), src);
2828 }
2829 
2830 /* VEX.128.66.0F.WIG DB /r VPAND xmm1, xmm2, xmm3/m128 */
vpand(XmmRegister dst,XmmRegister src1,XmmRegister src2)2831 void X86_64Assembler::vpand(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2832   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2833   bool is_twobyte_form = false;
2834   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2835   if (!src2.NeedsRex()) {
2836     is_twobyte_form = true;
2837   } else if (!src1.NeedsRex()) {
2838     return vpand(dst, src2, src1);
2839   }
2840   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2841   X86_64ManagedRegister vvvv_reg =
2842       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2843   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2844   if (is_twobyte_form) {
2845     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2846   } else {
2847     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2848                                    /*X=*/ false,
2849                                    src2.NeedsRex(),
2850                                    SET_VEX_M_0F);
2851     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2852   }
2853   EmitUint8(ByteZero);
2854   EmitUint8(ByteOne);
2855   if (!is_twobyte_form) {
2856     EmitUint8(ByteTwo);
2857   }
2858   EmitUint8(0xDB);
2859   EmitXmmRegisterOperand(dst.LowBits(), src2);
2860 }
2861 
2862 /* VEX.128.0F 54 /r VANDPS xmm1,xmm2, xmm3/m128 */
vandps(XmmRegister dst,XmmRegister src1,XmmRegister src2)2863 void X86_64Assembler::vandps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2864   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2865   bool is_twobyte_form = false;
2866   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2867   if (!src2.NeedsRex()) {
2868     is_twobyte_form = true;
2869   } else if (!src1.NeedsRex()) {
2870     return vandps(dst, src2, src1);
2871   }
2872   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2873   X86_64ManagedRegister vvvv_reg =
2874       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2875   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2876   if (is_twobyte_form) {
2877     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2878   } else {
2879     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2880                                    /*X=*/ false,
2881                                    src2.NeedsRex(),
2882                                    SET_VEX_M_0F);
2883     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2884   }
2885   EmitUint8(ByteZero);
2886   EmitUint8(ByteOne);
2887   if (!is_twobyte_form) {
2888     EmitUint8(ByteTwo);
2889   }
2890   EmitUint8(0x54);
2891   EmitXmmRegisterOperand(dst.LowBits(), src2);
2892 }
2893 
2894 /* VEX.128.66.0F 54 /r VANDPD xmm1, xmm2, xmm3/m128 */
vandpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)2895 void X86_64Assembler::vandpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2896   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2897   bool is_twobyte_form = false;
2898   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2899   if (!src2.NeedsRex()) {
2900     is_twobyte_form = true;
2901   } else if (!src1.NeedsRex()) {
2902     return vandpd(dst, src2, src1);
2903   }
2904   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2905   X86_64ManagedRegister vvvv_reg =
2906       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2907   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2908   if (is_twobyte_form) {
2909     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2910   } else {
2911     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2912                                    /*X=*/ false,
2913                                    src2.NeedsRex(),
2914                                    SET_VEX_M_0F);
2915     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2916   }
2917   EmitUint8(ByteZero);
2918   EmitUint8(ByteOne);
2919   if (!is_twobyte_form) {
2920     EmitUint8(ByteTwo);
2921   }
2922   EmitUint8(0x54);
2923   EmitXmmRegisterOperand(dst.LowBits(), src2);
2924 }
2925 
andn(CpuRegister dst,CpuRegister src1,CpuRegister src2)2926 void X86_64Assembler::andn(CpuRegister dst, CpuRegister src1, CpuRegister src2) {
2927   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2928   uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
2929   uint8_t byte_one = EmitVexPrefixByteOne(dst.NeedsRex(),
2930                                           /*X=*/ false,
2931                                           src2.NeedsRex(),
2932                                           SET_VEX_M_0F_38);
2933   uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/ true,
2934                                           X86_64ManagedRegister::FromCpuRegister(src1.AsRegister()),
2935                                           SET_VEX_L_128,
2936                                           SET_VEX_PP_NONE);
2937   EmitUint8(byte_zero);
2938   EmitUint8(byte_one);
2939   EmitUint8(byte_two);
2940   // Opcode field
2941   EmitUint8(0xF2);
2942   EmitRegisterOperand(dst.LowBits(), src2.LowBits());
2943 }
2944 
andnpd(XmmRegister dst,XmmRegister src)2945 void X86_64Assembler::andnpd(XmmRegister dst, XmmRegister src) {
2946   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2947   EmitUint8(0x66);
2948   EmitOptionalRex32(dst, src);
2949   EmitUint8(0x0F);
2950   EmitUint8(0x55);
2951   EmitXmmRegisterOperand(dst.LowBits(), src);
2952 }
2953 
andnps(XmmRegister dst,XmmRegister src)2954 void X86_64Assembler::andnps(XmmRegister dst, XmmRegister src) {
2955   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2956   EmitOptionalRex32(dst, src);
2957   EmitUint8(0x0F);
2958   EmitUint8(0x55);
2959   EmitXmmRegisterOperand(dst.LowBits(), src);
2960 }
2961 
pandn(XmmRegister dst,XmmRegister src)2962 void X86_64Assembler::pandn(XmmRegister dst, XmmRegister src) {
2963   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2964   EmitUint8(0x66);
2965   EmitOptionalRex32(dst, src);
2966   EmitUint8(0x0F);
2967   EmitUint8(0xDF);
2968   EmitXmmRegisterOperand(dst.LowBits(), src);
2969 }
2970 
2971 /* VEX.128.66.0F.WIG DF /r VPANDN xmm1, xmm2, xmm3/m128 */
vpandn(XmmRegister dst,XmmRegister src1,XmmRegister src2)2972 void X86_64Assembler::vpandn(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2973   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2974   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2975   bool is_twobyte_form = false;
2976   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2977   if (!src2.NeedsRex()) {
2978     is_twobyte_form = true;
2979   }
2980   X86_64ManagedRegister vvvv_reg =
2981       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2982   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2983   if (is_twobyte_form) {
2984     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2985   } else {
2986     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2987                                    /*X=*/ false,
2988                                    src2.NeedsRex(),
2989                                    SET_VEX_M_0F);
2990     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2991   }
2992   EmitUint8(ByteZero);
2993   EmitUint8(ByteOne);
2994   if (!is_twobyte_form) {
2995     EmitUint8(ByteTwo);
2996   }
2997   EmitUint8(0xDF);
2998   EmitXmmRegisterOperand(dst.LowBits(), src2);
2999 }
3000 
3001 /* VEX.128.0F 55 /r VANDNPS xmm1, xmm2, xmm3/m128 */
vandnps(XmmRegister dst,XmmRegister src1,XmmRegister src2)3002 void X86_64Assembler::vandnps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3003   DCHECK(CpuHasAVXorAVX2FeatureFlag());
3004   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3005   bool is_twobyte_form = false;
3006   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3007   if (!src2.NeedsRex()) {
3008     is_twobyte_form = true;
3009   }
3010   X86_64ManagedRegister vvvv_reg =
3011       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3012   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3013   if (is_twobyte_form) {
3014     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
3015   } else {
3016     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3017                                    /*X=*/ false,
3018                                    src2.NeedsRex(),
3019                                    SET_VEX_M_0F);
3020     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
3021   }
3022   EmitUint8(ByteZero);
3023   EmitUint8(ByteOne);
3024   if (!is_twobyte_form) {
3025     EmitUint8(ByteTwo);
3026   }
3027   EmitUint8(0x55);
3028   EmitXmmRegisterOperand(dst.LowBits(), src2);
3029 }
3030 
3031 /* VEX.128.66.0F 55 /r VANDNPD xmm1, xmm2, xmm3/m128 */
vandnpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)3032 void X86_64Assembler::vandnpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3033   DCHECK(CpuHasAVXorAVX2FeatureFlag());
3034   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3035   bool is_twobyte_form = false;
3036   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3037   if (!src2.NeedsRex()) {
3038     is_twobyte_form = true;
3039   }
3040   X86_64ManagedRegister vvvv_reg =
3041       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3042   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3043   if (is_twobyte_form) {
3044     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3045   } else {
3046     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3047                                    /*X=*/ false,
3048                                    src2.NeedsRex(),
3049                                    SET_VEX_M_0F);
3050     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3051   }
3052   EmitUint8(ByteZero);
3053   EmitUint8(ByteOne);
3054   if (!is_twobyte_form) {
3055     EmitUint8(ByteTwo);
3056   }
3057   EmitUint8(0x55);
3058   EmitXmmRegisterOperand(dst.LowBits(), src2);
3059 }
3060 
orpd(XmmRegister dst,XmmRegister src)3061 void X86_64Assembler::orpd(XmmRegister dst, XmmRegister src) {
3062   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3063   EmitUint8(0x66);
3064   EmitOptionalRex32(dst, src);
3065   EmitUint8(0x0F);
3066   EmitUint8(0x56);
3067   EmitXmmRegisterOperand(dst.LowBits(), src);
3068 }
3069 
orps(XmmRegister dst,XmmRegister src)3070 void X86_64Assembler::orps(XmmRegister dst, XmmRegister src) {
3071   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3072   EmitOptionalRex32(dst, src);
3073   EmitUint8(0x0F);
3074   EmitUint8(0x56);
3075   EmitXmmRegisterOperand(dst.LowBits(), src);
3076 }
3077 
por(XmmRegister dst,XmmRegister src)3078 void X86_64Assembler::por(XmmRegister dst, XmmRegister src) {
3079   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3080   EmitUint8(0x66);
3081   EmitOptionalRex32(dst, src);
3082   EmitUint8(0x0F);
3083   EmitUint8(0xEB);
3084   EmitXmmRegisterOperand(dst.LowBits(), src);
3085 }
3086 
3087 /* VEX.128.66.0F.WIG EB /r VPOR xmm1, xmm2, xmm3/m128 */
vpor(XmmRegister dst,XmmRegister src1,XmmRegister src2)3088 void X86_64Assembler::vpor(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3089   DCHECK(CpuHasAVXorAVX2FeatureFlag());
3090   bool is_twobyte_form = false;
3091   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3092   if (!src2.NeedsRex()) {
3093     is_twobyte_form = true;
3094   } else if (!src1.NeedsRex()) {
3095     return vpor(dst, src2, src1);
3096   }
3097   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3098   X86_64ManagedRegister vvvv_reg =
3099       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3100   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3101   if (is_twobyte_form) {
3102     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3103   } else {
3104     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3105                                    /*X=*/ false,
3106                                    src2.NeedsRex(),
3107                                    SET_VEX_M_0F);
3108     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3109   }
3110   EmitUint8(ByteZero);
3111   EmitUint8(ByteOne);
3112   if (!is_twobyte_form) {
3113     EmitUint8(ByteTwo);
3114   }
3115   EmitUint8(0xEB);
3116   EmitXmmRegisterOperand(dst.LowBits(), src2);
3117 }
3118 
3119 /* VEX.128.0F 56 /r VORPS xmm1,xmm2, xmm3/m128 */
vorps(XmmRegister dst,XmmRegister src1,XmmRegister src2)3120 void X86_64Assembler::vorps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3121   DCHECK(CpuHasAVXorAVX2FeatureFlag());
3122   bool is_twobyte_form = false;
3123   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3124   if (!src2.NeedsRex()) {
3125     is_twobyte_form = true;
3126   } else if (!src1.NeedsRex()) {
3127     return vorps(dst, src2, src1);
3128   }
3129   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3130   X86_64ManagedRegister vvvv_reg =
3131       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3132   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3133   if (is_twobyte_form) {
3134     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
3135   } else {
3136     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3137                                    /*X=*/ false,
3138                                    src2.NeedsRex(),
3139                                    SET_VEX_M_0F);
3140     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
3141   }
3142   EmitUint8(ByteZero);
3143   EmitUint8(ByteOne);
3144   if (!is_twobyte_form) {
3145     EmitUint8(ByteTwo);
3146   }
3147   EmitUint8(0x56);
3148   EmitXmmRegisterOperand(dst.LowBits(), src2);
3149 }
3150 
3151 /* VEX.128.66.0F 56 /r VORPD xmm1,xmm2, xmm3/m128 */
vorpd(XmmRegister dst,XmmRegister src1,XmmRegister src2)3152 void X86_64Assembler::vorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3153   DCHECK(CpuHasAVXorAVX2FeatureFlag());
3154   bool is_twobyte_form = false;
3155   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3156   if (!src2.NeedsRex()) {
3157     is_twobyte_form = true;
3158   } else if (!src1.NeedsRex()) {
3159     return vorpd(dst, src2, src1);
3160   }
3161   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3162   X86_64ManagedRegister vvvv_reg =
3163       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3164   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3165   if (is_twobyte_form) {
3166     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3167   } else {
3168     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3169                                    /*X=*/ false,
3170                                    src2.NeedsRex(),
3171                                    SET_VEX_M_0F);
3172     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3173   }
3174   EmitUint8(ByteZero);
3175   EmitUint8(ByteOne);
3176   if (!is_twobyte_form) {
3177     EmitUint8(ByteTwo);
3178   }
3179   EmitUint8(0x56);
3180   EmitXmmRegisterOperand(dst.LowBits(), src2);
3181 }
3182 
pavgb(XmmRegister dst,XmmRegister src)3183 void X86_64Assembler::pavgb(XmmRegister dst, XmmRegister src) {
3184   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3185   EmitUint8(0x66);
3186   EmitOptionalRex32(dst, src);
3187   EmitUint8(0x0F);
3188   EmitUint8(0xE0);
3189   EmitXmmRegisterOperand(dst.LowBits(), src);
3190 }
3191 
pavgw(XmmRegister dst,XmmRegister src)3192 void X86_64Assembler::pavgw(XmmRegister dst, XmmRegister src) {
3193   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3194   EmitUint8(0x66);
3195   EmitOptionalRex32(dst, src);
3196   EmitUint8(0x0F);
3197   EmitUint8(0xE3);
3198   EmitXmmRegisterOperand(dst.LowBits(), src);
3199 }
3200 
psadbw(XmmRegister dst,XmmRegister src)3201 void X86_64Assembler::psadbw(XmmRegister dst, XmmRegister src) {
3202   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3203   EmitUint8(0x66);
3204   EmitOptionalRex32(dst, src);
3205   EmitUint8(0x0F);
3206   EmitUint8(0xF6);
3207   EmitXmmRegisterOperand(dst.LowBits(), src);
3208 }
3209 
pmaddwd(XmmRegister dst,XmmRegister src)3210 void X86_64Assembler::pmaddwd(XmmRegister dst, XmmRegister src) {
3211   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3212   EmitUint8(0x66);
3213   EmitOptionalRex32(dst, src);
3214   EmitUint8(0x0F);
3215   EmitUint8(0xF5);
3216   EmitXmmRegisterOperand(dst.LowBits(), src);
3217 }
3218 
vpmaddwd(XmmRegister dst,XmmRegister src1,XmmRegister src2)3219 void X86_64Assembler::vpmaddwd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3220   DCHECK(CpuHasAVXorAVX2FeatureFlag());
3221   bool is_twobyte_form = false;
3222   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3223   if (!src2.NeedsRex()) {
3224     is_twobyte_form = true;
3225   } else if (!src1.NeedsRex()) {
3226     return vpmaddwd(dst, src2, src1);
3227   }
3228   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3229   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3230   X86_64ManagedRegister vvvv_reg =
3231       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3232   if (is_twobyte_form) {
3233     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3234   } else {
3235     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3236                                    /*X=*/ false,
3237                                    src2.NeedsRex(),
3238                                    SET_VEX_M_0F);
3239     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3240   }
3241   EmitUint8(ByteZero);
3242   EmitUint8(ByteOne);
3243   if (!is_twobyte_form) {
3244     EmitUint8(ByteTwo);
3245   }
3246   EmitUint8(0xF5);
3247   EmitXmmRegisterOperand(dst.LowBits(), src2);
3248 }
3249 
phaddw(XmmRegister dst,XmmRegister src)3250 void X86_64Assembler::phaddw(XmmRegister dst, XmmRegister src) {
3251   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3252   EmitUint8(0x66);
3253   EmitOptionalRex32(dst, src);
3254   EmitUint8(0x0F);
3255   EmitUint8(0x38);
3256   EmitUint8(0x01);
3257   EmitXmmRegisterOperand(dst.LowBits(), src);
3258 }
3259 
phaddd(XmmRegister dst,XmmRegister src)3260 void X86_64Assembler::phaddd(XmmRegister dst, XmmRegister src) {
3261   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3262   EmitUint8(0x66);
3263   EmitOptionalRex32(dst, src);
3264   EmitUint8(0x0F);
3265   EmitUint8(0x38);
3266   EmitUint8(0x02);
3267   EmitXmmRegisterOperand(dst.LowBits(), src);
3268 }
3269 
haddps(XmmRegister dst,XmmRegister src)3270 void X86_64Assembler::haddps(XmmRegister dst, XmmRegister src) {
3271   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3272   EmitUint8(0xF2);
3273   EmitOptionalRex32(dst, src);
3274   EmitUint8(0x0F);
3275   EmitUint8(0x7C);
3276   EmitXmmRegisterOperand(dst.LowBits(), src);
3277 }
3278 
haddpd(XmmRegister dst,XmmRegister src)3279 void X86_64Assembler::haddpd(XmmRegister dst, XmmRegister src) {
3280   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3281   EmitUint8(0x66);
3282   EmitOptionalRex32(dst, src);
3283   EmitUint8(0x0F);
3284   EmitUint8(0x7C);
3285   EmitXmmRegisterOperand(dst.LowBits(), src);
3286 }
3287 
phsubw(XmmRegister dst,XmmRegister src)3288 void X86_64Assembler::phsubw(XmmRegister dst, XmmRegister src) {
3289   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3290   EmitUint8(0x66);
3291   EmitOptionalRex32(dst, src);
3292   EmitUint8(0x0F);
3293   EmitUint8(0x38);
3294   EmitUint8(0x05);
3295   EmitXmmRegisterOperand(dst.LowBits(), src);
3296 }
3297 
phsubd(XmmRegister dst,XmmRegister src)3298 void X86_64Assembler::phsubd(XmmRegister dst, XmmRegister src) {
3299   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3300   EmitUint8(0x66);
3301   EmitOptionalRex32(dst, src);
3302   EmitUint8(0x0F);
3303   EmitUint8(0x38);
3304   EmitUint8(0x06);
3305   EmitXmmRegisterOperand(dst.LowBits(), src);
3306 }
3307 
hsubps(XmmRegister dst,XmmRegister src)3308 void X86_64Assembler::hsubps(XmmRegister dst, XmmRegister src) {
3309   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3310   EmitUint8(0xF2);
3311   EmitOptionalRex32(dst, src);
3312   EmitUint8(0x0F);
3313   EmitUint8(0x7D);
3314   EmitXmmRegisterOperand(dst.LowBits(), src);
3315 }
3316 
hsubpd(XmmRegister dst,XmmRegister src)3317 void X86_64Assembler::hsubpd(XmmRegister dst, XmmRegister src) {
3318   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3319   EmitUint8(0x66);
3320   EmitOptionalRex32(dst, src);
3321   EmitUint8(0x0F);
3322   EmitUint8(0x7D);
3323   EmitXmmRegisterOperand(dst.LowBits(), src);
3324 }
3325 
pminsb(XmmRegister dst,XmmRegister src)3326 void X86_64Assembler::pminsb(XmmRegister dst, XmmRegister src) {
3327   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3328   EmitUint8(0x66);
3329   EmitOptionalRex32(dst, src);
3330   EmitUint8(0x0F);
3331   EmitUint8(0x38);
3332   EmitUint8(0x38);
3333   EmitXmmRegisterOperand(dst.LowBits(), src);
3334 }
3335 
pmaxsb(XmmRegister dst,XmmRegister src)3336 void X86_64Assembler::pmaxsb(XmmRegister dst, XmmRegister src) {
3337   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3338   EmitUint8(0x66);
3339   EmitOptionalRex32(dst, src);
3340   EmitUint8(0x0F);
3341   EmitUint8(0x38);
3342   EmitUint8(0x3C);
3343   EmitXmmRegisterOperand(dst.LowBits(), src);
3344 }
3345 
pminsw(XmmRegister dst,XmmRegister src)3346 void X86_64Assembler::pminsw(XmmRegister dst, XmmRegister src) {
3347   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3348   EmitUint8(0x66);
3349   EmitOptionalRex32(dst, src);
3350   EmitUint8(0x0F);
3351   EmitUint8(0xEA);
3352   EmitXmmRegisterOperand(dst.LowBits(), src);
3353 }
3354 
pmaxsw(XmmRegister dst,XmmRegister src)3355 void X86_64Assembler::pmaxsw(XmmRegister dst, XmmRegister src) {
3356   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3357   EmitUint8(0x66);
3358   EmitOptionalRex32(dst, src);
3359   EmitUint8(0x0F);
3360   EmitUint8(0xEE);
3361   EmitXmmRegisterOperand(dst.LowBits(), src);
3362 }
3363 
pminsd(XmmRegister dst,XmmRegister src)3364 void X86_64Assembler::pminsd(XmmRegister dst, XmmRegister src) {
3365   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3366   EmitUint8(0x66);
3367   EmitOptionalRex32(dst, src);
3368   EmitUint8(0x0F);
3369   EmitUint8(0x38);
3370   EmitUint8(0x39);
3371   EmitXmmRegisterOperand(dst.LowBits(), src);
3372 }
3373 
pmaxsd(XmmRegister dst,XmmRegister src)3374 void X86_64Assembler::pmaxsd(XmmRegister dst, XmmRegister src) {
3375   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3376   EmitUint8(0x66);
3377   EmitOptionalRex32(dst, src);
3378   EmitUint8(0x0F);
3379   EmitUint8(0x38);
3380   EmitUint8(0x3D);
3381   EmitXmmRegisterOperand(dst.LowBits(), src);
3382 }
3383 
pminub(XmmRegister dst,XmmRegister src)3384 void X86_64Assembler::pminub(XmmRegister dst, XmmRegister src) {
3385   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3386   EmitUint8(0x66);
3387   EmitOptionalRex32(dst, src);
3388   EmitUint8(0x0F);
3389   EmitUint8(0xDA);
3390   EmitXmmRegisterOperand(dst.LowBits(), src);
3391 }
3392 
pmaxub(XmmRegister dst,XmmRegister src)3393 void X86_64Assembler::pmaxub(XmmRegister dst, XmmRegister src) {
3394   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3395   EmitUint8(0x66);
3396   EmitOptionalRex32(dst, src);
3397   EmitUint8(0x0F);
3398   EmitUint8(0xDE);
3399   EmitXmmRegisterOperand(dst.LowBits(), src);
3400 }
3401 
pminuw(XmmRegister dst,XmmRegister src)3402 void X86_64Assembler::pminuw(XmmRegister dst, XmmRegister src) {
3403   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3404   EmitUint8(0x66);
3405   EmitOptionalRex32(dst, src);
3406   EmitUint8(0x0F);
3407   EmitUint8(0x38);
3408   EmitUint8(0x3A);
3409   EmitXmmRegisterOperand(dst.LowBits(), src);
3410 }
3411 
pmaxuw(XmmRegister dst,XmmRegister src)3412 void X86_64Assembler::pmaxuw(XmmRegister dst, XmmRegister src) {
3413   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3414   EmitUint8(0x66);
3415   EmitOptionalRex32(dst, src);
3416   EmitUint8(0x0F);
3417   EmitUint8(0x38);
3418   EmitUint8(0x3E);
3419   EmitXmmRegisterOperand(dst.LowBits(), src);
3420 }
3421 
pminud(XmmRegister dst,XmmRegister src)3422 void X86_64Assembler::pminud(XmmRegister dst, XmmRegister src) {
3423   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3424   EmitUint8(0x66);
3425   EmitOptionalRex32(dst, src);
3426   EmitUint8(0x0F);
3427   EmitUint8(0x38);
3428   EmitUint8(0x3B);
3429   EmitXmmRegisterOperand(dst.LowBits(), src);
3430 }
3431 
pmaxud(XmmRegister dst,XmmRegister src)3432 void X86_64Assembler::pmaxud(XmmRegister dst, XmmRegister src) {
3433   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3434   EmitUint8(0x66);
3435   EmitOptionalRex32(dst, src);
3436   EmitUint8(0x0F);
3437   EmitUint8(0x38);
3438   EmitUint8(0x3F);
3439   EmitXmmRegisterOperand(dst.LowBits(), src);
3440 }
3441 
minps(XmmRegister dst,XmmRegister src)3442 void X86_64Assembler::minps(XmmRegister dst, XmmRegister src) {
3443   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3444   EmitOptionalRex32(dst, src);
3445   EmitUint8(0x0F);
3446   EmitUint8(0x5D);
3447   EmitXmmRegisterOperand(dst.LowBits(), src);
3448 }
3449 
maxps(XmmRegister dst,XmmRegister src)3450 void X86_64Assembler::maxps(XmmRegister dst, XmmRegister src) {
3451   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3452   EmitOptionalRex32(dst, src);
3453   EmitUint8(0x0F);
3454   EmitUint8(0x5F);
3455   EmitXmmRegisterOperand(dst.LowBits(), src);
3456 }
3457 
minpd(XmmRegister dst,XmmRegister src)3458 void X86_64Assembler::minpd(XmmRegister dst, XmmRegister src) {
3459   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3460   EmitUint8(0x66);
3461   EmitOptionalRex32(dst, src);
3462   EmitUint8(0x0F);
3463   EmitUint8(0x5D);
3464   EmitXmmRegisterOperand(dst.LowBits(), src);
3465 }
3466 
maxpd(XmmRegister dst,XmmRegister src)3467 void X86_64Assembler::maxpd(XmmRegister dst, XmmRegister src) {
3468   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3469   EmitUint8(0x66);
3470   EmitOptionalRex32(dst, src);
3471   EmitUint8(0x0F);
3472   EmitUint8(0x5F);
3473   EmitXmmRegisterOperand(dst.LowBits(), src);
3474 }
3475 
pcmpeqb(XmmRegister dst,XmmRegister src)3476 void X86_64Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) {
3477   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3478   EmitUint8(0x66);
3479   EmitOptionalRex32(dst, src);
3480   EmitUint8(0x0F);
3481   EmitUint8(0x74);
3482   EmitXmmRegisterOperand(dst.LowBits(), src);
3483 }
3484 
pcmpeqw(XmmRegister dst,XmmRegister src)3485 void X86_64Assembler::pcmpeqw(XmmRegister dst, XmmRegister src) {
3486   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3487   EmitUint8(0x66);
3488   EmitOptionalRex32(dst, src);
3489   EmitUint8(0x0F);
3490   EmitUint8(0x75);
3491   EmitXmmRegisterOperand(dst.LowBits(), src);
3492 }
3493 
pcmpeqd(XmmRegister dst,XmmRegister src)3494 void X86_64Assembler::pcmpeqd(XmmRegister dst, XmmRegister src) {
3495   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3496   EmitUint8(0x66);
3497   EmitOptionalRex32(dst, src);
3498   EmitUint8(0x0F);
3499   EmitUint8(0x76);
3500   EmitXmmRegisterOperand(dst.LowBits(), src);
3501 }
3502 
pcmpeqq(XmmRegister dst,XmmRegister src)3503 void X86_64Assembler::pcmpeqq(XmmRegister dst, XmmRegister src) {
3504   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3505   EmitUint8(0x66);
3506   EmitOptionalRex32(dst, src);
3507   EmitUint8(0x0F);
3508   EmitUint8(0x38);
3509   EmitUint8(0x29);
3510   EmitXmmRegisterOperand(dst.LowBits(), src);
3511 }
3512 
pcmpgtb(XmmRegister dst,XmmRegister src)3513 void X86_64Assembler::pcmpgtb(XmmRegister dst, XmmRegister src) {
3514   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3515   EmitUint8(0x66);
3516   EmitOptionalRex32(dst, src);
3517   EmitUint8(0x0F);
3518   EmitUint8(0x64);
3519   EmitXmmRegisterOperand(dst.LowBits(), src);
3520 }
3521 
pcmpgtw(XmmRegister dst,XmmRegister src)3522 void X86_64Assembler::pcmpgtw(XmmRegister dst, XmmRegister src) {
3523   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3524   EmitUint8(0x66);
3525   EmitOptionalRex32(dst, src);
3526   EmitUint8(0x0F);
3527   EmitUint8(0x65);
3528   EmitXmmRegisterOperand(dst.LowBits(), src);
3529 }
3530 
pcmpgtd(XmmRegister dst,XmmRegister src)3531 void X86_64Assembler::pcmpgtd(XmmRegister dst, XmmRegister src) {
3532   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3533   EmitUint8(0x66);
3534   EmitOptionalRex32(dst, src);
3535   EmitUint8(0x0F);
3536   EmitUint8(0x66);
3537   EmitXmmRegisterOperand(dst.LowBits(), src);
3538 }
3539 
pcmpgtq(XmmRegister dst,XmmRegister src)3540 void X86_64Assembler::pcmpgtq(XmmRegister dst, XmmRegister src) {
3541   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3542   EmitUint8(0x66);
3543   EmitOptionalRex32(dst, src);
3544   EmitUint8(0x0F);
3545   EmitUint8(0x38);
3546   EmitUint8(0x37);
3547   EmitXmmRegisterOperand(dst.LowBits(), src);
3548 }
3549 
shufpd(XmmRegister dst,XmmRegister src,const Immediate & imm)3550 void X86_64Assembler::shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
3551   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3552   EmitUint8(0x66);
3553   EmitOptionalRex32(dst, src);
3554   EmitUint8(0x0F);
3555   EmitUint8(0xC6);
3556   EmitXmmRegisterOperand(dst.LowBits(), src);
3557   EmitUint8(imm.value());
3558 }
3559 
3560 
shufps(XmmRegister dst,XmmRegister src,const Immediate & imm)3561 void X86_64Assembler::shufps(XmmRegister dst, XmmRegister src, const Immediate& imm) {
3562   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3563   EmitOptionalRex32(dst, src);
3564   EmitUint8(0x0F);
3565   EmitUint8(0xC6);
3566   EmitXmmRegisterOperand(dst.LowBits(), src);
3567   EmitUint8(imm.value());
3568 }
3569 
3570 
pshufd(XmmRegister dst,XmmRegister src,const Immediate & imm)3571 void X86_64Assembler::pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
3572   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3573   EmitUint8(0x66);
3574   EmitOptionalRex32(dst, src);
3575   EmitUint8(0x0F);
3576   EmitUint8(0x70);
3577   EmitXmmRegisterOperand(dst.LowBits(), src);
3578   EmitUint8(imm.value());
3579 }
3580 
3581 
punpcklbw(XmmRegister dst,XmmRegister src)3582 void X86_64Assembler::punpcklbw(XmmRegister dst, XmmRegister src) {
3583   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3584   EmitUint8(0x66);
3585   EmitOptionalRex32(dst, src);
3586   EmitUint8(0x0F);
3587   EmitUint8(0x60);
3588   EmitXmmRegisterOperand(dst.LowBits(), src);
3589 }
3590 
3591 
punpcklwd(XmmRegister dst,XmmRegister src)3592 void X86_64Assembler::punpcklwd(XmmRegister dst, XmmRegister src) {
3593   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3594   EmitUint8(0x66);
3595   EmitOptionalRex32(dst, src);
3596   EmitUint8(0x0F);
3597   EmitUint8(0x61);
3598   EmitXmmRegisterOperand(dst.LowBits(), src);
3599 }
3600 
3601 
punpckldq(XmmRegister dst,XmmRegister src)3602 void X86_64Assembler::punpckldq(XmmRegister dst, XmmRegister src) {
3603   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3604   EmitUint8(0x66);
3605   EmitOptionalRex32(dst, src);
3606   EmitUint8(0x0F);
3607   EmitUint8(0x62);
3608   EmitXmmRegisterOperand(dst.LowBits(), src);
3609 }
3610 
3611 
punpcklqdq(XmmRegister dst,XmmRegister src)3612 void X86_64Assembler::punpcklqdq(XmmRegister dst, XmmRegister src) {
3613   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3614   EmitUint8(0x66);
3615   EmitOptionalRex32(dst, src);
3616   EmitUint8(0x0F);
3617   EmitUint8(0x6C);
3618   EmitXmmRegisterOperand(dst.LowBits(), src);
3619 }
3620 
3621 
punpckhbw(XmmRegister dst,XmmRegister src)3622 void X86_64Assembler::punpckhbw(XmmRegister dst, XmmRegister src) {
3623   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3624   EmitUint8(0x66);
3625   EmitOptionalRex32(dst, src);
3626   EmitUint8(0x0F);
3627   EmitUint8(0x68);
3628   EmitXmmRegisterOperand(dst.LowBits(), src);
3629 }
3630 
3631 
punpckhwd(XmmRegister dst,XmmRegister src)3632 void X86_64Assembler::punpckhwd(XmmRegister dst, XmmRegister src) {
3633   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3634   EmitUint8(0x66);
3635   EmitOptionalRex32(dst, src);
3636   EmitUint8(0x0F);
3637   EmitUint8(0x69);
3638   EmitXmmRegisterOperand(dst.LowBits(), src);
3639 }
3640 
3641 
punpckhdq(XmmRegister dst,XmmRegister src)3642 void X86_64Assembler::punpckhdq(XmmRegister dst, XmmRegister src) {
3643   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3644   EmitUint8(0x66);
3645   EmitOptionalRex32(dst, src);
3646   EmitUint8(0x0F);
3647   EmitUint8(0x6A);
3648   EmitXmmRegisterOperand(dst.LowBits(), src);
3649 }
3650 
3651 
punpckhqdq(XmmRegister dst,XmmRegister src)3652 void X86_64Assembler::punpckhqdq(XmmRegister dst, XmmRegister src) {
3653   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3654   EmitUint8(0x66);
3655   EmitOptionalRex32(dst, src);
3656   EmitUint8(0x0F);
3657   EmitUint8(0x6D);
3658   EmitXmmRegisterOperand(dst.LowBits(), src);
3659 }
3660 
3661 
psllw(XmmRegister reg,const Immediate & shift_count)3662 void X86_64Assembler::psllw(XmmRegister reg, const Immediate& shift_count) {
3663   DCHECK(shift_count.is_uint8());
3664   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3665   EmitUint8(0x66);
3666   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3667   EmitUint8(0x0F);
3668   EmitUint8(0x71);
3669   EmitXmmRegisterOperand(6, reg);
3670   EmitUint8(shift_count.value());
3671 }
3672 
3673 
pslld(XmmRegister reg,const Immediate & shift_count)3674 void X86_64Assembler::pslld(XmmRegister reg, const Immediate& shift_count) {
3675   DCHECK(shift_count.is_uint8());
3676   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3677   EmitUint8(0x66);
3678   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3679   EmitUint8(0x0F);
3680   EmitUint8(0x72);
3681   EmitXmmRegisterOperand(6, reg);
3682   EmitUint8(shift_count.value());
3683 }
3684 
3685 
psllq(XmmRegister reg,const Immediate & shift_count)3686 void X86_64Assembler::psllq(XmmRegister reg, const Immediate& shift_count) {
3687   DCHECK(shift_count.is_uint8());
3688   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3689   EmitUint8(0x66);
3690   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3691   EmitUint8(0x0F);
3692   EmitUint8(0x73);
3693   EmitXmmRegisterOperand(6, reg);
3694   EmitUint8(shift_count.value());
3695 }
3696 
3697 
psraw(XmmRegister reg,const Immediate & shift_count)3698 void X86_64Assembler::psraw(XmmRegister reg, const Immediate& shift_count) {
3699   DCHECK(shift_count.is_uint8());
3700   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3701   EmitUint8(0x66);
3702   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3703   EmitUint8(0x0F);
3704   EmitUint8(0x71);
3705   EmitXmmRegisterOperand(4, reg);
3706   EmitUint8(shift_count.value());
3707 }
3708 
3709 
psrad(XmmRegister reg,const Immediate & shift_count)3710 void X86_64Assembler::psrad(XmmRegister reg, const Immediate& shift_count) {
3711   DCHECK(shift_count.is_uint8());
3712   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3713   EmitUint8(0x66);
3714   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3715   EmitUint8(0x0F);
3716   EmitUint8(0x72);
3717   EmitXmmRegisterOperand(4, reg);
3718   EmitUint8(shift_count.value());
3719 }
3720 
3721 
psrlw(XmmRegister reg,const Immediate & shift_count)3722 void X86_64Assembler::psrlw(XmmRegister reg, const Immediate& shift_count) {
3723   DCHECK(shift_count.is_uint8());
3724   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3725   EmitUint8(0x66);
3726   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3727   EmitUint8(0x0F);
3728   EmitUint8(0x71);
3729   EmitXmmRegisterOperand(2, reg);
3730   EmitUint8(shift_count.value());
3731 }
3732 
3733 
psrld(XmmRegister reg,const Immediate & shift_count)3734 void X86_64Assembler::psrld(XmmRegister reg, const Immediate& shift_count) {
3735   DCHECK(shift_count.is_uint8());
3736   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3737   EmitUint8(0x66);
3738   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3739   EmitUint8(0x0F);
3740   EmitUint8(0x72);
3741   EmitXmmRegisterOperand(2, reg);
3742   EmitUint8(shift_count.value());
3743 }
3744 
3745 
psrlq(XmmRegister reg,const Immediate & shift_count)3746 void X86_64Assembler::psrlq(XmmRegister reg, const Immediate& shift_count) {
3747   DCHECK(shift_count.is_uint8());
3748   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3749   EmitUint8(0x66);
3750   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3751   EmitUint8(0x0F);
3752   EmitUint8(0x73);
3753   EmitXmmRegisterOperand(2, reg);
3754   EmitUint8(shift_count.value());
3755 }
3756 
3757 
psrldq(XmmRegister reg,const Immediate & shift_count)3758 void X86_64Assembler::psrldq(XmmRegister reg, const Immediate& shift_count) {
3759   DCHECK(shift_count.is_uint8());
3760   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3761   EmitUint8(0x66);
3762   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3763   EmitUint8(0x0F);
3764   EmitUint8(0x73);
3765   EmitXmmRegisterOperand(3, reg);
3766   EmitUint8(shift_count.value());
3767 }
3768 
3769 
fldl(const Address & src)3770 void X86_64Assembler::fldl(const Address& src) {
3771   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3772   EmitUint8(0xDD);
3773   EmitOperand(0, src);
3774 }
3775 
3776 
fstl(const Address & dst)3777 void X86_64Assembler::fstl(const Address& dst) {
3778   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3779   EmitUint8(0xDD);
3780   EmitOperand(2, dst);
3781 }
3782 
3783 
fstpl(const Address & dst)3784 void X86_64Assembler::fstpl(const Address& dst) {
3785   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3786   EmitUint8(0xDD);
3787   EmitOperand(3, dst);
3788 }
3789 
3790 
fstsw()3791 void X86_64Assembler::fstsw() {
3792   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3793   EmitUint8(0x9B);
3794   EmitUint8(0xDF);
3795   EmitUint8(0xE0);
3796 }
3797 
3798 
fnstcw(const Address & dst)3799 void X86_64Assembler::fnstcw(const Address& dst) {
3800   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3801   EmitUint8(0xD9);
3802   EmitOperand(7, dst);
3803 }
3804 
3805 
fldcw(const Address & src)3806 void X86_64Assembler::fldcw(const Address& src) {
3807   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3808   EmitUint8(0xD9);
3809   EmitOperand(5, src);
3810 }
3811 
3812 
fistpl(const Address & dst)3813 void X86_64Assembler::fistpl(const Address& dst) {
3814   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3815   EmitUint8(0xDF);
3816   EmitOperand(7, dst);
3817 }
3818 
3819 
fistps(const Address & dst)3820 void X86_64Assembler::fistps(const Address& dst) {
3821   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3822   EmitUint8(0xDB);
3823   EmitOperand(3, dst);
3824 }
3825 
3826 
fildl(const Address & src)3827 void X86_64Assembler::fildl(const Address& src) {
3828   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3829   EmitUint8(0xDF);
3830   EmitOperand(5, src);
3831 }
3832 
3833 
filds(const Address & src)3834 void X86_64Assembler::filds(const Address& src) {
3835   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3836   EmitUint8(0xDB);
3837   EmitOperand(0, src);
3838 }
3839 
3840 
fincstp()3841 void X86_64Assembler::fincstp() {
3842   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3843   EmitUint8(0xD9);
3844   EmitUint8(0xF7);
3845 }
3846 
3847 
ffree(const Immediate & index)3848 void X86_64Assembler::ffree(const Immediate& index) {
3849   CHECK_LT(index.value(), 7);
3850   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3851   EmitUint8(0xDD);
3852   EmitUint8(0xC0 + index.value());
3853 }
3854 
3855 
fsin()3856 void X86_64Assembler::fsin() {
3857   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3858   EmitUint8(0xD9);
3859   EmitUint8(0xFE);
3860 }
3861 
3862 
fcos()3863 void X86_64Assembler::fcos() {
3864   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3865   EmitUint8(0xD9);
3866   EmitUint8(0xFF);
3867 }
3868 
3869 
fptan()3870 void X86_64Assembler::fptan() {
3871   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3872   EmitUint8(0xD9);
3873   EmitUint8(0xF2);
3874 }
3875 
fucompp()3876 void X86_64Assembler::fucompp() {
3877   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3878   EmitUint8(0xDA);
3879   EmitUint8(0xE9);
3880 }
3881 
3882 
fprem()3883 void X86_64Assembler::fprem() {
3884   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3885   EmitUint8(0xD9);
3886   EmitUint8(0xF8);
3887 }
3888 
3889 
try_xchg_rax(CpuRegister dst,CpuRegister src,void (X86_64Assembler::* prefix_fn)(CpuRegister))3890 bool X86_64Assembler::try_xchg_rax(CpuRegister dst,
3891                                    CpuRegister src,
3892                                    void (X86_64Assembler::*prefix_fn)(CpuRegister)) {
3893   Register src_reg = src.AsRegister();
3894   Register dst_reg = dst.AsRegister();
3895   if (src_reg != RAX && dst_reg != RAX) {
3896     return false;
3897   }
3898   if (dst_reg == RAX) {
3899     std::swap(src_reg, dst_reg);
3900   }
3901   if (dst_reg != RAX) {
3902     // Prefix is needed only if one of the registers is not RAX, otherwise it's a pure NOP.
3903     (this->*prefix_fn)(CpuRegister(dst_reg));
3904   }
3905   EmitUint8(0x90 + CpuRegister(dst_reg).LowBits());
3906   return true;
3907 }
3908 
3909 
xchgb(CpuRegister dst,CpuRegister src)3910 void X86_64Assembler::xchgb(CpuRegister dst, CpuRegister src) {
3911   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3912   // There is no short version for AL.
3913   EmitOptionalByteRegNormalizingRex32(dst, src, /*normalize_both=*/ true);
3914   EmitUint8(0x86);
3915   EmitRegisterOperand(dst.LowBits(), src.LowBits());
3916 }
3917 
3918 
xchgb(CpuRegister reg,const Address & address)3919 void X86_64Assembler::xchgb(CpuRegister reg, const Address& address) {
3920   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3921   EmitOptionalByteRegNormalizingRex32(reg, address);
3922   EmitUint8(0x86);
3923   EmitOperand(reg.LowBits(), address);
3924 }
3925 
3926 
xchgw(CpuRegister dst,CpuRegister src)3927 void X86_64Assembler::xchgw(CpuRegister dst, CpuRegister src) {
3928   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3929   EmitOperandSizeOverride();
3930   if (try_xchg_rax(dst, src, &X86_64Assembler::EmitOptionalRex32)) {
3931     // A short version for AX.
3932     return;
3933   }
3934   // General case.
3935   EmitOptionalRex32(dst, src);
3936   EmitUint8(0x87);
3937   EmitRegisterOperand(dst.LowBits(), src.LowBits());
3938 }
3939 
3940 
xchgw(CpuRegister reg,const Address & address)3941 void X86_64Assembler::xchgw(CpuRegister reg, const Address& address) {
3942   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3943   EmitOperandSizeOverride();
3944   EmitOptionalRex32(reg, address);
3945   EmitUint8(0x87);
3946   EmitOperand(reg.LowBits(), address);
3947 }
3948 
3949 
xchgl(CpuRegister dst,CpuRegister src)3950 void X86_64Assembler::xchgl(CpuRegister dst, CpuRegister src) {
3951   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3952   if (try_xchg_rax(dst, src, &X86_64Assembler::EmitOptionalRex32)) {
3953     // A short version for EAX.
3954     return;
3955   }
3956   // General case.
3957   EmitOptionalRex32(dst, src);
3958   EmitUint8(0x87);
3959   EmitRegisterOperand(dst.LowBits(), src.LowBits());
3960 }
3961 
3962 
xchgl(CpuRegister reg,const Address & address)3963 void X86_64Assembler::xchgl(CpuRegister reg, const Address& address) {
3964   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3965   EmitOptionalRex32(reg, address);
3966   EmitUint8(0x87);
3967   EmitOperand(reg.LowBits(), address);
3968 }
3969 
3970 
xchgq(CpuRegister dst,CpuRegister src)3971 void X86_64Assembler::xchgq(CpuRegister dst, CpuRegister src) {
3972   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3973   if (try_xchg_rax(dst, src, &X86_64Assembler::EmitRex64)) {
3974     // A short version for RAX.
3975     return;
3976   }
3977   // General case.
3978   EmitRex64(dst, src);
3979   EmitUint8(0x87);
3980   EmitRegisterOperand(dst.LowBits(), src.LowBits());
3981 }
3982 
3983 
xchgq(CpuRegister reg,const Address & address)3984 void X86_64Assembler::xchgq(CpuRegister reg, const Address& address) {
3985   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3986   EmitRex64(reg, address);
3987   EmitUint8(0x87);
3988   EmitOperand(reg.LowBits(), address);
3989 }
3990 
3991 
xaddb(CpuRegister dst,CpuRegister src)3992 void X86_64Assembler::xaddb(CpuRegister dst, CpuRegister src) {
3993   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3994   EmitOptionalByteRegNormalizingRex32(src, dst, /*normalize_both=*/ true);
3995   EmitUint8(0x0F);
3996   EmitUint8(0xC0);
3997   EmitRegisterOperand(src.LowBits(), dst.LowBits());
3998 }
3999 
4000 
xaddb(const Address & address,CpuRegister reg)4001 void X86_64Assembler::xaddb(const Address& address, CpuRegister reg) {
4002   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4003   EmitOptionalByteRegNormalizingRex32(reg, address);
4004   EmitUint8(0x0F);
4005   EmitUint8(0xC0);
4006   EmitOperand(reg.LowBits(), address);
4007 }
4008 
4009 
xaddw(CpuRegister dst,CpuRegister src)4010 void X86_64Assembler::xaddw(CpuRegister dst, CpuRegister src) {
4011   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4012   EmitOperandSizeOverride();
4013   EmitOptionalRex32(src, dst);
4014   EmitUint8(0x0F);
4015   EmitUint8(0xC1);
4016   EmitRegisterOperand(src.LowBits(), dst.LowBits());
4017 }
4018 
4019 
xaddw(const Address & address,CpuRegister reg)4020 void X86_64Assembler::xaddw(const Address& address, CpuRegister reg) {
4021   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4022   EmitOperandSizeOverride();
4023   EmitOptionalRex32(reg, address);
4024   EmitUint8(0x0F);
4025   EmitUint8(0xC1);
4026   EmitOperand(reg.LowBits(), address);
4027 }
4028 
4029 
xaddl(CpuRegister dst,CpuRegister src)4030 void X86_64Assembler::xaddl(CpuRegister dst, CpuRegister src) {
4031   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4032   EmitOptionalRex32(src, dst);
4033   EmitUint8(0x0F);
4034   EmitUint8(0xC1);
4035   EmitRegisterOperand(src.LowBits(), dst.LowBits());
4036 }
4037 
4038 
xaddl(const Address & address,CpuRegister reg)4039 void X86_64Assembler::xaddl(const Address& address, CpuRegister reg) {
4040   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4041   EmitOptionalRex32(reg, address);
4042   EmitUint8(0x0F);
4043   EmitUint8(0xC1);
4044   EmitOperand(reg.LowBits(), address);
4045 }
4046 
4047 
xaddq(CpuRegister dst,CpuRegister src)4048 void X86_64Assembler::xaddq(CpuRegister dst, CpuRegister src) {
4049   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4050   EmitRex64(src, dst);
4051   EmitUint8(0x0F);
4052   EmitUint8(0xC1);
4053   EmitRegisterOperand(src.LowBits(), dst.LowBits());
4054 }
4055 
4056 
xaddq(const Address & address,CpuRegister reg)4057 void X86_64Assembler::xaddq(const Address& address, CpuRegister reg) {
4058   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4059   EmitRex64(reg, address);
4060   EmitUint8(0x0F);
4061   EmitUint8(0xC1);
4062   EmitOperand(reg.LowBits(), address);
4063 }
4064 
4065 
cmpb(const Address & address,const Immediate & imm)4066 void X86_64Assembler::cmpb(const Address& address, const Immediate& imm) {
4067   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4068   CHECK(imm.is_int32());
4069   EmitOptionalRex32(address);
4070   EmitUint8(0x80);
4071   EmitOperand(7, address);
4072   EmitUint8(imm.value() & 0xFF);
4073 }
4074 
4075 
cmpw(const Address & address,const Immediate & imm)4076 void X86_64Assembler::cmpw(const Address& address, const Immediate& imm) {
4077   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4078   CHECK(imm.is_int32());
4079   EmitOperandSizeOverride();
4080   EmitOptionalRex32(address);
4081   EmitComplex(7, address, imm, /* is_16_op= */ true);
4082 }
4083 
4084 
cmpl(CpuRegister reg,const Immediate & imm)4085 void X86_64Assembler::cmpl(CpuRegister reg, const Immediate& imm) {
4086   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4087   CHECK(imm.is_int32());
4088   EmitOptionalRex32(reg);
4089   EmitComplex(7, Operand(reg), imm);
4090 }
4091 
4092 
cmpl(CpuRegister reg0,CpuRegister reg1)4093 void X86_64Assembler::cmpl(CpuRegister reg0, CpuRegister reg1) {
4094   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4095   EmitOptionalRex32(reg0, reg1);
4096   EmitUint8(0x3B);
4097   EmitOperand(reg0.LowBits(), Operand(reg1));
4098 }
4099 
4100 
cmpl(CpuRegister reg,const Address & address)4101 void X86_64Assembler::cmpl(CpuRegister reg, const Address& address) {
4102   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4103   EmitOptionalRex32(reg, address);
4104   EmitUint8(0x3B);
4105   EmitOperand(reg.LowBits(), address);
4106 }
4107 
4108 
cmpl(const Address & address,CpuRegister reg)4109 void X86_64Assembler::cmpl(const Address& address, CpuRegister reg) {
4110   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4111   EmitOptionalRex32(reg, address);
4112   EmitUint8(0x39);
4113   EmitOperand(reg.LowBits(), address);
4114 }
4115 
4116 
cmpl(const Address & address,const Immediate & imm)4117 void X86_64Assembler::cmpl(const Address& address, const Immediate& imm) {
4118   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4119   CHECK(imm.is_int32());
4120   EmitOptionalRex32(address);
4121   EmitComplex(7, address, imm);
4122 }
4123 
4124 
cmpq(CpuRegister reg0,CpuRegister reg1)4125 void X86_64Assembler::cmpq(CpuRegister reg0, CpuRegister reg1) {
4126   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4127   EmitRex64(reg0, reg1);
4128   EmitUint8(0x3B);
4129   EmitOperand(reg0.LowBits(), Operand(reg1));
4130 }
4131 
4132 
cmpq(CpuRegister reg,const Immediate & imm)4133 void X86_64Assembler::cmpq(CpuRegister reg, const Immediate& imm) {
4134   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4135   CHECK(imm.is_int32());  // cmpq only supports 32b immediate.
4136   EmitRex64(reg);
4137   EmitComplex(7, Operand(reg), imm);
4138 }
4139 
4140 
cmpq(CpuRegister reg,const Address & address)4141 void X86_64Assembler::cmpq(CpuRegister reg, const Address& address) {
4142   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4143   EmitRex64(reg, address);
4144   EmitUint8(0x3B);
4145   EmitOperand(reg.LowBits(), address);
4146 }
4147 
4148 
cmpq(const Address & address,const Immediate & imm)4149 void X86_64Assembler::cmpq(const Address& address, const Immediate& imm) {
4150   CHECK(imm.is_int32());  // cmpq only supports 32b immediate.
4151   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4152   EmitRex64(address);
4153   EmitComplex(7, address, imm);
4154 }
4155 
4156 
addl(CpuRegister dst,CpuRegister src)4157 void X86_64Assembler::addl(CpuRegister dst, CpuRegister src) {
4158   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4159   EmitOptionalRex32(dst, src);
4160   EmitUint8(0x03);
4161   EmitRegisterOperand(dst.LowBits(), src.LowBits());
4162 }
4163 
4164 
addl(CpuRegister reg,const Address & address)4165 void X86_64Assembler::addl(CpuRegister reg, const Address& address) {
4166   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4167   EmitOptionalRex32(reg, address);
4168   EmitUint8(0x03);
4169   EmitOperand(reg.LowBits(), address);
4170 }
4171 
4172 
testl(CpuRegister reg1,CpuRegister reg2)4173 void X86_64Assembler::testl(CpuRegister reg1, CpuRegister reg2) {
4174   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4175   EmitOptionalRex32(reg1, reg2);
4176   EmitUint8(0x85);
4177   EmitRegisterOperand(reg1.LowBits(), reg2.LowBits());
4178 }
4179 
4180 
testl(CpuRegister reg,const Address & address)4181 void X86_64Assembler::testl(CpuRegister reg, const Address& address) {
4182   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4183   EmitOptionalRex32(reg, address);
4184   EmitUint8(0x85);
4185   EmitOperand(reg.LowBits(), address);
4186 }
4187 
4188 
testl(CpuRegister reg,const Immediate & immediate)4189 void X86_64Assembler::testl(CpuRegister reg, const Immediate& immediate) {
4190   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4191   // For registers that have a byte variant (RAX, RBX, RCX, and RDX)
4192   // we only test the byte CpuRegister to keep the encoding short.
4193   if (immediate.is_uint8() && reg.AsRegister() < 4) {
4194     // Use zero-extended 8-bit immediate.
4195     if (reg.AsRegister() == RAX) {
4196       EmitUint8(0xA8);
4197     } else {
4198       EmitUint8(0xF6);
4199       EmitUint8(0xC0 + reg.AsRegister());
4200     }
4201     EmitUint8(immediate.value() & 0xFF);
4202   } else if (reg.AsRegister() == RAX) {
4203     // Use short form if the destination is RAX.
4204     EmitUint8(0xA9);
4205     EmitImmediate(immediate);
4206   } else {
4207     EmitOptionalRex32(reg);
4208     EmitUint8(0xF7);
4209     EmitOperand(0, Operand(reg));
4210     EmitImmediate(immediate);
4211   }
4212 }
4213 
4214 
testq(CpuRegister reg1,CpuRegister reg2)4215 void X86_64Assembler::testq(CpuRegister reg1, CpuRegister reg2) {
4216   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4217   EmitRex64(reg1, reg2);
4218   EmitUint8(0x85);
4219   EmitRegisterOperand(reg1.LowBits(), reg2.LowBits());
4220 }
4221 
4222 
testq(CpuRegister reg,const Address & address)4223 void X86_64Assembler::testq(CpuRegister reg, const Address& address) {
4224   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4225   EmitRex64(reg, address);
4226   EmitUint8(0x85);
4227   EmitOperand(reg.LowBits(), address);
4228 }
4229 
4230 
testb(const Address & dst,const Immediate & imm)4231 void X86_64Assembler::testb(const Address& dst, const Immediate& imm) {
4232   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4233   EmitOptionalRex32(dst);
4234   EmitUint8(0xF6);
4235   EmitOperand(Register::RAX, dst);
4236   CHECK(imm.is_int8());
4237   EmitUint8(imm.value() & 0xFF);
4238 }
4239 
4240 
testl(const Address & dst,const Immediate & imm)4241 void X86_64Assembler::testl(const Address& dst, const Immediate& imm) {
4242   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4243   EmitOptionalRex32(dst);
4244   EmitUint8(0xF7);
4245   EmitOperand(0, dst);
4246   EmitImmediate(imm);
4247 }
4248 
4249 
andl(CpuRegister dst,CpuRegister src)4250 void X86_64Assembler::andl(CpuRegister dst, CpuRegister src) {
4251   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4252   EmitOptionalRex32(dst, src);
4253   EmitUint8(0x23);
4254   EmitOperand(dst.LowBits(), Operand(src));
4255 }
4256 
4257 
andl(CpuRegister reg,const Address & address)4258 void X86_64Assembler::andl(CpuRegister reg, const Address& address) {
4259   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4260   EmitOptionalRex32(reg, address);
4261   EmitUint8(0x23);
4262   EmitOperand(reg.LowBits(), address);
4263 }
4264 
4265 
andl(CpuRegister dst,const Immediate & imm)4266 void X86_64Assembler::andl(CpuRegister dst, const Immediate& imm) {
4267   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4268   CHECK(imm.is_int32());  // andl only supports 32b immediate.
4269   EmitOptionalRex32(dst);
4270   EmitComplex(4, Operand(dst), imm);
4271 }
4272 
4273 
andq(CpuRegister reg,const Immediate & imm)4274 void X86_64Assembler::andq(CpuRegister reg, const Immediate& imm) {
4275   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4276   CHECK(imm.is_int32());  // andq only supports 32b immediate.
4277   EmitRex64(reg);
4278   EmitComplex(4, Operand(reg), imm);
4279 }
4280 
4281 
andq(CpuRegister dst,CpuRegister src)4282 void X86_64Assembler::andq(CpuRegister dst, CpuRegister src) {
4283   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4284   EmitRex64(dst, src);
4285   EmitUint8(0x23);
4286   EmitOperand(dst.LowBits(), Operand(src));
4287 }
4288 
4289 
andq(CpuRegister dst,const Address & src)4290 void X86_64Assembler::andq(CpuRegister dst, const Address& src) {
4291   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4292   EmitRex64(dst, src);
4293   EmitUint8(0x23);
4294   EmitOperand(dst.LowBits(), src);
4295 }
4296 
4297 
andw(const Address & address,const Immediate & imm)4298 void X86_64Assembler::andw(const Address& address, const Immediate& imm) {
4299   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4300   CHECK(imm.is_uint16() || imm.is_int16()) << imm.value();
4301   EmitUint8(0x66);
4302   EmitOptionalRex32(address);
4303   EmitComplex(4, address, imm, /* is_16_op= */ true);
4304 }
4305 
4306 
orl(CpuRegister dst,CpuRegister src)4307 void X86_64Assembler::orl(CpuRegister dst, CpuRegister src) {
4308   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4309   EmitOptionalRex32(dst, src);
4310   EmitUint8(0x0B);
4311   EmitOperand(dst.LowBits(), Operand(src));
4312 }
4313 
4314 
orl(CpuRegister reg,const Address & address)4315 void X86_64Assembler::orl(CpuRegister reg, const Address& address) {
4316   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4317   EmitOptionalRex32(reg, address);
4318   EmitUint8(0x0B);
4319   EmitOperand(reg.LowBits(), address);
4320 }
4321 
4322 
orl(CpuRegister dst,const Immediate & imm)4323 void X86_64Assembler::orl(CpuRegister dst, const Immediate& imm) {
4324   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4325   EmitOptionalRex32(dst);
4326   EmitComplex(1, Operand(dst), imm);
4327 }
4328 
4329 
orq(CpuRegister dst,const Immediate & imm)4330 void X86_64Assembler::orq(CpuRegister dst, const Immediate& imm) {
4331   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4332   CHECK(imm.is_int32());  // orq only supports 32b immediate.
4333   EmitRex64(dst);
4334   EmitComplex(1, Operand(dst), imm);
4335 }
4336 
4337 
orq(CpuRegister dst,CpuRegister src)4338 void X86_64Assembler::orq(CpuRegister dst, CpuRegister src) {
4339   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4340   EmitRex64(dst, src);
4341   EmitUint8(0x0B);
4342   EmitOperand(dst.LowBits(), Operand(src));
4343 }
4344 
4345 
orq(CpuRegister dst,const Address & src)4346 void X86_64Assembler::orq(CpuRegister dst, const Address& src) {
4347   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4348   EmitRex64(dst, src);
4349   EmitUint8(0x0B);
4350   EmitOperand(dst.LowBits(), src);
4351 }
4352 
4353 
xorl(CpuRegister dst,CpuRegister src)4354 void X86_64Assembler::xorl(CpuRegister dst, CpuRegister src) {
4355   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4356   EmitOptionalRex32(dst, src);
4357   EmitUint8(0x33);
4358   EmitOperand(dst.LowBits(), Operand(src));
4359 }
4360 
4361 
xorl(CpuRegister reg,const Address & address)4362 void X86_64Assembler::xorl(CpuRegister reg, const Address& address) {
4363   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4364   EmitOptionalRex32(reg, address);
4365   EmitUint8(0x33);
4366   EmitOperand(reg.LowBits(), address);
4367 }
4368 
4369 
xorl(CpuRegister dst,const Immediate & imm)4370 void X86_64Assembler::xorl(CpuRegister dst, const Immediate& imm) {
4371   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4372   EmitOptionalRex32(dst);
4373   EmitComplex(6, Operand(dst), imm);
4374 }
4375 
4376 
xorq(CpuRegister dst,CpuRegister src)4377 void X86_64Assembler::xorq(CpuRegister dst, CpuRegister src) {
4378   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4379   EmitRex64(dst, src);
4380   EmitUint8(0x33);
4381   EmitOperand(dst.LowBits(), Operand(src));
4382 }
4383 
4384 
xorq(CpuRegister dst,const Immediate & imm)4385 void X86_64Assembler::xorq(CpuRegister dst, const Immediate& imm) {
4386   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4387   CHECK(imm.is_int32());  // xorq only supports 32b immediate.
4388   EmitRex64(dst);
4389   EmitComplex(6, Operand(dst), imm);
4390 }
4391 
xorq(CpuRegister dst,const Address & src)4392 void X86_64Assembler::xorq(CpuRegister dst, const Address& src) {
4393   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4394   EmitRex64(dst, src);
4395   EmitUint8(0x33);
4396   EmitOperand(dst.LowBits(), src);
4397 }
4398 
4399 
4400 #if 0
4401 void X86_64Assembler::rex(bool force, bool w, Register* r, Register* x, Register* b) {
4402   // REX.WRXB
4403   // W - 64-bit operand
4404   // R - MODRM.reg
4405   // X - SIB.index
4406   // B - MODRM.rm/SIB.base
4407   uint8_t rex = force ? 0x40 : 0;
4408   if (w) {
4409     rex |= 0x48;  // REX.W000
4410   }
4411   if (r != nullptr && *r >= Register::R8 && *r < Register::kNumberOfCpuRegisters) {
4412     rex |= 0x44;  // REX.0R00
4413     *r = static_cast<Register>(*r - 8);
4414   }
4415   if (x != nullptr && *x >= Register::R8 && *x < Register::kNumberOfCpuRegisters) {
4416     rex |= 0x42;  // REX.00X0
4417     *x = static_cast<Register>(*x - 8);
4418   }
4419   if (b != nullptr && *b >= Register::R8 && *b < Register::kNumberOfCpuRegisters) {
4420     rex |= 0x41;  // REX.000B
4421     *b = static_cast<Register>(*b - 8);
4422   }
4423   if (rex != 0) {
4424     EmitUint8(rex);
4425   }
4426 }
4427 
4428 void X86_64Assembler::rex_reg_mem(bool force, bool w, Register* dst, const Address& mem) {
4429   // REX.WRXB
4430   // W - 64-bit operand
4431   // R - MODRM.reg
4432   // X - SIB.index
4433   // B - MODRM.rm/SIB.base
4434   uint8_t rex = mem->rex();
4435   if (force) {
4436     rex |= 0x40;  // REX.0000
4437   }
4438   if (w) {
4439     rex |= 0x48;  // REX.W000
4440   }
4441   if (dst != nullptr && *dst >= Register::R8 && *dst < Register::kNumberOfCpuRegisters) {
4442     rex |= 0x44;  // REX.0R00
4443     *dst = static_cast<Register>(*dst - 8);
4444   }
4445   if (rex != 0) {
4446     EmitUint8(rex);
4447   }
4448 }
4449 
4450 void rex_mem_reg(bool force, bool w, Address* mem, Register* src);
4451 #endif
4452 
addl(CpuRegister reg,const Immediate & imm)4453 void X86_64Assembler::addl(CpuRegister reg, const Immediate& imm) {
4454   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4455   EmitOptionalRex32(reg);
4456   EmitComplex(0, Operand(reg), imm);
4457 }
4458 
4459 
addw(CpuRegister reg,const Immediate & imm)4460 void X86_64Assembler::addw(CpuRegister reg, const Immediate& imm) {
4461   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4462   CHECK(imm.is_uint16() || imm.is_int16()) << imm.value();
4463   EmitUint8(0x66);
4464   EmitOptionalRex32(reg);
4465   EmitComplex(0, Operand(reg), imm, /* is_16_op= */ true);
4466 }
4467 
4468 
addq(CpuRegister reg,const Immediate & imm)4469 void X86_64Assembler::addq(CpuRegister reg, const Immediate& imm) {
4470   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4471   CHECK(imm.is_int32());  // addq only supports 32b immediate.
4472   EmitRex64(reg);
4473   EmitComplex(0, Operand(reg), imm);
4474 }
4475 
4476 
addq(CpuRegister dst,const Address & address)4477 void X86_64Assembler::addq(CpuRegister dst, const Address& address) {
4478   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4479   EmitRex64(dst, address);
4480   EmitUint8(0x03);
4481   EmitOperand(dst.LowBits(), address);
4482 }
4483 
4484 
addq(CpuRegister dst,CpuRegister src)4485 void X86_64Assembler::addq(CpuRegister dst, CpuRegister src) {
4486   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4487   // 0x01 is addq r/m64 <- r/m64 + r64, with op1 in r/m and op2 in reg: so reverse EmitRex64
4488   EmitRex64(src, dst);
4489   EmitUint8(0x01);
4490   EmitRegisterOperand(src.LowBits(), dst.LowBits());
4491 }
4492 
4493 
addl(const Address & address,CpuRegister reg)4494 void X86_64Assembler::addl(const Address& address, CpuRegister reg) {
4495   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4496   EmitOptionalRex32(reg, address);
4497   EmitUint8(0x01);
4498   EmitOperand(reg.LowBits(), address);
4499 }
4500 
4501 
addl(const Address & address,const Immediate & imm)4502 void X86_64Assembler::addl(const Address& address, const Immediate& imm) {
4503   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4504   EmitOptionalRex32(address);
4505   EmitComplex(0, address, imm);
4506 }
4507 
4508 
addw(const Address & address,const Immediate & imm)4509 void X86_64Assembler::addw(const Address& address, const Immediate& imm) {
4510   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4511   CHECK(imm.is_uint16() || imm.is_int16()) << imm.value();
4512   EmitUint8(0x66);
4513   EmitOptionalRex32(address);
4514   EmitComplex(0, address, imm, /* is_16_op= */ true);
4515 }
4516 
4517 
addw(const Address & address,CpuRegister reg)4518 void X86_64Assembler::addw(const Address& address, CpuRegister reg) {
4519   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4520   EmitOperandSizeOverride();
4521   EmitOptionalRex32(reg, address);
4522   EmitUint8(0x01);
4523   EmitOperand(reg.LowBits(), address);
4524 }
4525 
4526 
subl(CpuRegister dst,CpuRegister src)4527 void X86_64Assembler::subl(CpuRegister dst, CpuRegister src) {
4528   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4529   EmitOptionalRex32(dst, src);
4530   EmitUint8(0x2B);
4531   EmitOperand(dst.LowBits(), Operand(src));
4532 }
4533 
4534 
subl(CpuRegister reg,const Immediate & imm)4535 void X86_64Assembler::subl(CpuRegister reg, const Immediate& imm) {
4536   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4537   EmitOptionalRex32(reg);
4538   EmitComplex(5, Operand(reg), imm);
4539 }
4540 
4541 
subq(CpuRegister reg,const Immediate & imm)4542 void X86_64Assembler::subq(CpuRegister reg, const Immediate& imm) {
4543   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4544   CHECK(imm.is_int32());  // subq only supports 32b immediate.
4545   EmitRex64(reg);
4546   EmitComplex(5, Operand(reg), imm);
4547 }
4548 
4549 
subq(CpuRegister dst,CpuRegister src)4550 void X86_64Assembler::subq(CpuRegister dst, CpuRegister src) {
4551   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4552   EmitRex64(dst, src);
4553   EmitUint8(0x2B);
4554   EmitRegisterOperand(dst.LowBits(), src.LowBits());
4555 }
4556 
4557 
subq(CpuRegister reg,const Address & address)4558 void X86_64Assembler::subq(CpuRegister reg, const Address& address) {
4559   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4560   EmitRex64(reg, address);
4561   EmitUint8(0x2B);
4562   EmitOperand(reg.LowBits() & 7, address);
4563 }
4564 
4565 
subl(CpuRegister reg,const Address & address)4566 void X86_64Assembler::subl(CpuRegister reg, const Address& address) {
4567   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4568   EmitOptionalRex32(reg, address);
4569   EmitUint8(0x2B);
4570   EmitOperand(reg.LowBits(), address);
4571 }
4572 
4573 
cdq()4574 void X86_64Assembler::cdq() {
4575   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4576   EmitUint8(0x99);
4577 }
4578 
4579 
cqo()4580 void X86_64Assembler::cqo() {
4581   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4582   EmitRex64();
4583   EmitUint8(0x99);
4584 }
4585 
4586 
idivl(CpuRegister reg)4587 void X86_64Assembler::idivl(CpuRegister reg) {
4588   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4589   EmitOptionalRex32(reg);
4590   EmitUint8(0xF7);
4591   EmitUint8(0xF8 | reg.LowBits());
4592 }
4593 
4594 
idivq(CpuRegister reg)4595 void X86_64Assembler::idivq(CpuRegister reg) {
4596   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4597   EmitRex64(reg);
4598   EmitUint8(0xF7);
4599   EmitUint8(0xF8 | reg.LowBits());
4600 }
4601 
4602 
divl(CpuRegister reg)4603 void X86_64Assembler::divl(CpuRegister reg) {
4604   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4605   EmitOptionalRex32(reg);
4606   EmitUint8(0xF7);
4607   EmitUint8(0xF0 | reg.LowBits());
4608 }
4609 
4610 
divq(CpuRegister reg)4611 void X86_64Assembler::divq(CpuRegister reg) {
4612   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4613   EmitRex64(reg);
4614   EmitUint8(0xF7);
4615   EmitUint8(0xF0 | reg.LowBits());
4616 }
4617 
4618 
imull(CpuRegister dst,CpuRegister src)4619 void X86_64Assembler::imull(CpuRegister dst, CpuRegister src) {
4620   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4621   EmitOptionalRex32(dst, src);
4622   EmitUint8(0x0F);
4623   EmitUint8(0xAF);
4624   EmitOperand(dst.LowBits(), Operand(src));
4625 }
4626 
imull(CpuRegister dst,CpuRegister src,const Immediate & imm)4627 void X86_64Assembler::imull(CpuRegister dst, CpuRegister src, const Immediate& imm) {
4628   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4629   CHECK(imm.is_int32());  // imull only supports 32b immediate.
4630 
4631   EmitOptionalRex32(dst, src);
4632 
4633   // See whether imm can be represented as a sign-extended 8bit value.
4634   int32_t v32 = static_cast<int32_t>(imm.value());
4635   if (IsInt<8>(v32)) {
4636     // Sign-extension works.
4637     EmitUint8(0x6B);
4638     EmitOperand(dst.LowBits(), Operand(src));
4639     EmitUint8(static_cast<uint8_t>(v32 & 0xFF));
4640   } else {
4641     // Not representable, use full immediate.
4642     EmitUint8(0x69);
4643     EmitOperand(dst.LowBits(), Operand(src));
4644     EmitImmediate(imm);
4645   }
4646 }
4647 
4648 
imull(CpuRegister reg,const Immediate & imm)4649 void X86_64Assembler::imull(CpuRegister reg, const Immediate& imm) {
4650   imull(reg, reg, imm);
4651 }
4652 
4653 
imull(CpuRegister reg,const Address & address)4654 void X86_64Assembler::imull(CpuRegister reg, const Address& address) {
4655   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4656   EmitOptionalRex32(reg, address);
4657   EmitUint8(0x0F);
4658   EmitUint8(0xAF);
4659   EmitOperand(reg.LowBits(), address);
4660 }
4661 
4662 
imulq(CpuRegister dst,CpuRegister src)4663 void X86_64Assembler::imulq(CpuRegister dst, CpuRegister src) {
4664   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4665   EmitRex64(dst, src);
4666   EmitUint8(0x0F);
4667   EmitUint8(0xAF);
4668   EmitRegisterOperand(dst.LowBits(), src.LowBits());
4669 }
4670 
4671 
imulq(CpuRegister reg,const Immediate & imm)4672 void X86_64Assembler::imulq(CpuRegister reg, const Immediate& imm) {
4673   imulq(reg, reg, imm);
4674 }
4675 
imulq(CpuRegister dst,CpuRegister reg,const Immediate & imm)4676 void X86_64Assembler::imulq(CpuRegister dst, CpuRegister reg, const Immediate& imm) {
4677   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4678   CHECK(imm.is_int32());  // imulq only supports 32b immediate.
4679 
4680   EmitRex64(dst, reg);
4681 
4682   // See whether imm can be represented as a sign-extended 8bit value.
4683   int64_t v64 = imm.value();
4684   if (IsInt<8>(v64)) {
4685     // Sign-extension works.
4686     EmitUint8(0x6B);
4687     EmitOperand(dst.LowBits(), Operand(reg));
4688     EmitUint8(static_cast<uint8_t>(v64 & 0xFF));
4689   } else {
4690     // Not representable, use full immediate.
4691     EmitUint8(0x69);
4692     EmitOperand(dst.LowBits(), Operand(reg));
4693     EmitImmediate(imm);
4694   }
4695 }
4696 
imulq(CpuRegister reg,const Address & address)4697 void X86_64Assembler::imulq(CpuRegister reg, const Address& address) {
4698   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4699   EmitRex64(reg, address);
4700   EmitUint8(0x0F);
4701   EmitUint8(0xAF);
4702   EmitOperand(reg.LowBits(), address);
4703 }
4704 
4705 
imull(CpuRegister reg)4706 void X86_64Assembler::imull(CpuRegister reg) {
4707   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4708   EmitOptionalRex32(reg);
4709   EmitUint8(0xF7);
4710   EmitOperand(5, Operand(reg));
4711 }
4712 
4713 
imulq(CpuRegister reg)4714 void X86_64Assembler::imulq(CpuRegister reg) {
4715   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4716   EmitRex64(reg);
4717   EmitUint8(0xF7);
4718   EmitOperand(5, Operand(reg));
4719 }
4720 
4721 
imull(const Address & address)4722 void X86_64Assembler::imull(const Address& address) {
4723   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4724   EmitOptionalRex32(address);
4725   EmitUint8(0xF7);
4726   EmitOperand(5, address);
4727 }
4728 
4729 
mull(CpuRegister reg)4730 void X86_64Assembler::mull(CpuRegister reg) {
4731   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4732   EmitOptionalRex32(reg);
4733   EmitUint8(0xF7);
4734   EmitOperand(4, Operand(reg));
4735 }
4736 
4737 
mull(const Address & address)4738 void X86_64Assembler::mull(const Address& address) {
4739   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4740   EmitOptionalRex32(address);
4741   EmitUint8(0xF7);
4742   EmitOperand(4, address);
4743 }
4744 
4745 
shll(CpuRegister reg,const Immediate & imm)4746 void X86_64Assembler::shll(CpuRegister reg, const Immediate& imm) {
4747   EmitGenericShift(false, 4, reg, imm);
4748 }
4749 
4750 
shlq(CpuRegister reg,const Immediate & imm)4751 void X86_64Assembler::shlq(CpuRegister reg, const Immediate& imm) {
4752   EmitGenericShift(true, 4, reg, imm);
4753 }
4754 
4755 
shll(CpuRegister operand,CpuRegister shifter)4756 void X86_64Assembler::shll(CpuRegister operand, CpuRegister shifter) {
4757   EmitGenericShift(false, 4, operand, shifter);
4758 }
4759 
4760 
shlq(CpuRegister operand,CpuRegister shifter)4761 void X86_64Assembler::shlq(CpuRegister operand, CpuRegister shifter) {
4762   EmitGenericShift(true, 4, operand, shifter);
4763 }
4764 
4765 
shrl(CpuRegister reg,const Immediate & imm)4766 void X86_64Assembler::shrl(CpuRegister reg, const Immediate& imm) {
4767   EmitGenericShift(false, 5, reg, imm);
4768 }
4769 
4770 
shrq(CpuRegister reg,const Immediate & imm)4771 void X86_64Assembler::shrq(CpuRegister reg, const Immediate& imm) {
4772   EmitGenericShift(true, 5, reg, imm);
4773 }
4774 
4775 
shrl(CpuRegister operand,CpuRegister shifter)4776 void X86_64Assembler::shrl(CpuRegister operand, CpuRegister shifter) {
4777   EmitGenericShift(false, 5, operand, shifter);
4778 }
4779 
4780 
shrq(CpuRegister operand,CpuRegister shifter)4781 void X86_64Assembler::shrq(CpuRegister operand, CpuRegister shifter) {
4782   EmitGenericShift(true, 5, operand, shifter);
4783 }
4784 
4785 
sarl(CpuRegister reg,const Immediate & imm)4786 void X86_64Assembler::sarl(CpuRegister reg, const Immediate& imm) {
4787   EmitGenericShift(false, 7, reg, imm);
4788 }
4789 
4790 
sarl(CpuRegister operand,CpuRegister shifter)4791 void X86_64Assembler::sarl(CpuRegister operand, CpuRegister shifter) {
4792   EmitGenericShift(false, 7, operand, shifter);
4793 }
4794 
4795 
sarq(CpuRegister reg,const Immediate & imm)4796 void X86_64Assembler::sarq(CpuRegister reg, const Immediate& imm) {
4797   EmitGenericShift(true, 7, reg, imm);
4798 }
4799 
4800 
sarq(CpuRegister operand,CpuRegister shifter)4801 void X86_64Assembler::sarq(CpuRegister operand, CpuRegister shifter) {
4802   EmitGenericShift(true, 7, operand, shifter);
4803 }
4804 
4805 
roll(CpuRegister reg,const Immediate & imm)4806 void X86_64Assembler::roll(CpuRegister reg, const Immediate& imm) {
4807   EmitGenericShift(false, 0, reg, imm);
4808 }
4809 
4810 
roll(CpuRegister operand,CpuRegister shifter)4811 void X86_64Assembler::roll(CpuRegister operand, CpuRegister shifter) {
4812   EmitGenericShift(false, 0, operand, shifter);
4813 }
4814 
4815 
rorl(CpuRegister reg,const Immediate & imm)4816 void X86_64Assembler::rorl(CpuRegister reg, const Immediate& imm) {
4817   EmitGenericShift(false, 1, reg, imm);
4818 }
4819 
4820 
rorl(CpuRegister operand,CpuRegister shifter)4821 void X86_64Assembler::rorl(CpuRegister operand, CpuRegister shifter) {
4822   EmitGenericShift(false, 1, operand, shifter);
4823 }
4824 
4825 
rolq(CpuRegister reg,const Immediate & imm)4826 void X86_64Assembler::rolq(CpuRegister reg, const Immediate& imm) {
4827   EmitGenericShift(true, 0, reg, imm);
4828 }
4829 
4830 
rolq(CpuRegister operand,CpuRegister shifter)4831 void X86_64Assembler::rolq(CpuRegister operand, CpuRegister shifter) {
4832   EmitGenericShift(true, 0, operand, shifter);
4833 }
4834 
4835 
rorq(CpuRegister reg,const Immediate & imm)4836 void X86_64Assembler::rorq(CpuRegister reg, const Immediate& imm) {
4837   EmitGenericShift(true, 1, reg, imm);
4838 }
4839 
4840 
rorq(CpuRegister operand,CpuRegister shifter)4841 void X86_64Assembler::rorq(CpuRegister operand, CpuRegister shifter) {
4842   EmitGenericShift(true, 1, operand, shifter);
4843 }
4844 
4845 
negl(CpuRegister reg)4846 void X86_64Assembler::negl(CpuRegister reg) {
4847   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4848   EmitOptionalRex32(reg);
4849   EmitUint8(0xF7);
4850   EmitOperand(3, Operand(reg));
4851 }
4852 
4853 
negq(CpuRegister reg)4854 void X86_64Assembler::negq(CpuRegister reg) {
4855   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4856   EmitRex64(reg);
4857   EmitUint8(0xF7);
4858   EmitOperand(3, Operand(reg));
4859 }
4860 
4861 
notl(CpuRegister reg)4862 void X86_64Assembler::notl(CpuRegister reg) {
4863   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4864   EmitOptionalRex32(reg);
4865   EmitUint8(0xF7);
4866   EmitUint8(0xD0 | reg.LowBits());
4867 }
4868 
4869 
notq(CpuRegister reg)4870 void X86_64Assembler::notq(CpuRegister reg) {
4871   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4872   EmitRex64(reg);
4873   EmitUint8(0xF7);
4874   EmitOperand(2, Operand(reg));
4875 }
4876 
4877 
enter(const Immediate & imm)4878 void X86_64Assembler::enter(const Immediate& imm) {
4879   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4880   EmitUint8(0xC8);
4881   CHECK(imm.is_uint16()) << imm.value();
4882   EmitUint8(imm.value() & 0xFF);
4883   EmitUint8((imm.value() >> 8) & 0xFF);
4884   EmitUint8(0x00);
4885 }
4886 
4887 
leave()4888 void X86_64Assembler::leave() {
4889   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4890   EmitUint8(0xC9);
4891 }
4892 
4893 
ret()4894 void X86_64Assembler::ret() {
4895   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4896   EmitUint8(0xC3);
4897 }
4898 
4899 
ret(const Immediate & imm)4900 void X86_64Assembler::ret(const Immediate& imm) {
4901   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4902   EmitUint8(0xC2);
4903   CHECK(imm.is_uint16());
4904   EmitUint8(imm.value() & 0xFF);
4905   EmitUint8((imm.value() >> 8) & 0xFF);
4906 }
4907 
4908 
4909 
nop()4910 void X86_64Assembler::nop() {
4911   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4912   EmitUint8(0x90);
4913 }
4914 
4915 
int3()4916 void X86_64Assembler::int3() {
4917   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4918   EmitUint8(0xCC);
4919 }
4920 
4921 
hlt()4922 void X86_64Assembler::hlt() {
4923   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4924   EmitUint8(0xF4);
4925 }
4926 
4927 
j(Condition condition,Label * label)4928 void X86_64Assembler::j(Condition condition, Label* label) {
4929   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4930   if (label->IsBound()) {
4931     static const int kShortSize = 2;
4932     static const int kLongSize = 6;
4933     int offset = label->Position() - buffer_.Size();
4934     CHECK_LE(offset, 0);
4935     if (IsInt<8>(offset - kShortSize)) {
4936       EmitUint8(0x70 + condition);
4937       EmitUint8((offset - kShortSize) & 0xFF);
4938     } else {
4939       EmitUint8(0x0F);
4940       EmitUint8(0x80 + condition);
4941       EmitInt32(offset - kLongSize);
4942     }
4943   } else {
4944     EmitUint8(0x0F);
4945     EmitUint8(0x80 + condition);
4946     EmitLabelLink(label);
4947   }
4948 }
4949 
4950 
j(Condition condition,NearLabel * label)4951 void X86_64Assembler::j(Condition condition, NearLabel* label) {
4952   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4953   if (label->IsBound()) {
4954     static const int kShortSize = 2;
4955     int offset = label->Position() - buffer_.Size();
4956     CHECK_LE(offset, 0);
4957     CHECK(IsInt<8>(offset - kShortSize));
4958     EmitUint8(0x70 + condition);
4959     EmitUint8((offset - kShortSize) & 0xFF);
4960   } else {
4961     EmitUint8(0x70 + condition);
4962     EmitLabelLink(label);
4963   }
4964 }
4965 
4966 
jrcxz(NearLabel * label)4967 void X86_64Assembler::jrcxz(NearLabel* label) {
4968   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4969   if (label->IsBound()) {
4970     static const int kShortSize = 2;
4971     int offset = label->Position() - buffer_.Size();
4972     CHECK_LE(offset, 0);
4973     CHECK(IsInt<8>(offset - kShortSize));
4974     EmitUint8(0xE3);
4975     EmitUint8((offset - kShortSize) & 0xFF);
4976   } else {
4977     EmitUint8(0xE3);
4978     EmitLabelLink(label);
4979   }
4980 }
4981 
4982 
jmp(CpuRegister reg)4983 void X86_64Assembler::jmp(CpuRegister reg) {
4984   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4985   EmitOptionalRex32(reg);
4986   EmitUint8(0xFF);
4987   EmitRegisterOperand(4, reg.LowBits());
4988 }
4989 
jmp(const Address & address)4990 void X86_64Assembler::jmp(const Address& address) {
4991   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4992   EmitOptionalRex32(address);
4993   EmitUint8(0xFF);
4994   EmitOperand(4, address);
4995 }
4996 
jmp(Label * label)4997 void X86_64Assembler::jmp(Label* label) {
4998   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4999   if (label->IsBound()) {
5000     static const int kShortSize = 2;
5001     static const int kLongSize = 5;
5002     int offset = label->Position() - buffer_.Size();
5003     CHECK_LE(offset, 0);
5004     if (IsInt<8>(offset - kShortSize)) {
5005       EmitUint8(0xEB);
5006       EmitUint8((offset - kShortSize) & 0xFF);
5007     } else {
5008       EmitUint8(0xE9);
5009       EmitInt32(offset - kLongSize);
5010     }
5011   } else {
5012     EmitUint8(0xE9);
5013     EmitLabelLink(label);
5014   }
5015 }
5016 
5017 
jmp(NearLabel * label)5018 void X86_64Assembler::jmp(NearLabel* label) {
5019   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5020   if (label->IsBound()) {
5021     static const int kShortSize = 2;
5022     int offset = label->Position() - buffer_.Size();
5023     CHECK_LE(offset, 0);
5024     CHECK(IsInt<8>(offset - kShortSize));
5025     EmitUint8(0xEB);
5026     EmitUint8((offset - kShortSize) & 0xFF);
5027   } else {
5028     EmitUint8(0xEB);
5029     EmitLabelLink(label);
5030   }
5031 }
5032 
5033 
rep_movsw()5034 void X86_64Assembler::rep_movsw() {
5035   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5036   EmitUint8(0x66);
5037   EmitUint8(0xF3);
5038   EmitUint8(0xA5);
5039 }
5040 
rep_movsb()5041 void X86_64Assembler::rep_movsb() {
5042   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5043   EmitUint8(0xF3);
5044   EmitUint8(0xA4);
5045 }
5046 
rep_movsl()5047 void X86_64Assembler::rep_movsl() {
5048   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5049   EmitUint8(0xF3);
5050   EmitUint8(0xA5);
5051 }
5052 
lock()5053 X86_64Assembler* X86_64Assembler::lock() {
5054   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5055   EmitUint8(0xF0);
5056   return this;
5057 }
5058 
5059 
cmpxchgb(const Address & address,CpuRegister reg)5060 void X86_64Assembler::cmpxchgb(const Address& address, CpuRegister reg) {
5061   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5062   EmitOptionalByteRegNormalizingRex32(reg, address);
5063   EmitUint8(0x0F);
5064   EmitUint8(0xB0);
5065   EmitOperand(reg.LowBits(), address);
5066 }
5067 
5068 
cmpxchgw(const Address & address,CpuRegister reg)5069 void X86_64Assembler::cmpxchgw(const Address& address, CpuRegister reg) {
5070   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5071   EmitOperandSizeOverride();
5072   EmitOptionalRex32(reg, address);
5073   EmitUint8(0x0F);
5074   EmitUint8(0xB1);
5075   EmitOperand(reg.LowBits(), address);
5076 }
5077 
5078 
cmpxchgl(const Address & address,CpuRegister reg)5079 void X86_64Assembler::cmpxchgl(const Address& address, CpuRegister reg) {
5080   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5081   EmitOptionalRex32(reg, address);
5082   EmitUint8(0x0F);
5083   EmitUint8(0xB1);
5084   EmitOperand(reg.LowBits(), address);
5085 }
5086 
5087 
cmpxchgq(const Address & address,CpuRegister reg)5088 void X86_64Assembler::cmpxchgq(const Address& address, CpuRegister reg) {
5089   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5090   EmitRex64(reg, address);
5091   EmitUint8(0x0F);
5092   EmitUint8(0xB1);
5093   EmitOperand(reg.LowBits(), address);
5094 }
5095 
5096 
mfence()5097 void X86_64Assembler::mfence() {
5098   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5099   EmitUint8(0x0F);
5100   EmitUint8(0xAE);
5101   EmitUint8(0xF0);
5102 }
5103 
5104 
gs()5105 X86_64Assembler* X86_64Assembler::gs() {
5106   // TODO: gs is a prefix and not an instruction
5107   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5108   EmitUint8(0x65);
5109   return this;
5110 }
5111 
5112 
AddImmediate(CpuRegister reg,const Immediate & imm)5113 void X86_64Assembler::AddImmediate(CpuRegister reg, const Immediate& imm) {
5114   int value = imm.value();
5115   if (value != 0) {
5116     if (value > 0) {
5117       addl(reg, imm);
5118     } else {
5119       subl(reg, Immediate(value));
5120     }
5121   }
5122 }
5123 
5124 
setcc(Condition condition,CpuRegister dst)5125 void X86_64Assembler::setcc(Condition condition, CpuRegister dst) {
5126   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5127   // RSP, RBP, RDI, RSI need rex prefix (else the pattern encodes ah/bh/ch/dh).
5128   if (dst.NeedsRex() || dst.AsRegister() > 3) {
5129     EmitOptionalRex(true, false, false, false, dst.NeedsRex());
5130   }
5131   EmitUint8(0x0F);
5132   EmitUint8(0x90 + condition);
5133   EmitUint8(0xC0 + dst.LowBits());
5134 }
5135 
blsi(CpuRegister dst,CpuRegister src)5136 void X86_64Assembler::blsi(CpuRegister dst, CpuRegister src) {
5137   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5138   uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
5139   uint8_t byte_one = EmitVexPrefixByteOne(/*R=*/ false,
5140                                           /*X=*/ false,
5141                                           src.NeedsRex(),
5142                                           SET_VEX_M_0F_38);
5143   uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/true,
5144                                           X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()),
5145                                           SET_VEX_L_128,
5146                                           SET_VEX_PP_NONE);
5147   EmitUint8(byte_zero);
5148   EmitUint8(byte_one);
5149   EmitUint8(byte_two);
5150   EmitUint8(0xF3);
5151   EmitRegisterOperand(3, src.LowBits());
5152 }
5153 
blsmsk(CpuRegister dst,CpuRegister src)5154 void X86_64Assembler::blsmsk(CpuRegister dst, CpuRegister src) {
5155   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5156   uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
5157   uint8_t byte_one = EmitVexPrefixByteOne(/*R=*/ false,
5158                                           /*X=*/ false,
5159                                           src.NeedsRex(),
5160                                           SET_VEX_M_0F_38);
5161   uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/ true,
5162                                           X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()),
5163                                           SET_VEX_L_128,
5164                                           SET_VEX_PP_NONE);
5165   EmitUint8(byte_zero);
5166   EmitUint8(byte_one);
5167   EmitUint8(byte_two);
5168   EmitUint8(0xF3);
5169   EmitRegisterOperand(2, src.LowBits());
5170 }
5171 
blsr(CpuRegister dst,CpuRegister src)5172 void X86_64Assembler::blsr(CpuRegister dst, CpuRegister src) {
5173   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5174   uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/false);
5175   uint8_t byte_one = EmitVexPrefixByteOne(/*R=*/ false,
5176                                           /*X=*/ false,
5177                                           src.NeedsRex(),
5178                                           SET_VEX_M_0F_38);
5179   uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/ true,
5180                                           X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()),
5181                                           SET_VEX_L_128,
5182                                           SET_VEX_PP_NONE);
5183   EmitUint8(byte_zero);
5184   EmitUint8(byte_one);
5185   EmitUint8(byte_two);
5186   EmitUint8(0xF3);
5187   EmitRegisterOperand(1, src.LowBits());
5188 }
5189 
bswapl(CpuRegister dst)5190 void X86_64Assembler::bswapl(CpuRegister dst) {
5191   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5192   EmitOptionalRex(false, false, false, false, dst.NeedsRex());
5193   EmitUint8(0x0F);
5194   EmitUint8(0xC8 + dst.LowBits());
5195 }
5196 
bswapq(CpuRegister dst)5197 void X86_64Assembler::bswapq(CpuRegister dst) {
5198   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5199   EmitOptionalRex(false, true, false, false, dst.NeedsRex());
5200   EmitUint8(0x0F);
5201   EmitUint8(0xC8 + dst.LowBits());
5202 }
5203 
bsfl(CpuRegister dst,CpuRegister src)5204 void X86_64Assembler::bsfl(CpuRegister dst, CpuRegister src) {
5205   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5206   EmitOptionalRex32(dst, src);
5207   EmitUint8(0x0F);
5208   EmitUint8(0xBC);
5209   EmitRegisterOperand(dst.LowBits(), src.LowBits());
5210 }
5211 
bsfl(CpuRegister dst,const Address & src)5212 void X86_64Assembler::bsfl(CpuRegister dst, const Address& src) {
5213   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5214   EmitOptionalRex32(dst, src);
5215   EmitUint8(0x0F);
5216   EmitUint8(0xBC);
5217   EmitOperand(dst.LowBits(), src);
5218 }
5219 
bsfq(CpuRegister dst,CpuRegister src)5220 void X86_64Assembler::bsfq(CpuRegister dst, CpuRegister src) {
5221   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5222   EmitRex64(dst, src);
5223   EmitUint8(0x0F);
5224   EmitUint8(0xBC);
5225   EmitRegisterOperand(dst.LowBits(), src.LowBits());
5226 }
5227 
bsfq(CpuRegister dst,const Address & src)5228 void X86_64Assembler::bsfq(CpuRegister dst, const Address& src) {
5229   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5230   EmitRex64(dst, src);
5231   EmitUint8(0x0F);
5232   EmitUint8(0xBC);
5233   EmitOperand(dst.LowBits(), src);
5234 }
5235 
bsrl(CpuRegister dst,CpuRegister src)5236 void X86_64Assembler::bsrl(CpuRegister dst, CpuRegister src) {
5237   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5238   EmitOptionalRex32(dst, src);
5239   EmitUint8(0x0F);
5240   EmitUint8(0xBD);
5241   EmitRegisterOperand(dst.LowBits(), src.LowBits());
5242 }
5243 
bsrl(CpuRegister dst,const Address & src)5244 void X86_64Assembler::bsrl(CpuRegister dst, const Address& src) {
5245   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5246   EmitOptionalRex32(dst, src);
5247   EmitUint8(0x0F);
5248   EmitUint8(0xBD);
5249   EmitOperand(dst.LowBits(), src);
5250 }
5251 
bsrq(CpuRegister dst,CpuRegister src)5252 void X86_64Assembler::bsrq(CpuRegister dst, CpuRegister src) {
5253   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5254   EmitRex64(dst, src);
5255   EmitUint8(0x0F);
5256   EmitUint8(0xBD);
5257   EmitRegisterOperand(dst.LowBits(), src.LowBits());
5258 }
5259 
bsrq(CpuRegister dst,const Address & src)5260 void X86_64Assembler::bsrq(CpuRegister dst, const Address& src) {
5261   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5262   EmitRex64(dst, src);
5263   EmitUint8(0x0F);
5264   EmitUint8(0xBD);
5265   EmitOperand(dst.LowBits(), src);
5266 }
5267 
popcntl(CpuRegister dst,CpuRegister src)5268 void X86_64Assembler::popcntl(CpuRegister dst, CpuRegister src) {
5269   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5270   EmitUint8(0xF3);
5271   EmitOptionalRex32(dst, src);
5272   EmitUint8(0x0F);
5273   EmitUint8(0xB8);
5274   EmitRegisterOperand(dst.LowBits(), src.LowBits());
5275 }
5276 
popcntl(CpuRegister dst,const Address & src)5277 void X86_64Assembler::popcntl(CpuRegister dst, const Address& src) {
5278   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5279   EmitUint8(0xF3);
5280   EmitOptionalRex32(dst, src);
5281   EmitUint8(0x0F);
5282   EmitUint8(0xB8);
5283   EmitOperand(dst.LowBits(), src);
5284 }
5285 
popcntq(CpuRegister dst,CpuRegister src)5286 void X86_64Assembler::popcntq(CpuRegister dst, CpuRegister src) {
5287   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5288   EmitUint8(0xF3);
5289   EmitRex64(dst, src);
5290   EmitUint8(0x0F);
5291   EmitUint8(0xB8);
5292   EmitRegisterOperand(dst.LowBits(), src.LowBits());
5293 }
5294 
popcntq(CpuRegister dst,const Address & src)5295 void X86_64Assembler::popcntq(CpuRegister dst, const Address& src) {
5296   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5297   EmitUint8(0xF3);
5298   EmitRex64(dst, src);
5299   EmitUint8(0x0F);
5300   EmitUint8(0xB8);
5301   EmitOperand(dst.LowBits(), src);
5302 }
5303 
rdtsc()5304 void X86_64Assembler::rdtsc() {
5305   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5306   EmitUint8(0x0F);
5307   EmitUint8(0x31);
5308 }
5309 
repne_scasb()5310 void X86_64Assembler::repne_scasb() {
5311   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5312   EmitUint8(0xF2);
5313   EmitUint8(0xAE);
5314 }
5315 
repne_scasw()5316 void X86_64Assembler::repne_scasw() {
5317   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5318   EmitUint8(0x66);
5319   EmitUint8(0xF2);
5320   EmitUint8(0xAF);
5321 }
5322 
repe_cmpsw()5323 void X86_64Assembler::repe_cmpsw() {
5324   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5325   EmitUint8(0x66);
5326   EmitUint8(0xF3);
5327   EmitUint8(0xA7);
5328 }
5329 
5330 
repe_cmpsl()5331 void X86_64Assembler::repe_cmpsl() {
5332   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5333   EmitUint8(0xF3);
5334   EmitUint8(0xA7);
5335 }
5336 
5337 
repe_cmpsq()5338 void X86_64Assembler::repe_cmpsq() {
5339   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5340   EmitUint8(0xF3);
5341   EmitRex64();
5342   EmitUint8(0xA7);
5343 }
5344 
ud2()5345 void X86_64Assembler::ud2() {
5346   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5347   EmitUint8(0x0F);
5348   EmitUint8(0x0B);
5349 }
5350 
LoadDoubleConstant(XmmRegister dst,double value)5351 void X86_64Assembler::LoadDoubleConstant(XmmRegister dst, double value) {
5352   // TODO: Need to have a code constants table.
5353   int64_t constant = bit_cast<int64_t, double>(value);
5354   pushq(Immediate(High32Bits(constant)));
5355   pushq(Immediate(Low32Bits(constant)));
5356   movsd(dst, Address(CpuRegister(RSP), 0));
5357   addq(CpuRegister(RSP), Immediate(2 * sizeof(intptr_t)));
5358 }
5359 
5360 
Align(int alignment,int offset)5361 void X86_64Assembler::Align(int alignment, int offset) {
5362   CHECK(IsPowerOfTwo(alignment));
5363   // Emit nop instruction until the real position is aligned.
5364   while (((offset + buffer_.GetPosition()) & (alignment-1)) != 0) {
5365     nop();
5366   }
5367 }
5368 
5369 
Bind(Label * label)5370 void X86_64Assembler::Bind(Label* label) {
5371   int bound = buffer_.Size();
5372   CHECK(!label->IsBound());  // Labels can only be bound once.
5373   while (label->IsLinked()) {
5374     int position = label->LinkPosition();
5375     int next = buffer_.Load<int32_t>(position);
5376     buffer_.Store<int32_t>(position, bound - (position + 4));
5377     label->position_ = next;
5378   }
5379   label->BindTo(bound);
5380 }
5381 
5382 
Bind(NearLabel * label)5383 void X86_64Assembler::Bind(NearLabel* label) {
5384   int bound = buffer_.Size();
5385   CHECK(!label->IsBound());  // Labels can only be bound once.
5386   while (label->IsLinked()) {
5387     int position = label->LinkPosition();
5388     uint8_t delta = buffer_.Load<uint8_t>(position);
5389     int offset = bound - (position + 1);
5390     CHECK(IsInt<8>(offset));
5391     buffer_.Store<int8_t>(position, offset);
5392     label->position_ = delta != 0u ? label->position_ - delta : 0;
5393   }
5394   label->BindTo(bound);
5395 }
5396 
5397 
EmitOperand(uint8_t reg_or_opcode,const Operand & operand)5398 void X86_64Assembler::EmitOperand(uint8_t reg_or_opcode, const Operand& operand) {
5399   CHECK_GE(reg_or_opcode, 0);
5400   CHECK_LT(reg_or_opcode, 8);
5401   const int length = operand.length_;
5402   CHECK_GT(length, 0);
5403   // Emit the ModRM byte updated with the given reg value.
5404   CHECK_EQ(operand.encoding_[0] & 0x38, 0);
5405   EmitUint8(operand.encoding_[0] + (reg_or_opcode << 3));
5406   // Emit the rest of the encoded operand.
5407   for (int i = 1; i < length; i++) {
5408     EmitUint8(operand.encoding_[i]);
5409   }
5410   AssemblerFixup* fixup = operand.GetFixup();
5411   if (fixup != nullptr) {
5412     EmitFixup(fixup);
5413   }
5414 }
5415 
5416 
EmitImmediate(const Immediate & imm,bool is_16_op)5417 void X86_64Assembler::EmitImmediate(const Immediate& imm, bool is_16_op) {
5418   if (is_16_op) {
5419     EmitUint8(imm.value() & 0xFF);
5420     EmitUint8(imm.value() >> 8);
5421   } else if (imm.is_int32()) {
5422     EmitInt32(static_cast<int32_t>(imm.value()));
5423   } else {
5424     EmitInt64(imm.value());
5425   }
5426 }
5427 
5428 
EmitComplex(uint8_t reg_or_opcode,const Operand & operand,const Immediate & immediate,bool is_16_op)5429 void X86_64Assembler::EmitComplex(uint8_t reg_or_opcode,
5430                                   const Operand& operand,
5431                                   const Immediate& immediate,
5432                                   bool is_16_op) {
5433   CHECK_GE(reg_or_opcode, 0);
5434   CHECK_LT(reg_or_opcode, 8);
5435   if (immediate.is_int8()) {
5436     // Use sign-extended 8-bit immediate.
5437     EmitUint8(0x83);
5438     EmitOperand(reg_or_opcode, operand);
5439     EmitUint8(immediate.value() & 0xFF);
5440   } else if (operand.IsRegister(CpuRegister(RAX))) {
5441     // Use short form if the destination is eax.
5442     EmitUint8(0x05 + (reg_or_opcode << 3));
5443     EmitImmediate(immediate, is_16_op);
5444   } else {
5445     EmitUint8(0x81);
5446     EmitOperand(reg_or_opcode, operand);
5447     EmitImmediate(immediate, is_16_op);
5448   }
5449 }
5450 
5451 
EmitLabel(Label * label,int instruction_size)5452 void X86_64Assembler::EmitLabel(Label* label, int instruction_size) {
5453   if (label->IsBound()) {
5454     int offset = label->Position() - buffer_.Size();
5455     CHECK_LE(offset, 0);
5456     EmitInt32(offset - instruction_size);
5457   } else {
5458     EmitLabelLink(label);
5459   }
5460 }
5461 
5462 
EmitLabelLink(Label * label)5463 void X86_64Assembler::EmitLabelLink(Label* label) {
5464   CHECK(!label->IsBound());
5465   int position = buffer_.Size();
5466   EmitInt32(label->position_);
5467   label->LinkTo(position);
5468 }
5469 
5470 
EmitLabelLink(NearLabel * label)5471 void X86_64Assembler::EmitLabelLink(NearLabel* label) {
5472   CHECK(!label->IsBound());
5473   int position = buffer_.Size();
5474   if (label->IsLinked()) {
5475     // Save the delta in the byte that we have to play with.
5476     uint32_t delta = position - label->LinkPosition();
5477     CHECK(IsUint<8>(delta));
5478     EmitUint8(delta & 0xFF);
5479   } else {
5480     EmitUint8(0);
5481   }
5482   label->LinkTo(position);
5483 }
5484 
5485 
EmitGenericShift(bool wide,int reg_or_opcode,CpuRegister reg,const Immediate & imm)5486 void X86_64Assembler::EmitGenericShift(bool wide,
5487                                        int reg_or_opcode,
5488                                        CpuRegister reg,
5489                                        const Immediate& imm) {
5490   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5491   CHECK(imm.is_int8());
5492   if (wide) {
5493     EmitRex64(reg);
5494   } else {
5495     EmitOptionalRex32(reg);
5496   }
5497   if (imm.value() == 1) {
5498     EmitUint8(0xD1);
5499     EmitOperand(reg_or_opcode, Operand(reg));
5500   } else {
5501     EmitUint8(0xC1);
5502     EmitOperand(reg_or_opcode, Operand(reg));
5503     EmitUint8(imm.value() & 0xFF);
5504   }
5505 }
5506 
5507 
EmitGenericShift(bool wide,int reg_or_opcode,CpuRegister operand,CpuRegister shifter)5508 void X86_64Assembler::EmitGenericShift(bool wide,
5509                                        int reg_or_opcode,
5510                                        CpuRegister operand,
5511                                        CpuRegister shifter) {
5512   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5513   CHECK_EQ(shifter.AsRegister(), RCX);
5514   if (wide) {
5515     EmitRex64(operand);
5516   } else {
5517     EmitOptionalRex32(operand);
5518   }
5519   EmitUint8(0xD3);
5520   EmitOperand(reg_or_opcode, Operand(operand));
5521 }
5522 
EmitOptionalRex(bool force,bool w,bool r,bool x,bool b)5523 void X86_64Assembler::EmitOptionalRex(bool force, bool w, bool r, bool x, bool b) {
5524   // REX.WRXB
5525   // W - 64-bit operand
5526   // R - MODRM.reg
5527   // X - SIB.index
5528   // B - MODRM.rm/SIB.base
5529   uint8_t rex = force ? 0x40 : 0;
5530   if (w) {
5531     rex |= 0x48;  // REX.W000
5532   }
5533   if (r) {
5534     rex |= 0x44;  // REX.0R00
5535   }
5536   if (x) {
5537     rex |= 0x42;  // REX.00X0
5538   }
5539   if (b) {
5540     rex |= 0x41;  // REX.000B
5541   }
5542   if (rex != 0) {
5543     EmitUint8(rex);
5544   }
5545 }
5546 
EmitOptionalRex32(CpuRegister reg)5547 void X86_64Assembler::EmitOptionalRex32(CpuRegister reg) {
5548   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
5549 }
5550 
EmitOptionalRex32(CpuRegister dst,CpuRegister src)5551 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, CpuRegister src) {
5552   EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
5553 }
5554 
EmitOptionalRex32(XmmRegister dst,XmmRegister src)5555 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, XmmRegister src) {
5556   EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
5557 }
5558 
EmitOptionalRex32(CpuRegister dst,XmmRegister src)5559 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, XmmRegister src) {
5560   EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
5561 }
5562 
EmitOptionalRex32(XmmRegister dst,CpuRegister src)5563 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, CpuRegister src) {
5564   EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
5565 }
5566 
EmitOptionalRex32(const Operand & operand)5567 void X86_64Assembler::EmitOptionalRex32(const Operand& operand) {
5568   uint8_t rex = operand.rex();
5569   if (rex != 0) {
5570     EmitUint8(rex);
5571   }
5572 }
5573 
EmitOptionalRex32(CpuRegister dst,const Operand & operand)5574 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, const Operand& operand) {
5575   uint8_t rex = operand.rex();
5576   if (dst.NeedsRex()) {
5577     rex |= 0x44;  // REX.0R00
5578   }
5579   if (rex != 0) {
5580     EmitUint8(rex);
5581   }
5582 }
5583 
EmitOptionalRex32(XmmRegister dst,const Operand & operand)5584 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, const Operand& operand) {
5585   uint8_t rex = operand.rex();
5586   if (dst.NeedsRex()) {
5587     rex |= 0x44;  // REX.0R00
5588   }
5589   if (rex != 0) {
5590     EmitUint8(rex);
5591   }
5592 }
5593 
EmitRex64()5594 void X86_64Assembler::EmitRex64() {
5595   EmitOptionalRex(false, true, false, false, false);
5596 }
5597 
EmitRex64(CpuRegister reg)5598 void X86_64Assembler::EmitRex64(CpuRegister reg) {
5599   EmitOptionalRex(false, true, false, false, reg.NeedsRex());
5600 }
5601 
EmitRex64(const Operand & operand)5602 void X86_64Assembler::EmitRex64(const Operand& operand) {
5603   uint8_t rex = operand.rex();
5604   rex |= 0x48;  // REX.W000
5605   EmitUint8(rex);
5606 }
5607 
EmitRex64(CpuRegister dst,CpuRegister src)5608 void X86_64Assembler::EmitRex64(CpuRegister dst, CpuRegister src) {
5609   EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
5610 }
5611 
EmitRex64(XmmRegister dst,CpuRegister src)5612 void X86_64Assembler::EmitRex64(XmmRegister dst, CpuRegister src) {
5613   EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
5614 }
5615 
EmitRex64(CpuRegister dst,XmmRegister src)5616 void X86_64Assembler::EmitRex64(CpuRegister dst, XmmRegister src) {
5617   EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
5618 }
5619 
EmitRex64(CpuRegister dst,const Operand & operand)5620 void X86_64Assembler::EmitRex64(CpuRegister dst, const Operand& operand) {
5621   uint8_t rex = 0x48 | operand.rex();  // REX.W000
5622   if (dst.NeedsRex()) {
5623     rex |= 0x44;  // REX.0R00
5624   }
5625   EmitUint8(rex);
5626 }
5627 
EmitRex64(XmmRegister dst,const Operand & operand)5628 void X86_64Assembler::EmitRex64(XmmRegister dst, const Operand& operand) {
5629   uint8_t rex = 0x48 | operand.rex();  // REX.W000
5630   if (dst.NeedsRex()) {
5631     rex |= 0x44;  // REX.0R00
5632   }
5633   EmitUint8(rex);
5634 }
5635 
EmitOptionalByteRegNormalizingRex32(CpuRegister dst,CpuRegister src,bool normalize_both)5636 void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst,
5637                                                           CpuRegister src,
5638                                                           bool normalize_both) {
5639   // SPL, BPL, SIL, DIL need the REX prefix.
5640   bool force = src.AsRegister() > 3;
5641   if (normalize_both) {
5642     // Some instructions take two byte registers, such as `xchg bpl, al`, so they need the REX
5643     // prefix if either `src` or `dst` needs it.
5644     force |= dst.AsRegister() > 3;
5645   } else {
5646     // Other instructions take one byte register and one full register, such as `movzxb rax, bpl`.
5647     // They need REX prefix only if `src` needs it, but not `dst`.
5648   }
5649   EmitOptionalRex(force, false, dst.NeedsRex(), false, src.NeedsRex());
5650 }
5651 
EmitOptionalByteRegNormalizingRex32(CpuRegister dst,const Operand & operand)5652 void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand) {
5653   uint8_t rex = operand.rex();
5654   // For dst, SPL, BPL, SIL, DIL need the rex prefix.
5655   bool force = dst.AsRegister() > 3;
5656   if (force) {
5657     rex |= 0x40;  // REX.0000
5658   }
5659   if (dst.NeedsRex()) {
5660     rex |= 0x44;  // REX.0R00
5661   }
5662   if (rex != 0) {
5663     EmitUint8(rex);
5664   }
5665 }
5666 
AddConstantArea()5667 void X86_64Assembler::AddConstantArea() {
5668   ArrayRef<const int32_t> area = constant_area_.GetBuffer();
5669   for (size_t i = 0, e = area.size(); i < e; i++) {
5670     AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5671     EmitInt32(area[i]);
5672   }
5673 }
5674 
AppendInt32(int32_t v)5675 size_t ConstantArea::AppendInt32(int32_t v) {
5676   size_t result = buffer_.size() * elem_size_;
5677   buffer_.push_back(v);
5678   return result;
5679 }
5680 
AddInt32(int32_t v)5681 size_t ConstantArea::AddInt32(int32_t v) {
5682   // Look for an existing match.
5683   for (size_t i = 0, e = buffer_.size(); i < e; i++) {
5684     if (v == buffer_[i]) {
5685       return i * elem_size_;
5686     }
5687   }
5688 
5689   // Didn't match anything.
5690   return AppendInt32(v);
5691 }
5692 
AddInt64(int64_t v)5693 size_t ConstantArea::AddInt64(int64_t v) {
5694   int32_t v_low = v;
5695   int32_t v_high = v >> 32;
5696   if (buffer_.size() > 1) {
5697     // Ensure we don't pass the end of the buffer.
5698     for (size_t i = 0, e = buffer_.size() - 1; i < e; i++) {
5699       if (v_low == buffer_[i] && v_high == buffer_[i + 1]) {
5700         return i * elem_size_;
5701       }
5702     }
5703   }
5704 
5705   // Didn't match anything.
5706   size_t result = buffer_.size() * elem_size_;
5707   buffer_.push_back(v_low);
5708   buffer_.push_back(v_high);
5709   return result;
5710 }
5711 
AddDouble(double v)5712 size_t ConstantArea::AddDouble(double v) {
5713   // Treat the value as a 64-bit integer value.
5714   return AddInt64(bit_cast<int64_t, double>(v));
5715 }
5716 
AddFloat(float v)5717 size_t ConstantArea::AddFloat(float v) {
5718   // Treat the value as a 32-bit integer value.
5719   return AddInt32(bit_cast<int32_t, float>(v));
5720 }
5721 
EmitVexPrefixByteZero(bool is_twobyte_form)5722 uint8_t X86_64Assembler::EmitVexPrefixByteZero(bool is_twobyte_form) {
5723   // Vex Byte 0,
5724   // Bits [7:0] must contain the value 11000101b (0xC5) for 2-byte Vex
5725   // Bits [7:0] must contain the value 11000100b (0xC4) for 3-byte Vex
5726   uint8_t vex_prefix = 0xC0;
5727   if (is_twobyte_form) {
5728     vex_prefix |= TWO_BYTE_VEX;  // 2-Byte Vex
5729   } else {
5730     vex_prefix |= THREE_BYTE_VEX;  // 3-Byte Vex
5731   }
5732   return vex_prefix;
5733 }
5734 
EmitVexPrefixByteOne(bool R,bool X,bool B,int SET_VEX_M)5735 uint8_t X86_64Assembler::EmitVexPrefixByteOne(bool R, bool X, bool B, int SET_VEX_M) {
5736   // Vex Byte 1,
5737   uint8_t vex_prefix = VEX_INIT;
5738   /** Bit[7] This bit needs to be set to '1'
5739   otherwise the instruction is LES or LDS */
5740   if (!R) {
5741     // R .
5742     vex_prefix |= SET_VEX_R;
5743   }
5744   /** Bit[6] This bit needs to be set to '1'
5745   otherwise the instruction is LES or LDS */
5746   if (!X) {
5747     // X .
5748     vex_prefix |= SET_VEX_X;
5749   }
5750   /** Bit[5] This bit needs to be set to '1' */
5751   if (!B) {
5752     // B .
5753     vex_prefix |= SET_VEX_B;
5754   }
5755   /** Bits[4:0], Based on the instruction documentaion */
5756   vex_prefix |= SET_VEX_M;
5757   return vex_prefix;
5758 }
5759 
EmitVexPrefixByteOne(bool R,X86_64ManagedRegister operand,int SET_VEX_L,int SET_VEX_PP)5760 uint8_t X86_64Assembler::EmitVexPrefixByteOne(bool R,
5761                                               X86_64ManagedRegister operand,
5762                                               int SET_VEX_L,
5763                                               int SET_VEX_PP) {
5764   // Vex Byte 1,
5765   uint8_t vex_prefix = VEX_INIT;
5766   /** Bit[7] This bit needs to be set to '1'
5767   otherwise the instruction is LES or LDS */
5768   if (!R) {
5769     // R .
5770     vex_prefix |= SET_VEX_R;
5771   }
5772   /**Bits[6:3] - 'vvvv' the source or dest register specifier */
5773   if (operand.IsNoRegister()) {
5774     vex_prefix |= 0x78;
5775   } else if (operand.IsXmmRegister()) {
5776     XmmRegister vvvv = operand.AsXmmRegister();
5777     int inverted_reg = 15 - static_cast<int>(vvvv.AsFloatRegister());
5778     uint8_t reg = static_cast<uint8_t>(inverted_reg);
5779     vex_prefix |= ((reg & 0x0F) << 3);
5780   } else if (operand.IsCpuRegister()) {
5781     CpuRegister vvvv = operand.AsCpuRegister();
5782     int inverted_reg = 15 - static_cast<int>(vvvv.AsRegister());
5783     uint8_t reg = static_cast<uint8_t>(inverted_reg);
5784     vex_prefix |= ((reg & 0x0F) << 3);
5785   }
5786   /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation,
5787   VEX.L = 0 indicates 128 bit vector operation */
5788   vex_prefix |= SET_VEX_L;
5789   // Bits[1:0] -  "pp"
5790   vex_prefix |= SET_VEX_PP;
5791   return vex_prefix;
5792 }
5793 
EmitVexPrefixByteTwo(bool W,X86_64ManagedRegister operand,int SET_VEX_L,int SET_VEX_PP)5794 uint8_t X86_64Assembler::EmitVexPrefixByteTwo(bool W,
5795                                               X86_64ManagedRegister operand,
5796                                               int SET_VEX_L,
5797                                               int SET_VEX_PP) {
5798   // Vex Byte 2,
5799   uint8_t vex_prefix = VEX_INIT;
5800 
5801   /** Bit[7] This bits needs to be set to '1' with default value.
5802   When using C4H form of VEX prefix, REX.W value is ignored */
5803   if (W) {
5804     vex_prefix |= SET_VEX_W;
5805   }
5806   // Bits[6:3] - 'vvvv' the source or dest register specifier
5807   if (operand.IsXmmRegister()) {
5808     XmmRegister vvvv = operand.AsXmmRegister();
5809     int inverted_reg = 15 - static_cast<int>(vvvv.AsFloatRegister());
5810     uint8_t reg = static_cast<uint8_t>(inverted_reg);
5811     vex_prefix |= ((reg & 0x0F) << 3);
5812   } else if (operand.IsCpuRegister()) {
5813     CpuRegister vvvv = operand.AsCpuRegister();
5814     int inverted_reg = 15 - static_cast<int>(vvvv.AsRegister());
5815     uint8_t reg = static_cast<uint8_t>(inverted_reg);
5816     vex_prefix |= ((reg & 0x0F) << 3);
5817   }
5818   /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation,
5819   VEX.L = 0 indicates 128 bit vector operation */
5820   vex_prefix |= SET_VEX_L;
5821   // Bits[1:0] -  "pp"
5822   vex_prefix |= SET_VEX_PP;
5823   return vex_prefix;
5824 }
5825 
EmitVexPrefixByteTwo(bool W,int SET_VEX_L,int SET_VEX_PP)5826 uint8_t X86_64Assembler::EmitVexPrefixByteTwo(bool W,
5827                                               int SET_VEX_L,
5828                                               int SET_VEX_PP) {
5829   // Vex Byte 2,
5830   uint8_t vex_prefix = VEX_INIT;
5831 
5832   /** Bit[7] This bits needs to be set to '1' with default value.
5833   When using C4H form of VEX prefix, REX.W value is ignored */
5834   if (W) {
5835     vex_prefix |= SET_VEX_W;
5836   }
5837   /** Bits[6:3] - 'vvvv' the source or dest register specifier */
5838   vex_prefix |= (0x0F << 3);
5839   /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation,
5840   VEX.L = 0 indicates 128 bit vector operation */
5841   vex_prefix |= SET_VEX_L;
5842 
5843   // Bits[1:0] -  "pp"
5844   if (SET_VEX_PP != SET_VEX_PP_NONE) {
5845     vex_prefix |= SET_VEX_PP;
5846   }
5847   return vex_prefix;
5848 }
5849 
5850 }  // namespace x86_64
5851 }  // namespace art
5852