1 /**
2  * xbyak for the D programming language
3  * Version: 0.0940
4  * Date: 2020/04/10
5  * See_Also:
6  * Copyright: Copyright (c) 2007 MITSUNARI Shigeo, Copyright deepprog 2019
7  * License: <http://opensource.org/licenses/BSD-3-Clause>BSD-3-Clause</a>.
8  * Authors: herumi, deepprog
9  */
10 
11 module xbyak;
12 
13 version(X86)
14 {
15 	version = XBYAK32;
16 }
17 
18 version(X86_64)
19 {
20 	version = XBYAK64;
21 }
22 
23 //version = XBYAK_ENABLE_OMITTED_OPERAND;
24 //version = XBYAK_DISABLE_AVX512;
25 
26 import std.stdio;
27 import std.array;
28 import std.string;
29 import std.algorithm;
30 import std.conv;
31 
32 version (Windows)
33 {
34 	import core.sys.windows.windows;  // VirtualProtect
35 }
36 
37 version (linux)
38 {
39     import core.sys.posix.sys.mman;
40 }
41 
42 size_t	DEFAULT_MAX_CODE_SIZE = 4096 * 8;
43 size_t	VERSION               = 0x0099;  // 0xABCD = A.BC(D)
44 
45 alias uint64 = ulong ;
46 alias sint64 = long;
47 alias int32  = int;
48 alias uint32 = uint;
49 alias uint16 = ushort;
50 alias uint8  = ubyte;
51 
52 // MIE_ALIGN
53 T MIE_PACK(T)(T x, T y, T z, T W)
54 {
55 	return x * 64 + y * 16 + z * 4 + w;
56 }
57 
58 enum ERR
59 {
60 	NONE = 0,
61 	BAD_ADDRESSING,
62 	CODE_IS_TOO_BIG,
63 	BAD_SCALE,
64 	ESP_CANT_BE_INDEX,
65 	BAD_COMBINATION,
66 	BAD_SIZE_OF_REGISTER,
67 	IMM_IS_TOO_BIG,
68 	BAD_ALIGN,
69 	LABEL_IS_REDEFINED,
70 	LABEL_IS_TOO_FAR,
71 	LABEL_IS_NOT_FOUND,
72 	CODE_ISNOT_COPYABLE,
73 	BAD_PARAMETER,
74 	CANT_PROTECT,
75 	CANT_USE_64BIT_DISP,
76 	OFFSET_IS_TOO_BIG,
77 	MEM_SIZE_IS_NOT_SPECIFIED,
78 	BAD_MEM_SIZE,
79 	BAD_ST_COMBINATION,
80 	OVER_LOCAL_LABEL, // not used
81 	UNDER_LOCAL_LABEL,
82 	CANT_ALLOC,
83 	ONLY_T_NEAR_IS_SUPPORTED_IN_AUTO_GROW,
84 	BAD_PROTECT_MODE,
85 	BAD_PNUM,
86 	BAD_TNUM,
87 	BAD_VSIB_ADDRESSING,
88 	CANT_CONVERT,
89 	LABEL_ISNOT_SET_BY_L,
90 	LABEL_IS_ALREADY_SET_BY_L,
91 	BAD_LABEL_STR,
92 	MUNMAP,
93 	OPMASK_IS_ALREADY_SET,
94 	ROUNDING_IS_ALREADY_SET,
95 	K0_IS_INVALID,
96 	EVEX_IS_INVALID,
97 	SAE_IS_INVALID,
98 	ER_IS_INVALID,
99 	INVALID_BROADCAST,
100 	INVALID_OPMASK_WITH_MEMORY,
101 	INVALID_ZERO,
102 	INVALID_RIP_IN_AUTO_GROW,
103 	INVALID_MIB_ADDRESS,
104 	X2APIC_IS_NOT_SUPPORTED,
105 	INTERNAL	// Put it at last.
106 }
107 
108 class XError : Exception
109 {
110 	int err_;
111 public:
112 	this(ERR err = ERR.NONE, string file = __FILE__, size_t line = __LINE__, Throwable next = null)
113 	{
114 		err_ = cast(int) err;
115 		if (err_ < 0 || err_ > ERR.INTERNAL)
116 		{
117 			err_ = ERR.INTERNAL;
118 		}
119 		super(this.what(), file, line, next);
120 	}
121 
122 	int opCast(T : int)() const {
123 		return err_;
124 	}
125 
126 	string what() const
127 	{
128 		string[] errTbl =
129 		[
130 			"none",
131 			"bad addressing",
132 			"code is too big",
133 			"bad scale",
134 			"esp can't be index",
135 			"bad combination",
136 			"bad size of register",
137 			"imm is too big",
138 			"bad align",
139 			"label is redefined",
140 			"label is too far",
141 			"label is not found",
142 			"code is not copyable",
143 			"bad parameter",
144 			"can't protect",
145 			"can't use 64bit disp(use (void*))",
146 			"offset is too big",
147 			"MEM size is not specified",
148 			"bad mem size",
149 			"bad st combination",
150 			"over local label",
151 			"under local label",
152 			"can't alloc",
153 			"T_SHORT is not supported in AutoGrow",
154 			"bad protect mode",
155 			"bad pNum",
156 			"bad tNum",
157 			"bad vsib addressing",
158 			"can't convert",
159 			"label is not set by L()",
160 			"label is already set by L()",
161 			"bad label string",
162 			"err munmap",
163 			"opmask is already set",
164 			"rounding is already set",
165 			"k0 is invalid",
166 			"evex is invalid",
167 			"sae(suppress all exceptions) is invalid",
168 			"er(embedded rounding) is invalid",
169 			"invalid broadcast",
170 			"invalid opmask with memory",
171 			"invalid zero",
172 			"invalid rip in AutoGrow",
173 			"invalid mib address",
174 			"x2APIC is not supported",
175 			"internal error"
176 		];
177 
178 		assert(err_ <= ERR.INTERNAL);
179 		assert(ERR.INTERNAL + 1 == errTbl.length);
180 		return errTbl[err_];
181 	}
182 }
183 
184 string ConvertErrorToString(XError err) {
185 	return err.what();
186 }
187 
188 To CastTo(To, From)(From p)
189 {
190 	return cast(To) (p);
191 }
192 
193 struct inner
194 {
195 	static :
196 	const size_t ALIGN_PAGE_SIZE = 4096;
197 	bool IsInDisp8(uint32 x)	{	return 0xFFFFFF80 <= x || x <= 0x7F; }
198 	bool IsInDisp16(uint32 x)	{	return 0xFFFF8000 <= x || x <= 0x7FFF;	}
199 	bool IsInInt32(uint64 x)    {	return (int32.min <= x) || (x <= int32.max); }
200 
201 	uint32 VerifyInInt32(uint64 x)
202 	{
203 		version (XBYAK64)
204 		{
205 			if (!IsInInt32(x))	throw new XError(ERR.OFFSET_IS_TOO_BIG);
206 		}
207 		return cast(uint32)x;
208 	}
209 
210 	enum LabelMode
211 	{
212 		LasIs, // as is
213 		Labs, // absolute
214 		LaddTop // (addr + top) for mov(reg, label) with AutoGrow
215 	}
216 }// inner
217 
218 
219 void* getAlignedAddress(void* addr, size_t alignedSize = 16)
220 {
221 	size_t mask = alignedSize - 1;
222 	return cast(void*) ((cast(size_t) addr + mask) & ~mask);
223 }
224 
225 // custom allocator
226 
227 class Allocator
228 {
229 
230 version(Windows)
231 {
232 	uint8* alloc(size_t size)
233 	{
234         size_t alignment = inner.ALIGN_PAGE_SIZE;
235 		static import core.memory;
236         void* mp = core.memory.GC.malloc(size + alignment);    
237         assert(mp);	
238 		SizeTbl[mp] = size + alignment;
239 		MemTbl[mp]  = getAlignedAddress(mp, alignment);
240 		return cast(uint8*)MemTbl[mp];
241 	}
242 	
243 	void free(uint8* p)
244 	{
245 		//core.memory.GC.free(MemTbl[p]);
246 	}
247 }
248 
249 version(linux)
250 {
251 	uint8* alloc(size_t size)
252 	{
253 		const size_t alignedSizeM1 = inner.ALIGN_PAGE_SIZE - 1;
254 		size = (size + alignedSizeM1) & ~alignedSizeM1;
255 	
256         const int mode = MAP_PRIVATE | MAP_ANON;
257 		const int prot = PROT_EXEC | PROT_READ | PROT_WRITE;
258         void* mp = mmap(null, size, prot, mode, -1, 0);
259 
260 		if (mp == MAP_FAILED) throw new XError(ERR.CANT_ALLOC);
261 		assert(mp);
262         size_t alignment = inner.ALIGN_PAGE_SIZE;	
263 		SizeTbl[mp] = size + alignment;
264 		MemTbl[mp]  = getAlignedAddress(mp, alignment);
265 		return cast(uint8*)MemTbl[mp];
266     	}
267     
268     void free(uint8 *p)
269 	{
270 		if(p == null) return;
271 		void* ret = MemTbl[p];
272 		size_t size  = SizeTbl[p];
273 
274 		if (munmap(ret, size) < 0)
275 		{
276 			throw new XError(ERR.MUNMAP);
277 		}
278 		MemTbl.remove(p);
279 		SizeTbl.remove(p);
280 	}
281 }
282 	/* override to return false if you call protect() manually */
283 	bool useProtect() { return true; }
284 	
285 static:
286 	void*[void*] MemTbl;
287 	size_t[void*] SizeTbl;	
288 }
289 
290 
291 enum Kind
292 {
293 	NONE = 0,
294 	MEM = 1 << 0,
295 	REG = 1 << 1,
296 	MMX = 1 << 2,
297 	FPU = 1 << 3,
298 	XMM = 1 << 4,
299 	YMM = 1 << 5,
300 	ZMM = 1 << 6,
301 	OPMASK = 1 << 7,
302 	BNDREG = 1 << 8
303 }
304 
305 public class Operand {
306 private:
307 	static const uint8 EXT8BIT = 0x20;
308 	uint idx_ = 6; // 0..31 + EXT8BIT = 1 if spl/bpl/sil/dil
309 	uint kind_=9;
310 	uint bit_=10;
311 
312 protected:
313 	bool zero_= true;
314 	uint mask_=3;
315 	uint rounding_ = 3;
316 	void setIdx(int idx) { idx_ = idx; }
317 
318 public:
319 	
320 version(XBYAK64){
321 	enum : int //Code
322 	{
323         RAX = 0, RCX, RDX, RBX, RSP, RBP, RSI, RDI, R8, R9, R10, R11, R12, R13, R14, R15,
324 		R8D = 8, R9D, R10D, R11D, R12D, R13D, R14D, R15D,
325 		R8W = 8, R9W, R10W, R11W, R12W, R13W, R14W, R15W,
326 		R8B = 8, R9B, R10B, R11B, R12B, R13B, R14B, R15B,
327 		SPL = 4, BPL, SIL, DIL,
328 		EAX = 0, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
329 		AX  = 0, CX, DX, BX, SP, BP, SI, DI,
330 		AL  = 0, CL, DL, BL, AH, CH, DH, BH
331 	}
332 }
333 
334 version(XBYAK32){
335 	enum : int //Code 
336 	{
337 		EAX = 0, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
338 		AX  = 0, CX, DX, BX, SP, BP, SI, DI,
339 		AL  = 0, CL, DL, BL, AH, CH, DH, BH
340 	}
341 }
342 
343 
344 	this()
345 	{
346 		idx_ = 0;
347 		kind_ = 0;
348 		bit_ = 0;
349 		zero_ = 0;
350 		mask_ = 0;
351 		rounding_ =0;
352 	}
353 	this(int idx, int kind, int bit =0, bool ext8bit = 0)
354 	{
355 		idx_ = cast(uint8)(idx | (ext8bit ? EXT8BIT : 0));
356 		kind_ = kind;
357 		bit_  = bit;
358 		zero_ = 0;
359 		mask_ = 0;
360 		rounding_ =0;
361 		assert((bit_ & (bit_ - 1)) == 0); // bit must be power of two
362 	}
363 
364 	int getKind() const {	return kind_;	}
365 	int  getIdx () const {	return cast(int) idx_ & 15;}
366 	bool isNone () const {	return (kind_ == Kind.NONE); }
367 	bool isMMX  () const {	return isKind(Kind.MMX);}
368 	bool isXMM  () const {	return isKind(Kind.XMM);}
369 	bool isYMM  () const {	return isKind(Kind.YMM);}
370 	bool isZMM() const { return isKind(Kind.ZMM); }
371 	bool isXMEM() const { return isKind(Kind.XMM | Kind.MEM); }
372 	bool isYMEM() const { return isKind(Kind.YMM | Kind.MEM); }
373 	bool isZMEM() const { return isKind(Kind.ZMM | Kind.MEM); }
374 	bool isOPMASK() const { return isKind(Kind.OPMASK); }
375 	bool isBNDREG() const { return isKind(Kind.BNDREG); }
376 	bool isREG(int bit = 0) const {	return isKind(Kind.REG, bit);	}
377 	bool isMEM(int bit = 0) const {	return isKind(Kind.MEM, bit);	}
378 	bool isFPU  () const {	return isKind(Kind.FPU);}
379 	bool isExt8bit() const {	return (idx_ & EXT8BIT) != 0;	}
380 	bool isExtIdx() const { return (getIdx() & 8) != 0; }
381 	bool isExtIdx2() const { return (getIdx() & 16) != 0; }
382 	bool hasEvex() const { return isZMM() || isExtIdx2() || getOpmaskIdx() || getRounding(); }
383 	bool hasRex() const { return isExt8bit() || isREG(64) || isExtIdx(); }
384 	bool hasZero() const { return zero_; }
385 	int getOpmaskIdx() const { return mask_; }
386 	int getRounding() const { return rounding_; }
387 	
388 	void setKind(int kind)
389 	{
390 		if ((kind & (Kind.XMM | Kind.YMM | Kind.ZMM)) == 0) return;
391 		kind_ = kind;
392 		bit_ = kind == Kind.XMM ? 128 : kind == Kind.YMM ? 256 : 512;
393 	}
394 	// err if MMX/FPU/OPMASK/BNDREG
395 	void setBit(int bit)
396 	{
397 	if (bit != 8 && bit != 16 && bit != 32 && bit != 64 && bit != 128 && bit != 256 && bit != 512) goto ERR;
398 	if (isBit(bit)) return;
399 	if (isKind(Kind.MEM | Kind.OPMASK)) {
400 		this.bit_ = bit;
401 		return;
402 	}
403 	if (isKind(Kind.REG | Kind.XMM | Kind.YMM | Kind.ZMM)) {
404 		int idx = getIdx;
405 		// err if converting ah, bh, ch, dh
406 		if (isREG(8) && (4 <= idx && idx < 8) && !isExt8bit) goto ERR;
407 		int kind = Kind.REG;
408 		switch (bit)
409 		{
410 			case 8:
411 				if (idx >= 16) goto ERR;
412 
413 	version(XBYAK32){
414 				if (idx >= 4) goto ERR;
415 	}else{
416 				if (4 <= idx && idx < 8) idx |= EXT8BIT;
417 	}
418 				break;
419 			case 16:
420 			case 32:
421 			case 64:
422 				if (idx >= 16) goto ERR;
423 				break;
424 			case 128: kind = Kind.XMM; break;
425 			case 256: kind = Kind.YMM; break;
426 			case 512: kind = Kind.ZMM; break;
427 			default:	break;
428 		}
429 		idx_ = idx;
430 		kind_ = kind;
431 		bit_ = bit;
432 		if (bit >= 128) return; // keep mask_ and rounding_
433 		mask_ = 0;
434 		rounding_ = 0;
435 		return;
436 	}
437 ERR:
438 	throw new XError(ERR.CANT_CONVERT);
439 }
440 		
441 	void setOpmaskIdx(int idx, bool ignore_idx0 = false)
442 	{
443 		if (!ignore_idx0 && idx == 0) throw new XError(ERR.K0_IS_INVALID);
444 		if (mask_) throw new XError(ERR.OPMASK_IS_ALREADY_SET);
445 		mask_ = idx;
446 	}
447 	void setRounding(int idx)
448 	{
449 		if (rounding_) throw new XError(ERR.ROUNDING_IS_ALREADY_SET);
450 		rounding_ = idx;
451 	}
452 	void setZero() { zero_ = true; }
453 	
454 
455 // ah, ch, dh, bh?
456 	bool isHigh8bit() const
457 	{
458 		if (!isBit(8))	return false;
459 		if (isExt8bit()) return false;
460 		const int idx = getIdx();
461 		return Operand.AH <= idx && idx <= Operand.BH;
462 	}
463 
464 // any bit is accetable if bit == 0
465 	bool isKind(int kind, uint32 bit = 0) const
466 	{
467 		return (kind == 0 || (kind_ & kind)) && (bit == 0 || (bit_ & bit)); // cf. you can set (8|16)
468 	}
469 	bool isBit(uint32 bit) const { return (bit_ & bit) != 0;	}
470 	uint32 getBit() const { return bit_;}
471 
472 
473 	override string toString() const
474 	{
475 		int idx = getIdx;
476 		if (kind_ == Kind.REG)
477 		{
478 			if (isExt8bit())
479 			{
480 				string[] tbl = [ "spl", "bpl", "sil", "dil" ];
481 				return tbl[idx - 4];
482 			}
483 	
484 			
485 			string[][] tbl = [
486 			        [ "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh", "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b" ],
487 			        [ "ax", "cx", "dx", "bx", "sp", "bp", "si", "di", "r8w", "r9w", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w" ],
488 			        [ "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi", "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" ],
489 			        [ "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" ],
490 			];
491 			return tbl[bit_ == 8 ? 0 : bit_ == 16 ? 1 : bit_ == 32 ? 2 : 3][idx];
492 		}
493 		else if (isOPMASK)
494 		{
495 			string[] tbl = [ 
496 			"k0", "k1", "k2", "k3", "k4", "k5", "k6", "k7"];
497 			return tbl[idx];
498 		}
499 		else if (isZMM)
500 		{
501 			string[] tbl = [ 
502 			"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", 
503 			"10", "11", "12", "13", "14", "15" ,"16", "17", "18","19",
504 			"20", "21", "22", "23", "24", "25" ,"26", "27", "28","29",
505 			"30", "31" ];
506 			return "zmm" ~ tbl[idx];
507 		
508 		}
509 		else if (isYMM)
510 		{
511 			string[] tbl = [ 
512 			"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", 
513 			"10", "11", "12", "13", "14", "15" ,"16", "17", "18","19",
514 			"20", "21", "22", "23", "24", "25" ,"26", "27", "28","29",
515 			"30", "31" ];
516 			return "ymm" ~ tbl[idx];
517 		}
518 		else if (isXMM)
519 		{
520 			string[] tbl = [ 
521 			"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", 
522 			"10", "11", "12", "13", "14", "15" ,"16", "17", "18","19",
523 			"20", "21", "22", "23", "24", "25" ,"26", "27", "28","29",
524 			"30", "31" ];
525 			return "xmm" ~ tbl[idx];
526 		}
527 		else if (isMMX)
528 		{
529 			string[] tbl = [ "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" ];
530 			return tbl[idx];
531 		}
532 		else if (isFPU)
533 		{
534 			string[] tbl = [ "st0", "st1", "st2", "st3", "st4", "st5", "st6", "st7" ];
535 			return tbl[idx];
536 		}
537 		else if (isBNDREG())
538 		{
539 			string[] tbl = [ "bnd0", "bnd1", "bnd2", "bnd3" ];
540 			return tbl[idx];
541 		}
542 		throw new XError(ERR.INTERNAL);
543 	}
544 
545 	bool isEqualIfNotInherited(Operand rhs) const
546 	{
547 		return idx_ == rhs.idx_ && kind_ == rhs.kind_ && bit_ == rhs.bit_ && zero_ == rhs.zero_ && mask_ == rhs.mask_ && rounding_ == rhs.rounding_;
548 	}
549 
550 	override bool opEquals(Object o) const
551 	{
552 		auto rhs = cast(Operand) o;
553 		if(rhs is null) return false;
554 			
555 		if (isMEM() && rhs.isMEM()) return this.getAddress() == rhs.getAddress();
556 		return isEqualIfNotInherited(rhs);
557 	}
558 
559 	Address getAddress() const
560 	{
561 		assert(isMEM());
562 		Address ret = cast(Address)this;
563 		return ret;
564 	}
565 	
566 	Reg getReg()
567 	{
568 		assert(!isMEM());
569 		Reg ret = new Reg(this.getIdx(), this.kind_, this.bit_, this.isExt8bit() ) ;		////fix
570 		return ret;
571 	}	
572 }
573 
574 
575 public class Reg : Operand {
576 public:
577 	this(){}
578 	this(int idx, int kind, int bit = 0, bool ext8bit = false)
579 	{
580 		super(idx, kind, bit, ext8bit);
581 	}
582 	// convert to Reg8/Reg16/Reg32/Reg64/XMM/YMM/ZMM
583 	Reg changeBit(int bit)
584 	{
585 		this.setBit(bit);
586 		return this;
587 	}
588 	
589 	uint8 getRexW() const { return isREG(64) ? 8 : 0; }
590 	uint8 getRexR() const { return isExtIdx() ? 4 : 0; }
591 	uint8 getRexX() const { return isExtIdx() ? 2 : 0; }
592 	uint8 getRexB() const { return isExtIdx() ? 1 : 0; }
593 	uint8 getRex(Reg base = new Reg())
594 	{
595 		uint8 rex = this.getRexW() | this.getRexR() | base.getRexW() | base.getRexB();
596 		if (rex || this.isExt8bit() || base.isExt8bit()) rex |= 0x40;
597 		return rex;
598 	}
599 
600 	Reg8 cvt8()
601 	{
602 		Reg r = this.changeBit(8); return new Reg8(r.getIdx, r.isExt8bit);
603 	}
604 	
605 	Reg16 cvt16()
606 	{
607 		return new Reg16(changeBit(16).getIdx());
608 	}
609 
610 	Reg32 cvt32()
611 	{
612 		return new Reg32(changeBit(32).getIdx());
613 	}
614 
615 	version (XBYAK64)
616 	{
617 		Reg64 cvt64()
618 		{
619 			return new Reg64(changeBit(64).getIdx());
620 		}
621 	}	
622 }
623 
624 
625 public class Reg8 : Reg {
626 public:
627 	
628 	this(int idx = 0, bool ext8bit = false)
629 	{
630 		super(idx, Kind.REG, 8, ext8bit);
631 	}
632 }
633 
634 public class Reg16 : Reg {
635 public:
636 	this(int idx = 0)
637 	{
638 		super(idx, Kind.REG, 16);
639 	}
640 }
641 
642 public class Mmx : Reg {
643 public:
644 	this(int idx = 0, int kind = Kind.MMX, int bit = 64)
645 	{
646 		super(idx, kind, bit);
647 	}
648 }
649 
650 
651 class EvexModifierRounding {
652 	enum {
653 		T_RN_SAE = 1,
654 		T_RD_SAE = 2,
655 		T_RU_SAE = 3,
656 		T_RZ_SAE = 4,
657 		T_SAE = 5
658 	}
659 	
660 	this(int r)
661 	{
662 		rounding = r;
663 	}
664 	
665 	int rounding;
666 	
667 	T opBinaryRight(string op:"|", T)(T x)
668 	{		
669 		T r = new T();
670 		r.setRounding(this.rounding);
671 		return r;
672 	}
673 	
674 }
675 
676 class EvexModifierZero
677 {
678 
679 	T opBinaryRight(string op:"|", T)(T x)
680 	{
681 		T r = new T();
682 		r.setZero();
683 		return r;
684 	}
685 }
686 
687 
688 public class Xmm : Mmx {
689 public:
690 	this(int idx, int kind = Kind.XMM, int bit = 128)
691 	{
692 		super(idx, kind, bit);
693 	}
694 	
695 	RegExp opBinary(string op:"+") (Reg32e b)
696 	{
697 		return new RegExp(this) + new RegExp(b);
698 	}
699 
700 	RegExp opBinaryRight(string op:"+") (Reg32e a)
701 	{
702 		return new RegExp(a) + new RegExp(this);
703 	}
704 
705 	RegExp opBinary(string op:"*") (int scale)
706 	{
707 		return new RegExp(this, scale);
708 	}
709 
710 	RegExp opBinary(string op:"+") (int disp)
711 	{
712 		return new RegExp(this) + disp;
713 	}
714 	
715 	
716 	Xmm opBinary(string op:"|") (EvexModifierRounding emr)
717 	{
718 		Xmm r = this;
719 		r.setRounding(emr.rounding);
720 		return r;
721 	}
722 	
723 	Xmm copyAndSetIdx(int idx)
724 	{
725 		Xmm ret = this;
726 		ret.setIdx(idx);
727 		return ret;
728 	}
729 	
730 	Xmm copyAndSetKind(int kind)
731 	{
732 		Xmm ret = this;
733 		ret.setKind(kind);
734 		return ret;
735 	}
736 }
737 
738 
739 public class Ymm : Xmm {
740 public:
741 	this(int idx = 0)
742 	{
743 		super(idx, Kind.YMM, 256);
744 	}
745 	
746 	Ymm opBinary(string op:"|")(EvexModifierRounding emr)
747 	{
748 		Ymm r = this;
749 		r.setRounding(emr.rounding);
750 		return r;
751 	}
752 	
753 	
754 	RegExp opBinary(string op:"+") (Reg32e b)
755 	{
756 		return new RegExp(this) + new RegExp(b);
757 	}
758 
759 	RegExp opBinaryRight(string op:"+") (Reg32e a)
760 	{
761 		return new RegExp(a) + new RegExp(this);
762 	}
763 
764 	RegExp opBinary(string op:"*") (int scale)
765 	{
766 		return new RegExp(this, scale);
767 	}
768 
769 	RegExp opBinary(string op:"+") (int disp)
770 	{
771 		return new RegExp(this) + disp;
772 	}
773 }
774 
775 public class Zmm : Xmm {
776 public:
777 	this(int idx = 0)
778 	{
779 		super(idx, Kind.ZMM, 512);
780 	}
781 	
782 	Zmm opBinary(string op:"|")(EvexModifierRounding emr)
783 	{
784 		Zmm r = this;
785 		r.setRounding(emr.rounding);
786 		return r;
787 	}
788 	
789 }
790 
791 class Opmask : Reg {
792 	this(int idx = 0)
793 	{
794 		super(idx, Kind.OPMASK, 64);
795 	}
796 	
797 	T opBinaryRight(string op:"|", T)(T x)
798 	{
799 		T r = new T();
800 		r.setOpmaskIdx(k.getIdx());
801 		return r;
802 	}
803 }
804 
805 class BoundsReg : Reg {
806 	this(int idx = 0)
807 	{
808 		super(idx, Kind.BNDREG, 128);
809 	}
810 }
811 
812 
813 public class Fpu : Reg {
814 public:
815 	this(int idx)
816 	{
817 		super(idx, Kind.FPU, 32);
818 	}
819 }
820 
821 
822 public class Reg32e : Reg {
823 	this(int idx, int bit)
824 	{
825 		super(idx, Kind.REG, bit);
826 	}
827 	
828 	RegExp opBinary(string op:"+") (Reg32e b)
829 	{
830 		return new RegExp(this) + new RegExp(b);
831 	}
832 
833 	RegExp opBinary(string op:"*") (int scale)
834 	{
835 		return new RegExp(this, scale);
836 	}
837 
838 	RegExp opBinary(string op:"+") (int disp)
839 	{
840 		return new RegExp(this) + disp;
841 	}
842 
843  	RegExp opBinaryRight(string op:"+") (int disp)
844 	{
845 		return new RegExp(this) + disp;
846 	}
847 }
848 
849 
850 public class Reg32 : Reg32e {
851 	this(int idx)
852 	{
853 		super(idx, 32);
854 	}
855 }
856 
857 version (XBYAK64)
858 {
859 	public class Reg64 : Reg32e {
860 		this(int idx = 0)
861 		{
862 			super(idx, 64);
863 		}
864 	}
865 
866 	struct RegRip
867 	{
868 		sint64 disp_ = 0;
869 		Label label_;
870         bool isAddr_;
871 		
872 		this(sint64 disp, Label label = cast(Label)null, bool isAddr = false)
873 		{
874 			disp_  = disp;
875 			label_ = label;
876 			isAddr_ = isAddr;
877 		}
878         
879 		RegRip opBinary(string op:"+") (int disp)
880 		{
881 			return RegRip(disp_ + disp, label_, r.isAddr_);
882 		}
883 		RegRip opBinary(string op:"-") (int disp)
884 		{
885 			return RegRip(disp_ - disp, label_, r.isAddr_);
886 		}
887 		
888 		RegRip opBinary(string op:"+") (sint64 disp)
889 		{
890 			return RegRip(disp_ + disp, label_, r.isAddr_);
891 		}
892 		RegRip opBinary(string op:"-") (sint64 disp)
893 		{
894 			return RegRip(disp_ - disp, label_, r.isAddr_);
895 		}
896 		RegRip opBinary(string op:"+") (Label label)
897 		{
898 			if (label_) throw new XError(ERR.BAD_ADDRESSING);
899 			return RegRip(disp_ + disp, label);
900 		}
901 		RegRip opBinary(string op:"+")(void* addr)
902 		{
903 			if (r.label_ || r.isAddr_) throw new XError(ERR.BAD_ADDRESSING);
904 			return RegRip(r.disp_ + cast(sint64)addr, 0, true);
905 		}	
906 		
907 		
908 	}
909 }
910 
911 version (XBYAK_DISABLE_SEGMENT) {}
912 else{
913 // not derived from Reg
914     class Segment {
915     int idx_;
916 public:
917     enum
918     {
919         es, cs, ss, ds, fs, gs
920     }
921     this(int idx){ assert(0 <= idx_ && idx_ < 6); idx_ = idx; }
922     int getIdx() const
923     {
924         return idx_;
925     }
926     override string toString()
927     {
928         string[] tbl = [
929             "es", "cs", "ss", "ds", "fs", "gs"
930         ];
931         return tbl[idx_];
932     }
933     }
934 }
935 
936 class RegExp {
937 
938 public:
939 version ( XBYAK64)
940 {
941 	enum { i32e = 32 | 64 };
942 }
943 else
944 {
945 	enum { i32e = 32 };
946 }	
947 	
948 	this(size_t disp = 0)
949 	{
950 		scale_ = 0;
951 		disp_ = disp;
952 	}
953 	
954 
955 	this(Reg r, int scale = 1)
956 	{
957 		scale_ = scale;
958 		disp_ = 0;
959 		if (!r.isREG(i32e) && !r.isKind(Kind.XMM | Kind.YMM | Kind.ZMM)) throw new XError(ERR.BAD_SIZE_OF_REGISTER);
960 		if (scale == 0) return;
961 		if (scale != 1 && scale != 2 && scale != 4 && scale != 8) throw new XError(ERR.BAD_SCALE);
962 		if (r.getBit() >= 128 || scale != 1) { // xmm/ymm is always index
963 			index_ = r;
964 		} else {
965 			base_ = r;
966 		}
967 	}	
968 	
969 	bool isVsib(int bit = 128 | 256 | 512) const
970 	{
971 		return index_.isBit(bit);
972 	}
973 	
974 	
975 	RegExp optimize()
976 	{
977 		RegExp exp = this;
978 		// [reg * 2] => [reg + reg]
979 		if (index_.isBit(i32e) && !base_.getBit() && scale_ == 2) {
980 			exp.base_ = index_;
981 			exp.scale_ = 1;
982 		}
983 		return exp;
984 	}
985 	
986 	bool opEquals(RegExp rhs) const
987 	{
988 		return base_ == rhs.base_ && index_ == rhs.index_ && disp_ == rhs.disp_ && scale_ == rhs.scale_;
989 	}
990 	
991 	Reg getBase()	{	return base_; }
992 	Reg getIndex() 	{	return index_;	}
993 	int getScale() 	{	return scale_;	}
994 	size_t getDisp()	{	return cast(size_t)disp_; }
995 	
996 	void verify() const
997 	{
998 		if (base_.getBit() >= 128)	throw new XError(ERR.BAD_SIZE_OF_REGISTER);
999 		if (index_.getBit() && index_.getBit() <= 64)
1000 		{
1001 			if (index_.getIdx()== Operand.ESP) throw new XError(ERR.ESP_CANT_BE_INDEX);
1002 			if (base_.getBit() && base_.getBit() != index_.getBit())	throw new XError(ERR.BAD_SIZE_OF_REGISTER);
1003 		}
1004 	}
1005 
1006 	uint8 getRex() const
1007 	{
1008 		uint8 rex = index_.getRexX() | base_.getRexB();
1009 		return rex ? uint8(rex | 0x40) : 0;
1010 	}
1011 
1012 	RegExp opBinary(string op:"+") (RegExp b)
1013 	{
1014 		if (this.index_.getBit() && b.index_.getBit()) throw new XError(ERR.BAD_ADDRESSING);
1015 		RegExp ret = this;
1016 		if (!ret.index_.getBit()) { ret.index_ = b.index_; ret.scale_ = b.scale_; }
1017 		
1018 		
1019 		if (b.base_.getBit()) {
1020 			if (ret.base_.getBit()) {
1021 				if (ret.index_.getBit()) throw new XError(ERR.BAD_ADDRESSING);
1022 				
1023 				// base + base => base + index * 1
1024 				ret.index_ = b.base_;
1025 				
1026 				// [reg + esp] => [esp + reg]
1027 				if (ret.index_.getIdx() == Operand.ESP) swap(ret.base_, ret.index_);
1028 				ret.scale_ = 1;
1029 
1030 			} else { 
1031 				ret.base_ = b.base_;
1032 			}
1033 		}
1034 		ret.disp_ += b.disp_;
1035 		return ret;
1036 	}
1037 	
1038  	RegExp opBinary(string op:"+") (Reg32e b)
1039 	{
1040 		return this + new RegExp(b);
1041 	}
1042 	
1043 	RegExp opBinaryRight(string op:"+") (Reg32e a)
1044 	{
1045 		return new RegExp(a) + this;
1046 	}
1047 	
1048 	RegExp opBinary(string op:"+") (int disp)
1049 	{
1050 		RegExp ret = this;
1051 		ret.disp_ += disp;
1052 		return ret;
1053 	}
1054 	
1055 	RegExp opBinary(string op:"-") (int disp)
1056 	{
1057 		RegExp ret = this;
1058 		ret.disp_ -= disp;
1059 		return ret;
1060 	}
1061 
1062 private:
1063 	/*
1064 		[base_ + index_ * scale_ + disp_]
1065 		base : Reg32e, index : Reg32e(w/o esp), Xmm, Ymm
1066 	*/
1067 	Reg base_ = new Reg();
1068 	Reg index_ = new Reg();
1069 	int scale_;
1070 	size_t disp_;
1071 }
1072 
1073 // 2nd parameter for constructor of CodeArray(maxSize, userPtr, alloc)
1074 enum AutoGrow = cast(void*)1; 
1075 enum DontSetProtectRWE = cast(void*)2;
1076 
1077 class CodeArray
1078 {
1079 	enum Type
1080 	{
1081 		USER_BUF = 1,	// use userPtr(non alignment, non protect)
1082 		ALLOC_BUF,		// use new(alignment, protect)
1083 		AUTO_GROW		// automatically move and grow memory if necessary
1084 	}
1085 	
1086 	bool isAllocType() const
1087 	{
1088 		return type_ == Type.ALLOC_BUF || type_ == Type.AUTO_GROW;
1089 	}
1090 
1091 	struct AddrInfo
1092 	{
1093 		size_t codeOffset;  // position to write
1094 		size_t jmpAddr;     // value to write
1095 		int jmpSize;        // size of jmpAddr
1096 		inner.LabelMode mode;
1097 		this(size_t _codeOffset, size_t _jmpAddr, int _jmpSize, inner.LabelMode _mode)
1098 		{
1099 			codeOffset = _codeOffset;
1100 			jmpAddr    = _jmpAddr;
1101 			jmpSize    = _jmpSize;
1102 			mode       = _mode;
1103 		}
1104 
1105 		uint64 getVal(uint8* top) const
1106 		{
1107 			uint64 disp = (mode == inner.LabelMode.LaddTop) ? jmpAddr + cast(size_t) top : (mode == inner.LabelMode.LasIs) ? jmpAddr : jmpAddr - cast(size_t) top;
1108 			if (jmpSize == 4)
1109 			{
1110 				disp = inner.VerifyInInt32(disp);
1111 			}
1112 			return disp;
1113 		}
1114 	}
1115 
1116 	alias AddrInfoList = AddrInfo[] ;
1117 	AddrInfoList addrInfoList_;
1118 	Type type_;
1119 	Allocator defaultAllocator_;
1120 	Allocator alloc_;
1121 
1122 protected:
1123 	size_t maxSize_;
1124 	uint8* top_;
1125 	size_t size_;
1126 	bool isCalledCalcJmpAddress_;
1127 
1128 	bool useProtect() { return alloc_.useProtect(); }
1129 
1130 	/*
1131 		allocate new memory and copy old data to the new area
1132 	*/
1133 	void growMemory()
1134 	{
1135 		size_t newSize  = max(DEFAULT_MAX_CODE_SIZE, maxSize_ * 2);
1136 		uint8  * newTop = alloc_.alloc(newSize);
1137 		if (null == newTop)
1138 		{
1139 			throw new XError(ERR.CANT_ALLOC);
1140 		}
1141 
1142 		newTop[0..size_] = top_[0..size_];
1143 
1144 		alloc_.free(top_);
1145 		top_     = newTop;
1146 		maxSize_ = newSize;
1147 	}
1148 
1149 //	calc jmp address for AutoGrow mode
1150 	void calcJmpAddress()
1151 	{
1152 		if (isCalledCalcJmpAddress_) return;
1153 		foreach (i; addrInfoList_)
1154 		{
1155 			rewrite(i.codeOffset, i.getVal(top_), i.jmpSize);
1156 		}
1157 		isCalledCalcJmpAddress_ = true;
1158 	}
1159 
1160 public:
1161 	enum ProtectMode
1162 	{
1163 		PROTECT_RW = 0, // read/write
1164 		PROTECT_RWE = 1, // read/write/exec
1165 		PROTECT_RE = 2 // read/exec
1166 	}
1167 	
1168 	this(size_t maxSize, void* userPtr = null, Allocator allocator = new Allocator())
1169 	{
1170 		type_ = (userPtr == AutoGrow ? Type.AUTO_GROW : (userPtr == null || userPtr == DontSetProtectRWE) ? Type.ALLOC_BUF : Type.USER_BUF);
1171 		alloc_   = allocator;
1172 		maxSize_ = maxSize;
1173 		top_     = type_ == Type.USER_BUF ? cast(uint8*)userPtr: alloc_.alloc(max(maxSize, 1));
1174 		size_    = 0;
1175 		isCalledCalcJmpAddress_ = false;
1176 
1177 		if (maxSize_ > 0 && null == top_)	throw new XError(ERR.CANT_ALLOC);
1178 		if ((type_ == Type.ALLOC_BUF && userPtr != DontSetProtectRWE && alloc_.useProtect()) && !setProtectMode(ProtectMode.PROTECT_RWE, false))
1179 		{
1180 			alloc_.free(top_);
1181 			throw new XError(ERR.CANT_PROTECT);
1182 		}
1183 	}
1184 
1185 	~this()
1186 	{
1187 		if (isAllocType)
1188 		{
1189 			if (alloc_.useProtect()) setProtectModeRW(false);
1190 			alloc_.free(top_);
1191 		}
1192 	}
1193 
1194 	bool setProtectMode(ProtectMode mode, bool throwException = true)
1195 	{
1196 		bool isOK = protect(top_, maxSize_, mode);
1197 		if (isOK) return true;
1198 		if (throwException) throw new XError(ERR.CANT_PROTECT);
1199 		return false;
1200 	}
1201 	bool setProtectModeRE(bool throwException = true) { return setProtectMode(ProtectMode.PROTECT_RE, throwException); }
1202 	bool setProtectModeRW(bool throwException = true) { return setProtectMode(ProtectMode.PROTECT_RW, throwException); }
1203 
1204 
1205 	void resetSize()
1206 	{
1207 		size_ = 0;
1208 		addrInfoList_.destroy;
1209 	}
1210 
1211 	void db(int code)
1212 	{
1213 		
1214 		if (size_ >= maxSize_)
1215 		{
1216 			if (type_ == Type.AUTO_GROW)
1217 			{
1218 				growMemory;
1219 			}
1220 			else
1221 			{
1222 				throw new XError(ERR.CODE_IS_TOO_BIG);
1223 			}
1224 		}
1225 		top_[size_++] = cast(uint8) code;
1226 	}
1227 
1228 	void db(uint8* code, size_t codeSize)
1229 	{
1230 		foreach (i; 0..codeSize)
1231 		{
1232 			db(code[i]);
1233 		}
1234 	}
1235 
1236 	void db(uint64 code, size_t codeSize)
1237 	{
1238 		if (codeSize > 8) throw new XError(ERR.BAD_PARAMETER);
1239 		foreach (i; 0..codeSize)
1240 		{
1241 			db(cast(uint8) (code >> (i * 8)));
1242 		}
1243 	}
1244 
1245 	void dw(uint32 code) {	db(code, 2); }
1246 	void dd(uint32 code) {	db(code, 4); }
1247 	void dq(uint64 code) {	db(code, 8); }
1248 	uint8* getCode() { return top_; }
1249 	F getCode(F)() { return CastTo !(F)(top_); }
1250 	uint8* getCurr() {	return &top_[size_];}
1251 	F getCurr(F)() const {	return CastTo !(F)(&top_[size_]);	}
1252 	size_t getSize() const { return size_; }
1253 	void setSize(size_t size)
1254 	{
1255 		if (size > maxSize_) throw new XError(ERR.OFFSET_IS_TOO_BIG);
1256 		size_ = size;
1257 	}
1258 
1259 	void dump() 
1260 	{
1261 		uint8  * p     = CodeArray.getCode();
1262 		size_t bufSize = getSize();
1263 		size_t remain  = bufSize;
1264 		for (int i = 0; i < 4; i++)
1265 		{
1266 			size_t disp = 16;
1267 			if (remain < 16)
1268 			{
1269 				disp = remain;
1270 			}
1271 			for (size_t j = 0; j < 16; j++)
1272 			{
1273 				if (j < disp)
1274 				{
1275 					write(format("%02X", p[i * 16 + j]));
1276 				}
1277 			}
1278 			writeln();
1279 			remain -= disp;
1280 			if (remain <= 0)
1281 			{
1282 				break;
1283 			}
1284 		}
1285 		
1286 		size_ = 0; ////TEST
1287 	}
1288 
1289 //	@param data [in] address of jmp data
1290 //	@param disp [in] offset from the next of jmp
1291 //	@param size [in] write size(1, 2, 4, 8)
1292 	void rewrite(size_t offset, uint64 disp, size_t size)
1293 	{
1294 		assert(offset < maxSize_);
1295 
1296 		if (size != 1 && size != 2 && size != 4 && size != 8)
1297 		{
1298 			throw new XError(ERR.BAD_PARAMETER);
1299 		}
1300 
1301 		uint8* data = top_ + offset;
1302 		foreach (i; 0..size)
1303 		{
1304 			data[i] = cast(uint8) (disp >> (i * 8));
1305 		}
1306 	}
1307 	void save(size_t offset, size_t val, int size, inner.LabelMode mode)
1308 	{
1309 		addrInfoList_ ~= AddrInfo(offset, val, size, mode);
1310 	}
1311 	bool isAutoGrow() const
1312 	{
1313 		return type_ == Type.AUTO_GROW;
1314 	}
1315 	
1316 	bool isCalledCalcJmpAddress() const { return isCalledCalcJmpAddress_; }
1317 	/**
1318 		change exec permission of memory
1319 		@param addr [in] buffer address
1320 		@param size [in] buffer size
1321 		@param protectMode [in] mode(RW/RWE/RE)
1322 		@return true(success), false(failure)
1323 	*/
1324 	
1325 	static bool protect(void* addr, size_t size, ProtectMode protectMode_)
1326 	{
1327 version (Windows)
1328 {
1329 		const DWORD c_rw = PAGE_READWRITE;
1330 		const DWORD c_rwe = PAGE_EXECUTE_READWRITE;
1331 		const DWORD c_re = PAGE_EXECUTE_READ;
1332 		DWORD mode;
1333 }
1334 else
1335 {
1336 		const int c_rw = PROT_READ | PROT_WRITE;
1337 		const int c_rwe = PROT_READ | PROT_WRITE | PROT_EXEC;
1338 		const int c_re = PROT_READ | PROT_EXEC;
1339 		int mode;
1340 }
1341 		
1342 		switch (protectMode_)
1343 		{
1344 			case ProtectMode.PROTECT_RW: mode = c_rw; break;
1345 			case ProtectMode.PROTECT_RWE: mode = c_rwe; break;
1346 			case ProtectMode.PROTECT_RE: mode = c_re; break;
1347 			default:
1348 				return false;
1349 		}
1350 		
1351 		version (Windows)
1352 		{
1353 			DWORD oldProtect;
1354 			return VirtualProtect(addr, size, mode, &oldProtect) != 0;
1355 		}
1356         
1357 		version (linux)
1358 		{
1359 
1360 // size_t pageSize = sysconf(_SC_PAGESIZE);
1361 // size_t iaddr = reinterpret_cast<size_t>(addr);
1362 // size_t roundAddr = iaddr & ~(pageSize - static_cast<size_t>(1));
1363 
1364 // #ifndef NDEBUG
1365 		// if (pageSize != 4096) fprintf(stderr, "large page(%zd) is used. not tested enough.\n", pageSize);
1366 // #endif
1367 		// return mprotect(reinterpret_cast<void*>(roundAddr), size + (iaddr - roundAddr), mode) == 0;
1368 // #else
1369 		    return true;
1370         }
1371 	}
1372 
1373 //	get aligned memory pointer
1374 //	@param addr [in] address
1375 //	@param alingedSize [in] power of two
1376 //	@return aligned addr by alingedSize
1377 	uint8* getAlignedAddress(uint8* addr, size_t alignedSize = 16)
1378 	{
1379 		size_t mask = alignedSize - 1;
1380 		return cast(uint8*) ((cast(size_t) addr + mask) & ~mask);
1381 	}
1382 }
1383 
1384 public class Address : Operand {
1385 public:
1386 	enum Mode
1387 	{
1388 		M_ModRM,
1389 		M_64bitDisp,
1390 		M_rip,
1391 		M_ripAddr
1392 	}
1393 	
1394    	this(uint32 sizeBit, bool broadcast, RegExp e)
1395 	{
1396 		super(0, Kind.MEM, sizeBit);
1397 		e_ = e;
1398 		label_ = new Label();
1399 		mode_ = Mode.M_ModRM;
1400 		broadcast_ = broadcast;
1401 		e_.verify();
1402 	}
1403 
1404 version(XBYAK64)
1405 {
1406 	this(sint64 disp)
1407 	{
1408 		super(0, Kind.MEM, 64);
1409 		e_ = new RegExp(disp);
1410 		label_ = new Label();
1411 		mode_ = Mode.M_64bitDisp;
1412 		broadcast_ = false;
1413 	}
1414 	
1415 	this(uint32 sizeBit, bool broadcast, RegRip addr)
1416 	{
1417 		super(0, Kind.MEM, sizeBit);
1418 		e_ = new RegExp(addr.disp_);
1419 		label_ = addr.label_;
1420 		mode_ = addr.isAddr_ ? Mode.M_ripAddr : Mode.M_rip;
1421 		broadcast_ = broadcast;
1422 	}
1423 }
1424 
1425 	RegExp getRegExp(bool optimize = true)
1426 	{
1427 		return optimize ? e_.optimize() : e_;
1428 	}
1429 	Mode getMode() const { return mode_; }
1430 
1431 	bool is32bit() { return e_.getBase().getBit() == 32 || e_.getIndex().getBit() == 32; }
1432 	bool isOnlyDisp() { return !e_.getBase().getBit() && !e_.getIndex().getBit(); } // for mov eax
1433 	size_t getDisp() { return e_.getDisp(); }
1434 	uint8 getRex() 
1435 	{
1436 		if (mode_ != Mode.M_ModRM) return 0;
1437 		return getRegExp().getRex();
1438 	}
1439 	bool is64bitDisp() const { return mode_ == Mode.M_64bitDisp; } // for moffset
1440 	bool isBroadcast() const { return broadcast_; }
1441 	Label getLabel() { return label_; }
1442 
1443 	override bool opEquals(Object o) const
1444 	{
1445 		Address rhs = cast(Address)o;
1446 		if(!rhs) return false;
1447 		return this.getBit() == rhs.getBit() && this.e_ == rhs.e_ && this.label_ == rhs.label_ && this.mode_ == rhs.mode_ && this.broadcast_ == rhs.broadcast_;
1448 	}
1449 
1450 	bool isVsib() const { return e_.isVsib(); }
1451 
1452 private:
1453 	RegExp e_;
1454 	Label label_;
1455 	Mode mode_;
1456 	bool broadcast_;
1457 }
1458 
1459 
1460 class AddressFrame {
1461 public:
1462 	uint32 bit_;
1463 	bool broadcast_;
1464 	
1465 	this(uint32 bit, bool broadcast = false)
1466 	{
1467 		bit_ = bit;
1468 		broadcast_ = broadcast;
1469 	}
1470 	
1471 	Address opIndex(RegExp e)
1472 	{
1473 		return new Address(bit_, broadcast_, e);
1474 	}
1475 	
1476 	Address opIndex(void* disp)
1477 	{
1478 		return new Address(bit_, broadcast_, new RegExp(cast(size_t)disp));
1479 	}
1480 
1481 
1482 	version (XBYAK64)
1483 	{
1484 		Address opIndex(uint64 disp)
1485 		{
1486 			return new Address(disp);
1487 		}
1488 		
1489 		Address opIndex(RegRip addr)
1490 		{
1491 			return new Address(bit_, broadcast_, addr);
1492 		}
1493 	}
1494 
1495 	Address opIndex(Reg32e reg)
1496 	{
1497 		RegExp ret = new RegExp(reg);
1498 		return opIndex(ret);
1499 	}
1500 
1501 	Address opIndex(Mmx mmx)
1502 	{
1503 		RegExp ret = new RegExp(mmx);
1504 		return opIndex(ret);
1505 	}
1506 }
1507 
1508 struct JmpLabel
1509 {
1510 	size_t endOfJmp;        // offset from top to the end address of jmp
1511 	int jmpSize;
1512 	inner.LabelMode mode;
1513 	size_t disp;                            // disp for [rip + disp]
1514    
1515 	this(size_t endOfJmp, int jmpSize, inner.LabelMode mode = inner.LabelMode.LasIs, size_t disp = 0)
1516 	{
1517 		this.endOfJmp = endOfJmp;
1518 		this.jmpSize  = jmpSize;
1519 		this.mode     = mode;
1520 		this.disp     = disp;
1521 	}
1522 }
1523 
1524 class Label
1525 {
1526 	LabelManager mgr;
1527 	int id;
1528 public:
1529 	this()
1530 	{
1531 		mgr = new LabelManager();
1532 		id  = 0;
1533 	}
1534 
1535 	this(Label rhs)
1536 	{
1537 		id  = rhs.id;
1538 		mgr = rhs.mgr;
1539 		if (mgr is null) {
1540 			mgr.incRefCount(id, this);
1541 		}
1542 	}
1543 
1544 	override bool opEquals(Object o)
1545 	{
1546 		if (id) throw new XError(ERR.LABEL_IS_ALREADY_SET_BY_L);
1547 
1548 		Label rhs = cast(Label) o;
1549 		id  = rhs.id;
1550 		mgr = rhs.mgr;
1551 		if (mgr is null) {
1552 			mgr.incRefCount(id, this);
1553 		}
1554 		return this.id == rhs.id;
1555 	}
1556 
1557 	~this()
1558 	{
1559 		if (id && mgr) {
1560 			mgr.decRefCount(id, this);
1561 		}
1562 	}
1563 	void clear() {
1564 		mgr = new LabelManager();
1565 		id = 0;
1566 	}
1567 	
1568 	int getId() const { return id; }
1569 
1570 	string toStr(int num) const
1571 	{
1572 		return format(".%08x", num);
1573 	}
1574 }
1575 
1576 
1577 class LabelManager
1578 {
1579 // for string label
1580 	struct SlabelVal
1581 	{
1582 		size_t offset = 0;
1583         this(size_t offset)
1584 		{
1585 			this.offset = offset;
1586 		}
1587 	}
1588 
1589 	alias SlabelDefList = SlabelVal[string] ;
1590 	alias SlabelUndefList = JmpLabel[][string] ;
1591 
1592 	struct SlabelState
1593 	{
1594 		SlabelDefList defList;
1595 		SlabelUndefList undefList;
1596 	}
1597 
1598 	alias StateList = SlabelState[] ;
1599 
1600 // for Label class
1601 	struct ClabelVal
1602 	{
1603 		size_t offset;
1604 		int refCount;
1605 		this(size_t offset)
1606 		{
1607 			this.offset   = offset;
1608 			this.refCount = 1;
1609 		}
1610 	}
1611 
1612 	alias ClabelDefList = ClabelVal[int] ;
1613 	alias ClabelUndefList = JmpLabel[][int] ;
1614 	alias LabelPtrList = Label[] ;
1615 
1616 	CodeArray base_;
1617 
1618 // global : stateList_[0], local : stateList_{$-1]
1619 	StateList stateList_;
1620 	int labelId_;
1621 	ClabelDefList clabelDefList_;
1622 	ClabelUndefList clabelUndefList_;
1623 	LabelPtrList labelPtrList_;
1624 	
1625 	int getId(Label label)
1626 	{
1627 		if (label.id == 0)
1628 		{
1629 			label.id = labelId_++;
1630 		}
1631 
1632 		return label.id;
1633 	}
1634 
1635 	void define_inner(DefList, UndefList, T)(ref DefList deflist, ref UndefList undeflist, T labelId, size_t addrOffset)
1636 	{
1637 		
1638 		// add label
1639 //		if (labelId in deflist)	throw new XError(ERR.LABEL_IS_REDEFINED);
1640 //		deflist[labelId] = typeof(deflist[labelId])(addrOffset);
1641 
1642 
1643 		// search undefined label
1644 		if (null == (labelId in undeflist)) return;
1645 		foreach (JmpLabel jmp; undeflist[labelId]) 
1646 		{
1647 			size_t offset = jmp.endOfJmp - jmp.jmpSize;
1648 			size_t disp;
1649 			if (jmp.mode == inner.LabelMode.LaddTop)
1650 			{
1651 				disp = addrOffset;
1652 			}
1653 			else if (jmp.mode == inner.LabelMode.Labs)
1654 			{
1655 				disp = cast(size_t) base_.getCurr;
1656 			}
1657 			else
1658 			{
1659 				disp = addrOffset - jmp.endOfJmp + to!size_t(jmp.disp);
1660 version (XBYAK64)
1661 {
1662 				if (jmp.jmpSize <= 4 && !inner.IsInInt32(disp))
1663 					throw new XError(ERR.OFFSET_IS_TOO_BIG);
1664 }
1665 				if (jmp.jmpSize == 1 && !inner.IsInDisp8(cast(uint32) disp))
1666 					throw new XError(ERR.LABEL_IS_TOO_FAR);
1667 			}
1668 
1669 			if (base_.isAutoGrow)
1670 				base_.save(offset, disp, jmp.jmpSize, jmp.mode);
1671 			else
1672 				base_.rewrite(offset, disp, jmp.jmpSize);
1673 
1674 			undeflist.remove(labelId);
1675 
1676 		}
1677 	}
1678 
1679 	bool getOffset_inner(DefList, T)(DefList defList, size_t* offset, T label)
1680 	{
1681 		if (null == (label in defList))
1682 		{
1683 			return false;
1684 		}
1685 
1686 		*offset = defList[label].offset;
1687 		return true;
1688 	}
1689 
1690 	void incRefCount(int id, Label label)
1691 	{
1692 		clabelDefList_[id].refCount++;
1693 		labelPtrList_ ~= label;
1694 	}
1695 
1696 	void decRefCount(int id, Label label)
1697 	{
1698 		for(int i; i<labelPtrList_.length ; i++)
1699 		{
1700 			if(labelPtrList_[i] != label)
1701 				labelPtrList_.remove(i);
1702 		}
1703 		
1704 		if (null == (id in clabelDefList_)) {
1705 			return;
1706 		}
1707 
1708 		if (clabelDefList_[id].refCount == 1)
1709 		{
1710 			clabelDefList_.remove(id);
1711 		}
1712 		else
1713 		{
1714 			clabelDefList_[id].refCount -= 1;
1715 		}
1716 	}
1717 
1718 	bool hasUndefinedLabel_inner(T)(T list) const
1719 	{
1720 		return !list.empty();
1721 	}
1722 	
1723 	// detach all labels linked to LabelManager
1724 	void resetLabelPtrList()
1725 	{
1726 		foreach (i; labelPtrList_) {
1727 			i.clear();
1728 		}
1729 		labelPtrList_.destroy();
1730 	}
1731 	
1732 public:
1733 	this()
1734 	{
1735 		reset();
1736 
1737 	}
1738 	~this()
1739 	{
1740 		resetLabelPtrList();
1741 	}
1742 	
1743 	void reset()
1744 	{
1745 		base_    = null;
1746 		labelId_ = 1;
1747 		stateList_.destroy;
1748 		stateList_ = [SlabelState(), SlabelState()];
1749 
1750 		clabelDefList_.destroy;
1751 		clabelUndefList_.destroy;
1752 		resetLabelPtrList();
1753 	}
1754 	void enterLocal()
1755 	{
1756 		stateList_ ~= SlabelState();
1757 	}
1758 	void leaveLocal()
1759 	{
1760 		if (stateList_.length <= 2)
1761 		{
1762 			throw new XError(ERR.UNDER_LOCAL_LABEL);
1763 		}
1764 
1765 		if (hasUndefinedLabel_inner(stateList_[$-1].undefList))
1766 		{
1767 				throw new XError(ERR.LABEL_IS_NOT_FOUND);
1768 		}
1769 		stateList_.popBack();
1770 	}
1771 
1772 	void set(CodeArray base)
1773 	{
1774 		base_ = base;
1775 	}
1776 
1777 	void defineSlabel(string label)
1778 	{
1779 		if ("@b" == label || "@f" == label) throw new XError(ERR.BAD_LABEL_STR);
1780 		if ("@@" == label)
1781 		{
1782 			if ("@f" in stateList_[0].defList)
1783 			{				
1784 				stateList_[0].defList.remove("@f");
1785 				label = "@b";
1786 			}
1787 			else
1788 			{
1789 				if ("@b" in stateList_[0].defList)
1790 				{
1791 					stateList_[0].defList.remove("@b");
1792 				}
1793 				label = "@f";
1794 			}
1795 		}
1796 		
1797 		auto st = label[0] == '.' ? &stateList_[$-1] : &stateList_[0];
1798 
1799 		if (label in st.defList) throw new XError(ERR.LABEL_IS_REDEFINED);
1800 		st.defList[label] = SlabelVal(base_.getSize());
1801 
1802 		define_inner(stateList_[0].defList, st.undefList, label, base_.getSize());
1803 	}
1804 
1805 
1806 	void defineClabel(Label label)
1807 	{
1808 		
1809 		if (getId(label) in clabelDefList_)	throw new XError(ERR.LABEL_IS_REDEFINED);
1810 
1811 		clabelDefList_[getId(label)] = ClabelVal(base_.getSize());
1812 
1813 		
1814 		define_inner(clabelDefList_, clabelUndefList_, getId(label), base_.getSize);
1815 		label.mgr = this;
1816 		labelPtrList_ ~= label;
1817 	}
1818 
1819 	void assign(Label dst, Label src)
1820 	{
1821 		if (null == (src.id in clabelDefList_)) {
1822 			throw new XError(ERR.LABEL_ISNOT_SET_BY_L);
1823 		}
1824 
1825 		define_inner(clabelDefList_, clabelUndefList_, dst.id, clabelDefList_[src.id].offset);
1826 		dst.mgr = this;
1827 		labelPtrList_ ~= dst;
1828 	}
1829 
1830 	bool getOffset(size_t* offset, ref string label)  ////fix :( Add ref )
1831 	{
1832 		SlabelDefList df = stateList_[0].defList;
1833 		if (label == "@b")
1834 		{
1835 			if ("@f" in df)
1836 			{
1837 				label = "@f";
1838 			}
1839 			else if (!("@b" in df))
1840 			{
1841 				throw new XError(ERR.LABEL_IS_NOT_FOUND);
1842 			}
1843 		}
1844 		else if ("@f" == label)
1845 		{
1846 			if ("@f" in df)
1847 				label = "@b";
1848 		}
1849 
1850 		SlabelState* st = label[0] == '.' ? &stateList_[$-1] : &stateList_[0];
1851 		return getOffset_inner(st.defList, offset, label);
1852 	}
1853 
1854 	bool getOffset(size_t* offset, Label label)
1855 	{
1856 		return getOffset_inner(clabelDefList_, offset, getId(label));
1857 	}
1858 
1859 	void addUndefinedLabel(string label, JmpLabel jmp)
1860 	{
1861 		SlabelState* st = label[0] == '.' ? &stateList_[$-1] : &stateList_[0];
1862 		st.undefList[label] ~= jmp;
1863 	}
1864 
1865 	void addUndefinedLabel(Label label, JmpLabel jmp)
1866 	{
1867 		clabelUndefList_[label.id] ~= jmp;
1868 	}
1869 
1870 	bool hasUndefSlabel() const
1871 	{
1872 		foreach (st; stateList_)
1873 		{
1874 			if (hasUndefinedLabel_inner(st.undefList)) {
1875 				return true;
1876 			}
1877 		}
1878 		return false;
1879 	}
1880 
1881 	bool hasUndefClabel() const
1882 	{
1883 		return hasUndefinedLabel_inner(clabelUndefList_);
1884 	}
1885 	uint8* getCode() { return base_.getCode(); }
1886 	bool isReady() const { return !base_.isAutoGrow() || base_.isCalledCalcJmpAddress(); }
1887 
1888 /+
1889 inline Label::Label(const Label& rhs)
1890 {
1891 	id = rhs.id;
1892 	mgr = rhs.mgr;
1893 	if (mgr) mgr->incRefCount(id, this);
1894 }
1895 inline Label& Label::operator=(const Label& rhs)
1896 {
1897 	if (id) throw Error(ERR_LABEL_IS_ALREADY_SET_BY_L);
1898 	id = rhs.id;
1899 	mgr = rhs.mgr;
1900 	if (mgr) mgr->incRefCount(id, this);
1901 	return *this;
1902 }
1903 inline Label::~Label()
1904 {
1905 	if (id && mgr) mgr->decRefCount(id, this);
1906 }
1907 inline const uint8* Label::getAddress() const
1908 {
1909 	if (mgr == 0 || !mgr->isReady()) return 0;
1910 	size_t offset;
1911 	if (!mgr->getOffset(&offset, *this)) return 0;
1912 	return mgr->getCode() + offset;
1913 }
1914 +/
1915 	
1916 }	
1917 	
1918 
1919 enum LabelType
1920 {
1921 	T_SHORT,
1922 	T_NEAR,
1923 	T_AUTO // T_SHORT if possible
1924 }
1925 
1926 public class CodeGenerator : CodeArray
1927 {
1928 	version (XBYAK64)
1929 	{
1930 		enum { i32e = 64 | 32, BIT = 64 }
1931 		size_t dummyAddr = cast(size_t) (0x11223344UL << 32) | 55667788;
1932 		alias NativeReg = Reg64;
1933 	}
1934 	else
1935 	{
1936 		enum { i32e = 32, BIT = 32 }
1937 		size_t dummyAddr = 0x12345678;
1938 		alias NativeReg = Reg32;
1939 	}
1940 	// (XMM, XMM|MEM)
1941 	bool isXMM_XMMorMEM  (Operand op1, Operand op2)
1942 	{
1943 		return op1.isXMM && (op2.isXMM || op2.isMEM);
1944 	}
1945 	// (MMX, MMX|MEM) or (XMM, XMM|MEM)
1946 	bool isXMMorMMX_MEM  (Operand op1, Operand op2)
1947 	{
1948 		return (op1.isMMX && (op2.isMMX || op2.isMEM)) || isXMM_XMMorMEM(op1, op2);
1949 	} 
1950 	// (XMM, MMX|MEM)
1951 	bool isXMM_MMXorMEM  (Operand op1, Operand op2)
1952 	{
1953 		return op1.isXMM && (op2.isMMX || op2.isMEM);
1954 	}
1955 	// (MMX, XMM|MEM)
1956 	bool isMMX_XMMorMEM  (Operand op1, Operand op2)
1957 	{
1958 		return op1.isMMX && (op2.isXMM || op2.isMEM);
1959 	}
1960 	// (XMM, REG32|MEM)
1961 	bool isXMM_REG32orMEM(Operand op1, Operand op2)
1962 	{
1963 		return op1.isXMM && (op2.isREG(i32e) || op2.isMEM);
1964 	}
1965 	// (REG32, XMM|MEM)
1966 	bool isREG32_XMMorMEM(Operand op1, Operand op2)
1967 	{
1968 		return op1.isREG(i32e) && (op2.isXMM || op2.isMEM);
1969 	}
1970 	// (REG32, REG32|MEM)
1971 	bool isREG32_REG32orMEM(Operand op1 = new Operand(), Operand op2 = new Operand())
1972 	{
1973 		return op1.isREG(i32e) && ((op2.isREG(i32e) && op1.getBit == op2.getBit) || op2.isMEM);
1974 	}
1975 
1976 	void rex(Operand op1, Operand op2 = new Reg())
1977 	{
1978 		uint8 rex = 0;
1979 		Operand p1  = op1;
1980 		Operand p2  = op2;
1981 		if (p1.isMEM)	swap(p1, p2);
1982 		if (p1.isMEM)	throw new XError(ERR.BAD_COMBINATION);
1983 		if (p2.isMEM)
1984 		{
1985 			Address addr = p2.getAddress();
1986 			if (BIT == 64 && addr.is32bit() )	db(0x67);
1987 			Reg r1 = p1.getReg();
1988 			rex = addr.getRex() | r1.getRex();
1989 		}
1990 		else
1991 		{
1992 			// ModRM(reg, base);
1993 			Reg r1 = op1.getReg();
1994 			Reg r2 = op2.getReg();
1995 			rex = r2.getRex(r1);
1996 		}
1997 		// except movsx(16bit, 32/64bit)
1998 		if ((op1.isBit(16) && !op2.isBit(i32e)) || (op2.isBit(16) && !op1.isBit(i32e)))	db(0x66);
1999 		if (rex) db(rex);
2000 	}
2001 
2002 	enum // AVXtype  
2003 	{
2004 		// low 3 bit
2005 		T_N1 = 1,
2006 		T_N2 = 2,
2007 		T_N4 = 3,
2008 		T_N8 = 4,
2009 		T_N16 = 5,
2010 		T_N32 = 6,
2011 		T_NX_MASK = 7,
2012 		//
2013 		T_N_VL = 1 << 3, // N * (1, 2, 4) for VL
2014 		T_DUP = 1 << 4, // N = (8, 32, 64)
2015 		T_66 = 1 << 5,
2016 		T_F3 = 1 << 6,
2017 		T_F2 = 1 << 7,
2018 		T_0F = 1 << 8,
2019 		T_0F38 = 1 << 9,
2020 		T_0F3A = 1 << 10,
2021 		T_L0 = 1 << 11,
2022 		T_L1 = 1 << 12,
2023 		T_W0 = 1 << 13,
2024 		T_W1 = 1 << 14,
2025 		T_EW0 = 1 << 15,
2026 		T_EW1 = 1 << 16,
2027 		T_YMM = 1 << 17, // support YMM, ZMM
2028 		T_EVEX = 1 << 18,
2029 		T_ER_X = 1 << 19, // xmm{er}
2030 		T_ER_Y = 1 << 20, // ymm{er}
2031 		T_ER_Z = 1 << 21, // zmm{er}
2032 		T_SAE_X = 1 << 22, // xmm{sae}
2033 		T_SAE_Y = 1 << 23, // ymm{sae}
2034 		T_SAE_Z = 1 << 24, // zmm{sae}
2035 		T_MUST_EVEX = 1 << 25, // contains T_EVEX
2036 		T_B32 = 1 << 26, // m32bcst
2037 		T_B64 = 1 << 27, // m64bcst
2038 		T_M_K = 1 << 28, // mem{k}
2039 		T_VSIB = 1 << 29,
2040 		T_MEM_EVEX = 1 << 30, // use evex if mem
2041 		T_XXX
2042 	}
2043 
2044 	void vex(Reg reg, Reg base, Operand v, int type, int code, bool x = false)
2045 	{
2046 		int w = (type & T_W1) ? 1 : 0;
2047 		bool is256 = (type & T_L1) ? true : (type & T_L0) ? false : reg.isYMM();
2048 		bool r = reg.isExtIdx();
2049 		bool b = base.isExtIdx();
2050 		int idx = v ? v.getIdx() : 0;
2051 		if ((idx | reg.getIdx() | base.getIdx()) >= 16) throw new XError(ERR.BAD_COMBINATION);
2052 		uint32 pp = (type & T_66) ? 1 : (type & T_F3) ? 2 : (type & T_F2) ? 3 : 0;
2053 		uint32 vvvv = (((~idx) & 15) << 3) | (is256 ? 4 : 0) | pp;
2054 		if (!b && !x && !w && (type & T_0F)) {
2055 			db(0xC5); db((r ? 0 : 0x80) | vvvv);
2056 		} else {
2057 			uint32 mmmm = (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0;
2058 			db(0xC4); db((r ? 0 : 0x80) | (x ? 0 : 0x40) | (b ? 0 : 0x20) | mmmm); db((w << 7) | vvvv);
2059 		}
2060 		db(code);
2061 	}
2062 
2063 
2064 
2065 	void verifySAE(Reg r, int type)
2066 	{
2067 		if (
2068 			((type & T_SAE_X) && r.isXMM()) ||
2069 			((type & T_SAE_Y) && r.isYMM()) || 
2070 			((type & T_SAE_Z) && r.isZMM())
2071 		) return;
2072 		throw new XError(ERR.SAE_IS_INVALID);
2073 	}
2074 	void verifyER(Reg r, int type)
2075 	{
2076 		if (
2077 			((type & T_ER_X) && r.isXMM()) ||
2078 			((type & T_ER_Y) && r.isYMM()) ||
2079 			((type & T_ER_Z) && r.isZMM())
2080 		) return;
2081 		throw new XError(ERR.ER_IS_INVALID);
2082 	}
2083 	// (a, b, c) contains non zero two or three values then err
2084 	int verifyDuplicate(int a, int b, int c, ERR err)
2085 	{
2086 		int v = a | b | c;
2087 		if ((a > 0 && a != v) + (b > 0 && b != v) + (c > 0 && c != v) > 0) return cast(int)(new XError(err));
2088 		return v;
2089 	}
2090 	int evex(Reg reg, Reg base, Operand v, int type, int code, bool x = false, bool b = false, int aaa = 0, uint32 VL = 0, bool Hi16Vidx = false)
2091 	{
2092 		if (!(type & (T_EVEX | T_MUST_EVEX))) throw new XError(ERR.EVEX_IS_INVALID);
2093 		int w = (type & T_EW1) ? 1 : 0;
2094 		uint32 mm = (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0;
2095 		uint32 pp = (type & T_66) ? 1 : (type & T_F3) ? 2 : (type & T_F2) ? 3 : 0;
2096 
2097 		int idx = v ? v.getIdx() : 0;
2098 		uint32 vvvv = ~idx;
2099 
2100 		bool R = !reg.isExtIdx();
2101 		bool X = x ? false : !base.isExtIdx2();
2102 		bool B = !base.isExtIdx();
2103 		bool Rp = !reg.isExtIdx2();
2104 		int LL;
2105 		int rounding = verifyDuplicate(reg.getRounding(), base.getRounding(), v ? v.getRounding() : 0, ERR.ROUNDING_IS_ALREADY_SET);
2106 		int disp8N = 1;
2107 		if (rounding) {
2108 			if (rounding == EvexModifierRounding.T_SAE) {
2109 				verifySAE(base, type); LL = 0;
2110 			} else {
2111 				verifyER(base, type); LL = rounding - 1;
2112 			}
2113 			b = true;
2114 		} else {
2115 			if (v) VL = max(VL, v.getBit());
2116 			VL = max(max(reg.getBit(), base.getBit()), VL);
2117 			LL = (VL == 512) ? 2 : (VL == 256) ? 1 : 0;
2118 			if (b) {
2119 				disp8N = (type & T_B32) ? 4 : 8;
2120 			} else if (type & T_DUP) {
2121 				disp8N = VL == 128 ? 8 : VL == 256 ? 32 : 64;
2122 			} else {
2123 				if ((type & (T_NX_MASK | T_N_VL)) == 0) {
2124 					type |= T_N16 | T_N_VL; // default
2125 				}
2126 				int low = type & T_NX_MASK;
2127 				if (low > 0) {
2128 					disp8N = 1 << (low - 1);
2129 					if (type & T_N_VL) disp8N *= (VL == 512 ? 4 : VL == 256 ? 2 : 1);
2130 				}
2131 			}
2132 		}
2133 		bool Vp = !((v ? v.isExtIdx2() : 0) | Hi16Vidx);
2134 		bool z = reg.hasZero() || base.hasZero() || (v ? v.hasZero() : false);
2135 		if (aaa == 0) aaa = verifyDuplicate(base.getOpmaskIdx(), reg.getOpmaskIdx(), (v ? v.getOpmaskIdx() : 0), ERR.OPMASK_IS_ALREADY_SET);
2136 		db(0x62);
2137 		db((R ? 0x80 : 0) | (X ? 0x40 : 0) | (B ? 0x20 : 0) | (Rp ? 0x10 : 0) | (mm & 3));
2138 		db((w == 1 ? 0x80 : 0) | ((vvvv & 15) << 3) | 4 | (pp & 3));
2139 		db((z ? 0x80 : 0) | ((LL & 3) << 5) | (b ? 0x10 : 0) | (Vp ? 8 : 0) | (aaa & 7));
2140 		db(code);
2141 		return disp8N;
2142 	}
2143 	void setModRM(int mod, int r1, int r2)
2144 	{
2145 		db( cast(uint8)((mod << 6) | ((r1 & 7) << 3) | (r2 & 7)) );
2146 	}
2147 	void setSIB(RegExp e, int reg, int disp8N = 0)
2148 	{
2149 		size_t disp64 = e.getDisp();
2150 version (XBYAK64)
2151 {
2152 		size_t high = disp64 >> 32;
2153 		if (high != 0 && high != 0xFFFFFFFF) throw new XError(ERR.OFFSET_IS_TOO_BIG);
2154 }
2155 		uint32 disp = cast(uint32)(disp64);
2156 		Reg base = e.getBase();
2157 		Reg index = e.getIndex();
2158 		int baseIdx = base.getIdx();
2159 		int baseBit = base.getBit();
2160 		int indexBit = index.getBit();
2161 		enum {
2162 			mod00 = 0, mod01 = 1, mod10 = 2
2163 		}
2164 		int mod = mod10; // disp32
2165 		if (!baseBit || ((baseIdx & 7) != Operand.EBP && disp == 0)) {
2166 			mod = mod00;
2167 		} else {
2168 			if (disp8N == 0) {
2169 				if (inner.IsInDisp8(disp)) {
2170 					mod = mod01;
2171 				}
2172 			} else {
2173 				// disp must be casted to signed
2174 				uint32 t = cast(uint32)(cast(int)disp / disp8N);
2175 				if ((disp % disp8N) == 0 && inner.IsInDisp8(t)) {
2176 					disp = t;
2177 					mod = mod01;
2178 				}
2179 			}
2180 		}
2181 		const int newBaseIdx = baseBit ? (baseIdx & 7) : Operand.EBP;
2182 		/* ModR/M = [2:3:3] = [Mod:reg/code:R/M] */
2183 		bool hasSIB = indexBit || (baseIdx & 7) == Operand.ESP;
2184 version (XBYAK64)
2185 {
2186 		if (!baseBit && !indexBit) hasSIB = true;
2187 }
2188 		if (hasSIB) {
2189 			setModRM(mod, reg, Operand.ESP);
2190 			/* SIB = [2:3:3] = [SS:index:base(=rm)] */
2191 			int idx = indexBit ? (index.getIdx() & 7) : Operand.ESP;
2192 			int scale = e.getScale();
2193 			int SS = (scale == 8) ? 3 : (scale == 4) ? 2 : (scale == 2) ? 1 : 0;
2194 			setModRM(SS, idx, newBaseIdx);
2195 		} else {
2196 			setModRM(mod, reg, newBaseIdx);
2197 		}
2198 		if (mod == mod01) {
2199 			db(disp);
2200 		} else if (mod == mod10 || (mod == mod00 && !baseBit)) {
2201 			dd(disp);
2202 		}
2203 	}
2204 
2205 
2206 	LabelManager labelMgr_ = new LabelManager();
2207 
2208 	uint8 getModRM(int mod, int r1, int r2) const
2209 	{
2210 		return cast(uint8) ((mod << 6) | ((r1 & 7) << 3) | (r2 & 7));
2211 	}
2212 
2213 	void opModR(Reg reg1, Reg reg2, int code0, int code1 = NONE, int code2 = NONE)
2214 	{
2215 		rex(reg2, reg1);
2216 		db(code0 | (reg1.isBit(8) ? 0 : 1));
2217 		if (code1 != NONE) db(code1);
2218 		if (code2 != NONE) db(code2);
2219 		setModRM(3, reg1.getIdx(), reg2.getIdx());
2220 	}
2221 
2222 	void opModM(Address addr, Reg reg, int code0, int code1 = NONE, int code2 = NONE, int immSize = 0)
2223 	{
2224 		if (addr.is64bitDisp)	throw new XError(ERR.CANT_USE_64BIT_DISP);
2225 		rex(addr, reg);
2226 		db(code0 | (reg.isBit(8) ? 0 : 1));
2227 		if (code1 != NONE) db(code1);
2228 		if (code2 != NONE) db(code2);
2229 		opAddr(addr, reg.getIdx(), immSize);
2230 	}
2231 	
2232 	void opLoadSeg(Address addr, Reg reg, int code0, int code1 = NONE)
2233 	{
2234 		if (addr.is64bitDisp()) throw new XError(ERR.CANT_USE_64BIT_DISP);
2235 		if (reg.isBit(8)) throw new XError(ERR.BAD_SIZE_OF_REGISTER);
2236 		rex(addr, reg);
2237 		db(code0); if (code1 != NONE) db(code1);
2238 		opAddr(addr, reg.getIdx());
2239 	}
2240 
2241 	void opMIB(Address addr, Reg reg, int code0, int code1)
2242 	{
2243 		if (addr.is64bitDisp()) throw new XError(ERR.CANT_USE_64BIT_DISP);
2244 		if (addr.getMode() != Address.Mode.M_ModRM) throw new XError(ERR.INVALID_MIB_ADDRESS);
2245 		if (BIT == 64 && addr.is32bit()) db(0x67);
2246 		RegExp regExp = addr.getRegExp(false);
2247 		uint8 rex = regExp.getRex();
2248 		if (rex) db(rex);
2249 		db(code0); db(code1);
2250 		setSIB(regExp, reg.getIdx());
2251 	}
2252 	
2253 	void makeJmp(uint32 disp, LabelType type, uint8 shortCode, uint8 longCode, uint8 longPref)
2254 	{
2255 		int shortJmpSize   = 2;
2256 		int longHeaderSize = longPref ? 2 : 1;
2257 		int longJmpSize    = longHeaderSize + 4;
2258 		if (type != T_NEAR && inner.IsInDisp8(disp - shortJmpSize))
2259 		{
2260 			db(shortCode);
2261 			db(disp - shortJmpSize);
2262 		}
2263 		else
2264 		{
2265 			if (type == T_SHORT) throw new XError(ERR.LABEL_IS_TOO_FAR);
2266 			if (longPref) db(longPref);
2267 
2268 			db(longCode);
2269 			dd(disp - longJmpSize);
2270 		}
2271 	}
2272 
2273 	void opJmp(T)(T label, LabelType type, uint8 shortCode, uint8 longCode, uint8 longPref)
2274 	if( is(T == string) || is(T == Label) )
2275 	{
2276 		if (isAutoGrow && size_ + 16 >= maxSize_)	growMemory(); // avoid splitting code of jmp
2277 		size_t offset = 0;                      
2278 		if (labelMgr_.getOffset(&offset, label))	// label exists
2279 		{
2280 			makeJmp(inner.VerifyInInt32(offset - size_), type, shortCode, longCode, longPref);
2281 		}
2282 		else
2283 		{
2284 			int jmpSize = 0;
2285 			if (type == T_NEAR)
2286 			{
2287 				jmpSize = 4;
2288 				if (longPref) db(longPref);
2289 				db(longCode); dd(0);
2290 			}
2291 			else
2292 			{
2293 				jmpSize = 1;
2294 				db(shortCode); db(0);
2295 			}
2296 			JmpLabel jmp = JmpLabel(size_, jmpSize, inner.LabelMode.LasIs);
2297 			labelMgr_.addUndefinedLabel(label, jmp);
2298 		}
2299 	}
2300 
2301 	void opJmpAbs(const void* addr, LabelType type, uint8 shortCode, uint8 longCode, uint8 longPref = 0)
2302 	{
2303 		if (isAutoGrow)
2304 		{
2305 			if (type != T_NEAR)	throw new XError(ERR.ONLY_T_NEAR_IS_SUPPORTED_IN_AUTO_GROW);
2306 			if (size_ + 16 >= maxSize_)	growMemory;
2307 			if (longPref) db(longPref);  //// fix
2308 			db(longCode);
2309 			dd(0);
2310 			save(size_ - 4, cast(size_t) addr - size_, 4, inner.LabelMode.Labs);
2311 		}
2312 		else
2313 		{
2314 			makeJmp(inner.VerifyInInt32(cast(uint8*) addr - getCurr), type, shortCode, longCode, longPref);
2315 		}
2316 	}
2317 	
2318 	// reg is reg field of ModRM
2319 	// immSize is the size for immediate value
2320 	// disp8N = 0(normal), disp8N = 1(force disp32), disp8N = {2, 4, 8} ; compressed displacement
2321 	void opAddr(Address addr, int reg, int immSize = 0, int disp8N = 0, bool permitVisb = false)
2322 	{
2323 		if (!permitVisb && addr.isVsib()) throw new XError(ERR.BAD_VSIB_ADDRESSING);
2324 		if (addr.getMode() == Address.Mode.M_ModRM) {
2325 			setSIB(addr.getRegExp(), reg, disp8N);
2326 		} else if (addr.getMode() == Address.Mode.M_rip || addr.getMode() == Address.Mode.M_ripAddr) {
2327 			setModRM(0, reg, 5);
2328 			if (addr.getLabel()) { // [rip + Label]
2329 				putL_inner(addr.getLabel(), true, addr.getDisp() - immSize);
2330 			} else {
2331 				size_t disp = addr.getDisp();
2332 				if (addr.getMode() == Address.Mode.M_ripAddr) {
2333 					if (isAutoGrow()) throw new XError(ERR.INVALID_RIP_IN_AUTO_GROW);
2334 					disp -= cast(size_t)getCurr() + 4 + immSize;
2335 				}
2336 				dd(inner.VerifyInInt32(disp));
2337 			}
2338 		}
2339 	}
2340 	
2341 	
2342 //	preCode is for SSSE3/SSE4
2343 	void opGen(Operand reg, Operand op, int code, int pref, bool delegate(Operand, Operand)isValid, int imm8 = NONE, int preCode = NONE)
2344 	{
2345 		if (isValid && !isValid(reg, op)) throw new XError(ERR.BAD_COMBINATION);
2346 		if (pref != NONE) db(pref);
2347 		if (op.isMEM)
2348 		{
2349 			opModM(op.getAddress(), reg.getReg(), 0x0F, preCode, code, (imm8 != NONE) ? 1 : 0);
2350 		}
2351 		else
2352 		{
2353 			opModR(reg.getReg(), op.getReg(), 0x0F, preCode, code);
2354 		}
2355 
2356 		if (imm8 != NONE) db(imm8);
2357 	}
2358 
2359 	void opMMX_IMM(Mmx mmx, int imm8, int code, int ext)
2360 	{
2361 		if (mmx.isXMM) { db(0x66); }
2362 		opModR(new Reg32(ext), mmx, 0x0F, code);
2363 		db(imm8);
2364 	}
2365 
2366 	void opMMX(Mmx mmx, Operand op, int code, int pref = 0x66, int imm8 = NONE, int preCode = NONE)
2367 	{
2368 		opGen(mmx, op, code, (mmx.isXMM ? pref : NONE), &isXMMorMMX_MEM, imm8, preCode);
2369 	}
2370 
2371 	void opMovXMM(Operand op1, Operand op2, int code, int pref)
2372 	{
2373 		if (pref != NONE) db(pref);
2374 		if (op1.isXMM && op2.isMEM)
2375 		{
2376 			opModM(op2.getAddress(), op1.getReg(), 0x0F, code);
2377 		}
2378 		else if (op1.isMEM && op2.isXMM)
2379 		{
2380 			opModM(op1.getAddress(), op2.getReg(), 0x0F, code | 1);
2381 		}
2382 		else
2383 		{
2384 			throw new XError(ERR.BAD_COMBINATION);
2385 		}
2386 	}
2387 
2388 	void opExt(Operand op, Mmx mmx, int code, int imm, bool hasMMX2 = false)
2389 	{
2390 		// pextrw is special
2391 		if (hasMMX2 && op.isREG(i32e))
2392 		{
2393 			if (mmx.isXMM) db(0x66);
2394 			opModR(op.getReg(), mmx, 0x0F, 0xC5);
2395 			db(imm);
2396 		}
2397 		else
2398 		{
2399 			opGen(mmx, op, code, 0x66, &isXMM_REG32orMEM, imm, 0x3A);
2400 		}
2401 	}
2402 
2403 	void opR_ModM(Operand op, int bit, int ext, int code0, int code1 = NONE, int code2 = NONE, bool disableRex = false, int immSize = 0)
2404 	{
2405 		int opBit = op.getBit;
2406 		if (disableRex && opBit == 64) opBit = 32;
2407 		if (op.isREG(bit))
2408 		{
2409 			opModR(new Reg(ext, Kind.REG, opBit), op.getReg().changeBit(opBit), code0, code1, code2);
2410 		}
2411 		else if (op.isMEM)
2412 		{
2413 			opModM(op.getAddress(), new Reg(ext, Kind.REG, opBit), code0, code1, code2, immSize);
2414 		}
2415 		else
2416 		{
2417 			throw new XError(ERR.BAD_COMBINATION);
2418 		}
2419 	}
2420 
2421 	void opShift(Operand op, int imm, int ext)
2422 	{
2423 		verifyMemHasSize(op);
2424 		opR_ModM(op, 0, ext, (0xC0 | ((imm == 1 ? 1 : 0) << 4)), NONE, NONE, false, (imm != 1) ? 1 : 0);
2425 		if (imm != 1) db(imm);
2426 	}
2427 
2428 	void opShift(Operand op, Reg8 cl, int ext)
2429 	{
2430 		if (cl.getIdx != Operand.CL) throw new XError(ERR.BAD_COMBINATION);
2431 		opR_ModM(op, 0, ext, 0xD2);
2432 	}
2433 
2434 	void opModRM(Operand op1, Operand op2, bool condR, bool condM, int code0, int code1 = NONE, int code2 = NONE, int immSize = 0)
2435 	{
2436 		if (condR)
2437 		{
2438 			opModR(op1.getReg(), op2.getReg(), code0, code1, code2);
2439 		}
2440 		else if (condM)
2441 		{
2442 			opModM(op2.getAddress(), op1.getReg(), code0, code1, code2, immSize);
2443 		}
2444 		else
2445 		{
2446 			throw new XError(ERR.BAD_COMBINATION);
2447 		}
2448 	}
2449 
2450 	void opShxd(Operand op, Reg reg, uint8 imm, int code, Reg8 _cl = new Reg8)
2451 	{
2452 		if (_cl && _cl.getIdx != Operand.CL) throw new XError(ERR.BAD_COMBINATION);
2453 		opModRM(reg, op, (op.isREG(16 | i32e) && op.getBit == reg.getBit), op.isMEM && (reg.isREG(16 | i32e)), 0x0F, code | (_cl ? 1 : 0), NONE, _cl ? 0 : 1);
2454 		if (!cl) db(imm);
2455 	}
2456 
2457 // (REG, REG|MEM), (MEM, REG)
2458 	void opRM_RM(Operand op1, Operand op2, int code)
2459 	{
2460 		if (op1.isREG() && op2.isMEM())
2461 		{
2462 			opModM(op2.getAddress(), op1.getReg(), code | 2);
2463 		}
2464 		else
2465 		{
2466 			opModRM(op2, op1, op1.isREG() && op1.getKind() == op2.getKind(), op1.isMEM() && op2.isREG(), code);
2467 		}
2468 	}
2469 
2470 // (REG|MEM, IMM)
2471 	void opRM_I(Operand op, uint32 imm, int code, int ext)
2472 	{
2473 		verifyMemHasSize(op);
2474 		uint32 immBit = inner.IsInDisp8(imm) ? 8 : inner.IsInDisp16(imm) ? 16 : 32;
2475 		if (op.getBit < immBit) throw new XError(ERR.IMM_IS_TOO_BIG);
2476 
2477 		// don't use MEM16 if 32/64bit mode
2478 		if (op.isREG(32 | 64) && immBit == 16) immBit = 32;
2479 
2480 		// rax, eax, ax, al
2481 		if (op.isREG && op.getIdx == 0 && (op.getBit == immBit || (op.isBit(64) && immBit == 32)))
2482 		{
2483 			rex(op);
2484 			db(code | 4 | (immBit == 8 ? 0 : 1));
2485 		}
2486 		else
2487 		{
2488 			int tmp = immBit < min(op.getBit, 32U) ? 2 : 0;
2489 			opR_ModM(op, 0, ext, 0x80 | tmp, NONE, NONE, false, immBit / 8);
2490 		}
2491 		db(imm, immBit / 8);
2492 	}
2493 
2494 	void opIncDec(Operand op, int code, int ext)
2495 	{
2496 		verifyMemHasSize(op);
2497 		version (XBYAK64)
2498         {
2499             code = 0xFE;
2500 		    if (op.isREG)
2501 		    {
2502 			    opModR(new Reg(ext, Kind.REG, op.getBit()), op.getReg(), code);
2503 		    }
2504 		    else
2505 		    {
2506 			    opModM(op.getAddress(), new Reg(ext, Kind.REG, op.getBit()), code);
2507 		    }
2508         }
2509         else
2510         {
2511 			if (op.isREG && !op.isBit(8))
2512 			{
2513 				rex(op);
2514 				db(code | op.getIdx);
2515 			}
2516 		}
2517 	}
2518 
2519 	void opPushPop(Operand op, int code, int ext, int alt)
2520 	{
2521 		if (op.isREG)
2522 		{
2523 			if (op.isBit(16))
2524 				db(0x66);
2525 			if (op.getReg().getIdx >= 8)
2526 				db(0x41);
2527 			db(alt | (op.getIdx & 7));
2528 		}
2529 		else if (op.isMEM)
2530 		{
2531 			opModM(op.getAddress(), new Reg(ext, Kind.REG, op.getBit), code);
2532 		}
2533 		else
2534 		{
2535 			throw new XError(ERR.BAD_COMBINATION);
2536 		}
2537 	}
2538 
2539 	void verifyMemHasSize(Operand op) const
2540 	{
2541 		if (op.isMEM && op.getBit == 0)
2542 			throw new XError(ERR.MEM_SIZE_IS_NOT_SPECIFIED);
2543 	}
2544 	
2545 	//	mov(r, imm) = db(imm, mov_imm(r, imm))
2546 	int mov_imm(Reg reg, size_t imm)
2547 	{
2548 		int bit = reg.getBit();
2549 		const int idx  = reg.getIdx();
2550 		int code = 0xB0 | ((bit == 8 ? 0 : 1) << 3);
2551 		if (bit == 64 && (imm & ~cast(size_t) (0xffffffffu)) == 0)
2552 		{
2553 			rex(new Reg32(idx));
2554 			bit = 32;
2555 		}
2556 		else
2557 		{
2558 			rex(reg);
2559 			if (bit == 64 && inner.IsInInt32(imm))
2560 			{
2561 				db(0xC7);
2562 				code = 0xC0;
2563 				bit  = 32;
2564 			}
2565 		}
2566 		db(code | (idx & 7));
2567 		return bit / 8;
2568 	}
2569 
2570 
2571 	void putL_inner(T)(T label, bool relative = false, size_t disp = 0)
2572 	if(is(T == string) || is(T == Label) )
2573 	{
2574 		const int jmpSize = relative ? 4 : cast(int) size_t.sizeof;
2575 		if (isAutoGrow() && size_ + 16 >= maxSize_)
2576 			growMemory();
2577 		size_t offset = 0;
2578 		if (labelMgr_.getOffset(&offset, label))
2579 		{
2580 			if (relative)
2581 			{
2582 				db(inner.VerifyInInt32(offset + disp - size_ - jmpSize), jmpSize);
2583 			}
2584 			else if (isAutoGrow())
2585 			{
2586 				db(uint64(0), jmpSize);
2587 				save(size_ - jmpSize, offset, jmpSize, inner.LabelMode.LaddTop);
2588 			}
2589 			else
2590 			{
2591 				db(cast(size_t) top_ + offset, jmpSize);
2592 			}
2593 			return;
2594 		}
2595 		db(uint64(0), jmpSize);
2596 		JmpLabel jmp = JmpLabel(size_, jmpSize, (relative ? inner.LabelMode.LasIs : isAutoGrow() ? inner.LabelMode.LaddTop : inner.LabelMode.Labs), disp);
2597 		labelMgr_.addUndefinedLabel(label, jmp);
2598 	}
2599 
2600 
2601 	void opMovxx(Reg reg, Operand op, uint8 code)
2602 	{
2603 		if (op.isBit(32))
2604 			throw new XError(ERR.BAD_COMBINATION);
2605 
2606 		int w = op.isBit(16);
2607 
2608 		version (XBYAK64)
2609 		{
2610 			if (op.isHigh8bit())
2611 				throw new XError(ERR.BAD_COMBINATION);
2612 		}
2613 
2614 		bool cond = reg.isREG && (reg.getBit > op.getBit);
2615 		opModRM(reg, op, cond && op.isREG, cond && op.isMEM, 0x0F, code | w);
2616 	}
2617 
2618 	void opFpuMem(Address addr, uint8 m16, uint8 m32, uint8 m64, uint8 ext, uint8 m64ext)
2619 	{
2620 		if (addr.is64bitDisp)	throw new XError(ERR.CANT_USE_64BIT_DISP);
2621 		uint8 code = addr.isBit(16) ? m16 : addr.isBit(32) ? m32 : addr.isBit(64) ? m64 : 0;
2622 		if (!code)	throw new XError(ERR.BAD_MEM_SIZE);
2623 		if (m64ext && addr.isBit(64))	ext = m64ext;
2624 		
2625 		rex(addr, st0);
2626 		db(code);
2627 		opAddr(addr, ext);
2628 	}
2629 
2630 // use code1 if reg1 == st0
2631 // use code2 if reg1 != st0 && reg2 == st0
2632 	void opFpuFpu(Fpu reg1, Fpu reg2, uint32 code1, uint32 code2)
2633 	{
2634 		uint32 code = reg1.getIdx == 0 ? code1 : reg2.getIdx == 0 ? code2 : 0;
2635 		if (!code)
2636 			throw new XError(ERR.BAD_ST_COMBINATION);
2637 
2638 		db(cast(uint8) (code >> 8));
2639 		db(cast(uint8) (code | (reg1.getIdx | reg2.getIdx)));
2640 	}
2641 
2642 	void opFpu(Fpu reg, uint8 code1, uint8 code2)
2643 	{
2644 		db(code1);
2645 		db(code2 | reg.getIdx);
2646 	}
2647 
2648 	void opVex(Reg r, Operand op1, Operand op2, int type, int code, int imm8 = NONE)
2649 	{
2650 		if (op2.isMEM()) {
2651 			Address addr = op2.getAddress();
2652 			RegExp regExp = addr.getRegExp();
2653 			Reg base = regExp.getBase();
2654 			Reg index = regExp.getIndex();
2655 			if (BIT == 64 && addr.is32bit()) db(0x67);
2656 			int disp8N = 0;
2657 			bool x = index.isExtIdx();
2658 			
2659 			if ((type & (T_MUST_EVEX | T_MEM_EVEX)) || r.hasEvex() || (op1 && op1.hasEvex()) || addr.isBroadcast() || addr.getOpmaskIdx()) {
2660 				int aaa = addr.getOpmaskIdx();
2661 				if (aaa && !(type & T_M_K)) throw new XError(ERR.INVALID_OPMASK_WITH_MEMORY);
2662 				bool b = false;
2663 				if (addr.isBroadcast()) {
2664 					if (!(type & (T_B32 | T_B64))) throw new XError(ERR.INVALID_BROADCAST);
2665 					b = true;
2666 				}
2667 				int VL = regExp.isVsib() ? index.getBit() : 0;
2668 				disp8N = evex(r, base, op1, type, code, x, b, aaa, VL, index.isExtIdx2());
2669 			} else {
2670 				vex(r, base, op1, type, code, x);
2671 			}
2672 			opAddr(addr, r.getIdx(), (imm8 != NONE) ? 1 : 0, disp8N, (type & T_VSIB) != 0);
2673 		} else {		
2674 			Reg base = op2.getReg();
2675 			if ((type & T_MUST_EVEX) || r.hasEvex() || (op1 && op1.hasEvex()) || base.hasEvex()) {
2676 				evex(r, base, op1, type, code);
2677 			} else {
2678 				vex(r, base, op1, type, code);
2679 			}
2680 			setModRM(3, r.getIdx(), base.getIdx());
2681 		}
2682 		if (imm8 != NONE) db(imm8);
2683 	}		
2684 
2685 // (r, r, r/m) if isR_R_RM
2686 // (r, r/m, r)
2687 	void opGpr(Reg32e r, Operand op1, Operand op2, int type, uint8 code, bool isR_R_RM, int imm8 = NONE)
2688 	{
2689 		Operand p1 = op1;
2690 		Operand p2 = op2;
2691 		if (!isR_R_RM)	swap(p1, p2);
2692 		uint bit = r.getBit;
2693 		if (p1.getBit != bit || (p2.isREG && p2.getBit != bit))	throw new XError(ERR.BAD_COMBINATION);
2694 		type |= (bit == 64) ? T_W1 : T_W0;
2695 		opVex(r, p1, p2, type, code, imm8);
2696 	}
2697 	void opAVX_X_X_XM(Xmm x1, Operand op1, Operand op2, int type, int code0, int imm8 = NONE)
2698 	{
2699 		Xmm x2 = cast(Xmm)op1;
2700 		Operand op = op2;
2701 		if (op2.isNone) { // (x1, op1) -> (x1, x1, op1)
2702 			x2 = x1;
2703 			op = op1;
2704 		}
2705 		// (x1, x2, op)
2706 		if (!((x1.isXMM && x2.isXMM) || ((type & T_YMM) && ((x1.isYMM && x2.isYMM) || (x1.isZMM && x2.isZMM))))) throw new XError(ERR.BAD_COMBINATION);
2707 		opVex(x1, x2, op, type, code0, imm8);
2708 	}
2709 
2710 	void opAVX_K_X_XM(Opmask k, Xmm x2, Operand op3, int type, int code0, int imm8 = NONE)
2711 	{
2712 		if (!op3.isMEM() && (x2.getKind() != op3.getKind())) throw new XError(ERR.BAD_COMBINATION);
2713 		opVex(k, x2, op3, type, code0, imm8);
2714 	}
2715 
2716 	// (x, x/m), (y, x/m256), (z, y/m)
2717 	void checkCvt1(Operand x, Operand op)
2718 	{
2719 		if (!op.isMEM() && !(x.isKind(Kind.XMM | Kind.YMM) && op.isXMM()) && !(x.isZMM() && op.isYMM())) throw new XError(ERR.BAD_COMBINATION);
2720 	}
2721 	// (x, x/m), (x, y/m256), (y, z/m)
2722 	void checkCvt2(Xmm x, Operand op)
2723 	{
2724 		if (!(x.isXMM() && op.isKind(Kind.XMM | Kind.YMM | Kind.MEM)) && !(x.isYMM() && op.isKind(Kind.ZMM | Kind.MEM))) throw new XError(ERR.BAD_COMBINATION);
2725 	}
2726 	void opCvt2(Xmm x, Operand op, int type, int code)
2727 	{
2728 		checkCvt2(x, op);
2729 		int kind = x.isXMM() ? (op.isBit(256) ? Kind.YMM : Kind.XMM) : Kind.ZMM;
2730 		opVex(x.copyAndSetKind(kind), xm0, op, type, code);
2731 	}
2732 	void opCvt3(Xmm x1, Xmm x2, Operand op, int type, int type64, int type32, uint8 code)
2733 	{
2734 		if (!(x1.isXMM() && x2.isXMM() && (op.isREG(i32e) || op.isMEM()))) throw new XError(ERR.BAD_SIZE_OF_REGISTER);
2735 		Xmm x = new Xmm(op.getIdx());
2736 		Operand p = op.isREG() ? x : op;
2737 		opVex(x1, x2, p, (type | (op.isBit(64) ? type64 : type32)), code);
2738 	}
2739 	const Xmm cvtIdx0(Operand x)
2740 	{
2741 		return x.isZMM() ? zm0 : x.isYMM() ? ym0 : xm0;
2742 	}
2743 
2744 // support (x, x/m, imm), (y, y/m, imm)
2745 	void opAVX_X_XM_IMM(Xmm x, Operand op, int type, int code, int imm8 = NONE)
2746 	{
2747 		opAVX_X_X_XM(x, cvtIdx0(x), op, type, code, imm8);
2748 	}
2749 // QQQ:need to refactor
2750 	void opSp1(Reg reg, Operand op, uint8 pref, uint8 code0, uint8 code1)
2751 	{
2752 		if (reg.isBit(8))
2753 			throw new XError(ERR.BAD_SIZE_OF_REGISTER);
2754 		bool is16bit = reg.isREG(16) && (op.isREG(16) || op.isMEM);
2755 		if (!is16bit && !(reg.isREG(i32e) && (op.isREG(reg.getBit) || op.isMEM)))
2756 			throw new XError(ERR.BAD_COMBINATION);
2757 		if (is16bit)
2758 			db(0x66);
2759 		db(pref); opModRM(reg.changeBit(i32e == 32 ? 32 : reg.getBit), op, op.isREG, true, code0, code1);
2760 	}
2761 
2762 	void opGather(Xmm x1, Address addr, Xmm x2, int type, uint8 code, int mode)
2763 	{
2764 		RegExp regExp = addr.getRegExp();
2765 		if (!regExp.isVsib(128 | 256)) throw new XError(ERR.BAD_VSIB_ADDRESSING);
2766 		int y_vx_y = 0;
2767 		int y_vy_y = 1;
2768 //		int x_vy_x = 2;
2769 		bool isAddrYMM = regExp.getIndex().getBit() == 256;
2770 
2771 		if (!x1.isXMM || isAddrYMM || !x2.isXMM)
2772 		{
2773 			bool isOK = false;
2774 			if (mode == y_vx_y)
2775 			{
2776 				isOK = x1.isYMM && !isAddrYMM && x2.isYMM;
2777 			}
2778 			else if (mode == y_vy_y)
2779 			{
2780 				isOK = x1.isYMM && isAddrYMM && x2.isYMM;
2781 			}
2782 			else     // x_vy_x
2783 			{
2784 				isOK = !x1.isYMM && isAddrYMM && !x2.isYMM;
2785 			}
2786 			if (!isOK)
2787 				throw new XError(ERR.BAD_VSIB_ADDRESSING);
2788 		}
2789 		opAVX_X_X_XM(isAddrYMM ? new Ymm(x1.getIdx()) : x1, isAddrYMM ? new Ymm(x2.getIdx()) : x2, addr, type, code);
2790 	}
2791 	
2792 	enum {
2793 		xx_yy_zz = 0,
2794 		xx_yx_zy = 1,
2795 		xx_xy_yz = 2
2796 	}
2797 	
2798 	void checkGather2(Xmm x1, Reg x2, int mode) const
2799 	{
2800 		if (x1.isXMM() && x2.isXMM()) return;
2801 		switch (mode) {
2802 		case xx_yy_zz: if ((x1.isYMM() && x2.isYMM()) || (x1.isZMM() && x2.isZMM())) return;
2803 			break;
2804 		case xx_yx_zy: if ((x1.isYMM() && x2.isXMM()) || (x1.isZMM() && x2.isYMM())) return;
2805 			break;
2806 		case xx_xy_yz: if ((x1.isXMM() && x2.isYMM()) || (x1.isYMM() && x2.isZMM())) return;
2807 			break;
2808 		default:
2809 			break;
2810 		}
2811 		throw new XError(ERR.BAD_VSIB_ADDRESSING);
2812 	}
2813 	
2814 	void opGather2(Xmm x, Address addr, int type, uint8 code, int mode)
2815 	{
2816 		if (x.hasZero()) throw new XError(ERR.INVALID_ZERO);
2817 		checkGather2(x, addr.getRegExp().getIndex(), mode);
2818 		opVex(x, null, addr, type, code);
2819 	}
2820 	/*
2821 		xx_xy_yz ; mode = true
2822 		xx_xy_xz ; mode = false
2823 	*/
2824 	void opVmov(Operand op, Xmm x, int type, uint8 code, bool mode)
2825 	{
2826 		if (mode) {
2827 			if (!op.isMEM() && !((op.isXMM() && x.isXMM()) || (op.isXMM() && x.isYMM()) || (op.isYMM() && x.isZMM())))  throw new XError(ERR.BAD_COMBINATION);
2828 		} else {
2829 			if (!op.isMEM() && !op.isXMM()) throw new XError(ERR.BAD_COMBINATION);
2830 		}
2831 		opVex(x, cast(Operand)null, op, type, code);
2832 	}
2833 	void opGatherFetch(Address addr, Xmm x, int type, uint8 code, int kind)
2834 	{
2835 		if (addr.hasZero()) throw new XError(ERR.INVALID_ZERO);
2836 		if (addr.getRegExp().getIndex().getKind() != kind) throw new XError(ERR.BAD_VSIB_ADDRESSING);
2837 		opVex(x, cast(Operand)null, addr, type, code);
2838 	}
2839 	
2840 	void opInOut(Reg a, Reg d, uint8 code)
2841 	{
2842 		if (a.getIdx() == Operand.AL && d.getIdx() == Operand.DX && d.getBit() == 16) {
2843 			switch (a.getBit())
2844 			{
2845 				case 8: db(code); return;
2846 				case 16: db(0x66); db(code + 1); return;
2847 				case 32: db(code + 1); return;
2848 				default: break;
2849 			}
2850 		}
2851 		throw new XError(ERR.BAD_COMBINATION);
2852 	}
2853 	void opInOut(Reg a, uint8 code, uint8 v)
2854 	{
2855 		if (a.getIdx() == Operand.AL) {
2856 			switch (a.getBit())
2857 			{
2858 				case 8: db(code); db(v); return;
2859 				case 16: db(0x66); db(code + 1); db(v); return;
2860 				case 32: db(code + 1); db(v); return;
2861 				default: break;
2862 			}
2863 		}
2864 		throw new XError(ERR.BAD_COMBINATION);
2865 	}
2866 	
2867 public:
2868 	size_t getVersion() const
2869 	{
2870 		return xbyak.VERSION;
2871 	}
2872 	enum
2873 	{
2874 		mm0   = new Mmx(0), mm1 = new Mmx(1), mm2 = new Mmx(2), mm3 = new Mmx(3),
2875 		mm4   = new Mmx(4), mm5 = new Mmx(5), mm6 = new Mmx(6), mm7 = new Mmx(7),
2876 		xmm0  = new Xmm(0), xmm1 = new Xmm(1), xmm2 = new Xmm(2), xmm3 = new Xmm(3),
2877 		xmm4  = new Xmm(4), xmm5 = new Xmm(5), xmm6 = new Xmm(6), xmm7 = new Xmm(7),
2878 		ymm0  = new Ymm(0), ymm1 = new Ymm(1), ymm2 = new Ymm(2), ymm3 = new Ymm(3),
2879 		ymm4  = new Ymm(4), ymm5 = new Ymm(5), ymm6 = new Ymm(6), ymm7 = new Ymm(7),
2880 		zmm0  = new Zmm(0), zmm1 = new Zmm(1), zmm2 = new Zmm(2), zmm3 = new Zmm(3),
2881 		zmm4  = new Zmm(4), zmm5 = new Zmm(5), zmm6 = new Zmm(6), zmm7 = new Zmm(7),
2882 		// for my convenience		
2883 		xm0   = xmm0, xm1 = xmm1, xm2 = xmm2, xm3 = xmm3,
2884 		xm4   = xmm4, xm5 = xmm5, xm6 = xmm6, xm7 = xmm7,
2885 		ym0   = ymm0, ym1 = ymm1, ym2 = ymm2, ym3 = ymm3,
2886 		ym4   = ymm4, ym5 = ymm5, ym6 = ymm6, ym7 = ymm7,
2887 		zm0   = zmm0, zm1 = zmm1, zm2 = zmm2, zm3 = zmm3,
2888 		zm4   = zmm4, zm5 = zmm5, zm6 = zmm6, zm7 = zmm7,		
2889 			
2890 		eax = new Reg32(Operand.EAX),
2891 		ecx = new Reg32(Operand.ECX),
2892 		edx = new Reg32(Operand.EDX),
2893 		ebx = new Reg32(Operand.EBX),
2894 		esp = new Reg32(Operand.ESP),
2895 		ebp = new Reg32(Operand.EBP),
2896 		esi = new Reg32(Operand.ESI),
2897 		edi = new Reg32(Operand.EDI),
2898 		ax    = new Reg16(Operand.EAX), cx = new Reg16(Operand.ECX), dx = new Reg16(Operand.EDX), bx = new Reg16(Operand.EBX),
2899 		sp    = new Reg16(Operand.ESP), bp = new Reg16(Operand.EBP), si = new Reg16(Operand.ESI), di = new Reg16(Operand.EDI),
2900 		al    = new Reg8(Operand.AL), cl = new Reg8(Operand.CL), dl = new Reg8(Operand.DL), bl = new Reg8(Operand.BL),
2901 		ah    = new Reg8(Operand.AH), ch = new Reg8(Operand.CH), dh = new Reg8(Operand.DH), bh = new Reg8(Operand.BH),
2902 		ptr   = new AddressFrame(0),
2903 		byte_ = new AddressFrame(8),
2904 		word  = new AddressFrame(16),
2905 		dword = new AddressFrame(32),
2906 		qword = new AddressFrame(64),
2907 		xword = new AddressFrame(128), 
2908 		yword = new AddressFrame(256), 
2909 		zword = new AddressFrame(512),
2910 		ptr_b = new AddressFrame(0, true),
2911 		xword_b = new AddressFrame(128, true), 
2912 		yword_b = new AddressFrame(256, true),
2913 		zword_b = new AddressFrame(512, true),
2914 		st0   = new Fpu(0), st1 = new Fpu(1), st2 = new Fpu(2), st3 = new Fpu(3),
2915 		st4   = new Fpu(4), st5 = new Fpu(5), st6 = new Fpu(6), st7 = new Fpu(7),
2916 		k0 = new Opmask(0), k1 = new Opmask(1), k2 = new Opmask(2), k3 = new Opmask(3), 
2917 		k4 = new Opmask(4), k5 = new Opmask(5), k6 = new Opmask(6), k7 = new Opmask(7),
2918 		bnd0 = new BoundsReg(0),
2919 		bnd1 = new BoundsReg(1),
2920 		bnd2 = new BoundsReg(2),
2921 		bnd3 = new BoundsReg(3),
2922 		T_sae = new EvexModifierRounding(EvexModifierRounding.T_SAE),
2923 		T_rn_sae = new EvexModifierRounding(EvexModifierRounding.T_RN_SAE),
2924 		T_rd_sae = new EvexModifierRounding(EvexModifierRounding.T_RD_SAE),
2925 		T_ru_sae = new EvexModifierRounding(EvexModifierRounding.T_RU_SAE),
2926 		T_rz_sae = new EvexModifierRounding(EvexModifierRounding.T_RZ_SAE),
2927 		T_z = new EvexModifierZero()
2928 	}
2929 	version (XBYAK64)
2930 	{
2931 		enum
2932 		{
2933 			rax = new Reg64(Operand.RAX),
2934 			rcx = new Reg64(Operand.RCX),
2935 			rdx = new Reg64(Operand.RDX),
2936 			rbx = new Reg64(Operand.RBX),
2937 			rsp = new Reg64(Operand.RSP),
2938 			rbp = new Reg64(Operand.RBP),
2939 			rsi = new Reg64(Operand.RSI),
2940 			rdi = new Reg64(Operand.RDI),
2941 			r8 = new Reg64(Operand.R8),
2942 			r9 = new Reg64(Operand.R9),
2943 			r10 = new Reg64(Operand.R10),
2944 			r11 = new Reg64(Operand.R11),
2945 			r12 = new Reg64(Operand.R12),
2946 			r13 = new Reg64(Operand.R13),
2947 			r14 = new Reg64(Operand.R14),
2948 			r15 = new Reg64(Operand.R15),
2949 
2950 			r8d = new Reg32(Operand.R8D),
2951 			r9d = new Reg32(Operand.R9D),
2952 			r10d = new Reg32(Operand.R10D),
2953 			r11d = new Reg32(Operand.R11D),
2954 			r12d = new Reg32(Operand.R12D),
2955 			r13d = new Reg32(Operand.R13D),
2956 			r14d = new Reg32(Operand.R14D),
2957 			r15d = new Reg32(Operand.R15D),
2958 
2959 			r8w = new Reg16(Operand.R8W),
2960 			r9w = new Reg16(Operand.R9W),
2961 			r10w = new Reg16(Operand.R10W),
2962 			r11w = new Reg16(Operand.R11W),
2963 			r12w = new Reg16(Operand.R12W),
2964 			r13w = new Reg16(Operand.R13W),
2965 			r14w = new Reg16(Operand.R14W),
2966 			r15w = new Reg16(Operand.R15W),
2967 
2968 			r8b = new Reg8(Operand.R8B),
2969 			r9b = new Reg8(Operand.R9B),
2970 			r10b = new Reg8(Operand.R10B),
2971 			r11b = new Reg8(Operand.R11B),
2972 			r12b = new Reg8(Operand.R12B),
2973 			r13b = new Reg8(Operand.R13B),
2974 			r14b = new Reg8(Operand.R14B),
2975 			r15b = new Reg8(Operand.R15B),
2976 
2977 			spl = new Reg8(Operand.SPL, true),
2978 			bpl = new Reg8(Operand.BPL, true),
2979 			sil = new Reg8(Operand.SIL, true),
2980 			dil = new Reg8(Operand.DIL, true),
2981 
2982 			xmm8 = new Xmm(8),
2983 			xmm9 = new Xmm(9),
2984 			xmm10 = new Xmm(10),
2985 			xmm11 = new Xmm(11),
2986 			xmm12 = new Xmm(12),
2987 			xmm13 = new Xmm(13),
2988 			xmm14 = new Xmm(14),
2989 			xmm15 = new Xmm(15),
2990 			xmm16 = new Xmm(16),
2991 			xmm17 = new Xmm(17),
2992 			xmm18 = new Xmm(18),
2993 			xmm19 = new Xmm(19),
2994 			xmm20 = new Xmm(20),
2995 			xmm21 = new Xmm(21),
2996 			xmm22 = new Xmm(22),
2997 			xmm23 = new Xmm(23),
2998 			xmm24 = new Xmm(24),
2999 			xmm25 = new Xmm(25),
3000 			xmm26 = new Xmm(26),
3001 			xmm27 = new Xmm(27),
3002 			xmm28 = new Xmm(28),
3003 			xmm29 = new Xmm(29),
3004 			xmm30 = new Xmm(30),
3005 			xmm31 = new Xmm(31),
3006 		
3007 			ymm8 = new Ymm(8),
3008 			ymm9 = new Ymm(9),
3009 			ymm10 = new Ymm(10),
3010 			ymm11 = new Ymm(11),
3011 			ymm12 = new Ymm(12),
3012 			ymm13 = new Ymm(13),
3013 			ymm14 = new Ymm(14),
3014 			ymm15 = new Ymm(15),
3015 			ymm16 = new Ymm(16),
3016 			ymm17 = new Ymm(17),
3017 			ymm18 = new Ymm(18),
3018 			ymm19 = new Ymm(19),
3019 			ymm20 = new Ymm(20),
3020 			ymm21 = new Ymm(21),
3021 			ymm22 = new Ymm(22),
3022 			ymm23 = new Ymm(23),
3023 			ymm24 = new Ymm(24),
3024 			ymm25 = new Ymm(25),
3025 			ymm26 = new Ymm(26),
3026 			ymm27 = new Ymm(27),
3027 			ymm28 = new Ymm(28),
3028 			ymm29 = new Ymm(29),
3029 			ymm30 = new Ymm(30),
3030 			ymm31 = new Ymm(31),
3031 
3032 			zmm8 = new Zmm(8),
3033 			zmm9 = new Zmm(9),
3034 			zmm10 = new Zmm(10),
3035 			zmm11 = new Zmm(11),
3036 			zmm12 = new Zmm(12),
3037 			zmm13 = new Zmm(13),
3038 			zmm14 = new Zmm(14),
3039 			zmm15 = new Zmm(15),
3040 			zmm16 = new Zmm(16),
3041 			zmm17 = new Zmm(17),
3042 			zmm18 = new Zmm(18),
3043 			zmm19 = new Zmm(19),
3044 			zmm20 = new Zmm(20),
3045 			zmm21 = new Zmm(21),
3046 			zmm22 = new Zmm(22),
3047 			zmm23 = new Zmm(23),
3048 			zmm24 = new Zmm(24),
3049 			zmm25 = new Zmm(25),
3050 			zmm26 = new Zmm(26),
3051 			zmm27 = new Zmm(27),
3052 			zmm28 = new Zmm(28),
3053 			zmm29 = new Zmm(29),
3054 			zmm30 = new Zmm(30),
3055 			zmm31 = new Zmm(31),
3056 
3057 			// for my convenience
3058 			xm8 = xmm8, xm9 = xmm9, xm10 = xmm10, xm11 = xmm11, xm12 = xmm12, xm13 = xmm13, xm14 = xmm14, xm15 = xmm15, 
3059 			xm16 = xmm16, xm17 = xmm17, xm18 = xmm18, xm19 = xmm19, xm20 = xmm20, xm21 = xmm21, xm22 = xmm22, xm23 = xmm23, 
3060 			xm24 = xmm24, xm25 = xmm25, xm26 = xmm26, xm27 = xmm28, xm29 = xmm29, xm30 = xmm30, xm31 = xmm31,
3061 			
3062 			ym8 = ymm8, ym9 = ymm9, ym10 = ymm10, ym11 = ymm11, ym12 = ymm12, ym13 = ymm13, ym14 = ymm14, ym15 = ymm15, 
3063 			ym16 = ymm16, ym17 = ymm17, ym18 = ymm18, ym19 = ymm19, ym20 = ymm20, ym21 = ymm21, ym22 = ymm22, ym23 = ymm23, 
3064 			ym24 = ymm24, ym25 = ymm25, ym26 = ymm26, ym27 = ymm28, ym29 = ymm29, ym30 = ymm30, ym31 = ymm31,
3065 			
3066 			zm8 = zmm8, zm9 = zmm9, zm10 = zmm10, zm11 = zmm11, zm12 = zmm12, zm13 = zmm13, zm14 = zmm14, zm15 = zmm15, 
3067 			zm16 = zmm16, zm17 = zmm17, zm18 = zmm18, zm19 = zmm19, zm20 = zmm20, zm21 = zmm21, zm22 = zmm22, zm23 = zmm23, 
3068 			zm24 = zmm24, zm25 = zmm25, zm26 = zmm26, zm27 = zmm28, zm29 = zmm29, zm30 = zmm30, zm31 = zmm31,
3069 
3070 			rip = RegRip()
3071 		}
3072 		version (XBYAK_DISABLE_SEGMENT) {}
3073 		else{
3074 		    enum{
3075 			    es = new Segment(Segment.es),
3076 			    cs = new Segment(Segment.cs),
3077 			    ss = new Segment(Segment.ss),
3078 			    ds = new Segment(Segment.ds),
3079 			    fs = new Segment(Segment.fs),
3080 			    gs = new Segment(Segment.gs)
3081 		    }
3082         }
3083 	}
3084 	
3085 	Label L()
3086 	{
3087 		return new Label();
3088 	}
3089 	void L(string label)
3090 	{
3091 		labelMgr_.defineSlabel(label);
3092 	}
3093 	void L(Label label)
3094 	{
3095 		labelMgr_.defineClabel(label);
3096 	}
3097 	void inLocalLabel()
3098 	{
3099 		labelMgr_.enterLocal;
3100 	}
3101 	void outLocalLabel()
3102 	{
3103 		labelMgr_.leaveLocal;
3104 	}
3105 
3106 //		assign src to dst
3107 //		require
3108 //		dst : does not used by L()
3109 //		src : used by L()
3110 	void assignL(Label dst, Label src)
3111 	{
3112 		labelMgr_.assign(dst, src);
3113 	}
3114 
3115 	/*
3116 		put address of label to buffer
3117 		@note the put size is 4(32-bit), 8(64-bit)
3118 	*/
3119 	void putL(string label) { putL_inner(label); }
3120 	void putL(Label label) { putL_inner(label); }
3121 
3122 	void jmp(Operand op) { opR_ModM(op, BIT, 4, 0xFF, NONE, NONE, true); }
3123 	void jmp(string label, LabelType type = T_AUTO) { opJmp(label, type, 0xEB, 0xE9, 0); }
3124 	void jmp(const char* label, LabelType type = T_AUTO) { jmp(to!string(label), type); }
3125 	void jmp(Label label, LabelType type = T_AUTO) { opJmp(label, type, 0xEB, 0xE9, 0); }
3126 	void jmp(const void* addr, LabelType type = T_AUTO) { opJmpAbs(addr, type, 0xEB, 0xE9); }
3127 	
3128 	void call(Operand op) { opR_ModM(op, 16 | i32e, 2, 0xFF, NONE, NONE, true); }
3129 	// call(string label), not string
3130 	void call(string label) { opJmp(label, T_NEAR, 0, 0xE8, 0); }
3131 	void call(const char* label) { call(to!string(label)); }
3132 	void call(Label label) { opJmp(label, T_NEAR, 0, 0xE8, 0); }
3133 
3134 	// call(function pointer)
3135 version(XBYAK_VARIADIC_TEMPLATE)
3136 {	
3137     void call(Ret, Params)(Ret function(Params...) func)
3138     {
3139         call(CastTo(opJmpAbs(&func)));
3140     }
3141 }   
3142 	
3143 	void call(void* addr) { opJmpAbs(addr, T_NEAR, 0, 0xE8); }
3144 
3145 	void test(Operand op, Reg reg)
3146 	{
3147 		opModRM(reg, op, op.isREG && (op.getKind == reg.getKind), op.isMEM, 0x84);
3148 	}
3149 
3150 	void test(Operand op, uint32 imm)
3151 	{
3152 		verifyMemHasSize(op);
3153 		int immSize = min(op.getBit / 8, 4U);
3154 		if (op.isREG && op.getIdx == 0)   // al, ax, eax
3155 		{
3156 			rex(op);
3157 			db(0xA8 | (op.isBit(8) ? 0 : 1));
3158 		}
3159 		else
3160 		{
3161 			opR_ModM(op, 0, 0, 0xF6, NONE, NONE, false, immSize);
3162 		}
3163 		db(imm, immSize);
3164 	}
3165 
3166 	void imul(Reg reg, Operand op)
3167 	{
3168 		opModRM(reg, op, op.isREG && (reg.getKind == op.getKind), op.isMEM, 0x0F, 0xAF);
3169 	}
3170 
3171 	void imul(Reg reg, Operand op, int imm)
3172 	{
3173 		int s = inner.IsInDisp8(imm) ? 1 : 0;
3174 		int immSize = s ? 1 : reg.isREG(16) ? 2 : 4;
3175 		opModRM(reg, op, op.isREG && (reg.getKind == op.getKind), op.isMEM, 0x69 | (s << 1), NONE, NONE, immSize);
3176 		db(imm, immSize);
3177 	}
3178 
3179 	void pop(Operand op) { opPushPop(op, 0x8F, 0, 0x58); }
3180 	void push(Operand op) { opPushPop(op, 0xFF, 6, 0x50); }
3181 	void push(AddressFrame af, uint32 imm)
3182 	{
3183 		if (af.bit_ == 8 && inner.IsInDisp8(imm))
3184 		{
3185 			db(0x6A); db(imm);
3186 		}
3187 		else if (af.bit_ == 16 && inner.IsInDisp16(imm))
3188 		{
3189 			db(0x66); db(0x68); dw(imm);
3190 		}
3191 		else
3192 		{
3193 			db(0x68); dd(imm);
3194 		}
3195 	}
3196 
3197 	// use "push(word, 4)" if you want "push word 4"
3198 	void push(uint32 imm)
3199 	{
3200 		if (inner.IsInDisp8(imm))
3201 		{
3202 			push(byte_, imm);
3203 		}
3204 		else
3205 		{
3206 			push(dword, imm);
3207 		}
3208 	}
3209 
3210 
3211 	void mov(Operand reg1, Operand reg2)
3212 	{
3213 		Reg reg;
3214 		Address addr;
3215 		uint8 code;
3216 		if (reg1.isREG() && reg1.getIdx() == 0 && reg2.isMEM())   // mov eax|ax|al, [disp]
3217 		{
3218 			reg  = reg1.getReg();
3219 			addr = reg2.getAddress();
3220 			code = 0xA0;
3221 		}
3222 		else if (reg1.isMEM() && reg2.isREG() && reg2.getIdx() == 0)     // mov [disp], eax|ax|al
3223 		{
3224 			reg  = reg2.getReg();
3225 			addr = reg1.getAddress();
3226 			code = 0xA2;
3227 		}
3228 
3229 version (XBYAK64)
3230 {
3231 			if (addr && addr.is64bitDisp)
3232 			{
3233 				if (code)
3234 				{
3235 					rex(reg);
3236 					db(reg1.isREG(8) ? 0xA0 : reg1.isREG() ? 0xA1 : reg2.isREG(8) ? 0xA2 : 0xA3);
3237 					db(addr.getDisp(), 8);
3238 				}
3239 				else
3240 					throw new XError(ERR.BAD_COMBINATION);
3241 			}
3242 			else
3243 				opRM_RM(reg1, reg2, 0x88);
3244 }
3245 else
3246 {
3247 			if (code && addr.isOnlyDisp())
3248 			{
3249 				rex(reg, addr);
3250 				db(code | (reg.isBit(8) ? 0 : 1));
3251 				dd(cast(uint32) (addr.getDisp()));
3252 			}
3253 			else
3254 				opRM_RM(reg1, reg2, 0x88);
3255 }
3256 	}
3257 
3258 	void mov(Operand op, size_t imm)
3259 	{
3260 		if (op.isREG()) {
3261 			const int size = mov_imm(op.getReg(), imm);
3262 			db(imm, size);
3263 		} else if (op.isMEM()) {
3264 			verifyMemHasSize(op);
3265 			int immSize = op.getBit() / 8;
3266 			if (immSize <= 4) {
3267 				sint64 s = sint64(imm) >> (immSize * 8);
3268 				if (s != 0 && s != -1) throw new XError(ERR.IMM_IS_TOO_BIG);
3269 			} else {
3270 				if (!inner.IsInInt32(imm)) throw new XError(ERR.IMM_IS_TOO_BIG);
3271 				immSize = 4;
3272 			}
3273 			opModM(op.getAddress(), new Reg(0, Kind.REG, op.getBit()), 0xC6, NONE, NONE, immSize);
3274 			db(cast(uint32)(imm), immSize);
3275 		} else {
3276 			throw new XError(ERR.BAD_COMBINATION);
3277 		}
3278 	}
3279 	
3280 	void mov(NativeReg reg, const char* label) // can't use string
3281 	{
3282 		if (label == null) {
3283 			mov(cast(Operand)(reg), 0); // call imm
3284 			return;
3285 		}
3286 		mov_imm(reg, dummyAddr);
3287 		putL(to!string(label));
3288 	}
3289 
3290 	void mov(NativeReg reg, Label label)
3291 	{
3292 		mov_imm(reg, dummyAddr);
3293 		putL(label);
3294 	}
3295 
3296 	void xchg(Operand op1, Operand op2)
3297 	{
3298 		Operand p1 = op1;
3299 		Operand p2 = op2;
3300 		if (p1.isMEM || (p2.isREG(16 | i32e) && p2.getIdx == 0))
3301 		{
3302 			p1 = op2; p2 = op1;
3303 		}
3304 		if (p1.isMEM)
3305 			throw new XError(ERR.BAD_COMBINATION);
3306 
3307 		bool BL = true;
3308 		version (XBYAK64)
3309 		{
3310 			BL = (p2.getIdx != 0 || !p1.isREG(32));
3311 		}
3312 		if (p2.isREG && (p1.isREG(16 | i32e) && p1.getIdx == 0) && BL)
3313 		{
3314 			rex(p2, p1);
3315 			db(0x90 | (p2.getIdx & 7));
3316 			return;
3317 		}
3318 		opModRM(p1, p2, (p1.isREG && p2.isREG && (p1.getBit == p2.getBit)), p2.isMEM, 0x86 | (p1.isBit(8) ? 40 : 1));
3319 	}
3320 
3321 version(XBYAK_DISABLE_SEGMENT){}
3322 else
3323 {
3324 	void push(Segment seg)
3325 	{
3326 		switch (seg.getIdx()) {
3327 		case Segment.es: db(0x06); break;
3328 		case Segment.cs: db(0x0E); break;
3329 		case Segment.ss: db(0x16); break;
3330 		case Segment.ds: db(0x1E); break;
3331 		case Segment.fs: db(0x0F); db(0xA0); break;
3332 		case Segment.gs: db(0x0F); db(0xA8); break;
3333 		default:
3334 			assert(0);
3335 		}
3336 	}
3337 	void pop(Segment seg)
3338 	{
3339 		switch (seg.getIdx()) {
3340 		case Segment.es: db(0x07); break;
3341 		case Segment.cs: throw new XError(ERR.BAD_COMBINATION);
3342 		case Segment.ss: db(0x17); break;
3343 		case Segment.ds: db(0x1F); break;
3344 		case Segment.fs: db(0x0F); db(0xA1); break;
3345 		case Segment.gs: db(0x0F); db(0xA9); break;
3346 		default:
3347 			assert(0);
3348 		}
3349 	}
3350 	void putSeg(Segment seg)
3351 	{
3352 		switch (seg.getIdx()) {
3353 		case Segment.es: db(0x2E); break;
3354 		case Segment.cs: db(0x36); break;
3355 		case Segment.ss: db(0x3E); break;
3356 		case Segment.ds: db(0x26); break;
3357 		case Segment.fs: db(0x64); break;
3358 		case Segment.gs: db(0x65); break;
3359 		default:
3360 			assert(0);
3361 		}
3362 	}
3363 	void mov(Operand op, Segment seg)
3364 	{
3365 		opModRM(new Reg8(seg.getIdx()), op, op.isREG(16|i32e), op.isMEM(), 0x8C);
3366 	}
3367 	void mov(Segment seg, Operand op)
3368 	{
3369 		Reg r1 = op.getReg().cvt32();
3370 		Operand op_r1 = cast(Operand)r1;
3371 		opModRM(new Reg8(seg.getIdx()), op.isREG(16|i32e) ? op_r1 : op, op.isREG(16|i32e), op.isMEM(), 0x8E);
3372 	}
3373 }		
3374 		
3375 	enum { NONE = 256 }
3376 public:
3377     this(size_t maxSize = DEFAULT_MAX_CODE_SIZE, void* userPtr = null, Allocator allocator = new Allocator())
3378 	{
3379 		super(maxSize, userPtr, allocator);
3380 		this.reset();	////fix
3381 		
3382 		labelMgr_.set(this);
3383 	}
3384 
3385 	void reset()
3386 	{
3387 		resetSize;
3388 		labelMgr_.reset;
3389 		labelMgr_.set(this);
3390 	}
3391 
3392 	bool hasUndefinedLabel() const
3393 	{
3394 		return labelMgr_.hasUndefSlabel() || labelMgr_.hasUndefClabel();
3395 	}
3396 
3397 
3398 	/*
3399 		MUST call ready() to complete generating code if you use AutoGrow mode.
3400 		It is not necessary for the other mode if hasUndefinedLabel() is true.
3401 	*/
3402 	void ready(ProtectMode mode = ProtectMode.PROTECT_RWE)
3403 	{
3404 //		if (hasUndefinedLabel()) throw new XError(ERR.LABEL_IS_NOT_FOUND);
3405 		if (isAutoGrow()) {
3406 			calcJmpAddress();
3407 			if (useProtect()) setProtectMode(mode);
3408 		}
3409 	}
3410 	
3411 	// set read/exec
3412 	void readyRE() { return ready(ProtectMode.PROTECT_RE); }
3413 
3414 	/*
3415 		use single byte nop if useMultiByteNop = false
3416 	*/
3417 	void nop(size_t size = 1, bool useMultiByteNop = true)
3418 	{
3419 		if (!useMultiByteNop) {
3420 			for (size_t i = 0; i < size; i++) {
3421 				db(0x90);
3422 			}
3423 			return;
3424 		}
3425 		/*
3426 			Intel Architectures Software Developer's Manual Volume 2
3427 			recommended multi-byte sequence of NOP instruction
3428 			AMD and Intel seem to agree on the same sequences for up to 9 bytes:
3429 			https://support.amd.com/TechDocs/55723_SOG_Fam_17h_Processors_3.00.pdf
3430 		*/
3431 		uint8[][] nopTbl = [
3432 			[0x90],
3433 			[0x66, 0x90],
3434 			[0x0F, 0x1F, 0x00],
3435 			[0x0F, 0x1F, 0x40, 0x00],
3436 			[0x0F, 0x1F, 0x44, 0x00, 0x00],
3437 			[0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00],
3438 			[0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00],
3439 			[0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00],
3440 			[0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00],
3441 		];
3442 		size_t n = nopTbl.sizeof / nopTbl[0].sizeof;
3443 		while (size > 0) {
3444 			size_t len = min(n, size);
3445 			uint8* seq = nopTbl[len - 1].ptr;
3446 			db(seq, len);
3447 			size -= len;
3448 		}
3449 	}
3450 
3451 	void Align(int x = 16)
3452 	{
3453 		if (x == 1)
3454 			return;
3455 		if (x < 1 || (x & (x - 1)))
3456 			throw new XError(ERR.BAD_ALIGN);
3457 		if (isAutoGrow() && x > cast(int) inner.ALIGN_PAGE_SIZE)
3458 		{
3459 			throw new Exception(format("warning:autoGrow mode does not support %d align", x));
3460 		}
3461 		while (cast(size_t) getCurr % x)
3462 		{
3463 			nop();
3464 		}
3465 	}
3466 
3467 
3468 string getVersionString() const { return "0.099"; }
3469 void adc(Operand op, uint32 imm) { opRM_I(op, imm, 0x10, 2); }
3470 void adc(Operand op1, Operand op2) { opRM_RM(op1, op2, 0x10); }
3471 void adcx(Reg32e reg, Operand op) { opGen(reg, op, 0xF6, 0x66, &isREG32_REG32orMEM, NONE, 0x38); }
3472 void add(Operand op, uint32 imm) { opRM_I(op, imm, 0x00, 0); }
3473 void add(Operand op1, Operand op2) { opRM_RM(op1, op2, 0x00); }
3474 void addpd(Xmm xmm, Operand op) { opGen(xmm, op, 0x58, 0x66, &isXMM_XMMorMEM); }
3475 void addps(Xmm xmm, Operand op) { opGen(xmm, op, 0x58, 0x100, &isXMM_XMMorMEM); }
3476 void addsd(Xmm xmm, Operand op) { opGen(xmm, op, 0x58, 0xF2, &isXMM_XMMorMEM); }
3477 void addss(Xmm xmm, Operand op) { opGen(xmm, op, 0x58, 0xF3, &isXMM_XMMorMEM); }
3478 void addsubpd(Xmm xmm, Operand op) { opGen(xmm, op, 0xD0, 0x66, &isXMM_XMMorMEM); }
3479 void addsubps(Xmm xmm, Operand op) { opGen(xmm, op, 0xD0, 0xF2, &isXMM_XMMorMEM); }
3480 void adox(Reg32e reg, Operand op) { opGen(reg, op, 0xF6, 0xF3, &isREG32_REG32orMEM, NONE, 0x38); }
3481 void aesdec(Xmm xmm, Operand op) { opGen(xmm, op, 0xDE, 0x66, &isXMM_XMMorMEM, NONE, 0x38); }
3482 void aesdeclast(Xmm xmm, Operand op) { opGen(xmm, op, 0xDF, 0x66, &isXMM_XMMorMEM, NONE, 0x38); }
3483 void aesenc(Xmm xmm, Operand op) { opGen(xmm, op, 0xDC, 0x66, &isXMM_XMMorMEM, NONE, 0x38); }
3484 void aesenclast(Xmm xmm, Operand op) { opGen(xmm, op, 0xDD, 0x66, &isXMM_XMMorMEM, NONE, 0x38); }
3485 void aesimc(Xmm xmm, Operand op) { opGen(xmm, op, 0xDB, 0x66, &isXMM_XMMorMEM, NONE, 0x38); }
3486 void aeskeygenassist(Xmm xmm, Operand op, uint8 imm) { opGen(xmm, op, 0xDF, 0x66, &isXMM_XMMorMEM, imm, 0x3A); }
3487 void and(Operand op, uint32 imm) { opRM_I(op, imm, 0x20, 4); }
3488 void and(Operand op1, Operand op2) { opRM_RM(op1, op2, 0x20); }
3489 void andn(Reg32e r1, Reg32e r2, Operand op) { opGpr(r1, r2, op, T_0F38, 0xf2, true); }
3490 void andnpd(Xmm xmm, Operand op) { opGen(xmm, op, 0x55, 0x66, &isXMM_XMMorMEM); }
3491 void andnps(Xmm xmm, Operand op) { opGen(xmm, op, 0x55, 0x100, &isXMM_XMMorMEM); }
3492 void andpd(Xmm xmm, Operand op) { opGen(xmm, op, 0x54, 0x66, &isXMM_XMMorMEM); }
3493 void andps(Xmm xmm, Operand op) { opGen(xmm, op, 0x54, 0x100, &isXMM_XMMorMEM); }
3494 
3495 
3496 void bextr(Reg32e r1, Operand op, Reg32e r2) { opGpr(r1, op, r2, T_0F38, 0xf7, false); }
3497 void blendpd(Xmm xmm, Operand op, int imm) { opGen(xmm, op, 0x0D, 0x66, &isXMM_XMMorMEM, cast(uint8)(imm), 0x3A); }
3498 void blendps(Xmm xmm, Operand op, int imm) { opGen(xmm, op, 0x0C, 0x66, &isXMM_XMMorMEM, cast(uint8)(imm), 0x3A); }
3499 void blendvpd(Xmm xmm, Operand op) { opGen(xmm, op, 0x15, 0x66, &isXMM_XMMorMEM, NONE, 0x38); }
3500 void blendvps(Xmm xmm, Operand op) { opGen(xmm, op, 0x14, 0x66, &isXMM_XMMorMEM, NONE, 0x38); }
3501 void blsi(Reg32e r, Operand op) { opGpr(new Reg32e(3, r.getBit()), op, r, T_0F38, 0xf3, false); }
3502 void blsmsk(Reg32e r, Operand op) { opGpr(new Reg32e(2, r.getBit()), op, r, T_0F38, 0xf3, false); }
3503 void blsr(Reg32e r, Operand op) { opGpr(new Reg32e(1, r.getBit()), op, r, T_0F38, 0xf3, false); }
3504 void bnd() { db(0xF2); }
3505 void bndcl(BoundsReg bnd, Operand op) { db(0xF3); opR_ModM(op, i32e, bnd.getIdx(), 0x0F, 0x1A, NONE, !op.isMEM()); }
3506 void bndcn(BoundsReg bnd, Operand op) { db(0xF2); opR_ModM(op, i32e, bnd.getIdx(), 0x0F, 0x1B, NONE, !op.isMEM()); }
3507 void bndcu(BoundsReg bnd, Operand op) { db(0xF2); opR_ModM(op, i32e, bnd.getIdx(), 0x0F, 0x1A, NONE, !op.isMEM()); }
3508 void bndldx(BoundsReg bnd, Address addr) { opMIB(addr, bnd, 0x0F, 0x1A); }
3509 void bndmk(BoundsReg bnd, Address addr) { db(0xF3); opModM(addr, bnd, 0x0F, 0x1B); }
3510 void bndmov(Address addr, BoundsReg bnd) { db(0x66); opModM(addr, bnd, 0x0F, 0x1B); }
3511 void bndmov(BoundsReg bnd, Operand op) { db(0x66); opModRM(bnd, op, op.isBNDREG(), op.isMEM(), 0x0F, 0x1A); }
3512 void bndstx(Address addr, BoundsReg bnd) { opMIB(addr, bnd, 0x0F, 0x1B); }
3513 void bsf(Reg reg, Operand op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0xBC); }
3514 void bsr(Reg reg, Operand op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0xBD); }
3515 void bswap(Reg32e reg) { opModR(new Reg32(1), reg, 0x0F); }
3516 void bt(Operand op, Reg reg) { opModRM(reg, op, op.isREG(16|32|64) && op.getBit() == reg.getBit(), op.isMEM(), 0x0f, 0xA3); }
3517 void bt(Operand op, uint8 imm) { opR_ModM(op, 16|32|64, 4, 0x0f, 0xba, NONE, false, 1); db(imm); }
3518 void btc(Operand op, Reg reg) { opModRM(reg, op, op.isREG(16|32|64) && op.getBit() == reg.getBit(), op.isMEM(), 0x0f, 0xBB); }
3519 void btc(Operand op, uint8 imm) { opR_ModM(op, 16|32|64, 7, 0x0f, 0xba, NONE, false, 1); db(imm); }
3520 void btr(Operand op, Reg reg) { opModRM(reg, op, op.isREG(16|32|64) && op.getBit() == reg.getBit(), op.isMEM(), 0x0f, 0xB3); }
3521 void btr(Operand op, uint8 imm) { opR_ModM(op, 16|32|64, 6, 0x0f, 0xba, NONE, false, 1); db(imm); }
3522 void bts(Operand op, Reg reg) { opModRM(reg, op, op.isREG(16|32|64) && op.getBit() == reg.getBit(), op.isMEM(), 0x0f, 0xAB); }
3523 void bts(Operand op, uint8 imm) { opR_ModM(op, 16|32|64, 5, 0x0f, 0xba, NONE, false, 1); db(imm); }
3524 void bzhi(Reg32e r1, Operand op, Reg32e r2) { opGpr(r1, op, r2, T_0F38, 0xf5, false); }
3525 
3526 void cbw() { db(0x66); db(0x98); }
3527 void cdq() { db(0x99); }
3528 void clc() { db(0xF8); }
3529 void cld() { db(0xFC); }
3530 void clflush(Address addr) { opModM(addr, new Reg32(7), 0x0F, 0xAE); }
3531 void clflushopt(Address addr) { db(0x66); opModM(addr, new Reg32(7), 0x0F, 0xAE); }
3532 void cli() { db(0xFA); }
3533 void clzero() { db(0x0F); db(0x01); db(0xFC); }
3534 void cmc() { db(0xF5); }
3535 void cmova(Reg reg, Operand op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 7); }
3536 void cmovae(Reg reg, Operand op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 3); }
3537 void cmovb(Reg reg, Operand op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 2); }
3538 void cmovbe(Reg reg, Operand op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 6); }
3539 void cmovc(Reg reg, Operand op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 2); }
3540 void cmove(Reg reg, Operand op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 4); }
3541 void cmovg(Reg reg, Operand op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 15); }
3542 void cmovge(Reg reg, Operand op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 13); }
3543 void cmovl(Reg reg, Operand op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 12); }
3544 void cmovle(Reg reg, Operand op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 14); }
3545 void cmovna(Reg reg, Operand op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 6); }
3546 void cmovnae(Reg reg, Operand op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 2); }
3547 void cmovnb(Reg reg, Operand op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 3); }
3548 void cmovnbe(Reg reg, Operand op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 7); }
3549 void cmovnc(Reg reg, Operand op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 3); }
3550 void cmovne(Reg reg, Operand op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 5); }
3551 void cmovng(Reg reg, Operand op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 14); }
3552 void cmovnge(Reg reg, Operand op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 12); }
3553 void cmovnl(Reg reg, Operand op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 13); }
3554 void cmovnle(Reg reg, Operand op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 15); }
3555 void cmovno(Reg reg, Operand op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 1); }
3556 void cmovnp(Reg reg, Operand op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 11); }
3557 void cmovns(Reg reg, Operand op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 9); }
3558 void cmovnz(Reg reg, Operand op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 5); }
3559 void cmovo(Reg reg, Operand op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 0); }
3560 void cmovp(Reg reg, Operand op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 10); }
3561 void cmovpe(Reg reg, Operand op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 10); }
3562 void cmovpo(Reg reg, Operand op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 11); }
3563 void cmovs(Reg reg, Operand op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 8); }
3564 void cmovz(Reg reg, Operand op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 4); }
3565 void cmp(Operand op, uint32 imm) { opRM_I(op, imm, 0x38, 7); }
3566 void cmp(Operand op1, Operand op2) { opRM_RM(op1, op2, 0x38); }
3567 void cmpeqpd(Xmm x, Operand op) { cmppd(x, op, 0); }
3568 void cmpeqps(Xmm x, Operand op) { cmpps(x, op, 0); }
3569 void cmpeqsd(Xmm x, Operand op) { cmpsd(x, op, 0); }
3570 void cmpeqss(Xmm x, Operand op) { cmpss(x, op, 0); }
3571 void cmplepd(Xmm x, Operand op) { cmppd(x, op, 2); }
3572 void cmpleps(Xmm x, Operand op) { cmpps(x, op, 2); }
3573 void cmplesd(Xmm x, Operand op) { cmpsd(x, op, 2); }
3574 void cmpless(Xmm x, Operand op) { cmpss(x, op, 2); }
3575 void cmpltpd(Xmm x, Operand op) { cmppd(x, op, 1); }
3576 void cmpltps(Xmm x, Operand op) { cmpps(x, op, 1); }
3577 void cmpltsd(Xmm x, Operand op) { cmpsd(x, op, 1); }
3578 void cmpltss(Xmm x, Operand op) { cmpss(x, op, 1); }
3579 void cmpneqpd(Xmm x, Operand op) { cmppd(x, op, 4); }
3580 void cmpneqps(Xmm x, Operand op) { cmpps(x, op, 4); }
3581 void cmpneqsd(Xmm x, Operand op) { cmpsd(x, op, 4); }
3582 void cmpneqss(Xmm x, Operand op) { cmpss(x, op, 4); }
3583 void cmpnlepd(Xmm x, Operand op) { cmppd(x, op, 6); }
3584 void cmpnleps(Xmm x, Operand op) { cmpps(x, op, 6); }
3585 void cmpnlesd(Xmm x, Operand op) { cmpsd(x, op, 6); }
3586 void cmpnless(Xmm x, Operand op) { cmpss(x, op, 6); }
3587 void cmpnltpd(Xmm x, Operand op) { cmppd(x, op, 5); }
3588 void cmpnltps(Xmm x, Operand op) { cmpps(x, op, 5); }
3589 void cmpnltsd(Xmm x, Operand op) { cmpsd(x, op, 5); }
3590 void cmpnltss(Xmm x, Operand op) { cmpss(x, op, 5); }
3591 void cmpordpd(Xmm x, Operand op) { cmppd(x, op, 7); }
3592 void cmpordps(Xmm x, Operand op) { cmpps(x, op, 7); }
3593 void cmpordsd(Xmm x, Operand op) { cmpsd(x, op, 7); }
3594 void cmpordss(Xmm x, Operand op) { cmpss(x, op, 7); }
3595 void cmppd(Xmm xmm, Operand op, uint8 imm8) { opGen(xmm, op, 0xC2, 0x66, &isXMM_XMMorMEM, imm8); }
3596 void cmpps(Xmm xmm, Operand op, uint8 imm8) { opGen(xmm, op, 0xC2, 0x100, &isXMM_XMMorMEM, imm8); }
3597 void cmpsb() { db(0xA6); }
3598 void cmpsd() { db(0xA7); }
3599 void cmpsd(Xmm xmm, Operand op, uint8 imm8) { opGen(xmm, op, 0xC2, 0xF2, &isXMM_XMMorMEM, imm8); }
3600 void cmpss(Xmm xmm, Operand op, uint8 imm8) { opGen(xmm, op, 0xC2, 0xF3, &isXMM_XMMorMEM, imm8); }
3601 void cmpsw() { db(0x66); db(0xA7); }
3602 void cmpunordpd(Xmm x, Operand op) { cmppd(x, op, 3); }
3603 void cmpunordps(Xmm x, Operand op) { cmpps(x, op, 3); }
3604 void cmpunordsd(Xmm x, Operand op) { cmpsd(x, op, 3); }
3605 void cmpunordss(Xmm x, Operand op) { cmpss(x, op, 3); }
3606 void cmpxchg(Operand op, Reg reg) { opModRM(reg, op, (op.isREG() && reg.isREG() && op.getBit() == reg.getBit()), op.isMEM(), 0x0F, 0xB0 | (reg.isBit(8) ? 0 : 1)); }
3607 void cmpxchg8b(Address addr) { opModM(addr, new Reg32(1), 0x0F, 0xC7); }
3608 void comisd(Xmm xmm, Operand op) { opGen(xmm, op, 0x2F, 0x66, &isXMM_XMMorMEM); }
3609 void comiss(Xmm xmm, Operand op) { opGen(xmm, op, 0x2F, 0x100, &isXMM_XMMorMEM); }
3610 void cpuid() { db(0x0F); db(0xA2); }
3611 void crc32(Reg32e reg, Operand op) { if (reg.isBit(32) && op.isBit(16)) db(0x66); db(0xF2); opModRM(reg, op, op.isREG(), op.isMEM(), 0x0F, 0x38, 0xF0 | (op.isBit(8) ? 0 : 1)); }
3612 void cvtdq2pd(Xmm xmm, Operand op) { opGen(xmm, op, 0xE6, 0xF3, &isXMM_XMMorMEM); }
3613 void cvtdq2ps(Xmm xmm, Operand op) { opGen(xmm, op, 0x5B, 0x100, &isXMM_XMMorMEM); }
3614 void cvtpd2dq(Xmm xmm, Operand op) { opGen(xmm, op, 0xE6, 0xF2, &isXMM_XMMorMEM); }
3615 void cvtpd2pi(Operand reg, Operand op) { opGen(reg, op, 0x2D, 0x66, &isMMX_XMMorMEM); }
3616 void cvtpd2ps(Xmm xmm, Operand op) { opGen(xmm, op, 0x5A, 0x66, &isXMM_XMMorMEM); }
3617 void cvtpi2pd(Operand reg, Operand op) { opGen(reg, op, 0x2A, 0x66, &isXMM_MMXorMEM); }
3618 void cvtpi2ps(Operand reg, Operand op) { opGen(reg, op, 0x2A, 0x100, &isXMM_MMXorMEM); }
3619 void cvtps2dq(Xmm xmm, Operand op) { opGen(xmm, op, 0x5B, 0x66, &isXMM_XMMorMEM); }
3620 void cvtps2pd(Xmm xmm, Operand op) { opGen(xmm, op, 0x5A, 0x100, &isXMM_XMMorMEM); }
3621 void cvtps2pi(Operand reg, Operand op) { opGen(reg, op, 0x2D, 0x100, &isMMX_XMMorMEM); }
3622 void cvtsd2si(Operand reg, Operand op) { opGen(reg, op, 0x2D, 0xF2, &isREG32_XMMorMEM); }
3623 void cvtsd2ss(Xmm xmm, Operand op) { opGen(xmm, op, 0x5A, 0xF2, &isXMM_XMMorMEM); }
3624 void cvtsi2sd(Operand reg, Operand op) { opGen(reg, op, 0x2A, 0xF2, &isXMM_REG32orMEM); }
3625 void cvtsi2ss(Operand reg, Operand op) { opGen(reg, op, 0x2A, 0xF3, &isXMM_REG32orMEM); }
3626 void cvtss2sd(Xmm xmm, Operand op) { opGen(xmm, op, 0x5A, 0xF3, &isXMM_XMMorMEM); }
3627 void cvtss2si(Operand reg, Operand op) { opGen(reg, op, 0x2D, 0xF3, &isREG32_XMMorMEM); }
3628 void cvttpd2dq(Xmm xmm, Operand op) { opGen(xmm, op, 0xE6, 0x66, &isXMM_XMMorMEM); }
3629 void cvttpd2pi(Operand reg, Operand op) { opGen(reg, op, 0x2C, 0x66, &isMMX_XMMorMEM); }
3630 void cvttps2dq(Xmm xmm, Operand op) { opGen(xmm, op, 0x5B, 0xF3, &isXMM_XMMorMEM); }
3631 void cvttps2pi(Operand reg, Operand op) { opGen(reg, op, 0x2C, 0x100, &isMMX_XMMorMEM); }
3632 void cvttsd2si(Operand reg, Operand op) { opGen(reg, op, 0x2C, 0xF2, &isREG32_XMMorMEM); }
3633 void cvttss2si(Operand reg, Operand op) { opGen(reg, op, 0x2C, 0xF3, &isREG32_XMMorMEM); }
3634 void cwd() { db(0x66); db(0x99); }
3635 void cwde() { db(0x98); }
3636 
3637 void dec(Operand op) { opIncDec(op, 0x48, 1); }
3638 void div(Operand op) { opR_ModM(op, 0, 6, 0xF6); }
3639 void divpd(Xmm xmm, Operand op) { opGen(xmm, op, 0x5E, 0x66, &isXMM_XMMorMEM); }
3640 void divps(Xmm xmm, Operand op) { opGen(xmm, op, 0x5E, 0x100, &isXMM_XMMorMEM); }
3641 void divsd(Xmm xmm, Operand op) { opGen(xmm, op, 0x5E, 0xF2, &isXMM_XMMorMEM); }
3642 void divss(Xmm xmm, Operand op) { opGen(xmm, op, 0x5E, 0xF3, &isXMM_XMMorMEM); }
3643 void dppd(Xmm xmm, Operand op, int imm) { opGen(xmm, op, 0x41, 0x66, &isXMM_XMMorMEM, cast(uint8)(imm), 0x3A); }
3644 void dpps(Xmm xmm, Operand op, int imm) { opGen(xmm, op, 0x40, 0x66, &isXMM_XMMorMEM, cast(uint8)(imm), 0x3A); }
3645 
3646 void emms() { db(0x0F); db(0x77); }
3647 void enter(uint16 x, uint8 y) { db(0xC8); dw(x); db(y); }
3648 void extractps(Operand op, Xmm xmm, uint8 imm) { opExt(op, xmm, 0x17, imm); }
3649 
3650 void f2xm1() { db(0xD9); db(0xF0); }
3651 void fabs() { db(0xD9); db(0xE1); }
3652 void fadd(Address addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 0, 0); }
3653 void fadd(Fpu reg1) { opFpuFpu(st0, reg1, 0xD8C0, 0xDCC0); }
3654 void fadd(Fpu reg1, Fpu reg2) { opFpuFpu(reg1, reg2, 0xD8C0, 0xDCC0); }
3655 void faddp() { db(0xDE); db(0xC1); }
3656 void faddp(Fpu reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEC0); }
3657 void faddp(Fpu reg1, Fpu reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEC0); }
3658 void fbld(Address addr) { opModM(addr, new Reg32(4), 0xDF, 0x100); }
3659 void fbstp(Address addr) { opModM(addr, new Reg32(6), 0xDF, 0x100); }
3660 void fchs() { db(0xD9); db(0xE0); }
3661 void fclex() { db(0x9B); db(0xDB); db(0xE2); }
3662 void fcmovb(Fpu reg1) { opFpuFpu(st0, reg1, 0xDAC0, 0x00C0); }
3663 void fcmovb(Fpu reg1, Fpu reg2) { opFpuFpu(reg1, reg2, 0xDAC0, 0x00C0); }
3664 void fcmovbe(Fpu reg1) { opFpuFpu(st0, reg1, 0xDAD0, 0x00D0); }
3665 void fcmovbe(Fpu reg1, Fpu reg2) { opFpuFpu(reg1, reg2, 0xDAD0, 0x00D0); }
3666 void fcmove(Fpu reg1) { opFpuFpu(st0, reg1, 0xDAC8, 0x00C8); }
3667 void fcmove(Fpu reg1, Fpu reg2) { opFpuFpu(reg1, reg2, 0xDAC8, 0x00C8); }
3668 void fcmovnb(Fpu reg1) { opFpuFpu(st0, reg1, 0xDBC0, 0x00C0); }
3669 void fcmovnb(Fpu reg1, Fpu reg2) { opFpuFpu(reg1, reg2, 0xDBC0, 0x00C0); }
3670 void fcmovnbe(Fpu reg1) { opFpuFpu(st0, reg1, 0xDBD0, 0x00D0); }
3671 void fcmovnbe(Fpu reg1, Fpu reg2) { opFpuFpu(reg1, reg2, 0xDBD0, 0x00D0); }
3672 void fcmovne(Fpu reg1) { opFpuFpu(st0, reg1, 0xDBC8, 0x00C8); }
3673 void fcmovne(Fpu reg1, Fpu reg2) { opFpuFpu(reg1, reg2, 0xDBC8, 0x00C8); }
3674 void fcmovnu(Fpu reg1) { opFpuFpu(st0, reg1, 0xDBD8, 0x00D8); }
3675 void fcmovnu(Fpu reg1, Fpu reg2) { opFpuFpu(reg1, reg2, 0xDBD8, 0x00D8); }
3676 void fcmovu(Fpu reg1) { opFpuFpu(st0, reg1, 0xDAD8, 0x00D8); }
3677 void fcmovu(Fpu reg1, Fpu reg2) { opFpuFpu(reg1, reg2, 0xDAD8, 0x00D8); }
3678 void fcom() { db(0xD8); db(0xD1); }
3679 void fcom(Address addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 2, 0); }
3680 void fcom(Fpu reg) { opFpu(reg, 0xD8, 0xD0); }
3681 void fcomi(Fpu reg1) { opFpuFpu(st0, reg1, 0xDBF0, 0x00F0); }
3682 void fcomi(Fpu reg1, Fpu reg2) { opFpuFpu(reg1, reg2, 0xDBF0, 0x00F0); }
3683 void fcomip(Fpu reg1) { opFpuFpu(st0, reg1, 0xDFF0, 0x00F0); }
3684 void fcomip(Fpu reg1, Fpu reg2) { opFpuFpu(reg1, reg2, 0xDFF0, 0x00F0); }
3685 void fcomp() { db(0xD8); db(0xD9); }
3686 void fcomp(Address addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 3, 0); }
3687 void fcomp(Fpu reg) { opFpu(reg, 0xD8, 0xD8); }
3688 void fcompp() { db(0xDE); db(0xD9); }
3689 void fcos() { db(0xD9); db(0xFF); }
3690 void fdecstp() { db(0xD9); db(0xF6); }
3691 void fdiv(Address addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 6, 0); }
3692 void fdiv(Fpu reg1) { opFpuFpu(st0, reg1, 0xD8F0, 0xDCF8); }
3693 void fdiv(Fpu reg1, Fpu reg2) { opFpuFpu(reg1, reg2, 0xD8F0, 0xDCF8); }
3694 void fdivp() { db(0xDE); db(0xF9); }
3695 void fdivp(Fpu reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEF8); }
3696 void fdivp(Fpu reg1, Fpu reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEF8); }
3697 void fdivr(Address addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 7, 0); }
3698 void fdivr(Fpu reg1) { opFpuFpu(st0, reg1, 0xD8F8, 0xDCF0); }
3699 void fdivr(Fpu reg1, Fpu reg2) { opFpuFpu(reg1, reg2, 0xD8F8, 0xDCF0); }
3700 void fdivrp() { db(0xDE); db(0xF1); }
3701 void fdivrp(Fpu reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEF0); }
3702 void fdivrp(Fpu reg1, Fpu reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEF0); }
3703 void ffree(Fpu reg) { opFpu(reg, 0xDD, 0xC0); }
3704 void fiadd(Address addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 0, 0); }
3705 void ficom(Address addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 2, 0); }
3706 void ficomp(Address addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 3, 0); }
3707 void fidiv(Address addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 6, 0); }
3708 void fidivr(Address addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 7, 0); }
3709 void fild(Address addr) { opFpuMem(addr, 0xDF, 0xDB, 0xDF, 0, 5); }
3710 void fimul(Address addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 1, 0); }
3711 void fincstp() { db(0xD9); db(0xF7); }
3712 void finit() { db(0x9B); db(0xDB); db(0xE3); }
3713 void fist(Address addr) { opFpuMem(addr, 0xDF, 0xDB, 0x00, 2, 0); }
3714 void fistp(Address addr) { opFpuMem(addr, 0xDF, 0xDB, 0xDF, 3, 7); }
3715 void fisttp(Address addr) { opFpuMem(addr, 0xDF, 0xDB, 0xDD, 1, 0); }
3716 void fisub(Address addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 4, 0); }
3717 void fisubr(Address addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 5, 0); }
3718 void fld(Address addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 0, 0); }
3719 void fld(Fpu reg) { opFpu(reg, 0xD9, 0xC0); }
3720 void fld1() { db(0xD9); db(0xE8); }
3721 void fldcw(Address addr) { opModM(addr, new Reg32(5), 0xD9, 0x100); }
3722 void fldenv(Address addr) { opModM(addr, new Reg32(4), 0xD9, 0x100); }
3723 void fldl2e() { db(0xD9); db(0xEA); }
3724 void fldl2t() { db(0xD9); db(0xE9); }
3725 void fldlg2() { db(0xD9); db(0xEC); }
3726 void fldln2() { db(0xD9); db(0xED); }
3727 void fldpi() { db(0xD9); db(0xEB); }
3728 void fldz() { db(0xD9); db(0xEE); }
3729 void fmul(Address addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 1, 0); }
3730 void fmul(Fpu reg1) { opFpuFpu(st0, reg1, 0xD8C8, 0xDCC8); }
3731 void fmul(Fpu reg1, Fpu reg2) { opFpuFpu(reg1, reg2, 0xD8C8, 0xDCC8); }
3732 void fmulp() { db(0xDE); db(0xC9); }
3733 void fmulp(Fpu reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEC8); }
3734 void fmulp(Fpu reg1, Fpu reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEC8); }
3735 void fnclex() { db(0xDB); db(0xE2); }
3736 void fninit() { db(0xDB); db(0xE3); }
3737 void fnop() { db(0xD9); db(0xD0); }
3738 void fnsave(Address addr) { opModM(addr, new Reg32(6), 0xDD, 0x100); }
3739 void fnstcw(Address addr) { opModM(addr, new Reg32(7), 0xD9, 0x100); }
3740 void fnstenv(Address addr) { opModM(addr, new Reg32(6), 0xD9, 0x100); }
3741 void fnstsw(Address addr) { opModM(addr, new Reg32(7), 0xDD, 0x100); }
3742 void fnstsw(Reg16 r) { if (r.getIdx() != Operand.AX) throw new XError(ERR.BAD_PARAMETER); db(0xDF); db(0xE0); }
3743 void fpatan() { db(0xD9); db(0xF3); }
3744 void fprem() { db(0xD9); db(0xF8); }
3745 void fprem1() { db(0xD9); db(0xF5); }
3746 void fptan() { db(0xD9); db(0xF2); }
3747 void frndint() { db(0xD9); db(0xFC); }
3748 void frstor(Address addr) { opModM(addr, new Reg32(4), 0xDD, 0x100); }
3749 void fsave(Address addr) { db(0x9B); opModM(addr, new Reg32(6), 0xDD, 0x100); }
3750 void fscale() { db(0xD9); db(0xFD); }
3751 void fsin() { db(0xD9); db(0xFE); }
3752 void fsincos() { db(0xD9); db(0xFB); }
3753 void fsqrt() { db(0xD9); db(0xFA); }
3754 void fst(Address addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 2, 0); }
3755 void fst(Fpu reg) { opFpu(reg, 0xDD, 0xD0); }
3756 void fstcw(Address addr) { db(0x9B); opModM(addr, new Reg32(7), 0xD9, 0x100); }
3757 void fstenv(Address addr) { db(0x9B); opModM(addr, new Reg32(6), 0xD9, 0x100); }
3758 void fstp(Address addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 3, 0); }
3759 void fstp(Fpu reg) { opFpu(reg, 0xDD, 0xD8); }
3760 void fstsw(Address addr) { db(0x9B); opModM(addr, new Reg32(7), 0xDD, 0x100); }
3761 void fstsw(Reg16 r) { if (r.getIdx() != Operand.AX) throw new XError(ERR.BAD_PARAMETER); db(0x9B); db(0xDF); db(0xE0); }
3762 void fsub(Address addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 4, 0); }
3763 void fsub(Fpu reg1) { opFpuFpu(st0, reg1, 0xD8E0, 0xDCE8); }
3764 void fsub(Fpu reg1, Fpu reg2) { opFpuFpu(reg1, reg2, 0xD8E0, 0xDCE8); }
3765 void fsubp() { db(0xDE); db(0xE9); }
3766 void fsubp(Fpu reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEE8); }
3767 void fsubp(Fpu reg1, Fpu reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEE8); }
3768 void fsubr(Address addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 5, 0); }
3769 void fsubr(Fpu reg1) { opFpuFpu(st0, reg1, 0xD8E8, 0xDCE0); }
3770 void fsubr(Fpu reg1, Fpu reg2) { opFpuFpu(reg1, reg2, 0xD8E8, 0xDCE0); }
3771 void fsubrp() { db(0xDE); db(0xE1); }
3772 void fsubrp(Fpu reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEE0); }
3773 void fsubrp(Fpu reg1, Fpu reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEE0); }
3774 void ftst() { db(0xD9); db(0xE4); }
3775 void fucom() { db(0xDD); db(0xE1); }
3776 void fucom(Fpu reg) { opFpu(reg, 0xDD, 0xE0); }
3777 void fucomi(Fpu reg1) { opFpuFpu(st0, reg1, 0xDBE8, 0x00E8); }
3778 void fucomi(Fpu reg1, Fpu reg2) { opFpuFpu(reg1, reg2, 0xDBE8, 0x00E8); }
3779 void fucomip(Fpu reg1) { opFpuFpu(st0, reg1, 0xDFE8, 0x00E8); }
3780 void fucomip(Fpu reg1, Fpu reg2) { opFpuFpu(reg1, reg2, 0xDFE8, 0x00E8); }
3781 void fucomp() { db(0xDD); db(0xE9); }
3782 void fucomp(Fpu reg) { opFpu(reg, 0xDD, 0xE8); }
3783 void fucompp() { db(0xDA); db(0xE9); }
3784 void fwait() { db(0x9B); }
3785 void fxam() { db(0xD9); db(0xE5); }
3786 void fxch() { db(0xD9); db(0xC9); }
3787 void fxch(Fpu reg) { opFpu(reg, 0xD9, 0xC8); }
3788 void fxrstor(Address addr) { opModM(addr, new Reg32(1), 0x0F, 0xAE); }
3789 void fxtract() { db(0xD9); db(0xF4); }
3790 void fyl2x() { db(0xD9); db(0xF1); }
3791 void fyl2xp1() { db(0xD9); db(0xF9); }
3792 
3793 void gf2p8affineinvqb(Xmm xmm, Operand op, int imm) { opGen(xmm, op, 0xCF, 0x66, &isXMM_XMMorMEM, cast(uint8)(imm), 0x3A); }
3794 void gf2p8affineqb( Xmm xmm, Operand op, int imm) { opGen(xmm, op, 0xCE, 0x66, &isXMM_XMMorMEM, cast(uint8)(imm), 0x3A); }
3795 void gf2p8mulb(Xmm xmm, Operand op) { opGen(xmm, op, 0xCF, 0x66, &isXMM_XMMorMEM, NONE, 0x38); }
3796 
3797 void haddpd(Xmm xmm, Operand op) { opGen(xmm, op, 0x7C, 0x66, &isXMM_XMMorMEM); }
3798 void haddps(Xmm xmm, Operand op) { opGen(xmm, op, 0x7C, 0xF2, &isXMM_XMMorMEM); }
3799 void hsubpd(Xmm xmm, Operand op) { opGen(xmm, op, 0x7D, 0x66, &isXMM_XMMorMEM); }
3800 void hsubps(Xmm xmm, Operand op) { opGen(xmm, op, 0x7D, 0xF2, &isXMM_XMMorMEM); }
3801 
3802 void idiv(Operand op) { opR_ModM(op, 0, 7, 0xF6); }
3803 void imul(Operand op) { opR_ModM(op, 0, 5, 0xF6); }
3804 void in_(Reg a, Reg d) { opInOut(a, d, 0xEC); }
3805 void in_(Reg a, uint8 v) { opInOut(a, 0xE4, v); }
3806 void inc(Operand op) { opIncDec(op, 0x40, 0); }
3807 void insertps(Xmm xmm, Operand op, uint8 imm) { opGen(xmm, op, 0x21, 0x66, &isXMM_XMMorMEM, imm, 0x3A); }
3808 void int3() { db(0xCC); }
3809 void int_(uint8 x) { db(0xCD); db(x); }
3810 
3811 void ja(T)(T label, LabelType type = T_AUTO) { opJmp(label, type, 0x77, 0x87, 0x0F); }
3812 void ja(const char* label, LabelType type = T_AUTO) { ja(to!string(label), type); }
3813 void ja(const void* addr) { opJmpAbs(addr, T_NEAR, 0x77, 0x87, 0x0F); }
3814 
3815 void jae(T)(T label, LabelType type = T_AUTO) { opJmp(label, type, 0x73, 0x83, 0x0F); }
3816 void jae(const char* label, LabelType type = T_AUTO) { jae(to!string(label), type); }
3817 void jae(const void* addr) { opJmpAbs(addr, T_NEAR, 0x73, 0x83, 0x0F); }
3818 
3819 void jb(T)(T label, LabelType type = T_AUTO) { opJmp(label, type, 0x72, 0x82, 0x0F); }
3820 void jb(const char* label, LabelType type = T_AUTO) { jb(to!string(label), type); }
3821 void jb(const void* addr) { opJmpAbs(addr, T_NEAR, 0x72, 0x82, 0x0F); }
3822 
3823 void jbe(T)(T label, LabelType type = T_AUTO) { opJmp(label, type, 0x76, 0x86, 0x0F); }
3824 void jbe(const char* label, LabelType type = T_AUTO) { jbe(to!string(label), type); }
3825 void jbe(const void* addr) { opJmpAbs(addr, T_NEAR, 0x76, 0x86, 0x0F); }
3826 
3827 void jc(T)(T label, LabelType type = T_AUTO) { opJmp(label, type, 0x72, 0x82, 0x0F); }
3828 void jc(const char* label, LabelType type = T_AUTO) { jc(to!string(label), type); }
3829 void jc(const void* addr) { opJmpAbs(addr, T_NEAR, 0x72, 0x82, 0x0F); }
3830 
3831 void je(T)(T label, LabelType type = T_AUTO) { opJmp(label, type, 0x74, 0x84, 0x0F); }
3832 void je(const char* label, LabelType type = T_AUTO) { je(to!string(label), type); }
3833 void je(const void* addr) { opJmpAbs(addr, T_NEAR, 0x74, 0x84, 0x0F); }
3834 
3835 void jg(T)(T label, LabelType type = T_AUTO) { opJmp(label, type, 0x7F, 0x8F, 0x0F); }
3836 void jg(const char* label, LabelType type = T_AUTO) { jg(to!string(label), type); }
3837 void jg(const void* addr) { opJmpAbs(addr, T_NEAR, 0x7F, 0x8F, 0x0F); }
3838 
3839 void jge(T)(T label, LabelType type = T_AUTO) { opJmp(label, type, 0x7D, 0x8D, 0x0F); }
3840 void jge(const char* label, LabelType type = T_AUTO) { jge(to!string(label), type); }
3841 void jge(const void* addr) { opJmpAbs(addr, T_NEAR, 0x7D, 0x8D, 0x0F); }
3842 
3843 void jl(T)(T label, LabelType type = T_AUTO) { opJmp(label, type, 0x7C, 0x8C, 0x0F); }
3844 void jl(const char* label, LabelType type = T_AUTO) { jl(to!string(label), type); }
3845 void jl(const void* addr) { opJmpAbs(addr, T_NEAR, 0x7C, 0x8C, 0x0F); }
3846 
3847 void jle(T)(T label, LabelType type = T_AUTO) { opJmp(label, type, 0x7E, 0x8E, 0x0F); }
3848 void jle(const char* label, LabelType type = T_AUTO) { jle(to!string(label), type); }
3849 void jle(const void* addr) { opJmpAbs(addr, T_NEAR, 0x7E, 0x8E, 0x0F); }
3850 
3851 void jna(T)(T label, LabelType type = T_AUTO) { opJmp(label, type, 0x76, 0x86, 0x0F); }
3852 void jna(const char* label, LabelType type = T_AUTO) { jna(to!string(label), type); }
3853 void jna(const void* addr) { opJmpAbs(addr, T_NEAR, 0x76, 0x86, 0x0F); }
3854 
3855 void jnae(T)(T label, LabelType type = T_AUTO) { opJmp(label, type, 0x72, 0x82, 0x0F); }
3856 void jnae(const char* label, LabelType type = T_AUTO) { jnae(to!string(label), type); }
3857 void jnae(const void* addr) { opJmpAbs(addr, T_NEAR, 0x72, 0x82, 0x0F); }
3858 
3859 void jnb(T)(T label, LabelType type = T_AUTO) { opJmp(label, type, 0x73, 0x83, 0x0F); }
3860 void jnb(const char* label, LabelType type = T_AUTO) { jnb(to!string(label), type); }
3861 void jnb(const void* addr) { opJmpAbs(addr, T_NEAR, 0x73, 0x83, 0x0F); }
3862 
3863 void jnbe(T)(T label, LabelType type = T_AUTO) { opJmp(label, type, 0x77, 0x87, 0x0F); }
3864 void jnbe(const char* label, LabelType type = T_AUTO) { jnbe(to!string(label), type); }
3865 void jnbe(const void* addr) { opJmpAbs(addr, T_NEAR, 0x77, 0x87, 0x0F); }
3866 
3867 void jnc(T)(T label, LabelType type = T_AUTO) { opJmp(label, type, 0x73, 0x83, 0x0F); }
3868 void jnc(const char* label, LabelType type = T_AUTO) { jnc(to!string(label), type); }
3869 void jnc(const void* addr) { opJmpAbs(addr, T_NEAR, 0x73, 0x83, 0x0F); }
3870 
3871 void jne(T)(T label, LabelType type = T_AUTO) { opJmp(label, type, 0x75, 0x85, 0x0F); }
3872 void jne(const char* label, LabelType type = T_AUTO) { jne(to!string(label), type); }
3873 void jne(const void* addr) { opJmpAbs(addr, T_NEAR, 0x75, 0x85, 0x0F); }
3874 
3875 void jng(T)(T label, LabelType type = T_AUTO) { opJmp(label, type, 0x7E, 0x8E, 0x0F); }
3876 void jng(const char* label, LabelType type = T_AUTO) { jng(to!string(label), type); }
3877 void jng(const void* addr) { opJmpAbs(addr, T_NEAR, 0x7E, 0x8E, 0x0F); }
3878 
3879 void jnge(T)(T label, LabelType type = T_AUTO) { opJmp(label, type, 0x7C, 0x8C, 0x0F); }
3880 void jnge(const char* label, LabelType type = T_AUTO) { jnge(to!string(label), type); }
3881 void jnge(const void* addr) { opJmpAbs(addr, T_NEAR, 0x7C, 0x8C, 0x0F); }
3882 
3883 void jnl(T)(T label, LabelType type = T_AUTO) { opJmp(label, type, 0x7D, 0x8D, 0x0F); }
3884 void jnl(const char* label, LabelType type = T_AUTO) { jnl(to!string(label), type); }
3885 void jnl(const void* addr) { opJmpAbs(addr, T_NEAR, 0x7D, 0x8D, 0x0F); }
3886 
3887 void jnle(T)(T label, LabelType type = T_AUTO) { opJmp(label, type, 0x7F, 0x8F, 0x0F); }
3888 void jnle(const char* label, LabelType type = T_AUTO) { jnle(to!string(label), type); }
3889 void jnle(const void* addr) { opJmpAbs(addr, T_NEAR, 0x7F, 0x8F, 0x0F); }
3890 
3891 void jno(T)(T label, LabelType type = T_AUTO) { opJmp(label, type, 0x71, 0x81, 0x0F); }
3892 void jno(const char* label, LabelType type = T_AUTO) { jno(to!string(label), type); }
3893 void jno(const void* addr) { opJmpAbs(addr, T_NEAR, 0x71, 0x81, 0x0F); }
3894 
3895 void jnp(T)(T label, LabelType type = T_AUTO) { opJmp(label, type, 0x7B, 0x8B, 0x0F); }
3896 void jnp(const char* label, LabelType type = T_AUTO) { jnp(to!string(label), type); }
3897 void jnp(void* addr) { opJmpAbs(addr, T_NEAR, 0x7B, 0x8B, 0x0F); }
3898 
3899 void jns(T)(T label, LabelType type = T_AUTO) { opJmp(label, type, 0x79, 0x89, 0x0F); }
3900 void jns(const char* label, LabelType type = T_AUTO) { jns(to!string(label), type); }
3901 void jns(const void* addr) { opJmpAbs(addr, T_NEAR, 0x79, 0x89, 0x0F); }
3902 
3903 void jnz(T)(T label, LabelType type = T_AUTO) { opJmp(label, type, 0x75, 0x85, 0x0F); }
3904 void jnz(const char* label, LabelType type = T_AUTO) { jnz(to!string(label), type); }
3905 void jnz(const void* addr) { opJmpAbs(addr, T_NEAR, 0x75, 0x85, 0x0F); }
3906 
3907 void jo(T)(T label, LabelType type = T_AUTO) { opJmp(label, type, 0x70, 0x80, 0x0F); }
3908 void jo(const char* label, LabelType type = T_AUTO) { jo(to!string(label), type); }
3909 void jo(const void* addr) { opJmpAbs(addr, T_NEAR, 0x70, 0x80, 0x0F); }
3910 
3911 void jp(T)(T label, LabelType type = T_AUTO) { opJmp(label, type, 0x7A, 0x8A, 0x0F); }
3912 void jp(const char* label, LabelType type = T_AUTO) { jp(to!string(label), type); }
3913 void jp(const void* addr) { opJmpAbs(addr, T_NEAR, 0x7A, 0x8A, 0x0F); }
3914 
3915 void jpe(T)(T label, LabelType type = T_AUTO) { opJmp(label, type, 0x7A, 0x8A, 0x0F); }
3916 void jpe(const char* label, LabelType type = T_AUTO) { jpe(to!string(label), type); }
3917 void jpe(const void* addr) { opJmpAbs(addr, T_NEAR, 0x7A, 0x8A, 0x0F); }
3918 
3919 void jpo(T)(T label, LabelType type = T_AUTO) { opJmp(label, type, 0x7B, 0x8B, 0x0F); }
3920 void jpo(const char* label, LabelType type = T_AUTO) { jpo(to!string(label), type); }
3921 void jpo(const void* addr) { opJmpAbs(addr, T_NEAR, 0x7B, 0x8B, 0x0F); }
3922 
3923 void js(T)(T label, LabelType type = T_AUTO) { opJmp(label, type, 0x78, 0x88, 0x0F); }
3924 void js(const char* label, LabelType type = T_AUTO) { js(to!string(label), type); }
3925 void js(const void* addr) { opJmpAbs(addr, T_NEAR, 0x78, 0x88, 0x0F); }
3926 
3927 void jz(T)(T label, LabelType type = T_AUTO) { opJmp(label, type, 0x74, 0x84, 0x0F); }
3928 void jz(const char* label, LabelType type = T_AUTO) { jz(to!string(label), type); }
3929 void jz(const void* addr) { opJmpAbs(addr, T_NEAR, 0x74, 0x84, 0x0F); }
3930 
3931 void lahf() { db(0x9F); }
3932 void lddqu(Xmm xmm, Address addr) { db(0xF2); opModM(addr, xmm, 0x0F, 0xF0); }
3933 void ldmxcsr(Address addr) { opModM(addr, new Reg32(2), 0x0F, 0xAE); }
3934 void lea(Reg reg, Address addr) { if (!reg.isBit(16 | i32e)) throw new XError(ERR.BAD_SIZE_OF_REGISTER); opModM(addr, reg, 0x8D); }
3935 void leave() { db(0xC9); }
3936 void lfence() { db(0x0F); db(0xAE); db(0xE8); }
3937 void lfs(Reg reg, Address addr) { opLoadSeg(addr, reg, 0x0F, 0xB4); }
3938 void lgs(Reg reg, Address addr) { opLoadSeg(addr, reg, 0x0F, 0xB5); }
3939 void lock() { db(0xF0); }
3940 void lodsb() { db(0xAC); }
3941 void lodsd() { db(0xAD); }
3942 void lodsw() { db(0x66); db(0xAD); }
3943 void lss(Reg reg, Address addr) { opLoadSeg(addr, reg, 0x0F, 0xB2); }
3944 void loop(Label label) { opJmp(label, T_SHORT, 0xE2, 0, 0); }
3945 void loop(const char* label) { loop(to!string(label)); }
3946 void loop(string label) { opJmp(label, T_SHORT, 0xE2, 0, 0); }
3947 void loope(Label label) { opJmp(label, T_SHORT, 0xE1, 0, 0); }
3948 void loope(const char* label) { loope(to!string(label)); }
3949 void loope(string label) { opJmp(label, T_SHORT, 0xE1, 0, 0); }
3950 void loopne(Label label) { opJmp(label, T_SHORT, 0xE0, 0, 0); }
3951 void loopne(const char* label) { loopne(to!string(label)); }
3952 void loopne(string label) { opJmp(label, T_SHORT, 0xE0, 0, 0); }
3953 void lzcnt(Reg reg, Operand op) { opSp1(reg, op, 0xF3, 0x0F, 0xBD); }
3954 
3955 void maskmovdqu(Xmm reg1, Xmm reg2) { db(0x66);  opModR(reg1, reg2, 0x0F, 0xF7); }
3956 void maskmovq(Mmx reg1, Mmx reg2) { if (!reg1.isMMX() || !reg2.isMMX()) throw new XError(ERR.BAD_COMBINATION); opModR(reg1, reg2, 0x0F, 0xF7); }
3957 void maxpd(Xmm xmm, Operand op) { opGen(xmm, op, 0x5F, 0x66, &isXMM_XMMorMEM); }
3958 void maxps(Xmm xmm, Operand op) { opGen(xmm, op, 0x5F, 0x100, &isXMM_XMMorMEM); }
3959 void maxsd(Xmm xmm, Operand op) { opGen(xmm, op, 0x5F, 0xF2, &isXMM_XMMorMEM); }
3960 void maxss(Xmm xmm, Operand op) { opGen(xmm, op, 0x5F, 0xF3, &isXMM_XMMorMEM); }
3961 void mfence() { db(0x0F); db(0xAE); db(0xF0); }
3962 void minpd(Xmm xmm, Operand op) { opGen(xmm, op, 0x5D, 0x66, &isXMM_XMMorMEM); }
3963 void minps(Xmm xmm, Operand op) { opGen(xmm, op, 0x5D, 0x100, &isXMM_XMMorMEM); }
3964 void minsd(Xmm xmm, Operand op) { opGen(xmm, op, 0x5D, 0xF2, &isXMM_XMMorMEM); }
3965 void minss(Xmm xmm, Operand op) { opGen(xmm, op, 0x5D, 0xF3, &isXMM_XMMorMEM); }
3966 void monitor() { db(0x0F); db(0x01); db(0xC8); }
3967 void monitorx() { db(0x0F); db(0x01); db(0xFA); }
3968 void movapd(Address addr, Xmm xmm) { db(0x66); opModM(addr, xmm, 0x0F, 0x29); }
3969 void movapd(Xmm xmm, Operand op) { opMMX(xmm, op, 0x28, 0x66); }
3970 void movaps(Address addr, Xmm xmm) { opModM(addr, xmm, 0x0F, 0x29); }
3971 void movaps(Xmm xmm, Operand op) { opMMX(xmm, op, 0x28, 0x100); }
3972 void movbe(Address addr, Reg reg) { opModM(addr, reg, 0x0F, 0x38, 0xF1); }
3973 void movbe(Reg reg, Address addr) { opModM(addr, reg, 0x0F, 0x38, 0xF0); }
3974 void movd(Address addr, Mmx mmx) { if (mmx.isXMM()) db(0x66); opModM(addr, mmx, 0x0F, 0x7E); }
3975 void movd(Mmx mmx, Address addr) { if (mmx.isXMM()) db(0x66); opModM(addr, mmx, 0x0F, 0x6E); }
3976 void movd(Mmx mmx, Reg32 reg) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x6E); }
3977 void movd(Reg32 reg, Mmx mmx) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x7E); }
3978 void movddup(Xmm xmm, Operand op) { opGen(xmm, op, 0x12, 0xF2, &isXMM_XMMorMEM, NONE, NONE); }
3979 void movdq2q(Mmx mmx, Xmm xmm) { db(0xF2); opModR(mmx, xmm, 0x0F, 0xD6); }
3980 void movdqa(Address addr, Xmm xmm) { db(0x66); opModM(addr, xmm, 0x0F, 0x7F); }
3981 void movdqa(Xmm xmm, Operand op) { opMMX(xmm, op, 0x6F, 0x66); }
3982 void movdqu(Address addr, Xmm xmm) { db(0xF3); opModM(addr, xmm, 0x0F, 0x7F); }
3983 void movdqu(Xmm xmm, Operand op) { opMMX(xmm, op, 0x6F, 0xF3); }
3984 void movhlps(Xmm reg1, Xmm reg2) {  opModR(reg1, reg2, 0x0F, 0x12); }
3985 void movhpd(Operand op1, Operand op2) { opMovXMM(op1, op2, 0x16, 0x66); }
3986 void movhps(Operand op1, Operand op2) { opMovXMM(op1, op2, 0x16, 0x100); }
3987 void movlhps(Xmm reg1, Xmm reg2) {  opModR(reg1, reg2, 0x0F, 0x16); }
3988 void movlpd(Operand op1, Operand op2) { opMovXMM(op1, op2, 0x12, 0x66); }
3989 void movlps(Operand op1, Operand op2) { opMovXMM(op1, op2, 0x12, 0x100); }
3990 void movmskpd(Reg32e reg, Xmm xmm) { db(0x66); movmskps(reg, xmm); }
3991 void movmskps(Reg32e reg, Xmm xmm) { opModR(reg, xmm, 0x0F, 0x50); }
3992 void movntdq(Address addr, Xmm reg) { opModM(addr, new Reg16(reg.getIdx()), 0x0F, 0xE7); }
3993 void movntdqa(Xmm xmm, Address addr) { db(0x66); opModM(addr, xmm, 0x0F, 0x38, 0x2A); }
3994 void movnti(Address addr, Reg32e reg) { opModM(addr, reg, 0x0F, 0xC3); }
3995 void movntpd(Address addr, Xmm reg) { opModM(addr, new Reg16(reg.getIdx()), 0x0F, 0x2B); }
3996 void movntps(Address addr, Xmm xmm) { opModM(addr, new Mmx(xmm.getIdx()), 0x0F, 0x2B); }
3997 void movntq(Address addr, Mmx mmx) { if (!mmx.isMMX()) throw new XError(ERR.BAD_COMBINATION); opModM(addr, mmx, 0x0F, 0xE7); }
3998 void movq(Address addr, Mmx mmx) { if (mmx.isXMM()) db(0x66); opModM(addr, mmx, 0x0F, mmx.isXMM() ? 0xD6 : 0x7F); }
3999 void movq(Mmx mmx, Operand op) { if (mmx.isXMM()) db(0xF3); opModRM(mmx, op, (mmx.getKind() == op.getKind()), op.isMEM(), 0x0F, mmx.isXMM() ? 0x7E : 0x6F); }
4000 void movq2dq(Xmm xmm, Mmx mmx) { db(0xF3); opModR(xmm, mmx, 0x0F, 0xD6); }
4001 void movsb() { db(0xA4); }
4002 void movsd() { db(0xA5); }
4003 void movsd(Address addr, Xmm xmm) { db(0xF2); opModM(addr, xmm, 0x0F, 0x11); }
4004 void movsd(Xmm xmm, Operand op) { opMMX(xmm, op, 0x10, 0xF2); }
4005 void movshdup(Xmm xmm, Operand op) { opGen(xmm, op, 0x16, 0xF3, &isXMM_XMMorMEM, NONE, NONE); }
4006 void movsldup(Xmm xmm, Operand op) { opGen(xmm, op, 0x12, 0xF3, &isXMM_XMMorMEM, NONE, NONE); }
4007 void movss(Address addr, Xmm xmm) { db(0xF3); opModM(addr, xmm, 0x0F, 0x11); }
4008 void movss(Xmm xmm, Operand op) { opMMX(xmm, op, 0x10, 0xF3); }
4009 void movsw() { db(0x66); db(0xA5); }
4010 void movsx(Reg reg, Operand op) { opMovxx(reg, op, 0xBE); }
4011 void movupd(Address addr, Xmm xmm) { db(0x66); opModM(addr, xmm, 0x0F, 0x11); }
4012 void movupd(Xmm xmm, Operand op) { opMMX(xmm, op, 0x10, 0x66); }
4013 void movups(Address addr, Xmm xmm) { opModM(addr, xmm, 0x0F, 0x11); }
4014 void movups(Xmm xmm, Operand op) { opMMX(xmm, op, 0x10, 0x100); }
4015 void movzx(Reg reg, Operand op) { opMovxx(reg, op, 0xB6); }
4016 void mpsadbw(Xmm xmm, Operand op, int imm) { opGen(xmm, op, 0x42, 0x66, &isXMM_XMMorMEM, cast(uint8)(imm), 0x3A); }
4017 void mul(Operand op) { opR_ModM(op, 0, 4, 0xF6); }
4018 void mulpd(Xmm xmm, Operand op) { opGen(xmm, op, 0x59, 0x66, &isXMM_XMMorMEM); }
4019 void mulps(Xmm xmm, Operand op) { opGen(xmm, op, 0x59, 0x100, &isXMM_XMMorMEM); }
4020 void mulsd(Xmm xmm, Operand op) { opGen(xmm, op, 0x59, 0xF2, &isXMM_XMMorMEM); }
4021 void mulss(Xmm xmm, Operand op) { opGen(xmm, op, 0x59, 0xF3, &isXMM_XMMorMEM); }
4022 void mulx(Reg32e r1, Reg32e r2, Operand op) { opGpr(r1, r2, op, T_F2 | T_0F38, 0xf6, true); }
4023 void mwait() { db(0x0F); db(0x01); db(0xC9); }
4024 void mwaitx() { db(0x0F); db(0x01); db(0xFB); }
4025 
4026 void neg(Operand op) { opR_ModM(op, 0, 3, 0xF6); }
4027 void not(Operand op) { opR_ModM(op, 0, 2, 0xF6); }
4028 
4029 void or(Operand op, uint32 imm) { opRM_I(op, imm, 0x08, 1); }
4030 void or(Operand op1, Operand op2) { opRM_RM(op1, op2, 0x08); }
4031 void orpd(Xmm xmm, Operand op) { opGen(xmm, op, 0x56, 0x66, &isXMM_XMMorMEM); }
4032 void orps(Xmm xmm, Operand op) { opGen(xmm, op, 0x56, 0x100, &isXMM_XMMorMEM); }
4033 void out_(Reg d, Reg a) { opInOut(a, d, 0xEE); }
4034 void out_(uint8 v, Reg a) { opInOut(a, 0xE6, v); }
4035 void outsb() { db(0x6E); }
4036 void outsd() { db(0x6F); }
4037 void outsw() { db(0x66); db(0x6F); }
4038 
4039 void pabsb(Mmx mmx, Operand op) { opMMX(mmx, op, 0x1C, 0x66, NONE, 0x38); }
4040 void pabsd(Mmx mmx, Operand op) { opMMX(mmx, op, 0x1E, 0x66, NONE, 0x38); }
4041 void pabsw(Mmx mmx, Operand op) { opMMX(mmx, op, 0x1D, 0x66, NONE, 0x38); }
4042 void packssdw(Mmx mmx, Operand op) { opMMX(mmx, op, 0x6B); }
4043 void packsswb(Mmx mmx, Operand op) { opMMX(mmx, op, 0x63); }
4044 void packusdw(Xmm xmm, Operand op) { opGen(xmm, op, 0x2B, 0x66, &isXMM_XMMorMEM, NONE, 0x38); }
4045 void packuswb(Mmx mmx, Operand op) { opMMX(mmx, op, 0x67); }
4046 void paddb(Mmx mmx, Operand op) { opMMX(mmx, op, 0xFC); }
4047 void paddd(Mmx mmx, Operand op) { opMMX(mmx, op, 0xFE); }
4048 void paddq(Mmx mmx, Operand op) { opMMX(mmx, op, 0xD4); }
4049 void paddsb(Mmx mmx, Operand op) { opMMX(mmx, op, 0xEC); }
4050 void paddsw(Mmx mmx, Operand op) { opMMX(mmx, op, 0xED); }
4051 void paddusb(Mmx mmx, Operand op) { opMMX(mmx, op, 0xDC); }
4052 void paddusw(Mmx mmx, Operand op) { opMMX(mmx, op, 0xDD); }
4053 void paddw(Mmx mmx, Operand op) { opMMX(mmx, op, 0xFD); }
4054 void palignr(Mmx mmx, Operand op, int imm) { opMMX(mmx, op, 0x0f, 0x66, cast(uint8)(imm), 0x3a); }
4055 void pand(Mmx mmx, Operand op) { opMMX(mmx, op, 0xDB); }
4056 void pandn(Mmx mmx, Operand op) { opMMX(mmx, op, 0xDF); }
4057 void pause() { db(0xF3); db(0x90); }
4058 void pavgb(Mmx mmx, Operand op) { opMMX(mmx, op, 0xE0); }
4059 void pavgw(Mmx mmx, Operand op) { opMMX(mmx, op, 0xE3); }
4060 void pblendvb(Xmm xmm, Operand op) { opGen(xmm, op, 0x10, 0x66, &isXMM_XMMorMEM, NONE, 0x38); }
4061 void pblendw(Xmm xmm, Operand op, int imm) { opGen(xmm, op, 0x0E, 0x66, &isXMM_XMMorMEM, cast(uint8)(imm), 0x3A); }
4062 void pclmulhqhdq(Xmm xmm, Operand op) { pclmulqdq(xmm, op, 0x11); }
4063 void pclmulhqlqdq(Xmm xmm, Operand op) { pclmulqdq(xmm, op, 0x01); }
4064 void pclmullqhdq(Xmm xmm, Operand op) { pclmulqdq(xmm, op, 0x10); }
4065 void pclmullqlqdq(Xmm xmm, Operand op) { pclmulqdq(xmm, op, 0x00); }
4066 void pclmulqdq(Xmm xmm, Operand op, int imm) { opGen(xmm, op, 0x44, 0x66, &isXMM_XMMorMEM, cast(uint8)(imm), 0x3A); }
4067 void pcmpeqb(Mmx mmx, Operand op) { opMMX(mmx, op, 0x74); }
4068 void pcmpeqd(Mmx mmx, Operand op) { opMMX(mmx, op, 0x76); }
4069 void pcmpeqq(Xmm xmm, Operand op) { opGen(xmm, op, 0x29, 0x66, &isXMM_XMMorMEM, NONE, 0x38); }
4070 void pcmpeqw(Mmx mmx, Operand op) { opMMX(mmx, op, 0x75); }
4071 void pcmpestri(Xmm xmm, Operand op, uint8 imm) { opGen(xmm, op, 0x61, 0x66, &isXMM_XMMorMEM, imm, 0x3A); }
4072 void pcmpestrm(Xmm xmm, Operand op, uint8 imm) { opGen(xmm, op, 0x60, 0x66, &isXMM_XMMorMEM, imm, 0x3A); }
4073 void pcmpgtb(Mmx mmx, Operand op) { opMMX(mmx, op, 0x64); }
4074 void pcmpgtd(Mmx mmx, Operand op) { opMMX(mmx, op, 0x66); }
4075 void pcmpgtq(Xmm xmm, Operand op) { opGen(xmm, op, 0x37, 0x66, &isXMM_XMMorMEM, NONE, 0x38); }
4076 void pcmpgtw(Mmx mmx, Operand op) { opMMX(mmx, op, 0x65); }
4077 void pcmpistri(Xmm xmm, Operand op, uint8 imm) { opGen(xmm, op, 0x63, 0x66, &isXMM_XMMorMEM, imm, 0x3A); }
4078 void pcmpistrm(Xmm xmm, Operand op, uint8 imm) { opGen(xmm, op, 0x62, 0x66, &isXMM_XMMorMEM, imm, 0x3A); }
4079 void pdep(Reg32e r1, Reg32e r2, Operand op) { opGpr(r1, r2, op, T_F2 | T_0F38, 0xf5, true); }
4080 void pext(Reg32e r1, Reg32e r2, Operand op) { opGpr(r1, r2, op, T_F3 | T_0F38, 0xf5, true); }
4081 void pextrb(Operand op, Xmm xmm, uint8 imm) { opExt(op, xmm, 0x14, imm); }
4082 void pextrd(Operand op, Xmm xmm, uint8 imm) { opExt(op, xmm, 0x16, imm); }
4083 void pextrw(Operand op, Mmx xmm, uint8 imm) { opExt(op, xmm, 0x15, imm, true); }
4084 void phaddd(Mmx mmx, Operand op) { opMMX(mmx, op, 0x02, 0x66, NONE, 0x38); }
4085 void phaddsw(Mmx mmx, Operand op) { opMMX(mmx, op, 0x03, 0x66, NONE, 0x38); }
4086 void phaddw(Mmx mmx, Operand op) { opMMX(mmx, op, 0x01, 0x66, NONE, 0x38); }
4087 void phminposuw(Xmm xmm, Operand op) { opGen(xmm, op, 0x41, 0x66, &isXMM_XMMorMEM, NONE, 0x38); }
4088 void phsubd(Mmx mmx, Operand op) { opMMX(mmx, op, 0x06, 0x66, NONE, 0x38); }
4089 void phsubsw(Mmx mmx, Operand op) { opMMX(mmx, op, 0x07, 0x66, NONE, 0x38); }
4090 void phsubw(Mmx mmx, Operand op) { opMMX(mmx, op, 0x05, 0x66, NONE, 0x38); }
4091 void pinsrb(Xmm xmm, Operand op, uint8 imm) { opGen(xmm, op, 0x20, 0x66, &isXMM_REG32orMEM, imm, 0x3A); }
4092 void pinsrd(Xmm xmm, Operand op, uint8 imm) { opGen(xmm, op, 0x22, 0x66, &isXMM_REG32orMEM, imm, 0x3A); }
4093 void pinsrw(Mmx mmx, Operand op, int imm) { if (!op.isREG(32) && !op.isMEM()) throw new XError(ERR.BAD_COMBINATION); opGen(mmx, op, 0xC4, mmx.isXMM() ? 0x66 : NONE, null, imm); }
4094 void pmaddubsw(Mmx mmx, Operand op) { opMMX(mmx, op, 0x04, 0x66, NONE, 0x38); }
4095 void pmaddwd(Mmx mmx, Operand op) { opMMX(mmx, op, 0xF5); }
4096 void pmaxsb(Xmm xmm, Operand op) { opGen(xmm, op, 0x3C, 0x66, &isXMM_XMMorMEM, NONE, 0x38); }
4097 void pmaxsd(Xmm xmm, Operand op) { opGen(xmm, op, 0x3D, 0x66, &isXMM_XMMorMEM, NONE, 0x38); }
4098 void pmaxsw(Mmx mmx, Operand op) { opMMX(mmx, op, 0xEE); }
4099 void pmaxub(Mmx mmx, Operand op) { opMMX(mmx, op, 0xDE); }
4100 void pmaxud(Xmm xmm, Operand op) { opGen(xmm, op, 0x3F, 0x66, &isXMM_XMMorMEM, NONE, 0x38); }
4101 void pmaxuw(Xmm xmm, Operand op) { opGen(xmm, op, 0x3E, 0x66, &isXMM_XMMorMEM, NONE, 0x38); }
4102 void pminsb(Xmm xmm, Operand op) { opGen(xmm, op, 0x38, 0x66, &isXMM_XMMorMEM, NONE, 0x38); }
4103 void pminsd(Xmm xmm, Operand op) { opGen(xmm, op, 0x39, 0x66, &isXMM_XMMorMEM, NONE, 0x38); }
4104 void pminsw(Mmx mmx, Operand op) { opMMX(mmx, op, 0xEA); }
4105 void pminub(Mmx mmx, Operand op) { opMMX(mmx, op, 0xDA); }
4106 void pminud(Xmm xmm, Operand op) { opGen(xmm, op, 0x3B, 0x66, &isXMM_XMMorMEM, NONE, 0x38); }
4107 void pminuw(Xmm xmm, Operand op) { opGen(xmm, op, 0x3A, 0x66, &isXMM_XMMorMEM, NONE, 0x38); }
4108 void pmovmskb(Reg32e reg, Mmx mmx) { if (mmx.isXMM()) db(0x66); opModR(reg, mmx, 0x0F, 0xD7); }
4109 void pmovsxbd(Xmm xmm, Operand op) { opGen(xmm, op, 0x21, 0x66, &isXMM_XMMorMEM, NONE, 0x38); }
4110 void pmovsxbq(Xmm xmm, Operand op) { opGen(xmm, op, 0x22, 0x66, &isXMM_XMMorMEM, NONE, 0x38); }
4111 void pmovsxbw(Xmm xmm, Operand op) { opGen(xmm, op, 0x20, 0x66, &isXMM_XMMorMEM, NONE, 0x38); }
4112 void pmovsxdq(Xmm xmm, Operand op) { opGen(xmm, op, 0x25, 0x66, &isXMM_XMMorMEM, NONE, 0x38); }
4113 void pmovsxwd(Xmm xmm, Operand op) { opGen(xmm, op, 0x23, 0x66, &isXMM_XMMorMEM, NONE, 0x38); }
4114 void pmovsxwq(Xmm xmm, Operand op) { opGen(xmm, op, 0x24, 0x66, &isXMM_XMMorMEM, NONE, 0x38); }
4115 void pmovzxbd(Xmm xmm, Operand op) { opGen(xmm, op, 0x31, 0x66, &isXMM_XMMorMEM, NONE, 0x38); }
4116 void pmovzxbq(Xmm xmm, Operand op) { opGen(xmm, op, 0x32, 0x66, &isXMM_XMMorMEM, NONE, 0x38); }
4117 void pmovzxbw(Xmm xmm, Operand op) { opGen(xmm, op, 0x30, 0x66, &isXMM_XMMorMEM, NONE, 0x38); }
4118 void pmovzxdq(Xmm xmm, Operand op) { opGen(xmm, op, 0x35, 0x66, &isXMM_XMMorMEM, NONE, 0x38); }
4119 void pmovzxwd(Xmm xmm, Operand op) { opGen(xmm, op, 0x33, 0x66, &isXMM_XMMorMEM, NONE, 0x38); }
4120 void pmovzxwq(Xmm xmm, Operand op) { opGen(xmm, op, 0x34, 0x66, &isXMM_XMMorMEM, NONE, 0x38); }
4121 void pmuldq(Xmm xmm, Operand op) { opGen(xmm, op, 0x28, 0x66, &isXMM_XMMorMEM, NONE, 0x38); }
4122 void pmulhrsw(Mmx mmx, Operand op) { opMMX(mmx, op, 0x0B, 0x66, NONE, 0x38); }
4123 void pmulhuw(Mmx mmx, Operand op) { opMMX(mmx, op, 0xE4); }
4124 void pmulhw(Mmx mmx, Operand op) { opMMX(mmx, op, 0xE5); }
4125 void pmulld(Xmm xmm, Operand op) { opGen(xmm, op, 0x40, 0x66, &isXMM_XMMorMEM, NONE, 0x38); }
4126 void pmullw(Mmx mmx, Operand op) { opMMX(mmx, op, 0xD5); }
4127 void pmuludq(Mmx mmx, Operand op) { opMMX(mmx, op, 0xF4); }
4128 void popcnt(Reg reg, Operand op) { opSp1(reg, op, 0xF3, 0x0F, 0xB8); }
4129 void popf() { db(0x9D); }
4130 void por(Mmx mmx, Operand op) { opMMX(mmx, op, 0xEB); }
4131 void prefetchnta(Address addr) { opModM(addr, new Reg32(0), 0x0F, 0x18); }
4132 void prefetcht0(Address addr) { opModM(addr, new Reg32(1), 0x0F, 0x18); }
4133 void prefetcht1(Address addr) { opModM(addr, new Reg32(2), 0x0F, 0x18); }
4134 void prefetcht2(Address addr) { opModM(addr, new Reg32(3), 0x0F, 0x18); }
4135 void prefetchw(Address addr) { opModM(addr, new Reg32(1), 0x0F, 0x0D); }
4136 void prefetchwt1(Address addr) { opModM(addr, new Reg32(2), 0x0F, 0x0D); }
4137 void psadbw(Mmx mmx, Operand op) { opMMX(mmx, op, 0xF6); }
4138 void pshufb(Mmx mmx, Operand op) { opMMX(mmx, op, 0x00, 0x66, NONE, 0x38); }
4139 void pshufd(Mmx mmx, Operand op, uint8 imm8) { opMMX(mmx, op, 0x70, 0x66, imm8); }
4140 void pshufhw(Mmx mmx, Operand op, uint8 imm8) { opMMX(mmx, op, 0x70, 0xF3, imm8); }
4141 void pshuflw(Mmx mmx, Operand op, uint8 imm8) { opMMX(mmx, op, 0x70, 0xF2, imm8); }
4142 void pshufw(Mmx mmx, Operand op, uint8 imm8) { opMMX(mmx, op, 0x70, 0x00, imm8); }
4143 void psignb(Mmx mmx, Operand op) { opMMX(mmx, op, 0x08, 0x66, NONE, 0x38); }
4144 void psignd(Mmx mmx, Operand op) { opMMX(mmx, op, 0x0A, 0x66, NONE, 0x38); }
4145 void psignw(Mmx mmx, Operand op) { opMMX(mmx, op, 0x09, 0x66, NONE, 0x38); }
4146 void pslld(Mmx mmx, Operand op) { opMMX(mmx, op, 0xF2); }
4147 void pslld(Mmx mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x72, 6); }
4148 void pslldq(Xmm xmm, int imm8) { opMMX_IMM(xmm, imm8, 0x73, 7); }
4149 void psllq(Mmx mmx, Operand op) { opMMX(mmx, op, 0xF3); }
4150 void psllq(Mmx mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x73, 6); }
4151 void psllw(Mmx mmx, Operand op) { opMMX(mmx, op, 0xF1); }
4152 void psllw(Mmx mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x71, 6); }
4153 void psrad(Mmx mmx, Operand op) { opMMX(mmx, op, 0xE2); }
4154 void psrad(Mmx mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x72, 4); }
4155 void psraw(Mmx mmx, Operand op) { opMMX(mmx, op, 0xE1); }
4156 void psraw(Mmx mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x71, 4); }
4157 void psrld(Mmx mmx, Operand op) { opMMX(mmx, op, 0xD2); }
4158 void psrld(Mmx mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x72, 2); }
4159 void psrldq(Xmm xmm, int imm8) { opMMX_IMM(xmm, imm8, 0x73, 3); }
4160 void psrlq(Mmx mmx, Operand op) { opMMX(mmx, op, 0xD3); }
4161 void psrlq(Mmx mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x73, 2); }
4162 void psrlw(Mmx mmx, Operand op) { opMMX(mmx, op, 0xD1); }
4163 void psrlw(Mmx mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x71, 2); }
4164 void psubb(Mmx mmx, Operand op) { opMMX(mmx, op, 0xF8); }
4165 void psubd(Mmx mmx, Operand op) { opMMX(mmx, op, 0xFA); }
4166 void psubq(Mmx mmx, Operand op) { opMMX(mmx, op, 0xFB); }
4167 void psubsb(Mmx mmx, Operand op) { opMMX(mmx, op, 0xE8); }
4168 void psubsw(Mmx mmx, Operand op) { opMMX(mmx, op, 0xE9); }
4169 void psubusb(Mmx mmx, Operand op) { opMMX(mmx, op, 0xD8); }
4170 void psubusw(Mmx mmx, Operand op) { opMMX(mmx, op, 0xD9); }
4171 void psubw(Mmx mmx, Operand op) { opMMX(mmx, op, 0xF9); }
4172 void ptest(Xmm xmm, Operand op) { opGen(xmm, op, 0x17, 0x66, &isXMM_XMMorMEM, NONE, 0x38); }
4173 void punpckhbw(Mmx mmx, Operand op) { opMMX(mmx, op, 0x68); }
4174 void punpckhdq(Mmx mmx, Operand op) { opMMX(mmx, op, 0x6A); }
4175 void punpckhqdq(Xmm xmm, Operand op) { opGen(xmm, op, 0x6D, 0x66, &isXMM_XMMorMEM); }
4176 void punpckhwd(Mmx mmx, Operand op) { opMMX(mmx, op, 0x69); }
4177 void punpcklbw(Mmx mmx, Operand op) { opMMX(mmx, op, 0x60); }
4178 void punpckldq(Mmx mmx, Operand op) { opMMX(mmx, op, 0x62); }
4179 void punpcklqdq(Xmm xmm, Operand op) { opGen(xmm, op, 0x6C, 0x66, &isXMM_XMMorMEM); }
4180 void punpcklwd(Mmx mmx, Operand op) { opMMX(mmx, op, 0x61); }
4181 void pushf() { db(0x9C); }
4182 void pxor(Mmx mmx, Operand op) { opMMX(mmx, op, 0xEF); }
4183 
4184 void rcl(Operand op, Reg8 _cl) { opShift(op, _cl, 2); }
4185 void rcl(Operand op, int imm) { opShift(op, imm, 2); }
4186 void rcpps(Xmm xmm, Operand op) { opGen(xmm, op, 0x53, 0x100, &isXMM_XMMorMEM); }
4187 void rcpss(Xmm xmm, Operand op) { opGen(xmm, op, 0x53, 0xF3, &isXMM_XMMorMEM); }
4188 void rcr(Operand op, Reg8 _cl) { opShift(op, _cl, 3); }
4189 void rcr(Operand op, int imm) { opShift(op, imm, 3); }
4190 void rdmsr() { db(0x0F); db(0x32); }
4191 void rdpmc() { db(0x0F); db(0x33); }
4192 void rdrand(Reg r) { if (r.isBit(8)) throw new XError(ERR.BAD_SIZE_OF_REGISTER); opModR(new Reg(6, Kind.REG, r.getBit()), r, 0x0F, 0xC7); }
4193 void rdseed(Reg r) { if (r.isBit(8)) throw new XError(ERR.BAD_SIZE_OF_REGISTER); opModR(new Reg(7, Kind.REG, r.getBit()), r, 0x0F, 0xC7); }
4194 void rdtsc() { db(0x0F); db(0x31); }
4195 void rdtscp() { db(0x0F); db(0x01); db(0xF9); }
4196 void rep() { db(0xF3); }
4197 void repe() { db(0xF3); }
4198 void repne() { db(0xF2); }
4199 void repnz() { db(0xF2); }
4200 void repz() { db(0xF3); }
4201 void ret(int imm = 0) { if (imm) { db(0xC2); dw(imm); } else { db(0xC3); } }
4202 void rol(Operand op, Reg8 _cl) { opShift(op, _cl, 0); }
4203 void rol(Operand op, int imm) { opShift(op, imm, 0); }
4204 void ror(Operand op, Reg8 _cl) { opShift(op, _cl, 1); }
4205 void ror(Operand op, int imm) { opShift(op, imm, 1); }
4206 void rorx(Reg32e r, Operand op, uint8 imm) { opGpr(r, op, new Reg32e(0, r.getBit()), T_0F3A | T_F2, 0xF0, false, imm); }
4207 void roundpd(Xmm xmm, Operand op, uint8 imm) { opGen(xmm, op, 0x09, 0x66, &isXMM_XMMorMEM, imm, 0x3A); }
4208 void roundps(Xmm xmm, Operand op, uint8 imm) { opGen(xmm, op, 0x08, 0x66, &isXMM_XMMorMEM, imm, 0x3A); }
4209 void roundsd(Xmm xmm, Operand op, int imm) { opGen(xmm, op, 0x0B, 0x66, &isXMM_XMMorMEM, cast(uint8)(imm), 0x3A); }
4210 void roundss(Xmm xmm, Operand op, int imm) { opGen(xmm, op, 0x0A, 0x66, &isXMM_XMMorMEM, cast(uint8)(imm), 0x3A); }
4211 void rsqrtps(Xmm xmm, Operand op) { opGen(xmm, op, 0x52, 0x100, &isXMM_XMMorMEM); }
4212 void rsqrtss(Xmm xmm, Operand op) { opGen(xmm, op, 0x52, 0xF3, &isXMM_XMMorMEM); }
4213 
4214 void sahf() { db(0x9E); }
4215 void sal(Operand op, Reg8 _cl) { opShift(op, _cl, 4); }
4216 void sal(Operand op, int imm) { opShift(op, imm, 4); }
4217 void sar(Operand op, Reg8 _cl) { opShift(op, _cl, 7); }
4218 void sar(Operand op, int imm) { opShift(op, imm, 7); }
4219 void sarx(Reg32e r1, Operand op, Reg32e r2) { opGpr(r1, op, r2, T_F3 | T_0F38, 0xf7, false); }
4220 void sbb(Operand op, uint32 imm) { opRM_I(op, imm, 0x18, 3); }
4221 void sbb(Operand op1, Operand op2) { opRM_RM(op1, op2, 0x18); }
4222 void scasb() { db(0xAE); }
4223 void scasd() { db(0xAF); }
4224 void scasw() { db(0x66); db(0xAF); }
4225 void seta(Operand op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 7); }
4226 void setae(Operand op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 3); }
4227 void setb(Operand op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 2); }
4228 void setbe(Operand op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 6); }
4229 void setc(Operand op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 2); }
4230 void sete(Operand op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 4); }
4231 void setg(Operand op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 15); }
4232 void setge(Operand op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 13); }
4233 void setl(Operand op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 12); }
4234 void setle(Operand op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 14); }
4235 void setna(Operand op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 6); }
4236 void setnae(Operand op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 2); }
4237 void setnb(Operand op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 3); }
4238 void setnbe(Operand op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 7); }
4239 void setnc(Operand op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 3); }
4240 void setne(Operand op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 5); }
4241 void setng(Operand op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 14); }
4242 void setnge(Operand op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 12); }
4243 void setnl(Operand op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 13); }
4244 void setnle(Operand op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 15); }
4245 void setno(Operand op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 1); }
4246 void setnp(Operand op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 11); }
4247 void setns(Operand op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 9); }
4248 void setnz(Operand op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 5); }
4249 void seto(Operand op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 0); }
4250 void setp(Operand op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 10); }
4251 void setpe(Operand op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 10); }
4252 void setpo(Operand op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 11); }
4253 void sets(Operand op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 8); }
4254 void setz(Operand op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 4); }
4255 void sfence() { db(0x0F); db(0xAE); db(0xF8); }
4256 void sha1msg1(Xmm xmm, Operand op) { opGen(xmm, op, 0xC9, NONE, &isXMM_XMMorMEM, NONE, 0x38); }
4257 void sha1msg2(Xmm xmm, Operand op) { opGen(xmm, op, 0xCA, NONE, &isXMM_XMMorMEM, NONE, 0x38); }
4258 void sha1nexte(Xmm xmm, Operand op) { opGen(xmm, op, 0xC8, NONE, &isXMM_XMMorMEM, NONE, 0x38); }
4259 void sha1rnds4(Xmm xmm, Operand op, uint8 imm) { opGen(xmm, op, 0xCC, NONE, &isXMM_XMMorMEM, imm, 0x3A); }
4260 void sha256msg1(Xmm xmm, Operand op) { opGen(xmm, op, 0xCC, NONE, &isXMM_XMMorMEM, NONE, 0x38); }
4261 void sha256msg2(Xmm xmm, Operand op) { opGen(xmm, op, 0xCD, NONE, &isXMM_XMMorMEM, NONE, 0x38); }
4262 void sha256rnds2(Xmm xmm, Operand op) { opGen(xmm, op, 0xCB, NONE, &isXMM_XMMorMEM, NONE, 0x38); }
4263 void shl(Operand op, Reg8 _cl) { opShift(op, _cl, 4); }
4264 void shl(Operand op, int imm) { opShift(op, imm, 4); }
4265 void shld(Operand op, Reg reg, Reg8 _cl) { opShxd(op, reg, 0, 0xA4, _cl); }
4266 void shld(Operand op, Reg reg, uint8 imm) { opShxd(op, reg, imm, 0xA4); }
4267 void shlx(Reg32e r1, Operand op, Reg32e r2) { opGpr(r1, op, r2, T_66 | T_0F38, 0xf7, false); }
4268 void shr(Operand op, Reg8 _cl) { opShift(op, _cl, 5); }
4269 void shr(Operand op, int imm) { opShift(op, imm, 5); }
4270 void shrd(Operand op, Reg reg, Reg8 _cl) { opShxd(op, reg, 0, 0xAC, _cl); }
4271 void shrd(Operand op, Reg reg, uint8 imm) { opShxd(op, reg, imm, 0xAC); }
4272 void shrx(Reg32e r1, Operand op, Reg32e r2) { opGpr(r1, op, r2, T_F2 | T_0F38, 0xf7, false); }
4273 void shufpd(Xmm xmm, Operand op, uint8 imm8) { opGen(xmm, op, 0xC6, 0x66, &isXMM_XMMorMEM, imm8); }
4274 void shufps(Xmm xmm, Operand op, uint8 imm8) { opGen(xmm, op, 0xC6, 0x100, &isXMM_XMMorMEM, imm8); }
4275 void sqrtpd(Xmm xmm, Operand op) { opGen(xmm, op, 0x51, 0x66, &isXMM_XMMorMEM); }
4276 void sqrtps(Xmm xmm, Operand op) { opGen(xmm, op, 0x51, 0x100, &isXMM_XMMorMEM); }
4277 void sqrtsd(Xmm xmm, Operand op) { opGen(xmm, op, 0x51, 0xF2, &isXMM_XMMorMEM); }
4278 void sqrtss(Xmm xmm, Operand op) { opGen(xmm, op, 0x51, 0xF3, &isXMM_XMMorMEM); }
4279 void stac() { db(0x0F); db(0x01); db(0xCB); }
4280 void stc() { db(0xF9); }
4281 void std() { db(0xFD); }
4282 void sti() { db(0xFB); }
4283 void stmxcsr(Address addr) { opModM(addr, new Reg32(3), 0x0F, 0xAE); }
4284 void stosb() { db(0xAA); }
4285 void stosd() { db(0xAB); }
4286 void stosw() { db(0x66); db(0xAB); }
4287 void sub(Operand op, uint32 imm) { opRM_I(op, imm, 0x28, 5); }
4288 void sub(Operand op1, Operand op2) { opRM_RM(op1, op2, 0x28); }
4289 void subpd(Xmm xmm, Operand op) { opGen(xmm, op, 0x5C, 0x66, &isXMM_XMMorMEM); }
4290 void subps(Xmm xmm, Operand op) { opGen(xmm, op, 0x5C, 0x100, &isXMM_XMMorMEM); }
4291 void subsd(Xmm xmm, Operand op) { opGen(xmm, op, 0x5C, 0xF2, &isXMM_XMMorMEM); }
4292 void subss(Xmm xmm, Operand op) { opGen(xmm, op, 0x5C, 0xF3, &isXMM_XMMorMEM); }
4293 void sysenter() { db(0x0F); db(0x34); }
4294 void sysexit() { db(0x0F); db(0x35); }
4295 
4296 void tzcnt(Reg reg, Operand op) { opSp1(reg, op, 0xF3, 0x0F, 0xBC); }
4297 
4298 void ucomisd(Xmm xmm, Operand op) { opGen(xmm, op, 0x2E, 0x66, &isXMM_XMMorMEM); }
4299 void ucomiss(Xmm xmm, Operand op) { opGen(xmm, op, 0x2E, 0x100, &isXMM_XMMorMEM); }
4300 void ud2() { db(0x0F); db(0x0B); }
4301 void unpckhpd(Xmm xmm, Operand op) { opGen(xmm, op, 0x15, 0x66, &isXMM_XMMorMEM); }
4302 void unpckhps(Xmm xmm, Operand op) { opGen(xmm, op, 0x15, 0x100, &isXMM_XMMorMEM); }
4303 void unpcklpd(Xmm xmm, Operand op) { opGen(xmm, op, 0x14, 0x66, &isXMM_XMMorMEM); }
4304 void unpcklps(Xmm xmm, Operand op) { opGen(xmm, op, 0x14, 0x100, &isXMM_XMMorMEM); }
4305 
4306 void vaddpd(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x58); }
4307 void vaddps(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x58); }
4308 void vaddsd(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z | T_N8, 0x58); }
4309 void vaddss(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z | T_N4, 0x58); }
4310 void vaddsubpd(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F | T_YMM, 0xD0); }
4311 void vaddsubps(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_F2 | T_0F | T_YMM, 0xD0); }
4312 void vaesdec(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F38 | T_YMM | T_EVEX, 0xDE); }
4313 void vaesdeclast(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F38 | T_YMM | T_EVEX, 0xDF); }
4314 void vaesenc(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F38 | T_YMM | T_EVEX, 0xDC); }
4315 void vaesenclast(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F38 | T_YMM | T_EVEX, 0xDD); }
4316 void vaesimc(Xmm xm, Operand op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_W0, 0xDB); }
4317 void vaeskeygenassist(Xmm xm, Operand op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0xDF, imm); }
4318 void vandnpd(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x55); }
4319 void vandnps(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x55); }
4320 void vandpd(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x54); }
4321 void vandps(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x54); }
4322 void vblendpd(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x0D, imm); }
4323 void vblendps(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x0C, imm); }
4324 void vblendvpd(Xmm x1, Xmm x2, Operand op, Xmm x4) { opAVX_X_X_XM(x1, x2, op, T_0F3A | T_66 | T_YMM, 0x4B, x4.getIdx() << 4); }
4325 void vblendvps(Xmm x1, Xmm x2, Operand op, Xmm x4) { opAVX_X_X_XM(x1, x2, op, T_0F3A | T_66 | T_YMM, 0x4A, x4.getIdx() << 4); }
4326 void vbroadcastf128(Ymm y, Address addr) { opAVX_X_XM_IMM(y, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x1A); }
4327 void vbroadcasti128(Ymm y, Address addr) { opAVX_X_XM_IMM(y, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x5A); }
4328 void vbroadcastsd(Ymm y, Operand op) { if (!op.isMEM() && !(y.isYMM() && op.isXMM()) && !(y.isZMM() && op.isXMM())) throw new XError(ERR.BAD_COMBINATION); opAVX_X_XM_IMM(y, op, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW1 | T_N8, 0x19); }
4329 void vbroadcastss(Xmm x, Operand op) { if (!(op.isXMM() || op.isMEM())) throw new XError(ERR.BAD_COMBINATION); opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x18); }
4330 void vcmpeq_ospd(Xmm x1, Xmm x2, Operand op) { vcmppd(x1, x2, op, 16); }
4331 void vcmpeq_osps(Xmm x1, Xmm x2, Operand op) { vcmpps(x1, x2, op, 16); }
4332 void vcmpeq_ossd(Xmm x1, Xmm x2, Operand op) { vcmpsd(x1, x2, op, 16); }
4333 void vcmpeq_osss(Xmm x1, Xmm x2, Operand op) { vcmpss(x1, x2, op, 16); }
4334 void vcmpeq_uqpd(Xmm x1, Xmm x2, Operand op) { vcmppd(x1, x2, op, 8); }
4335 void vcmpeq_uqps(Xmm x1, Xmm x2, Operand op) { vcmpps(x1, x2, op, 8); }
4336 void vcmpeq_uqsd(Xmm x1, Xmm x2, Operand op) { vcmpsd(x1, x2, op, 8); }
4337 void vcmpeq_uqss(Xmm x1, Xmm x2, Operand op) { vcmpss(x1, x2, op, 8); }
4338 void vcmpeq_uspd(Xmm x1, Xmm x2, Operand op) { vcmppd(x1, x2, op, 24); }
4339 void vcmpeq_usps(Xmm x1, Xmm x2, Operand op) { vcmpps(x1, x2, op, 24); }
4340 void vcmpeq_ussd(Xmm x1, Xmm x2, Operand op) { vcmpsd(x1, x2, op, 24); }
4341 void vcmpeq_usss(Xmm x1, Xmm x2, Operand op) { vcmpss(x1, x2, op, 24); }
4342 void vcmpeqpd(Xmm x1, Xmm x2, Operand op) { vcmppd(x1, x2, op, 0); }
4343 void vcmpeqps(Xmm x1, Xmm x2, Operand op) { vcmpps(x1, x2, op, 0); }
4344 void vcmpeqsd(Xmm x1, Xmm x2, Operand op) { vcmpsd(x1, x2, op, 0); }
4345 void vcmpeqss(Xmm x1, Xmm x2, Operand op) { vcmpss(x1, x2, op, 0); }
4346 void vcmpfalse_ospd(Xmm x1, Xmm x2, Operand op) { vcmppd(x1, x2, op, 27); }
4347 void vcmpfalse_osps(Xmm x1, Xmm x2, Operand op) { vcmpps(x1, x2, op, 27); }
4348 void vcmpfalse_ossd(Xmm x1, Xmm x2, Operand op) { vcmpsd(x1, x2, op, 27); }
4349 void vcmpfalse_osss(Xmm x1, Xmm x2, Operand op) { vcmpss(x1, x2, op, 27); }
4350 void vcmpfalsepd(Xmm x1, Xmm x2, Operand op) { vcmppd(x1, x2, op, 11); }
4351 void vcmpfalseps(Xmm x1, Xmm x2, Operand op) { vcmpps(x1, x2, op, 11); }
4352 void vcmpfalsesd(Xmm x1, Xmm x2, Operand op) { vcmpsd(x1, x2, op, 11); }
4353 void vcmpfalsess(Xmm x1, Xmm x2, Operand op) { vcmpss(x1, x2, op, 11); }
4354 void vcmpge_oqpd(Xmm x1, Xmm x2, Operand op) { vcmppd(x1, x2, op, 29); }
4355 void vcmpge_oqps(Xmm x1, Xmm x2, Operand op) { vcmpps(x1, x2, op, 29); }
4356 void vcmpge_oqsd(Xmm x1, Xmm x2, Operand op) { vcmpsd(x1, x2, op, 29); }
4357 void vcmpge_oqss(Xmm x1, Xmm x2, Operand op) { vcmpss(x1, x2, op, 29); }
4358 void vcmpgepd(Xmm x1, Xmm x2, Operand op) { vcmppd(x1, x2, op, 13); }
4359 void vcmpgeps(Xmm x1, Xmm x2, Operand op) { vcmpps(x1, x2, op, 13); }
4360 void vcmpgesd(Xmm x1, Xmm x2, Operand op) { vcmpsd(x1, x2, op, 13); }
4361 void vcmpgess(Xmm x1, Xmm x2, Operand op) { vcmpss(x1, x2, op, 13); }
4362 void vcmpgt_oqpd(Xmm x1, Xmm x2, Operand op) { vcmppd(x1, x2, op, 30); }
4363 void vcmpgt_oqps(Xmm x1, Xmm x2, Operand op) { vcmpps(x1, x2, op, 30); }
4364 void vcmpgt_oqsd(Xmm x1, Xmm x2, Operand op) { vcmpsd(x1, x2, op, 30); }
4365 void vcmpgt_oqss(Xmm x1, Xmm x2, Operand op) { vcmpss(x1, x2, op, 30); }
4366 void vcmpgtpd(Xmm x1, Xmm x2, Operand op) { vcmppd(x1, x2, op, 14); }
4367 void vcmpgtps(Xmm x1, Xmm x2, Operand op) { vcmpps(x1, x2, op, 14); }
4368 void vcmpgtsd(Xmm x1, Xmm x2, Operand op) { vcmpsd(x1, x2, op, 14); }
4369 void vcmpgtss(Xmm x1, Xmm x2, Operand op) { vcmpss(x1, x2, op, 14); }
4370 void vcmple_oqpd(Xmm x1, Xmm x2, Operand op) { vcmppd(x1, x2, op, 18); }
4371 void vcmple_oqps(Xmm x1, Xmm x2, Operand op) { vcmpps(x1, x2, op, 18); }
4372 void vcmple_oqsd(Xmm x1, Xmm x2, Operand op) { vcmpsd(x1, x2, op, 18); }
4373 void vcmple_oqss(Xmm x1, Xmm x2, Operand op) { vcmpss(x1, x2, op, 18); }
4374 void vcmplepd(Xmm x1, Xmm x2, Operand op) { vcmppd(x1, x2, op, 2); }
4375 void vcmpleps(Xmm x1, Xmm x2, Operand op) { vcmpps(x1, x2, op, 2); }
4376 void vcmplesd(Xmm x1, Xmm x2, Operand op) { vcmpsd(x1, x2, op, 2); }
4377 void vcmpless(Xmm x1, Xmm x2, Operand op) { vcmpss(x1, x2, op, 2); }
4378 void vcmplt_oqpd(Xmm x1, Xmm x2, Operand op) { vcmppd(x1, x2, op, 17); }
4379 void vcmplt_oqps(Xmm x1, Xmm x2, Operand op) { vcmpps(x1, x2, op, 17); }
4380 void vcmplt_oqsd(Xmm x1, Xmm x2, Operand op) { vcmpsd(x1, x2, op, 17); }
4381 void vcmplt_oqss(Xmm x1, Xmm x2, Operand op) { vcmpss(x1, x2, op, 17); }
4382 void vcmpltpd(Xmm x1, Xmm x2, Operand op) { vcmppd(x1, x2, op, 1); }
4383 void vcmpltps(Xmm x1, Xmm x2, Operand op) { vcmpps(x1, x2, op, 1); }
4384 void vcmpltsd(Xmm x1, Xmm x2, Operand op) { vcmpsd(x1, x2, op, 1); }
4385 void vcmpltss(Xmm x1, Xmm x2, Operand op) { vcmpss(x1, x2, op, 1); }
4386 void vcmpneq_oqpd(Xmm x1, Xmm x2, Operand op) { vcmppd(x1, x2, op, 12); }
4387 void vcmpneq_oqps(Xmm x1, Xmm x2, Operand op) { vcmpps(x1, x2, op, 12); }
4388 void vcmpneq_oqsd(Xmm x1, Xmm x2, Operand op) { vcmpsd(x1, x2, op, 12); }
4389 void vcmpneq_oqss(Xmm x1, Xmm x2, Operand op) { vcmpss(x1, x2, op, 12); }
4390 void vcmpneq_ospd(Xmm x1, Xmm x2, Operand op) { vcmppd(x1, x2, op, 28); }
4391 void vcmpneq_osps(Xmm x1, Xmm x2, Operand op) { vcmpps(x1, x2, op, 28); }
4392 void vcmpneq_ossd(Xmm x1, Xmm x2, Operand op) { vcmpsd(x1, x2, op, 28); }
4393 void vcmpneq_osss(Xmm x1, Xmm x2, Operand op) { vcmpss(x1, x2, op, 28); }
4394 void vcmpneq_uspd(Xmm x1, Xmm x2, Operand op) { vcmppd(x1, x2, op, 20); }
4395 void vcmpneq_usps(Xmm x1, Xmm x2, Operand op) { vcmpps(x1, x2, op, 20); }
4396 void vcmpneq_ussd(Xmm x1, Xmm x2, Operand op) { vcmpsd(x1, x2, op, 20); }
4397 void vcmpneq_usss(Xmm x1, Xmm x2, Operand op) { vcmpss(x1, x2, op, 20); }
4398 void vcmpneqpd(Xmm x1, Xmm x2, Operand op) { vcmppd(x1, x2, op, 4); }
4399 void vcmpneqps(Xmm x1, Xmm x2, Operand op) { vcmpps(x1, x2, op, 4); }
4400 void vcmpneqsd(Xmm x1, Xmm x2, Operand op) { vcmpsd(x1, x2, op, 4); }
4401 void vcmpneqss(Xmm x1, Xmm x2, Operand op) { vcmpss(x1, x2, op, 4); }
4402 void vcmpnge_uqpd(Xmm x1, Xmm x2, Operand op) { vcmppd(x1, x2, op, 25); }
4403 void vcmpnge_uqps(Xmm x1, Xmm x2, Operand op) { vcmpps(x1, x2, op, 25); }
4404 void vcmpnge_uqsd(Xmm x1, Xmm x2, Operand op) { vcmpsd(x1, x2, op, 25); }
4405 void vcmpnge_uqss(Xmm x1, Xmm x2, Operand op) { vcmpss(x1, x2, op, 25); }
4406 void vcmpngepd(Xmm x1, Xmm x2, Operand op) { vcmppd(x1, x2, op, 9); }
4407 void vcmpngeps(Xmm x1, Xmm x2, Operand op) { vcmpps(x1, x2, op, 9); }
4408 void vcmpngesd(Xmm x1, Xmm x2, Operand op) { vcmpsd(x1, x2, op, 9); }
4409 void vcmpngess(Xmm x1, Xmm x2, Operand op) { vcmpss(x1, x2, op, 9); }
4410 void vcmpngt_uqpd(Xmm x1, Xmm x2, Operand op) { vcmppd(x1, x2, op, 26); }
4411 void vcmpngt_uqps(Xmm x1, Xmm x2, Operand op) { vcmpps(x1, x2, op, 26); }
4412 void vcmpngt_uqsd(Xmm x1, Xmm x2, Operand op) { vcmpsd(x1, x2, op, 26); }
4413 void vcmpngt_uqss(Xmm x1, Xmm x2, Operand op) { vcmpss(x1, x2, op, 26); }
4414 void vcmpngtpd(Xmm x1, Xmm x2, Operand op) { vcmppd(x1, x2, op, 10); }
4415 void vcmpngtps(Xmm x1, Xmm x2, Operand op) { vcmpps(x1, x2, op, 10); }
4416 void vcmpngtsd(Xmm x1, Xmm x2, Operand op) { vcmpsd(x1, x2, op, 10); }
4417 void vcmpngtss(Xmm x1, Xmm x2, Operand op) { vcmpss(x1, x2, op, 10); }
4418 void vcmpnle_uqpd(Xmm x1, Xmm x2, Operand op) { vcmppd(x1, x2, op, 22); }
4419 void vcmpnle_uqps(Xmm x1, Xmm x2, Operand op) { vcmpps(x1, x2, op, 22); }
4420 void vcmpnle_uqsd(Xmm x1, Xmm x2, Operand op) { vcmpsd(x1, x2, op, 22); }
4421 void vcmpnle_uqss(Xmm x1, Xmm x2, Operand op) { vcmpss(x1, x2, op, 22); }
4422 void vcmpnlepd(Xmm x1, Xmm x2, Operand op) { vcmppd(x1, x2, op, 6); }
4423 void vcmpnleps(Xmm x1, Xmm x2, Operand op) { vcmpps(x1, x2, op, 6); }
4424 void vcmpnlesd(Xmm x1, Xmm x2, Operand op) { vcmpsd(x1, x2, op, 6); }
4425 void vcmpnless(Xmm x1, Xmm x2, Operand op) { vcmpss(x1, x2, op, 6); }
4426 void vcmpnlt_uqpd(Xmm x1, Xmm x2, Operand op) { vcmppd(x1, x2, op, 21); }
4427 void vcmpnlt_uqps(Xmm x1, Xmm x2, Operand op) { vcmpps(x1, x2, op, 21); }
4428 void vcmpnlt_uqsd(Xmm x1, Xmm x2, Operand op) { vcmpsd(x1, x2, op, 21); }
4429 void vcmpnlt_uqss(Xmm x1, Xmm x2, Operand op) { vcmpss(x1, x2, op, 21); }
4430 void vcmpnltpd(Xmm x1, Xmm x2, Operand op) { vcmppd(x1, x2, op, 5); }
4431 void vcmpnltps(Xmm x1, Xmm x2, Operand op) { vcmpps(x1, x2, op, 5); }
4432 void vcmpnltsd(Xmm x1, Xmm x2, Operand op) { vcmpsd(x1, x2, op, 5); }
4433 void vcmpnltss(Xmm x1, Xmm x2, Operand op) { vcmpss(x1, x2, op, 5); }
4434 void vcmpord_spd(Xmm x1, Xmm x2, Operand op) { vcmppd(x1, x2, op, 23); }
4435 void vcmpord_sps(Xmm x1, Xmm x2, Operand op) { vcmpps(x1, x2, op, 23); }
4436 void vcmpord_ssd(Xmm x1, Xmm x2, Operand op) { vcmpsd(x1, x2, op, 23); }
4437 void vcmpord_sss(Xmm x1, Xmm x2, Operand op) { vcmpss(x1, x2, op, 23); }
4438 void vcmpordpd(Xmm x1, Xmm x2, Operand op) { vcmppd(x1, x2, op, 7); }
4439 void vcmpordps(Xmm x1, Xmm x2, Operand op) { vcmpps(x1, x2, op, 7); }
4440 void vcmpordsd(Xmm x1, Xmm x2, Operand op) { vcmpsd(x1, x2, op, 7); }
4441 void vcmpordss(Xmm x1, Xmm x2, Operand op) { vcmpss(x1, x2, op, 7); }
4442 void vcmppd(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0xC2, imm); }
4443 void vcmpps(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_0F | T_YMM, 0xC2, imm); }
4444 void vcmpsd(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_0F, 0xC2, imm); }
4445 void vcmpss(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F, 0xC2, imm); }
4446 void vcmptrue_uspd(Xmm x1, Xmm x2, Operand op) { vcmppd(x1, x2, op, 31); }
4447 void vcmptrue_usps(Xmm x1, Xmm x2, Operand op) { vcmpps(x1, x2, op, 31); }
4448 void vcmptrue_ussd(Xmm x1, Xmm x2, Operand op) { vcmpsd(x1, x2, op, 31); }
4449 void vcmptrue_usss(Xmm x1, Xmm x2, Operand op) { vcmpss(x1, x2, op, 31); }
4450 void vcmptruepd(Xmm x1, Xmm x2, Operand op) { vcmppd(x1, x2, op, 15); }
4451 void vcmptrueps(Xmm x1, Xmm x2, Operand op) { vcmpps(x1, x2, op, 15); }
4452 void vcmptruesd(Xmm x1, Xmm x2, Operand op) { vcmpsd(x1, x2, op, 15); }
4453 void vcmptruess(Xmm x1, Xmm x2, Operand op) { vcmpss(x1, x2, op, 15); }
4454 void vcmpunord_spd(Xmm x1, Xmm x2, Operand op) { vcmppd(x1, x2, op, 19); }
4455 void vcmpunord_sps(Xmm x1, Xmm x2, Operand op) { vcmpps(x1, x2, op, 19); }
4456 void vcmpunord_ssd(Xmm x1, Xmm x2, Operand op) { vcmpsd(x1, x2, op, 19); }
4457 void vcmpunord_sss(Xmm x1, Xmm x2, Operand op) { vcmpss(x1, x2, op, 19); }
4458 void vcmpunordpd(Xmm x1, Xmm x2, Operand op) { vcmppd(x1, x2, op, 3); }
4459 void vcmpunordps(Xmm x1, Xmm x2, Operand op) { vcmpps(x1, x2, op, 3); }
4460 void vcmpunordsd(Xmm x1, Xmm x2, Operand op) { vcmpsd(x1, x2, op, 3); }
4461 void vcmpunordss(Xmm x1, Xmm x2, Operand op) { vcmpss(x1, x2, op, 3); }
4462 void vcomisd(Xmm xm, Operand op) { opAVX_X_XM_IMM(xm, op, T_N8 | T_66 | T_0F | T_EW1 | T_EVEX | T_SAE_X, 0x2F); }
4463 void vcomiss(Xmm xm, Operand op) { opAVX_X_XM_IMM(xm, op, T_N4 | T_0F | T_EW0 | T_EVEX | T_SAE_X, 0x2F); }
4464 void vcvtdq2pd(Xmm x, Operand op) { checkCvt1(x, op); opVex(x, null, op, T_0F | T_F3 | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL, 0xE6); }
4465 void vcvtdq2ps(Xmm xm, Operand op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5B); }
4466 void vcvtpd2dq(Xmm x, Operand op) { opCvt2(x, op, T_0F | T_F2 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_ER_Z, 0xE6); }
4467 void vcvtpd2ps(Xmm x, Operand op) { opCvt2(x, op, T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x5A); }
4468 void vcvtph2ps(Xmm x, Operand op) { checkCvt1(x, op); opVex(x, null, op, T_0F38 | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x13); }
4469 void vcvtps2dq(Xmm xm, Operand op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5B); }
4470 void vcvtps2pd(Xmm x, Operand op) { checkCvt1(x, op); opVex(x, null, op, T_0F | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 0x5A); }
4471 void vcvtps2ph(Operand op, Xmm x, uint8 imm) { checkCvt1(x, op); opVex(x, null, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x1D, imm); }
4472 void vcvtsd2si(Reg32 r, Operand op) { opAVX_X_X_XM(new Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W0 | T_EVEX | T_EW0 | T_N4 | T_ER_X, 0x2D); }
4473 void vcvtsd2ss(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_F2 | T_0F | T_EW1 | T_EVEX | T_ER_X, 0x5A); }
4474 void vcvtsi2sd(Xmm x1, Xmm x2, Operand op) { opCvt3(x1, x2, op, T_0F | T_F2 | T_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x2A); }
4475 void vcvtsi2ss(Xmm x1, Xmm x2, Operand op) { opCvt3(x1, x2, op, T_0F | T_F3 | T_EVEX | T_ER_X, T_W1 | T_EW1 | T_N8, T_W0 | T_EW0 | T_N4, 0x2A); }
4476 void vcvtss2sd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_F3 | T_0F | T_EW0 | T_EVEX | T_SAE_X, 0x5A); }
4477 void vcvtss2si(Reg32 r, Operand op) { opAVX_X_X_XM(new Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W0 | T_EVEX | T_EW0 | T_ER_X | T_N8, 0x2D); }
4478 void vcvttpd2dq(Xmm x, Operand op) { opCvt2(x, op, T_66 | T_0F | T_YMM | T_EVEX |T_EW1 | T_B64 | T_ER_Z, 0xE6); }
4479 void vcvttps2dq(Xmm xm, Operand op) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_EW0 | T_YMM | T_EVEX | T_SAE_Z | T_B32, 0x5B); }
4480 void vcvttsd2si(Reg32 r, Operand op) { opAVX_X_X_XM(new Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W0 | T_EVEX | T_EW0 | T_N4 | T_SAE_X, 0x2C); }
4481 void vcvttss2si(Reg32 r, Operand op) { opAVX_X_X_XM(new Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W0 | T_EVEX | T_EW0 | T_SAE_X | T_N8, 0x2C); }
4482 void vdivpd(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x5E); }
4483 void vdivps(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5E); }
4484 void vdivsd(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z | T_N8, 0x5E); }
4485 void vdivss(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z | T_N4, 0x5E); }
4486 void vdppd(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0, 0x41, imm); }
4487 void vdpps(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x40, imm); }
4488 void vextractf128(Operand op, Ymm y, uint8 imm) { if (!(op.isXMEM() & y.isYMM())) throw new XError(ERR.BAD_COMBINATION); opVex(y, null, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x19, imm); }
4489 void vextracti128(Operand op, Ymm y, uint8 imm) { if (!(op.isXMEM() & y.isYMM())) throw new XError(ERR.BAD_COMBINATION); opVex(y, null, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x39, imm); }
4490 void vextractps(Operand op, Xmm x, uint8 imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) throw new XError(ERR.BAD_COMBINATION); opVex(x, null, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_N4, 0x17, imm); }
4491 void vfmadd132pd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x98); }
4492 void vfmadd132ps(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x98); }
4493 void vfmadd132sd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_W1 | T_EW1 | T_EVEX | T_ER_X, 0x99); }
4494 void vfmadd132ss(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_W0 | T_EW0 | T_EVEX | T_ER_X, 0x99); }
4495 void vfmadd213pd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0xA8); }
4496 void vfmadd213ps(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0xA8); }
4497 void vfmadd213sd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_W1 | T_EW1 | T_EVEX | T_ER_X, 0xA9); }
4498 void vfmadd213ss(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_W0 | T_EW0 | T_EVEX | T_ER_X, 0xA9); }
4499 void vfmadd231pd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0xB8); }
4500 void vfmadd231ps(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0xB8); }
4501 void vfmadd231sd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_W1 | T_EW1 | T_EVEX | T_ER_X, 0xB9); }
4502 void vfmadd231ss(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_W0 | T_EW0 | T_EVEX | T_ER_X, 0xB9); }
4503 void vfmaddsub132pd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x96); }
4504 void vfmaddsub132ps(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x96); }
4505 void vfmaddsub213pd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0xA6); }
4506 void vfmaddsub213ps(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0xA6); }
4507 void vfmaddsub231pd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0xB6); }
4508 void vfmaddsub231ps(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0xB6); }
4509 void vfmsub132pd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x9A); }
4510 void vfmsub132ps(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x9A); }
4511 void vfmsub132sd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_W1 | T_EW1 | T_EVEX | T_ER_X, 0x9B); }
4512 void vfmsub132ss(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_W0 | T_EW0 | T_EVEX | T_ER_X, 0x9B); }
4513 void vfmsub213pd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0xAA); }
4514 void vfmsub213ps(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0xAA); }
4515 void vfmsub213sd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_W1 | T_EW1 | T_EVEX | T_ER_X, 0xAB); }
4516 void vfmsub213ss(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_W0 | T_EW0 | T_EVEX | T_ER_X, 0xAB); }
4517 void vfmsub231pd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0xBA); }
4518 void vfmsub231ps(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0xBA); }
4519 void vfmsub231sd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_W1 | T_EW1 | T_EVEX | T_ER_X, 0xBB); }
4520 void vfmsub231ss(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_W0 | T_EW0 | T_EVEX | T_ER_X, 0xBB); }
4521 void vfmsubadd132pd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x97); }
4522 void vfmsubadd132ps(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x97); }
4523 void vfmsubadd213pd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0xA7); }
4524 void vfmsubadd213ps(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0xA7); }
4525 void vfmsubadd231pd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0xB7); }
4526 void vfmsubadd231ps(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0xB7); }
4527 void vfnmadd132pd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x9C); }
4528 void vfnmadd132ps(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x9C); }
4529 void vfnmadd132sd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_W1 | T_EW1 | T_EVEX | T_ER_X, 0x9D); }
4530 void vfnmadd132ss(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_W0 | T_EW0 | T_EVEX | T_ER_X, 0x9D); }
4531 void vfnmadd213pd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0xAC); }
4532 void vfnmadd213ps(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0xAC); }
4533 void vfnmadd213sd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_W1 | T_EW1 | T_EVEX | T_ER_X, 0xAD); }
4534 void vfnmadd213ss(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_W0 | T_EW0 | T_EVEX | T_ER_X, 0xAD); }
4535 void vfnmadd231pd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0xBC); }
4536 void vfnmadd231ps(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0xBC); }
4537 void vfnmadd231sd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_W1 | T_EW1 | T_EVEX | T_ER_X, 0xBD); }
4538 void vfnmadd231ss(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_W0 | T_EW0 | T_EVEX | T_ER_X, 0xBD); }
4539 void vfnmsub132pd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x9E); }
4540 void vfnmsub132ps(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x9E); }
4541 void vfnmsub132sd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_W1 | T_EW1 | T_EVEX | T_ER_X, 0x9F); }
4542 void vfnmsub132ss(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_W0 | T_EW0 | T_EVEX | T_ER_X, 0x9F); }
4543 void vfnmsub213pd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0xAE); }
4544 void vfnmsub213ps(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0xAE); }
4545 void vfnmsub213sd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_W1 | T_EW1 | T_EVEX | T_ER_X, 0xAF); }
4546 void vfnmsub213ss(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_W0 | T_EW0 | T_EVEX | T_ER_X, 0xAF); }
4547 void vfnmsub231pd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0xBE); }
4548 void vfnmsub231ps(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0xBE); }
4549 void vfnmsub231sd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_W1 | T_EW1 | T_EVEX | T_ER_X, 0xBF); }
4550 void vfnmsub231ss(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_W0 | T_EW0 | T_EVEX | T_ER_X, 0xBF); }
4551 void vgatherdpd(Xmm x1, Address addr, Xmm x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x92, 0); }
4552 void vgatherdps(Xmm x1, Address addr, Xmm x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x92, 1); }
4553 void vgatherqpd(Xmm x1, Address addr, Xmm x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x93, 1); }
4554 void vgatherqps(Xmm x1, Address addr, Xmm x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x93, 2); }
4555 void vgf2p8affineinvqb(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_SAE_Z | T_B64, 0xCF, imm); }
4556 void vgf2p8affineqb(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_SAE_Z | T_B64, 0xCE, imm); }
4557 void vgf2p8mulb(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_SAE_Z, 0xCF); }
4558 void vhaddpd(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F | T_YMM, 0x7C); }
4559 void vhaddps(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_F2 | T_0F | T_YMM, 0x7C); }
4560 void vhsubpd(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F | T_YMM, 0x7D); }
4561 void vhsubps(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_F2 | T_0F | T_YMM, 0x7D); }
4562 void vinsertf128(Ymm y1, Ymm y2, Operand op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) throw new XError(ERR.BAD_COMBINATION); opVex(y1, y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x18, imm); }
4563 void vinserti128(Ymm y1, Ymm y2, Operand op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) throw new XError(ERR.BAD_COMBINATION); opVex(y1, y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x38, imm); }
4564 void vinsertps(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_W0 | T_EW0 | T_EVEX, 0x21, imm); }
4565 void vlddqu(Xmm x, Address addr) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F | T_F2 | T_W0 | T_YMM, 0xF0); }
4566 void vldmxcsr(Address addr) { opAVX_X_X_XM(xm2, xm0, addr, T_0F, 0xAE); }
4567 void vmaskmovdqu(Xmm x1, Xmm x2) { opAVX_X_X_XM(x1, xm0, x2, T_0F | T_66, 0xF7); }
4568 void vmaskmovpd(Address addr, Xmm x1, Xmm x2) { opAVX_X_X_XM(x2, x1, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x2F); }
4569 void vmaskmovpd(Xmm x1, Xmm x2, Address addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x2D); }
4570 void vmaskmovps(Address addr, Xmm x1, Xmm x2) { opAVX_X_X_XM(x2, x1, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x2E); }
4571 void vmaskmovps(Xmm x1, Xmm x2, Address addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x2C); }
4572 void vmaxpd(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x5F); }
4573 void vmaxps(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5F); }
4574 void vmaxsd(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z | T_N8, 0x5F); }
4575 void vmaxss(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z | T_N4, 0x5F); }
4576 void vminpd(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x5D); }
4577 void vminps(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5D); }
4578 void vminsd(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z | T_N8, 0x5D); }
4579 void vminss(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z | T_N4, 0x5D); }
4580 void vmovapd(Address addr, Xmm xmm) { opAVX_X_XM_IMM(xmm, addr, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_M_K, 0x29); }
4581 void vmovapd(Xmm xm, Operand op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0x28); }
4582 void vmovaps(Address addr, Xmm xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_EW0 | T_YMM | T_EVEX | T_M_K, 0x29); }
4583 void vmovaps(Xmm xm, Operand op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX, 0x28); }
4584 void vmovd(Operand op, Xmm x) { if (!op.isREG(32) && !op.isMEM()) throw new XError(ERR.BAD_COMBINATION); opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x7E); }
4585 void vmovd(Xmm x, Operand op) { if (!op.isREG(32) && !op.isMEM()) throw new XError(ERR.BAD_COMBINATION); opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x6E); }
4586 void vmovddup(Xmm xm, Operand op) { opAVX_X_XM_IMM(xm, op, T_DUP | T_F2 | T_0F | T_EW1 | T_YMM | T_EVEX | T_ER_X | T_ER_Y | T_ER_Z, 0x12); }
4587 void vmovdqa(Address addr, Xmm xmm) { opAVX_X_XM_IMM(xmm, addr, T_66 | T_0F | T_YMM, 0x7F); }
4588 void vmovdqa(Xmm xm, Operand op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_YMM, 0x6F); }
4589 void vmovdqu(Address addr, Xmm xmm) { opAVX_X_XM_IMM(xmm, addr, T_F3 | T_0F | T_YMM, 0x7F); }
4590 void vmovdqu(Xmm xm, Operand op) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_YMM, 0x6F); }
4591 void vmovhlps(Xmm x1, Xmm x2, Operand op = new Operand()) { if (!op.isNone() && !op.isXMM()) throw new XError(ERR.BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, T_0F | T_EVEX | T_EW0, 0x12); }
4592 void vmovhpd(Address addr, Xmm x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, 0x17); }
4593 void vmovhpd(Xmm x, Operand op1, Operand op2 = new Operand()) { if (!op2.isNone() && !op2.isMEM()) throw new XError(ERR.BAD_COMBINATION); opAVX_X_X_XM(x, op1, op2, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, 0x16); }
4594 void vmovhps(Address addr, Xmm x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_EVEX | T_EW0 | T_N8, 0x17); }
4595 void vmovhps(Xmm x, Operand op1, Operand op2 = new Operand()) { if (!op2.isNone() && !op2.isMEM()) throw new XError(ERR.BAD_COMBINATION); opAVX_X_X_XM(x, op1, op2, T_0F | T_EVEX | T_EW0 | T_N8, 0x16); }
4596 void vmovlhps(Xmm x1, Xmm x2, Operand op = new Operand()) { if (!op.isNone() && !op.isXMM()) throw new XError(ERR.BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, T_0F | T_EVEX | T_EW0, 0x16); }
4597 void vmovlpd(Address addr, Xmm x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, 0x13); }
4598 void vmovlpd(Xmm x, Operand op1, Operand op2 = new Operand()) { if (!op2.isNone() && !op2.isMEM()) throw new XError(ERR.BAD_COMBINATION); opAVX_X_X_XM(x, op1, op2, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, 0x12); }
4599 void vmovlps(Address addr, Xmm x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_EVEX | T_EW0 | T_N8, 0x13); }
4600 void vmovlps(Xmm x, Operand op1, Operand op2 = new Operand()) { if (!op2.isNone() && !op2.isMEM()) throw new XError(ERR.BAD_COMBINATION); opAVX_X_X_XM(x, op1, op2, T_0F | T_EVEX | T_EW0 | T_N8, 0x12); }
4601 void vmovmskpd(Reg r, Xmm x) { if (!r.isBit(i32e)) throw new XError(ERR.BAD_COMBINATION); opAVX_X_X_XM(x.isXMM() ? new Xmm(r.getIdx()) : new Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_66 | T_W0 | T_YMM, 0x50); }
4602 void vmovmskps(Reg r, Xmm x) { if (!r.isBit(i32e)) throw new XError(ERR.BAD_COMBINATION); opAVX_X_X_XM(x.isXMM() ? new Xmm(r.getIdx()) : new Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_W0 | T_YMM, 0x50); }
4603 void vmovntdq(Address addr, Xmm x) { opVex(x, null, addr, T_0F | T_66 | T_YMM | T_EVEX | T_EW0, 0xE7); }
4604 void vmovntdqa(Xmm x, Address addr) { opVex(x, null, addr, T_0F38 | T_66 | T_YMM | T_EVEX | T_EW0, 0x2A); }
4605 void vmovntpd(Address addr, Xmm x) { opVex(x, null, addr, T_0F | T_66 | T_YMM | T_EVEX | T_EW1, 0x2B); }
4606 void vmovntps(Address addr, Xmm x) { opVex(x, null, addr, T_0F | T_YMM | T_EVEX | T_EW0, 0x2B); }
4607 void vmovq(Address addr, Xmm x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, x.getIdx() < 16 ? 0xD6 : 0x7E); }
4608 void vmovq(Xmm x, Address addr) { int type, code; if (x.getIdx() < 16) { type = T_0F | T_F3; code = 0x7E; } else { type = T_0F | T_66 | T_EVEX | T_EW1 | T_N8; code = 0x6E; } opAVX_X_X_XM(x, xm0, addr, type, code); }
4609 void vmovq(Xmm x1, Xmm x2) { opAVX_X_X_XM(x1, xm0, x2, T_0F | T_F3 | T_EVEX | T_EW1 | T_N8, 0x7E); }
4610 void vmovsd(Address addr, Xmm x) { opAVX_X_X_XM(x, xm0, addr, T_N8 | T_F2 | T_0F | T_EW1 | T_EVEX | T_M_K, 0x11); }
4611 void vmovsd(Xmm x, Address addr) { opAVX_X_X_XM(x, xm0, addr, T_N8 | T_F2 | T_0F | T_EW1 | T_EVEX, 0x10); }
4612 void vmovsd(Xmm x1, Xmm x2, Operand op = new Operand()) { if (!op.isNone() && !op.isXMM()) throw new XError(ERR.BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, T_N8 | T_F2 | T_0F | T_EW1 | T_EVEX, 0x10); }
4613 void vmovshdup(Xmm xm, Operand op) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_EW0 | T_YMM | T_EVEX, 0x16); }
4614 void vmovsldup(Xmm xm, Operand op) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_EW0 | T_YMM | T_EVEX, 0x12); }
4615 void vmovss(Address addr, Xmm x) { opAVX_X_X_XM(x, xm0, addr, T_N4 | T_F3 | T_0F | T_EW0 | T_EVEX | T_M_K, 0x11); }
4616 void vmovss(Xmm x, Address addr) { opAVX_X_X_XM(x, xm0, addr, T_N4 | T_F3 | T_0F | T_EW0 | T_EVEX, 0x10); }
4617 void vmovss(Xmm x1, Xmm x2, Operand op = new Operand()) { if (!op.isNone() && !op.isXMM()) throw new XError(ERR.BAD_COMBINATION); opAVX_X_X_XM(x1, x2, op, T_N4 | T_F3 | T_0F | T_EW0 | T_EVEX, 0x10); }
4618 void vmovupd(Address addr, Xmm xmm) { opAVX_X_XM_IMM(xmm, addr, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_M_K, 0x11); }
4619 void vmovupd(Xmm xm, Operand op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0x10); }
4620 void vmovups(Address addr, Xmm xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_EW0 | T_YMM | T_EVEX | T_M_K, 0x11); }
4621 void vmovups(Xmm xm, Operand op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX, 0x10); }
4622 void vmpsadbw(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x42, imm); }
4623 void vmulpd(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x59); }
4624 void vmulps(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x59); }
4625 void vmulsd(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z | T_N8, 0x59); }
4626 void vmulss(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z | T_N4, 0x59); }
4627 void vorpd(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x56); }
4628 void vorps(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x56); }
4629 void vpabsb(Xmm xm, Operand op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x1C); }
4630 void vpabsd(Xmm xm, Operand op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x1E); }
4631 void vpabsw(Xmm xm, Operand op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x1D); }
4632 void vpackssdw(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x6B); }
4633 void vpacksswb(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0x63); }
4634 void vpackusdw(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x2B); }
4635 void vpackuswb(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0x67); }
4636 void vpaddb(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xFC); }
4637 void vpaddd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0xFE); }
4638 void vpaddq(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0xD4); }
4639 void vpaddsb(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xEC); }
4640 void vpaddsw(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xED); }
4641 void vpaddusb(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xDC); }
4642 void vpaddusw(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xDD); }
4643 void vpaddw(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xFD); }
4644 void vpalignr(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_YMM | T_EVEX, 0x0F, imm); }
4645 void vpand(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0xDB); }
4646 void vpandn(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0xDF); }
4647 void vpavgb(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xE0); }
4648 void vpavgw(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xE3); }
4649 void vpblendd(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x02, imm); }
4650 void vpblendvb(Xmm x1, Xmm x2, Operand op, Xmm x4) { opAVX_X_X_XM(x1, x2, op, T_0F3A | T_66 | T_YMM, 0x4C, x4.getIdx() << 4); }
4651 void vpblendw(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x0E, imm); }
4652 void vpbroadcastb(Xmm x, Operand op) { if (!(op.isXMM() || op.isMEM())) throw new XError(ERR.BAD_COMBINATION); opAVX_X_XM_IMM(x, op, T_N1 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x78); }
4653 void vpbroadcastd(Xmm x, Operand op) { if (!(op.isXMM() || op.isMEM())) throw new XError(ERR.BAD_COMBINATION); opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x58); }
4654 void vpbroadcastq(Xmm x, Operand op) { if (!(op.isXMM() || op.isMEM())) throw new XError(ERR.BAD_COMBINATION); opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_W0 | T_EW1 | T_YMM | T_EVEX, 0x59); }
4655 void vpbroadcastw(Xmm x, Operand op) { if (!(op.isXMM() || op.isMEM())) throw new XError(ERR.BAD_COMBINATION); opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_W0 | T_YMM | T_EVEX, 0x79); }
4656 void vpclmulqdq(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM | T_EVEX, 0x44, imm); }
4657 void vpcmpeqb(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x74); }
4658 void vpcmpeqd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x76); }
4659 void vpcmpeqq(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x29); }
4660 void vpcmpeqw(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x75); }
4661 void vpcmpestri(Xmm xm, Operand op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x61, imm); }
4662 void vpcmpestrm(Xmm xm, Operand op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x60, imm); }
4663 void vpcmpgtb(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x64); }
4664 void vpcmpgtd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x66); }
4665 void vpcmpgtq(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x37); }
4666 void vpcmpgtw(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x65); }
4667 void vpcmpistri(Xmm xm, Operand op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x63, imm); }
4668 void vpcmpistrm(Xmm xm, Operand op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x62, imm); }
4669 void vperm2f128(Ymm y1, Ymm y2, Operand op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) throw new XError(ERR.BAD_COMBINATION); opVex(y1, y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x06, imm); }
4670 void vperm2i128(Ymm y1, Ymm y2, Operand op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) throw new XError(ERR.BAD_COMBINATION); opVex(y1, y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x46, imm); }
4671 void vpermd(Ymm y1, Ymm y2, Operand op) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x36); }
4672 void vpermilpd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x0D); }
4673 void vpermilpd(Xmm xm, Operand op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_EVEX | T_B64, 0x05, imm); }
4674 void vpermilps(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x0C); }
4675 void vpermilps(Xmm xm, Operand op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_EVEX | T_B32, 0x04, imm); }
4676 void vpermpd(Ymm y, Operand op, uint8 imm) { opAVX_X_XM_IMM(y, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x01, imm); }
4677 void vpermpd(Ymm y1, Ymm y2, Operand op) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x16); }
4678 void vpermps(Ymm y1, Ymm y2, Operand op) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x16); }
4679 void vpermq(Ymm y, Operand op, uint8 imm) { opAVX_X_XM_IMM(y, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x00, imm); }
4680 void vpermq(Ymm y1, Ymm y2, Operand op) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F38 | T_W0 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x36); }
4681 void vpextrb(Operand op, Xmm x, uint8 imm) { if (!((op.isREG(8|16|i32e) || op.isMEM()) && x.isXMM())) throw new XError(ERR.BAD_COMBINATION); opVex(x, null, op, T_0F3A | T_66 | T_EVEX | T_N1, 0x14, imm); }
4682 void vpextrd(Operand op, Xmm x, uint8 imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) throw new XError(ERR.BAD_COMBINATION); opVex(x, null, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x16, imm); }
4683 void vpextrq(Operand op, Xmm x, uint8 imm) { if (!((op.isREG(64) || op.isMEM()) && x.isXMM())) throw new XError(ERR.BAD_COMBINATION); opVex(x, null, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x16, imm); }
4684 void vpextrw(Operand op, Xmm x, uint8 imm) { if (!((op.isREG(16|i32e) || op.isMEM()) && x.isXMM())) throw new XError(ERR.BAD_COMBINATION); if (op.isREG() && x.getIdx() < 16) { opAVX_X_X_XM(new Xmm(op.getIdx()), xm0, x, T_0F | T_66, 0xC5, imm); } else { opVex(x, null, op, T_0F3A | T_66 | T_EVEX | T_N2, 0x15, imm); } }
4685 void vpgatherdd(Xmm x1, Address addr, Xmm x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x90, 1); }
4686 void vpgatherdq(Xmm x1, Address addr, Xmm x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x90, 0); }
4687 void vpgatherqd(Xmm x1, Address addr, Xmm x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x91, 2); }
4688 void vpgatherqq(Xmm x1, Address addr, Xmm x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x91, 1); }
4689 void vphaddd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x02); }
4690 void vphaddsw(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x03); }
4691 void vphaddw(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x01); }
4692 void vphminposuw(Xmm xm, Operand op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38, 0x41); }
4693 void vphsubd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x06); }
4694 void vphsubsw(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x07); }
4695 void vphsubw(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x05); }
4696 void vpinsrb(Xmm x1, Xmm x2, Operand op, uint8 imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) throw new XError(ERR.BAD_COMBINATION); opVex(x1, x2, op, T_0F3A | T_66 | T_EVEX | T_N1, 0x20, imm); }
4697 void vpinsrd(Xmm x1, Xmm x2, Operand op, uint8 imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) throw new XError(ERR.BAD_COMBINATION); opVex(x1, x2, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x22, imm); }
4698 void vpinsrq(Xmm x1, Xmm x2, Operand op, uint8 imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(64) || op.isMEM()))) throw new XError(ERR.BAD_COMBINATION); opVex(x1, x2, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x22, imm); }
4699 void vpinsrw(Xmm x1, Xmm x2, Operand op, uint8 imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) throw new XError(ERR.BAD_COMBINATION); opVex(x1, x2, op, T_0F | T_66 | T_EVEX | T_N2, 0xC4, imm); }
4700 void vpmaddubsw(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x04); }
4701 void vpmaddwd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xF5); }
4702 void vpmaskmovd(Address addr, Xmm x1, Xmm x2) { opAVX_X_X_XM(x2, x1, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x8E); }
4703 void vpmaskmovd(Xmm x1, Xmm x2, Address addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x8C); }
4704 void vpmaskmovq(Address addr, Xmm x1, Xmm x2) { opAVX_X_X_XM(x2, x1, addr, T_0F38 | T_66 | T_W1 | T_YMM, 0x8E); }
4705 void vpmaskmovq(Xmm x1, Xmm x2, Address addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_66 | T_W1 | T_YMM, 0x8C); }
4706 void vpmaxsb(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x3C); }
4707 void vpmaxsd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x3D); }
4708 void vpmaxsw(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xEE); }
4709 void vpmaxub(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xDE); }
4710 void vpmaxud(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x3F); }
4711 void vpmaxuw(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x3E); }
4712 void vpminsb(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x38); }
4713 void vpminsd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x39); }
4714 void vpminsw(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xEA); }
4715 void vpminub(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xDA); }
4716 void vpminud(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x3B); }
4717 void vpminuw(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x3A); }
4718 void vpmovmskb(Reg32e r, Xmm x) { if (!x.isKind(Kind.XMM | Kind.YMM)) throw new XError(ERR.BAD_COMBINATION); opVex(x.isYMM() ? new Ymm(r.getIdx()) : new Xmm(r.getIdx()), null, x, T_0F | T_66 | T_YMM, 0xD7); }
4719 void vpmovsxbd(Xmm xm, Operand op) { opAVX_X_XM_IMM(xm, op, T_N4 | T_N_VL | T_66 | T_0F38 | T_YMM | T_EVEX, 0x21); }
4720 void vpmovsxbq(Xmm xm, Operand op) { opAVX_X_XM_IMM(xm, op, T_N2 | T_N_VL | T_66 | T_0F38 | T_YMM | T_EVEX, 0x22); }
4721 void vpmovsxbw(Xmm xm, Operand op) { opAVX_X_XM_IMM(xm, op, T_N8 | T_N_VL | T_66 | T_0F38 | T_YMM | T_EVEX, 0x20); }
4722 void vpmovsxdq(Xmm xm, Operand op) { opAVX_X_XM_IMM(xm, op, T_N8 | T_N_VL | T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX, 0x25); }
4723 void vpmovsxwd(Xmm xm, Operand op) { opAVX_X_XM_IMM(xm, op, T_N8 | T_N_VL | T_66 | T_0F38 | T_YMM | T_EVEX, 0x23); }
4724 void vpmovsxwq(Xmm xm, Operand op) { opAVX_X_XM_IMM(xm, op, T_N4 | T_N_VL | T_66 | T_0F38 | T_YMM | T_EVEX, 0x24); }
4725 void vpmovzxbd(Xmm xm, Operand op) { opAVX_X_XM_IMM(xm, op, T_N4 | T_N_VL | T_66 | T_0F38 | T_YMM | T_EVEX, 0x31); }
4726 void vpmovzxbq(Xmm xm, Operand op) { opAVX_X_XM_IMM(xm, op, T_N2 | T_N_VL | T_66 | T_0F38 | T_YMM | T_EVEX, 0x32); }
4727 void vpmovzxbw(Xmm xm, Operand op) { opAVX_X_XM_IMM(xm, op, T_N8 | T_N_VL | T_66 | T_0F38 | T_YMM | T_EVEX, 0x30); }
4728 void vpmovzxdq(Xmm xm, Operand op) { opAVX_X_XM_IMM(xm, op, T_N8 | T_N_VL | T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX, 0x35); }
4729 void vpmovzxwd(Xmm xm, Operand op) { opAVX_X_XM_IMM(xm, op, T_N8 | T_N_VL | T_66 | T_0F38 | T_YMM | T_EVEX, 0x33); }
4730 void vpmovzxwq(Xmm xm, Operand op) { opAVX_X_XM_IMM(xm, op, T_N4 | T_N_VL | T_66 | T_0F38 | T_YMM | T_EVEX, 0x34); }
4731 void vpmuldq(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x28); }
4732 void vpmulhrsw(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x0B); }
4733 void vpmulhuw(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xE4); }
4734 void vpmulhw(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xE5); }
4735 void vpmulld(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x40); }
4736 void vpmullw(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xD5); }
4737 void vpmuludq(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0xF4); }
4738 void vpor(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0xEB); }
4739 void vpsadbw(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xF6); }
4740 void vpshufb(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x00); }
4741 void vpshufd(Xmm xm, Operand op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x70, imm); }
4742 void vpshufhw(Xmm xm, Operand op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_F3 | T_0F | T_YMM | T_EVEX, 0x70, imm); }
4743 void vpshuflw(Xmm xm, Operand op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_F2 | T_0F | T_YMM | T_EVEX, 0x70, imm); }
4744 void vpsignb(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x08); }
4745 void vpsignd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x0A); }
4746 void vpsignw(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x09); }
4747 void vpslld(Xmm x, Operand op, uint8 imm) { opAVX_X_X_XM(new Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); }
4748 void vpslld(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xF2); }
4749 void vpslldq(Xmm x, Operand op, uint8 imm) { opAVX_X_X_XM(new Xmm(x.getKind(), 7), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x73, imm); }
4750 void vpsllq(Xmm x, Operand op, uint8 imm) { opAVX_X_X_XM(new Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64 | T_MEM_EVEX, 0x73, imm); }
4751 void vpsllq(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0xF3); }
4752 void vpsllvd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x47); }
4753 void vpsllvq(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x47); }
4754 void vpsllw(Xmm x, Operand op, uint8 imm) { opAVX_X_X_XM(new Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); }
4755 void vpsllw(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xF1); }
4756 void vpsrad(Xmm x, Operand op, uint8 imm) { opAVX_X_X_XM(new Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); }
4757 void vpsrad(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xE2); }
4758 void vpsravd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x46); }
4759 void vpsraw(Xmm x, Operand op, uint8 imm) { opAVX_X_X_XM(new Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); }
4760 void vpsraw(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xE1); }
4761 void vpsrld(Xmm x, Operand op, uint8 imm) { opAVX_X_X_XM(new Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); }
4762 void vpsrld(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xD2); }
4763 void vpsrldq(Xmm x, Operand op, uint8 imm) { opAVX_X_X_XM(new Xmm(x.getKind(), 3), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x73, imm); }
4764 void vpsrlq(Xmm x, Operand op, uint8 imm) { opAVX_X_X_XM(new Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64 | T_MEM_EVEX, 0x73, imm); }
4765 void vpsrlq(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0xD3); }
4766 void vpsrlvd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x45); }
4767 void vpsrlvq(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x45); }
4768 void vpsrlw(Xmm x, Operand op, uint8 imm) { opAVX_X_X_XM(new Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); }
4769 void vpsrlw(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xD1); }
4770 void vpsubb(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xF8); }
4771 void vpsubd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0xFA); }
4772 void vpsubq(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0xFB); }
4773 void vpsubsb(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xE8); }
4774 void vpsubsw(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xE9); }
4775 void vpsubusb(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xD8); }
4776 void vpsubusw(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xD9); }
4777 void vpsubw(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xF9); }
4778 void vptest(Xmm xm, Operand op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM, 0x17); }
4779 void vpunpckhbw(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0x68); }
4780 void vpunpckhdq(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x6A); }
4781 void vpunpckhqdq(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0x6D); }
4782 void vpunpckhwd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0x69); }
4783 void vpunpcklbw(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0x60); }
4784 void vpunpckldq(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x62); }
4785 void vpunpcklqdq(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0x6C); }
4786 void vpunpcklwd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0x61); }
4787 void vpxor(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0xEF); }
4788 void vrcpps(Xmm xm, Operand op) { opAVX_X_XM_IMM(xm, op, T_0F | T_YMM, 0x53); }
4789 void vrcpss(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F, 0x53); }
4790 void vroundpd(Xmm xm, Operand op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A | T_YMM, 0x09, imm); }
4791 void vroundps(Xmm xm, Operand op, uint8 imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A | T_YMM, 0x08, imm); }
4792 void vroundsd(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0, 0x0B, imm); }
4793 void vroundss(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0, 0x0A, imm); }
4794 void vrsqrtps(Xmm xm, Operand op) { opAVX_X_XM_IMM(xm, op, T_0F | T_YMM, 0x52); }
4795 void vrsqrtss(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F, 0x52); }
4796 void vshufpd(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0xC6, imm); }
4797 void vshufps(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0xC6, imm); }
4798 void vsqrtpd(Xmm xm, Operand op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x51); }
4799 void vsqrtps(Xmm xm, Operand op) { opAVX_X_XM_IMM(xm, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x51); }
4800 void vsqrtsd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_F2 | T_0F | T_EW1 | T_EVEX | T_ER_X, 0x51); }
4801 void vsqrtss(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_F3 | T_0F | T_EW0 | T_EVEX | T_ER_X, 0x51); }
4802 void vstmxcsr(Address addr) { opAVX_X_X_XM(xm3, xm0, addr, T_0F, 0xAE); }
4803 void vsubpd(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x5C); }
4804 void vsubps(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5C); }
4805 void vsubsd(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z | T_N8, 0x5C); }
4806 void vsubss(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z | T_N4, 0x5C); }
4807 void vtestpd(Xmm xm, Operand op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM, 0x0F); }
4808 void vtestps(Xmm xm, Operand op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM, 0x0E); }
4809 void vucomisd(Xmm xm, Operand op) { opAVX_X_XM_IMM(xm, op, T_N8 | T_66 | T_0F | T_EW1 | T_EVEX | T_SAE_X, 0x2E); }
4810 void vucomiss(Xmm xm, Operand op) { opAVX_X_XM_IMM(xm, op, T_N4 | T_0F | T_EW0 | T_EVEX | T_SAE_X, 0x2E); }
4811 void vunpckhpd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0x15); }
4812 void vunpckhps(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x15); }
4813 void vunpcklpd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0x14); }
4814 void vunpcklps(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x14); }
4815 void vxorpd(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x57); }
4816 void vxorps(Xmm xmm, Operand op1, Operand op2 = new Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x57); }
4817 void vzeroall() { db(0xC5); db(0xFC); db(0x77); }
4818 void vzeroupper() { db(0xC5); db(0xF8); db(0x77); }
4819 
4820 void wait() { db(0x9B); }
4821 void wbinvd() { db(0x0F); db(0x09); }
4822 void wrmsr() { db(0x0F); db(0x30); }
4823 
4824 void xadd(Operand op, Reg reg) { opModRM(reg, op, (op.isREG() && reg.isREG() && op.getBit() == reg.getBit()), op.isMEM(), 0x0F, 0xC0 | (reg.isBit(8) ? 0 : 1)); }
4825 void xgetbv() { db(0x0F); db(0x01); db(0xD0); }
4826 void xlatb() { db(0xD7); }
4827 void xor(Operand op, uint32 imm) { opRM_I(op, imm, 0x30, 6); }
4828 void xor(Operand op1, Operand op2) { opRM_RM(op1, op2, 0x30); }
4829 void xorpd(Xmm xmm, Operand op) { opGen(xmm, op, 0x57, 0x66, &isXMM_XMMorMEM); }
4830 void xorps(Xmm xmm, Operand op) { opGen(xmm, op, 0x57, 0x100, &isXMM_XMMorMEM); }
4831 
4832 version(XBYAK_ENABLE_OMITTED_OPERAND)
4833 {
4834 void vblendpd(Xmm x, Operand op, uint8 imm) { vblendpd(x, x, op, imm); }
4835 void vblendps(Xmm x, Operand op, uint8 imm) { vblendps(x, x, op, imm); }
4836 void vblendvpd(Xmm x1, Operand op, Xmm x4) { vblendvpd(x1, x1, op, x4); }
4837 void vblendvps(Xmm x1, Operand op, Xmm x4) { vblendvps(x1, x1, op, x4); }
4838 void vcmpeq_ospd(Xmm x, Operand op) { vcmpeq_ospd(x, x, op); }
4839 void vcmpeq_osps(Xmm x, Operand op) { vcmpeq_osps(x, x, op); }
4840 void vcmpeq_ossd(Xmm x, Operand op) { vcmpeq_ossd(x, x, op); }
4841 void vcmpeq_osss(Xmm x, Operand op) { vcmpeq_osss(x, x, op); }
4842 void vcmpeq_uqpd(Xmm x, Operand op) { vcmpeq_uqpd(x, x, op); }
4843 void vcmpeq_uqps(Xmm x, Operand op) { vcmpeq_uqps(x, x, op); }
4844 void vcmpeq_uqsd(Xmm x, Operand op) { vcmpeq_uqsd(x, x, op); }
4845 void vcmpeq_uqss(Xmm x, Operand op) { vcmpeq_uqss(x, x, op); }
4846 void vcmpeq_uspd(Xmm x, Operand op) { vcmpeq_uspd(x, x, op); }
4847 void vcmpeq_usps(Xmm x, Operand op) { vcmpeq_usps(x, x, op); }
4848 void vcmpeq_ussd(Xmm x, Operand op) { vcmpeq_ussd(x, x, op); }
4849 void vcmpeq_usss(Xmm x, Operand op) { vcmpeq_usss(x, x, op); }
4850 void vcmpeqpd(Xmm x, Operand op) { vcmpeqpd(x, x, op); }
4851 void vcmpeqps(Xmm x, Operand op) { vcmpeqps(x, x, op); }
4852 void vcmpeqsd(Xmm x, Operand op) { vcmpeqsd(x, x, op); }
4853 void vcmpeqss(Xmm x, Operand op) { vcmpeqss(x, x, op); }
4854 void vcmpfalse_ospd(Xmm x, Operand op) { vcmpfalse_ospd(x, x, op); }
4855 void vcmpfalse_osps(Xmm x, Operand op) { vcmpfalse_osps(x, x, op); }
4856 void vcmpfalse_ossd(Xmm x, Operand op) { vcmpfalse_ossd(x, x, op); }
4857 void vcmpfalse_osss(Xmm x, Operand op) { vcmpfalse_osss(x, x, op); }
4858 void vcmpfalsepd(Xmm x, Operand op) { vcmpfalsepd(x, x, op); }
4859 void vcmpfalseps(Xmm x, Operand op) { vcmpfalseps(x, x, op); }
4860 void vcmpfalsesd(Xmm x, Operand op) { vcmpfalsesd(x, x, op); }
4861 void vcmpfalsess(Xmm x, Operand op) { vcmpfalsess(x, x, op); }
4862 void vcmpge_oqpd(Xmm x, Operand op) { vcmpge_oqpd(x, x, op); }
4863 void vcmpge_oqps(Xmm x, Operand op) { vcmpge_oqps(x, x, op); }
4864 void vcmpge_oqsd(Xmm x, Operand op) { vcmpge_oqsd(x, x, op); }
4865 void vcmpge_oqss(Xmm x, Operand op) { vcmpge_oqss(x, x, op); }
4866 void vcmpgepd(Xmm x, Operand op) { vcmpgepd(x, x, op); }
4867 void vcmpgeps(Xmm x, Operand op) { vcmpgeps(x, x, op); }
4868 void vcmpgesd(Xmm x, Operand op) { vcmpgesd(x, x, op); }
4869 void vcmpgess(Xmm x, Operand op) { vcmpgess(x, x, op); }
4870 void vcmpgt_oqpd(Xmm x, Operand op) { vcmpgt_oqpd(x, x, op); }
4871 void vcmpgt_oqps(Xmm x, Operand op) { vcmpgt_oqps(x, x, op); }
4872 void vcmpgt_oqsd(Xmm x, Operand op) { vcmpgt_oqsd(x, x, op); }
4873 void vcmpgt_oqss(Xmm x, Operand op) { vcmpgt_oqss(x, x, op); }
4874 void vcmpgtpd(Xmm x, Operand op) { vcmpgtpd(x, x, op); }
4875 void vcmpgtps(Xmm x, Operand op) { vcmpgtps(x, x, op); }
4876 void vcmpgtsd(Xmm x, Operand op) { vcmpgtsd(x, x, op); }
4877 void vcmpgtss(Xmm x, Operand op) { vcmpgtss(x, x, op); }
4878 void vcmple_oqpd(Xmm x, Operand op) { vcmple_oqpd(x, x, op); }
4879 void vcmple_oqps(Xmm x, Operand op) { vcmple_oqps(x, x, op); }
4880 void vcmple_oqsd(Xmm x, Operand op) { vcmple_oqsd(x, x, op); }
4881 void vcmple_oqss(Xmm x, Operand op) { vcmple_oqss(x, x, op); }
4882 void vcmplepd(Xmm x, Operand op) { vcmplepd(x, x, op); }
4883 void vcmpleps(Xmm x, Operand op) { vcmpleps(x, x, op); }
4884 void vcmplesd(Xmm x, Operand op) { vcmplesd(x, x, op); }
4885 void vcmpless(Xmm x, Operand op) { vcmpless(x, x, op); }
4886 void vcmplt_oqpd(Xmm x, Operand op) { vcmplt_oqpd(x, x, op); }
4887 void vcmplt_oqps(Xmm x, Operand op) { vcmplt_oqps(x, x, op); }
4888 void vcmplt_oqsd(Xmm x, Operand op) { vcmplt_oqsd(x, x, op); }
4889 void vcmplt_oqss(Xmm x, Operand op) { vcmplt_oqss(x, x, op); }
4890 void vcmpltpd(Xmm x, Operand op) { vcmpltpd(x, x, op); }
4891 void vcmpltps(Xmm x, Operand op) { vcmpltps(x, x, op); }
4892 void vcmpltsd(Xmm x, Operand op) { vcmpltsd(x, x, op); }
4893 void vcmpltss(Xmm x, Operand op) { vcmpltss(x, x, op); }
4894 void vcmpneq_oqpd(Xmm x, Operand op) { vcmpneq_oqpd(x, x, op); }
4895 void vcmpneq_oqps(Xmm x, Operand op) { vcmpneq_oqps(x, x, op); }
4896 void vcmpneq_oqsd(Xmm x, Operand op) { vcmpneq_oqsd(x, x, op); }
4897 void vcmpneq_oqss(Xmm x, Operand op) { vcmpneq_oqss(x, x, op); }
4898 void vcmpneq_ospd(Xmm x, Operand op) { vcmpneq_ospd(x, x, op); }
4899 void vcmpneq_osps(Xmm x, Operand op) { vcmpneq_osps(x, x, op); }
4900 void vcmpneq_ossd(Xmm x, Operand op) { vcmpneq_ossd(x, x, op); }
4901 void vcmpneq_osss(Xmm x, Operand op) { vcmpneq_osss(x, x, op); }
4902 void vcmpneq_uspd(Xmm x, Operand op) { vcmpneq_uspd(x, x, op); }
4903 void vcmpneq_usps(Xmm x, Operand op) { vcmpneq_usps(x, x, op); }
4904 void vcmpneq_ussd(Xmm x, Operand op) { vcmpneq_ussd(x, x, op); }
4905 void vcmpneq_usss(Xmm x, Operand op) { vcmpneq_usss(x, x, op); }
4906 void vcmpneqpd(Xmm x, Operand op) { vcmpneqpd(x, x, op); }
4907 void vcmpneqps(Xmm x, Operand op) { vcmpneqps(x, x, op); }
4908 void vcmpneqsd(Xmm x, Operand op) { vcmpneqsd(x, x, op); }
4909 void vcmpneqss(Xmm x, Operand op) { vcmpneqss(x, x, op); }
4910 void vcmpnge_uqpd(Xmm x, Operand op) { vcmpnge_uqpd(x, x, op); }
4911 void vcmpnge_uqps(Xmm x, Operand op) { vcmpnge_uqps(x, x, op); }
4912 void vcmpnge_uqsd(Xmm x, Operand op) { vcmpnge_uqsd(x, x, op); }
4913 void vcmpnge_uqss(Xmm x, Operand op) { vcmpnge_uqss(x, x, op); }
4914 void vcmpngepd(Xmm x, Operand op) { vcmpngepd(x, x, op); }
4915 void vcmpngeps(Xmm x, Operand op) { vcmpngeps(x, x, op); }
4916 void vcmpngesd(Xmm x, Operand op) { vcmpngesd(x, x, op); }
4917 void vcmpngess(Xmm x, Operand op) { vcmpngess(x, x, op); }
4918 void vcmpngt_uqpd(Xmm x, Operand op) { vcmpngt_uqpd(x, x, op); }
4919 void vcmpngt_uqps(Xmm x, Operand op) { vcmpngt_uqps(x, x, op); }
4920 void vcmpngt_uqsd(Xmm x, Operand op) { vcmpngt_uqsd(x, x, op); }
4921 void vcmpngt_uqss(Xmm x, Operand op) { vcmpngt_uqss(x, x, op); }
4922 void vcmpngtpd(Xmm x, Operand op) { vcmpngtpd(x, x, op); }
4923 void vcmpngtps(Xmm x, Operand op) { vcmpngtps(x, x, op); }
4924 void vcmpngtsd(Xmm x, Operand op) { vcmpngtsd(x, x, op); }
4925 void vcmpngtss(Xmm x, Operand op) { vcmpngtss(x, x, op); }
4926 void vcmpnle_uqpd(Xmm x, Operand op) { vcmpnle_uqpd(x, x, op); }
4927 void vcmpnle_uqps(Xmm x, Operand op) { vcmpnle_uqps(x, x, op); }
4928 void vcmpnle_uqsd(Xmm x, Operand op) { vcmpnle_uqsd(x, x, op); }
4929 void vcmpnle_uqss(Xmm x, Operand op) { vcmpnle_uqss(x, x, op); }
4930 void vcmpnlepd(Xmm x, Operand op) { vcmpnlepd(x, x, op); }
4931 void vcmpnleps(Xmm x, Operand op) { vcmpnleps(x, x, op); }
4932 void vcmpnlesd(Xmm x, Operand op) { vcmpnlesd(x, x, op); }
4933 void vcmpnless(Xmm x, Operand op) { vcmpnless(x, x, op); }
4934 void vcmpnlt_uqpd(Xmm x, Operand op) { vcmpnlt_uqpd(x, x, op); }
4935 void vcmpnlt_uqps(Xmm x, Operand op) { vcmpnlt_uqps(x, x, op); }
4936 void vcmpnlt_uqsd(Xmm x, Operand op) { vcmpnlt_uqsd(x, x, op); }
4937 void vcmpnlt_uqss(Xmm x, Operand op) { vcmpnlt_uqss(x, x, op); }
4938 void vcmpnltpd(Xmm x, Operand op) { vcmpnltpd(x, x, op); }
4939 void vcmpnltps(Xmm x, Operand op) { vcmpnltps(x, x, op); }
4940 void vcmpnltsd(Xmm x, Operand op) { vcmpnltsd(x, x, op); }
4941 void vcmpnltss(Xmm x, Operand op) { vcmpnltss(x, x, op); }
4942 void vcmpord_spd(Xmm x, Operand op) { vcmpord_spd(x, x, op); }
4943 void vcmpord_sps(Xmm x, Operand op) { vcmpord_sps(x, x, op); }
4944 void vcmpord_ssd(Xmm x, Operand op) { vcmpord_ssd(x, x, op); }
4945 void vcmpord_sss(Xmm x, Operand op) { vcmpord_sss(x, x, op); }
4946 void vcmpordpd(Xmm x, Operand op) { vcmpordpd(x, x, op); }
4947 void vcmpordps(Xmm x, Operand op) { vcmpordps(x, x, op); }
4948 void vcmpordsd(Xmm x, Operand op) { vcmpordsd(x, x, op); }
4949 void vcmpordss(Xmm x, Operand op) { vcmpordss(x, x, op); }
4950 void vcmppd(Xmm x, Operand op, uint8 imm) { vcmppd(x, x, op, imm); }
4951 void vcmpps(Xmm x, Operand op, uint8 imm) { vcmpps(x, x, op, imm); }
4952 void vcmpsd(Xmm x, Operand op, uint8 imm) { vcmpsd(x, x, op, imm); }
4953 void vcmpss(Xmm x, Operand op, uint8 imm) { vcmpss(x, x, op, imm); }
4954 void vcmptrue_uspd(Xmm x, Operand op) { vcmptrue_uspd(x, x, op); }
4955 void vcmptrue_usps(Xmm x, Operand op) { vcmptrue_usps(x, x, op); }
4956 void vcmptrue_ussd(Xmm x, Operand op) { vcmptrue_ussd(x, x, op); }
4957 void vcmptrue_usss(Xmm x, Operand op) { vcmptrue_usss(x, x, op); }
4958 void vcmptruepd(Xmm x, Operand op) { vcmptruepd(x, x, op); }
4959 void vcmptrueps(Xmm x, Operand op) { vcmptrueps(x, x, op); }
4960 void vcmptruesd(Xmm x, Operand op) { vcmptruesd(x, x, op); }
4961 void vcmptruess(Xmm x, Operand op) { vcmptruess(x, x, op); }
4962 void vcmpunord_spd(Xmm x, Operand op) { vcmpunord_spd(x, x, op); }
4963 void vcmpunord_sps(Xmm x, Operand op) { vcmpunord_sps(x, x, op); }
4964 void vcmpunord_ssd(Xmm x, Operand op) { vcmpunord_ssd(x, x, op); }
4965 void vcmpunord_sss(Xmm x, Operand op) { vcmpunord_sss(x, x, op); }
4966 void vcmpunordpd(Xmm x, Operand op) { vcmpunordpd(x, x, op); }
4967 void vcmpunordps(Xmm x, Operand op) { vcmpunordps(x, x, op); }
4968 void vcmpunordsd(Xmm x, Operand op) { vcmpunordsd(x, x, op); }
4969 void vcmpunordss(Xmm x, Operand op) { vcmpunordss(x, x, op); }
4970 void vcvtsd2ss(Xmm x, Operand op) { vcvtsd2ss(x, x, op); }
4971 void vcvtsi2sd(Xmm x, Operand op) { vcvtsi2sd(x, x, op); }
4972 void vcvtsi2ss(Xmm x, Operand op) { vcvtsi2ss(x, x, op); }
4973 void vcvtss2sd(Xmm x, Operand op) { vcvtss2sd(x, x, op); }
4974 void vdppd(Xmm x, Operand op, uint8 imm) { vdppd(x, x, op, imm); }
4975 void vdpps(Xmm x, Operand op, uint8 imm) { vdpps(x, x, op, imm); }
4976 void vinsertps(Xmm x, Operand op, uint8 imm) { vinsertps(x, x, op, imm); }
4977 void vmpsadbw(Xmm x, Operand op, uint8 imm) { vmpsadbw(x, x, op, imm); }
4978 void vpackssdw(Xmm x, Operand op) { vpackssdw(x, x, op); }
4979 void vpacksswb(Xmm x, Operand op) { vpacksswb(x, x, op); }
4980 void vpackusdw(Xmm x, Operand op) { vpackusdw(x, x, op); }
4981 void vpackuswb(Xmm x, Operand op) { vpackuswb(x, x, op); }
4982 void vpaddb(Xmm x, Operand op) { vpaddb(x, x, op); }
4983 void vpaddd(Xmm x, Operand op) { vpaddd(x, x, op); }
4984 void vpaddq(Xmm x, Operand op) { vpaddq(x, x, op); }
4985 void vpaddsb(Xmm x, Operand op) { vpaddsb(x, x, op); }
4986 void vpaddsw(Xmm x, Operand op) { vpaddsw(x, x, op); }
4987 void vpaddusb(Xmm x, Operand op) { vpaddusb(x, x, op); }
4988 void vpaddusw(Xmm x, Operand op) { vpaddusw(x, x, op); }
4989 void vpaddw(Xmm x, Operand op) { vpaddw(x, x, op); }
4990 void vpalignr(Xmm x, Operand op, uint8 imm) { vpalignr(x, x, op, imm); }
4991 void vpand(Xmm x, Operand op) { vpand(x, x, op); }
4992 void vpandn(Xmm x, Operand op) { vpandn(x, x, op); }
4993 void vpavgb(Xmm x, Operand op) { vpavgb(x, x, op); }
4994 void vpavgw(Xmm x, Operand op) { vpavgw(x, x, op); }
4995 void vpblendd(Xmm x, Operand op, uint8 imm) { vpblendd(x, x, op, imm); }
4996 void vpblendvb(Xmm x1, Operand op, Xmm x4) { vpblendvb(x1, x1, op, x4); }
4997 void vpblendw(Xmm x, Operand op, uint8 imm) { vpblendw(x, x, op, imm); }
4998 void vpclmulqdq(Xmm x, Operand op, uint8 imm) { vpclmulqdq(x, x, op, imm); }
4999 void vpcmpeqb(Xmm x, Operand op) { vpcmpeqb(x, x, op); }
5000 void vpcmpeqd(Xmm x, Operand op) { vpcmpeqd(x, x, op); }
5001 void vpcmpeqq(Xmm x, Operand op) { vpcmpeqq(x, x, op); }
5002 void vpcmpeqw(Xmm x, Operand op) { vpcmpeqw(x, x, op); }
5003 void vpcmpgtb(Xmm x, Operand op) { vpcmpgtb(x, x, op); }
5004 void vpcmpgtd(Xmm x, Operand op) { vpcmpgtd(x, x, op); }
5005 void vpcmpgtq(Xmm x, Operand op) { vpcmpgtq(x, x, op); }
5006 void vpcmpgtw(Xmm x, Operand op) { vpcmpgtw(x, x, op); }
5007 void vphaddd(Xmm x, Operand op) { vphaddd(x, x, op); }
5008 void vphaddsw(Xmm x, Operand op) { vphaddsw(x, x, op); }
5009 void vphaddw(Xmm x, Operand op) { vphaddw(x, x, op); }
5010 void vphsubd(Xmm x, Operand op) { vphsubd(x, x, op); }
5011 void vphsubsw(Xmm x, Operand op) { vphsubsw(x, x, op); }
5012 void vphsubw(Xmm x, Operand op) { vphsubw(x, x, op); }
5013 void vpinsrb(Xmm x, Operand op, uint8 imm) { vpinsrb(x, x, op, imm); }
5014 void vpinsrd(Xmm x, Operand op, uint8 imm) { vpinsrd(x, x, op, imm); }
5015 void vpinsrq(Xmm x, Operand op, uint8 imm) { vpinsrq(x, x, op, imm); }
5016 void vpinsrw(Xmm x, Operand op, uint8 imm) { vpinsrw(x, x, op, imm); }
5017 void vpmaddubsw(Xmm x, Operand op) { vpmaddubsw(x, x, op); }
5018 void vpmaddwd(Xmm x, Operand op) { vpmaddwd(x, x, op); }
5019 void vpmaxsb(Xmm x, Operand op) { vpmaxsb(x, x, op); }
5020 void vpmaxsd(Xmm x, Operand op) { vpmaxsd(x, x, op); }
5021 void vpmaxsw(Xmm x, Operand op) { vpmaxsw(x, x, op); }
5022 void vpmaxub(Xmm x, Operand op) { vpmaxub(x, x, op); }
5023 void vpmaxud(Xmm x, Operand op) { vpmaxud(x, x, op); }
5024 void vpmaxuw(Xmm x, Operand op) { vpmaxuw(x, x, op); }
5025 void vpminsb(Xmm x, Operand op) { vpminsb(x, x, op); }
5026 void vpminsd(Xmm x, Operand op) { vpminsd(x, x, op); }
5027 void vpminsw(Xmm x, Operand op) { vpminsw(x, x, op); }
5028 void vpminub(Xmm x, Operand op) { vpminub(x, x, op); }
5029 void vpminud(Xmm x, Operand op) { vpminud(x, x, op); }
5030 void vpminuw(Xmm x, Operand op) { vpminuw(x, x, op); }
5031 void vpmuldq(Xmm x, Operand op) { vpmuldq(x, x, op); }
5032 void vpmulhrsw(Xmm x, Operand op) { vpmulhrsw(x, x, op); }
5033 void vpmulhuw(Xmm x, Operand op) { vpmulhuw(x, x, op); }
5034 void vpmulhw(Xmm x, Operand op) { vpmulhw(x, x, op); }
5035 void vpmulld(Xmm x, Operand op) { vpmulld(x, x, op); }
5036 void vpmullw(Xmm x, Operand op) { vpmullw(x, x, op); }
5037 void vpmuludq(Xmm x, Operand op) { vpmuludq(x, x, op); }
5038 void vpor(Xmm x, Operand op) { vpor(x, x, op); }
5039 void vpsadbw(Xmm x, Operand op) { vpsadbw(x, x, op); }
5040 void vpsignb(Xmm x, Operand op) { vpsignb(x, x, op); }
5041 void vpsignd(Xmm x, Operand op) { vpsignd(x, x, op); }
5042 void vpsignw(Xmm x, Operand op) { vpsignw(x, x, op); }
5043 void vpslld(Xmm x, Operand op) { vpslld(x, x, op); }
5044 void vpslld(Xmm x, uint8 imm) { vpslld(x, x, imm); }
5045 void vpslldq(Xmm x, uint8 imm) { vpslldq(x, x, imm); }
5046 void vpsllq(Xmm x, Operand op) { vpsllq(x, x, op); }
5047 void vpsllq(Xmm x, uint8 imm) { vpsllq(x, x, imm); }
5048 void vpsllw(Xmm x, Operand op) { vpsllw(x, x, op); }
5049 void vpsllw(Xmm x, uint8 imm) { vpsllw(x, x, imm); }
5050 void vpsrad(Xmm x, Operand op) { vpsrad(x, x, op); }
5051 void vpsrad(Xmm x, uint8 imm) { vpsrad(x, x, imm); }
5052 void vpsraw(Xmm x, Operand op) { vpsraw(x, x, op); }
5053 void vpsraw(Xmm x, uint8 imm) { vpsraw(x, x, imm); }
5054 void vpsrld(Xmm x, Operand op) { vpsrld(x, x, op); }
5055 void vpsrld(Xmm x, uint8 imm) { vpsrld(x, x, imm); }
5056 void vpsrldq(Xmm x, uint8 imm) { vpsrldq(x, x, imm); }
5057 void vpsrlq(Xmm x, Operand op) { vpsrlq(x, x, op); }
5058 void vpsrlq(Xmm x, uint8 imm) { vpsrlq(x, x, imm); }
5059 void vpsrlw(Xmm x, Operand op) { vpsrlw(x, x, op); }
5060 void vpsrlw(Xmm x, uint8 imm) { vpsrlw(x, x, imm); }
5061 void vpsubb(Xmm x, Operand op) { vpsubb(x, x, op); }
5062 void vpsubd(Xmm x, Operand op) { vpsubd(x, x, op); }
5063 void vpsubq(Xmm x, Operand op) { vpsubq(x, x, op); }
5064 void vpsubsb(Xmm x, Operand op) { vpsubsb(x, x, op); }
5065 void vpsubsw(Xmm x, Operand op) { vpsubsw(x, x, op); }
5066 void vpsubusb(Xmm x, Operand op) { vpsubusb(x, x, op); }
5067 void vpsubusw(Xmm x, Operand op) { vpsubusw(x, x, op); }
5068 void vpsubw(Xmm x, Operand op) { vpsubw(x, x, op); }
5069 void vpunpckhbw(Xmm x, Operand op) { vpunpckhbw(x, x, op); }
5070 void vpunpckhdq(Xmm x, Operand op) { vpunpckhdq(x, x, op); }
5071 void vpunpckhqdq(Xmm x, Operand op) { vpunpckhqdq(x, x, op); }
5072 void vpunpckhwd(Xmm x, Operand op) { vpunpckhwd(x, x, op); }
5073 void vpunpcklbw(Xmm x, Operand op) { vpunpcklbw(x, x, op); }
5074 void vpunpckldq(Xmm x, Operand op) { vpunpckldq(x, x, op); }
5075 void vpunpcklqdq(Xmm x, Operand op) { vpunpcklqdq(x, x, op); }
5076 void vpunpcklwd(Xmm x, Operand op) { vpunpcklwd(x, x, op); }
5077 void vpxor(Xmm x, Operand op) { vpxor(x, x, op); }
5078 void vrcpss(Xmm x, Operand op) { vrcpss(x, x, op); }
5079 void vroundsd(Xmm x, Operand op, uint8 imm) { vroundsd(x, x, op, imm); }
5080 void vroundss(Xmm x, Operand op, uint8 imm) { vroundss(x, x, op, imm); }
5081 void vrsqrtss(Xmm x, Operand op) { vrsqrtss(x, x, op); }
5082 void vshufpd(Xmm x, Operand op, uint8 imm) { vshufpd(x, x, op, imm); }
5083 void vshufps(Xmm x, Operand op, uint8 imm) { vshufps(x, x, op, imm); }
5084 void vsqrtsd(Xmm x, Operand op) { vsqrtsd(x, x, op); }
5085 void vsqrtss(Xmm x, Operand op) { vsqrtss(x, x, op); }
5086 void vunpckhpd(Xmm x, Operand op) { vunpckhpd(x, x, op); }
5087 void vunpckhps(Xmm x, Operand op) { vunpckhps(x, x, op); }
5088 void vunpcklpd(Xmm x, Operand op) { vunpcklpd(x, x, op); }
5089 void vunpcklps(Xmm x, Operand op) { vunpcklps(x, x, op); }
5090 }
5091 
5092 version(XBYAK64)
5093 {
5094 void jecxz(string label) { db(0x67); opJmp(label, T_SHORT, 0xe3, 0, 0); }
5095 void jecxz(Label label) { db(0x67); opJmp(label, T_SHORT, 0xe3, 0, 0); }
5096 void jrcxz(string label) { opJmp(label, T_SHORT, 0xe3, 0, 0); }
5097 void jrcxz(Label label) { opJmp(label, T_SHORT, 0xe3, 0, 0); }
5098 void cdqe() { db(0x48); db(0x98); }
5099 void cqo() { db(0x48); db(0x99); }
5100 void cmpsq() { db(0x48); db(0xA7); }
5101 void popfq() { db(0x9D); }
5102 void pushfq() { db(0x9C); }
5103 void lodsq() { db(0x48); db(0xAD); }
5104 void movsq() { db(0x48); db(0xA5); }
5105 void scasq() { db(0x48); db(0xAF); }
5106 void stosq() { db(0x48); db(0xAB); }
5107 void syscall() { db(0x0F); db(0x05); }
5108 void sysret() { db(0x0F); db(0x07); }
5109 void cmpxchg16b(Address addr) { opModM(addr, new Reg64(1), 0x0F, 0xC7); }
5110 void fxrstor64(Address addr) { opModM(addr, new Reg64(1), 0x0F, 0xAE); }
5111 void movq(Reg64 reg, Mmx mmx) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x7E); }
5112 void movq(Mmx mmx, Reg64 reg) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x6E); }
5113 void movsxd(Reg64 reg, Operand op) { if (!op.isBit(32)) throw new XError(ERR.BAD_COMBINATION); opModRM(reg, op, op.isREG(), op.isMEM(), 0x63); }
5114 void pextrq(Operand op, Xmm xmm, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw new XError(ERR.BAD_COMBINATION); opGen(new Reg64(xmm.getIdx()), op, 0x16, 0x66, null, imm, 0x3A); }
5115 void pinsrq(Xmm xmm, Operand op, uint8 imm) { if (!op.isREG(64) && !op.isMEM()) throw new XError(ERR.BAD_COMBINATION); opGen(new Reg64(xmm.getIdx()), op, 0x22, 0x66, null, imm, 0x3A); }
5116 void vcvtss2si(Reg64 r, Operand op) { opAVX_X_X_XM(new Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W1 | T_EVEX | T_EW1 | T_ER_X | T_N8, 0x2D); }
5117 void vcvttss2si(Reg64 r, Operand op) { opAVX_X_X_XM(new Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W1 | T_EVEX | T_EW1 | T_SAE_X | T_N8, 0x2C); }
5118 void vcvtsd2si(Reg64 r, Operand op) { opAVX_X_X_XM(new Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W1 | T_EVEX | T_EW1 | T_N4 | T_ER_X, 0x2D); }
5119 void vcvttsd2si(Reg64 r, Operand op) { opAVX_X_X_XM(new Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W1 | T_EVEX | T_EW1 | T_N4 | T_SAE_X, 0x2C); }
5120 void vmovq(Xmm x, Reg64 r) { opAVX_X_X_XM(x, xm0, new Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x6E); }
5121 void vmovq(Reg64 r, Xmm x) { opAVX_X_X_XM(x, xm0, new Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x7E); }
5122 }
5123 else
5124 {
5125 void jcxz(string label) { db(0x67); opJmp(label, T_SHORT, 0xe3, 0, 0); }
5126 void jcxz(Label label) { db(0x67); opJmp(label, T_SHORT, 0xe3, 0, 0); }
5127 void jecxz(string label) { opJmp(label, T_SHORT, 0xe3, 0, 0); }
5128 void jecxz(Label label) { opJmp(label, T_SHORT, 0xe3, 0, 0); }
5129 void aaa() { db(0x37); }
5130 void aad() { db(0xD5); db(0x0A); }
5131 void aam() { db(0xD4); db(0x0A); }
5132 void aas() { db(0x3F); }
5133 void daa() { db(0x27); }
5134 void das() { db(0x2F); }
5135 void into() { db(0xCE); }
5136 void popad() { db(0x61); }
5137 void popfd() { db(0x9D); }
5138 void pusha() { db(0x60); }
5139 void pushad() { db(0x60); }
5140 void pushfd() { db(0x9C); }
5141 void popa() { db(0x61); }
5142 void lds(Reg reg, Address addr) { opLoadSeg(addr, reg, 0xC5, 0x100); }
5143 void les(Reg reg, Address addr) { opLoadSeg(addr, reg, 0xC4, 0x100); }
5144 }
5145 
5146 
5147 version(XBYAK_DISABLE_AVX512)
5148 {
5149 void kaddb(Opmask r1, Opmask r2, Opmask r3) { opVex(r1, r2, r3, T_L1 | T_0F | T_66 | T_W0, 0x4A); }
5150 void kaddd(Opmask r1, Opmask r2, Opmask r3) { opVex(r1, r2, r3, T_L1 | T_0F | T_66 | T_W1, 0x4A); }
5151 void kaddq(Opmask r1, Opmask r2, Opmask r3) { opVex(r1, r2, r3, T_L1 | T_0F | T_W1, 0x4A); }
5152 void kaddw(Opmask r1, Opmask r2, Opmask r3) { opVex(r1, r2, r3, T_L1 | T_0F | T_W0, 0x4A); }
5153 void kandb(Opmask r1, Opmask r2, Opmask r3) { opVex(r1, r2, r3, T_L1 | T_0F | T_66 | T_W0, 0x41); }
5154 void kandd(Opmask r1, Opmask r2, Opmask r3) { opVex(r1, r2, r3, T_L1 | T_0F | T_66 | T_W1, 0x41); }
5155 void kandnb(Opmask r1, Opmask r2, Opmask r3) { opVex(r1, r2, r3, T_L1 | T_0F | T_66 | T_W0, 0x42); }
5156 void kandnd(Opmask r1, Opmask r2, Opmask r3) { opVex(r1, r2, r3, T_L1 | T_0F | T_66 | T_W1, 0x42); }
5157 void kandnq(Opmask r1, Opmask r2, Opmask r3) { opVex(r1, r2, r3, T_L1 | T_0F | T_W1, 0x42); }
5158 void kandnw(Opmask r1, Opmask r2, Opmask r3) { opVex(r1, r2, r3, T_L1 | T_0F | T_W0, 0x42); }
5159 void kandq(Opmask r1, Opmask r2, Opmask r3) { opVex(r1, r2, r3, T_L1 | T_0F | T_W1, 0x41); }
5160 void kandw(Opmask r1, Opmask r2, Opmask r3) { opVex(r1, r2, r3, T_L1 | T_0F | T_W0, 0x41); }
5161 void kmovb(Address addr, Opmask k) { opVex(k, null, addr, T_L0 | T_0F | T_66 | T_W0, 0x91); }
5162 void kmovb(Opmask k, Operand op) { if (!op.isMEM() && !op.isOPMASK()) throw XError(ERR.BAD_COMBINATION); opVex(k, null, op, T_L0 | T_0F | T_66 | T_W0, 0x90); }
5163 void kmovb(Opmask k, Reg32 r) { opVex(k, null, r, T_L0 | T_0F | T_66 | T_W0, 0x92); }
5164 void kmovb(Reg32 r, Opmask k) { opVex(r, null, k, T_L0 | T_0F | T_66 | T_W0, 0x93); }
5165 void kmovd(Address addr, Opmask k) { opVex(k, null, addr, T_L0 | T_0F | T_66 | T_W1, 0x91); }
5166 void kmovd(Opmask k, Operand op) { if (!op.isMEM() && !op.isOPMASK()) throw XError(ERR.BAD_COMBINATION); opVex(k, null, op, T_L0 | T_0F | T_66 | T_W1, 0x90); }
5167 void kmovd(Opmask k, Reg32 r) { opVex(k, null, r, T_L0 | T_0F | T_F2 | T_W0, 0x92); }
5168 void kmovd(Reg32 r, Opmask k) { opVex(r, null, k, T_L0 | T_0F | T_F2 | T_W0, 0x93); }
5169 void kmovq(Address addr, Opmask k) { opVex(k, null, addr, T_L0 | T_0F | T_W1, 0x91); }
5170 void kmovq(Opmask k, Operand op) { if (!op.isMEM() && !op.isOPMASK()) throw XError(ERR.BAD_COMBINATION); opVex(k, null, op, T_L0 | T_0F | T_W1, 0x90); }
5171 void kmovw(Address addr, Opmask k) { opVex(k, null, addr, T_L0 | T_0F | T_W0, 0x91); }
5172 void kmovw(Opmask k, Operand op) { if (!op.isMEM() && !op.isOPMASK()) throw XError(ERR.BAD_COMBINATION); opVex(k, null, op, T_L0 | T_0F | T_W0, 0x90); }
5173 void kmovw(Opmask k, Reg32 r) { opVex(k, null, r, T_L0 | T_0F | T_W0, 0x92); }
5174 void kmovw(Reg32 r, Opmask k) { opVex(r, null, k, T_L0 | T_0F | T_W0, 0x93); }
5175 void knotb(Opmask r1, Opmask r2) { opVex(r1, null, r2, T_0F | T_66 | T_W0, 0x44); }
5176 void knotd(Opmask r1, Opmask r2) { opVex(r1, null, r2, T_0F | T_66 | T_W1, 0x44); }
5177 void knotq(Opmask r1, Opmask r2) { opVex(r1, null, r2, T_0F | T_W1, 0x44); }
5178 void knotw(Opmask r1, Opmask r2) { opVex(r1, null, r2, T_0F | T_W0, 0x44); }
5179 void korb(Opmask r1, Opmask r2, Opmask r3) { opVex(r1, r2, r3, T_L1 | T_0F | T_66 | T_W0, 0x45); }
5180 void kord(Opmask r1, Opmask r2, Opmask r3) { opVex(r1, r2, r3, T_L1 | T_0F | T_66 | T_W1, 0x45); }
5181 void korq(Opmask r1, Opmask r2, Opmask r3) { opVex(r1, r2, r3, T_L1 | T_0F | T_W1, 0x45); }
5182 void kortestb(Opmask r1, Opmask r2) { opVex(r1, null, r2, T_0F | T_66 | T_W0, 0x98); }
5183 void kortestd(Opmask r1, Opmask r2) { opVex(r1, null, r2, T_0F | T_66 | T_W1, 0x98); }
5184 void kortestq(Opmask r1, Opmask r2) { opVex(r1, null, r2, T_0F | T_W1, 0x98); }
5185 void kortestw(Opmask r1, Opmask r2) { opVex(r1, null, r2, T_0F | T_W0, 0x98); }
5186 void korw(Opmask r1, Opmask r2, Opmask r3) { opVex(r1, r2, r3, T_L1 | T_0F | T_W0, 0x45); }
5187 void kshiftlb(Opmask r1, Opmask r2, uint8 imm) { opVex(r1, null, r2, T_66 | T_0F3A | T_W0, 0x32, imm); }
5188 void kshiftld(Opmask r1, Opmask r2, uint8 imm) { opVex(r1, null, r2, T_66 | T_0F3A | T_W0, 0x33, imm); }
5189 void kshiftlq(Opmask r1, Opmask r2, uint8 imm) { opVex(r1, null, r2, T_66 | T_0F3A | T_W1, 0x33, imm); }
5190 void kshiftlw(Opmask r1, Opmask r2, uint8 imm) { opVex(r1, null, r2, T_66 | T_0F3A | T_W1, 0x32, imm); }
5191 void kshiftrb(Opmask r1, Opmask r2, uint8 imm) { opVex(r1, null, r2, T_66 | T_0F3A | T_W0, 0x30, imm); }
5192 void kshiftrd(Opmask r1, Opmask r2, uint8 imm) { opVex(r1, null, r2, T_66 | T_0F3A | T_W0, 0x31, imm); }
5193 void kshiftrq(Opmask r1, Opmask r2, uint8 imm) { opVex(r1, null, r2, T_66 | T_0F3A | T_W1, 0x31, imm); }
5194 void kshiftrw(Opmask r1, Opmask r2, uint8 imm) { opVex(r1, null, r2, T_66 | T_0F3A | T_W1, 0x30, imm); }
5195 void ktestb(Opmask r1, Opmask r2) { opVex(r1, null, r2, T_0F | T_66 | T_W0, 0x99); }
5196 void ktestd(Opmask r1, Opmask r2) { opVex(r1, null, r2, T_0F | T_66 | T_W1, 0x99); }
5197 void ktestq(Opmask r1, Opmask r2) { opVex(r1, null, r2, T_0F | T_W1, 0x99); }
5198 void ktestw(Opmask r1, Opmask r2) { opVex(r1, null, r2, T_0F | T_W0, 0x99); }
5199 void kunpckbw(Opmask r1, Opmask r2, Opmask r3) { opVex(r1, r2, r3, T_L1 | T_0F | T_66 | T_W0, 0x4B); }
5200 void kunpckdq(Opmask r1, Opmask r2, Opmask r3) { opVex(r1, r2, r3, T_L1 | T_0F | T_W1, 0x4B); }
5201 void kunpckwd(Opmask r1, Opmask r2, Opmask r3) { opVex(r1, r2, r3, T_L1 | T_0F | T_W0, 0x4B); }
5202 void kxnorb(Opmask r1, Opmask r2, Opmask r3) { opVex(r1, r2, r3, T_L1 | T_0F | T_66 | T_W0, 0x46); }
5203 void kxnord(Opmask r1, Opmask r2, Opmask r3) { opVex(r1, r2, r3, T_L1 | T_0F | T_66 | T_W1, 0x46); }
5204 void kxnorq(Opmask r1, Opmask r2, Opmask r3) { opVex(r1, r2, r3, T_L1 | T_0F | T_W1, 0x46); }
5205 void kxnorw(Opmask r1, Opmask r2, Opmask r3) { opVex(r1, r2, r3, T_L1 | T_0F | T_W0, 0x46); }
5206 void kxorb(Opmask r1, Opmask r2, Opmask r3) { opVex(r1, r2, r3, T_L1 | T_0F | T_66 | T_W0, 0x47); }
5207 void kxord(Opmask r1, Opmask r2, Opmask r3) { opVex(r1, r2, r3, T_L1 | T_0F | T_66 | T_W1, 0x47); }
5208 void kxorq(Opmask r1, Opmask r2, Opmask r3) { opVex(r1, r2, r3, T_L1 | T_0F | T_W1, 0x47); }
5209 void kxorw(Opmask r1, Opmask r2, Opmask r3) { opVex(r1, r2, r3, T_L1 | T_0F | T_W0, 0x47); }
5210 void v4fmaddps(Zmm z1, Zmm z2, Address addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x9A); }
5211 void v4fmaddss(Xmm x1, Xmm x2, Address addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_F2 | T_EW0 | T_MUST_EVEX | T_N16, 0x9B); }
5212 void v4fnmaddps(Zmm z1, Zmm z2, Address addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0xAA); }
5213 void v4fnmaddss(Xmm x1, Xmm x2, Address addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_F2 | T_EW0 | T_MUST_EVEX | T_N16, 0xAB); }
5214 void valignd(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x03, imm); }
5215 void valignq(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x03, imm); }
5216 void vblendmpd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x65); }
5217 void vblendmps(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x65); }
5218 void vbroadcastf32x2(Ymm y, Operand op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N8, 0x19); }
5219 void vbroadcastf32x4(Ymm y, Address addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N16, 0x1A); }
5220 void vbroadcastf32x8(Zmm y, Address addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N32, 0x1B); }
5221 void vbroadcastf64x2(Ymm y, Address addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N16, 0x1A); }
5222 void vbroadcastf64x4(Zmm y, Address addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N32, 0x1B); }
5223 void vbroadcasti32x2(Xmm x, Operand op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N8, 0x59); }
5224 void vbroadcasti32x4(Ymm y, Operand op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N16, 0x5A); }
5225 void vbroadcasti32x8(Zmm z, Operand op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N32, 0x5B); }
5226 void vbroadcasti64x2(Ymm y, Operand op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N16, 0x5A); }
5227 void vbroadcasti64x4(Zmm z, Operand op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N32, 0x5B); }
5228 void vcmppd(Opmask k, Xmm x, Operand op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0xC2, imm); }
5229 void vcmpps(Opmask k, Xmm x, Operand op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_0F | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0xC2, imm); }
5230 void vcmpsd(Opmask k, Xmm x, Operand op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_N8 | T_F2 | T_0F | T_EW1 | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); }
5231 void vcmpss(Opmask k, Xmm x, Operand op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_N4 | T_F3 | T_0F | T_EW0 | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); }
5232 void vcompressb(Operand op, Xmm x) { opAVX_X_XM_IMM(x, op, T_N1 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x63); }
5233 void vcompresspd(Operand op, Xmm x) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x8A); }
5234 void vcompressps(Operand op, Xmm x) { opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8A); }
5235 void vcompressw(Operand op, Xmm x) { opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x63); }
5236 void vcvtne2ps2bf16(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); }
5237 void vcvtneps2bf16(Xmm x, Operand op) { opCvt2(x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); }
5238 void vcvtpd2qq(Xmm x, Operand op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x7B); }
5239 void vcvtpd2udq(Xmm x, Operand op) { opCvt2(x, op, T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x79); }
5240 void vcvtpd2uqq(Xmm x, Operand op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x79); }
5241 void vcvtps2qq(Xmm x, Operand op) { checkCvt1(x, op); opVex(x, null, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_ER_Y, 0x7B); }
5242 void vcvtps2udq(Xmm x, Operand op) { opAVX_X_XM_IMM(x, op, T_0F | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x79); }
5243 void vcvtps2uqq(Xmm x, Operand op) { checkCvt1(x, op); opVex(x, null, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_ER_Y, 0x79); }
5244 void vcvtqq2pd(Xmm x, Operand op) { opAVX_X_XM_IMM(x, op, T_F3 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0xE6); }
5245 void vcvtqq2ps(Xmm x, Operand op) { opCvt2(x, op, T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x5B); }
5246 void vcvtsd2usi(Reg32e r, Operand op) { int type = (T_F2 | T_0F | T_MUST_EVEX | T_N8 | T_ER_X) | (r.isREG(64) ? T_EW1 : T_EW0); opAVX_X_X_XM(new Xmm(r.getIdx()), xm0, op, type, 0x79); }
5247 void vcvtss2usi(Reg32e r, Operand op) { int type = (T_F3 | T_0F | T_MUST_EVEX | T_N4 | T_ER_X) | (r.isREG(64) ? T_EW1 : T_EW0); opAVX_X_X_XM(new Xmm(r.getIdx()), xm0, op, type, 0x79); }
5248 void vcvttpd2qq(Xmm x, Operand op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x7A); }
5249 void vcvttpd2udq(Xmm x, Operand op) { opCvt2(x, op, T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, 0x78); }
5250 void vcvttpd2uqq(Xmm x, Operand op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x78); }
5251 void vcvttps2qq(Xmm x, Operand op) { checkCvt1(x, op); opVex(x, null, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 0x7A); }
5252 void vcvttps2udq(Xmm x, Operand op) { opAVX_X_XM_IMM(x, op, T_0F | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x78); }
5253 void vcvttps2uqq(Xmm x, Operand op) { checkCvt1(x, op); opVex(x, null, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 0x78); }
5254 void vcvttsd2usi(Reg32e r, Operand op) { int type = (T_F2 | T_0F | T_MUST_EVEX | T_N8 | T_SAE_X) | (r.isREG(64) ? T_EW1 : T_EW0); opAVX_X_X_XM(new Xmm(r.getIdx()), xm0, op, type, 0x78); }
5255 void vcvttss2usi(Reg32e r, Operand op) { int type = (T_F3 | T_0F | T_MUST_EVEX | T_N4 | T_SAE_X) | (r.isREG(64) ? T_EW1 : T_EW0); opAVX_X_X_XM(new Xmm(r.getIdx()), xm0, op, type, 0x78); }
5256 void vcvtudq2pd(Xmm x, Operand op) { checkCvt1(x, op); opVex(x, null, op, T_F3 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL, 0x7A); }
5257 void vcvtudq2ps(Xmm x, Operand op) { opAVX_X_XM_IMM(x, op, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x7A); }
5258 void vcvtuqq2pd(Xmm x, Operand op) { opAVX_X_XM_IMM(x, op, T_F3 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x7A); }
5259 void vcvtuqq2ps(Xmm x, Operand op) { opCvt2(x, op, T_F2 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x7A); }
5260 void vcvtusi2sd(Xmm x1, Xmm x2, Operand op) { opCvt3(x1, x2, op, T_F2 | T_0F | T_MUST_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }
5261 void vcvtusi2ss(Xmm x1, Xmm x2, Operand op) { opCvt3(x1, x2, op, T_F3 | T_0F | T_MUST_EVEX | T_ER_X, T_W1 | T_EW1 | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }
5262 void vdbpsadbw(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x42, imm); }
5263 void vdpbf16ps(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x52); }
5264 void vexp2pd(Zmm z, Operand op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xC8); }
5265 void vexp2ps(Zmm z, Operand op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xC8); }
5266 void vexpandpd(Xmm x, Operand op) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x88); }
5267 void vexpandps(Xmm x, Operand op) { opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x88); }
5268 void vextractf32x4(Operand op, Ymm r, uint8 imm) { if (!op.isKind(Operand.Kind.MEM | Operand.Kind.XMM)) throw new XError(ERR.BAD_COMBINATION); opVex(r, null, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x19, imm); }
5269 void vextractf32x8(Operand op, Zmm r, uint8 imm) { if (!op.isKind(Operand.Kind.MEM | Operand.Kind.YMM)) throw new XError(ERR.BAD_COMBINATION); opVex(r, null, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x1B, imm); }
5270 void vextractf64x2(Operand op, Ymm r, uint8 imm) { if (!op.isKind(Operand.Kind.MEM | Operand.Kind.XMM)) throw new XError(ERR.BAD_COMBINATION); opVex(r, null, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x19, imm); }
5271 void vextractf64x4(Operand op, Zmm r, uint8 imm) { if (!op.isKind(Operand.Kind.MEM | Operand.Kind.YMM)) throw new XError(ERR.BAD_COMBINATION); opVex(r, null, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x1B, imm); }
5272 void vextracti32x4(Operand op, Ymm r, uint8 imm) { if (!op.isKind(Operand.Kind.MEM | Operand.Kind.XMM)) throw new XError(ERR.BAD_COMBINATION); opVex(r, null, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x39, imm); }
5273 void vextracti32x8(Operand op, Zmm r, uint8 imm) { if (!op.isKind(Operand.Kind.MEM | Operand.Kind.YMM)) throw new XError(ERR.BAD_COMBINATION); opVex(r, null, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3B, imm); }
5274 void vextracti64x2(Operand op, Ymm r, uint8 imm) { if (!op.isKind(Operand.Kind.MEM | Operand.Kind.XMM)) throw new XError(ERR.BAD_COMBINATION); opVex(r, null, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x39, imm); }
5275 void vextracti64x4(Operand op, Zmm r, uint8 imm) { if (!op.isKind(Operand.Kind.MEM | Operand.Kind.YMM)) throw new XError(ERR.BAD_COMBINATION); opVex(r, null, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3B, imm); }
5276 void vfixupimmpd(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x54, imm); }
5277 void vfixupimmps(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x54, imm); }
5278 void vfixupimmsd(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_Z | T_MUST_EVEX, 0x55, imm); }
5279 void vfixupimmss(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_Z | T_MUST_EVEX, 0x55, imm); }
5280 void vfpclasspd(Opmask k, Operand op, uint8 imm) { if (!op.isBit(128|256|512)) throw new XError(ERR.BAD_MEM_SIZE); opVex(k.changeBit(op.getBit()), null, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }
5281 void vfpclassps(Opmask k, Operand op, uint8 imm) { if (!op.isBit(128|256|512)) throw new XError(ERR.BAD_MEM_SIZE); opVex(k.changeBit(op.getBit()), null, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }
5282 void vfpclasssd(Opmask k, Operand op, uint8 imm) { if (!op.isXMEM()) throw new XError(ERR.BAD_MEM_SIZE); opVex(k, null, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); }
5283 void vfpclassss(Opmask k, Operand op, uint8 imm) { if (!op.isXMEM()) throw new XError(ERR.BAD_MEM_SIZE); opVex(k, null, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); }
5284 void vgatherdpd(Xmm x, Address addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x92, 1); }
5285 void vgatherdps(Xmm x, Address addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x92, 0); }
5286 void vgatherpf0dpd(Address addr) { opGatherFetch(addr, zm1, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand.Kind.YMM); }
5287 void vgatherpf0dps(Address addr) { opGatherFetch(addr, zm1, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand.Kind.ZMM); }
5288 void vgatherpf0qpd(Address addr) { opGatherFetch(addr, zm1, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand.Kind.ZMM); }
5289 void vgatherpf0qps(Address addr) { opGatherFetch(addr, zm1, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand.Kind.ZMM); }
5290 void vgatherpf1dpd(Address addr) { opGatherFetch(addr, zm2, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand.Kind.YMM); }
5291 void vgatherpf1dps(Address addr) { opGatherFetch(addr, zm2, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand.Kind.ZMM); }
5292 void vgatherpf1qpd(Address addr) { opGatherFetch(addr, zm2, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand.Kind.ZMM); }
5293 void vgatherpf1qps(Address addr) { opGatherFetch(addr, zm2, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand.Kind.ZMM); }
5294 void vgatherqpd(Xmm x, Address addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x93, 0); }
5295 void vgatherqps(Xmm x, Address addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x93, 2); }
5296 void vgetexppd(Xmm x, Operand op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x42); }
5297 void vgetexpps(Xmm x, Operand op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x42); }
5298 void vgetexpsd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x43); }
5299 void vgetexpss(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x43); }
5300 void vgetmantpd(Xmm x, Operand op, uint8 imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x26, imm); }
5301 void vgetmantps(Xmm x, Operand op, uint8 imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x26, imm); }
5302 void vgetmantsd(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x27, imm); }
5303 void vgetmantss(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x27, imm); }
5304 void vinsertf32x4(Ymm r1, Ymm r2, Operand op, uint8 imm) {if (!(r1.getKind() == r2.getKind() && op.isKind(Operand.Kind.MEM | Operand.Kind.XMM))) throw new XError(ERR.BAD_COMBINATION); opVex(r1, r2, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x18, imm); }
5305 void vinsertf32x8(Zmm r1, Zmm r2, Operand op, uint8 imm) {if (!op.isKind(Operand.Kind.MEM | Operand.Kind.YMM)) throw new XError(ERR.BAD_COMBINATION); opVex(r1, r2, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x1A, imm); }
5306 void vinsertf64x2(Ymm r1, Ymm r2, Operand op, uint8 imm) {if (!(r1.getKind() == r2.getKind() && op.isKind(Operand.Kind.MEM | Operand.Kind.XMM))) throw new XError(ERR.BAD_COMBINATION); opVex(r1, r2, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x18, imm); }
5307 void vinsertf64x4(Zmm r1, Zmm r2, Operand op, uint8 imm) {if (!op.isKind(Operand.Kind.MEM | Operand.Kind.YMM)) throw new XError(ERR.BAD_COMBINATION); opVex(r1, r2, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x1A, imm); }
5308 void vinserti32x4(Ymm r1, Ymm r2, Operand op, uint8 imm) {if (!(r1.getKind() == r2.getKind() && op.isKind(Operand.Kind.MEM | Operand.Kind.XMM))) throw new XError(ERR.BAD_COMBINATION); opVex(r1, r2, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x38, imm); }
5309 void vinserti32x8(Zmm r1, Zmm r2, Operand op, uint8 imm) {if (!op.isKind(Operand.Kind.MEM | Operand.Kind.YMM)) throw new XError(ERR.BAD_COMBINATION); opVex(r1, r2, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3A, imm); }
5310 void vinserti64x2(Ymm r1, Ymm r2, Operand op, uint8 imm) {if (!(r1.getKind() == r2.getKind() && op.isKind(Operand.Kind.MEM | Operand.Kind.XMM))) throw new XError(ERR.BAD_COMBINATION); opVex(r1, r2, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x38, imm); }
5311 void vinserti64x4(Zmm r1, Zmm r2, Operand op, uint8 imm) {if (!op.isKind(Operand.Kind.MEM | Operand.Kind.YMM)) throw new XError(ERR.BAD_COMBINATION); opVex(r1, r2, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3A, imm); }
5312 void vmovdqa32(Address addr, Xmm x) { opAVX_X_XM_IMM(x, addr, T_66 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
5313 void vmovdqa32(Xmm x, Operand op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
5314 void vmovdqa64(Address addr, Xmm x) { opAVX_X_XM_IMM(x, addr, T_66 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
5315 void vmovdqa64(Xmm x, Operand op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
5316 void vmovdqu16(Address addr, Xmm x) { opAVX_X_XM_IMM(x, addr, T_F2 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
5317 void vmovdqu16(Xmm x, Operand op) { opAVX_X_XM_IMM(x, op, T_F2 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
5318 void vmovdqu32(Address addr, Xmm x) { opAVX_X_XM_IMM(x, addr, T_F3 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
5319 void vmovdqu32(Xmm x, Operand op) { opAVX_X_XM_IMM(x, op, T_F3 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
5320 void vmovdqu64(Address addr, Xmm x) { opAVX_X_XM_IMM(x, addr, T_F3 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
5321 void vmovdqu64(Xmm x, Operand op) { opAVX_X_XM_IMM(x, op, T_F3 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
5322 void vmovdqu8(Address addr, Xmm x) { opAVX_X_XM_IMM(x, addr, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
5323 void vmovdqu8(Xmm x, Operand op) { opAVX_X_XM_IMM(x, op, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
5324 void vp2intersectd(Opmask k, Xmm x, Operand op) { if (k.getOpmaskIdx() != 0) throw new XError(ERR.OPMASK_IS_ALREADY_SET); opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW0 | T_B32, 0x68); }
5325 void vp2intersectq(Opmask k, Xmm x, Operand op) { if (k.getOpmaskIdx() != 0) throw new XError(ERR.OPMASK_IS_ALREADY_SET); opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW1 | T_B64, 0x68); }
5326 void vp4dpwssd(Zmm z1, Zmm z2, Address addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x52); }
5327 void vp4dpwssds(Zmm z1, Zmm z2, Address addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x53); }
5328 void vpabsq(Xmm x, Operand op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_B64 | T_YMM, 0x1F); }
5329 void vpandd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0xDB); }
5330 void vpandnd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0xDF); }
5331 void vpandnq(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xDF); }
5332 void vpandq(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xDB); }
5333 void vpblendmb(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x66); }
5334 void vpblendmd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x64); }
5335 void vpblendmq(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x64); }
5336 void vpblendmw(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x66); }
5337 void vpbroadcastb(Xmm x, Reg8 r) { opVex(x, null, r, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x7A); }
5338 void vpbroadcastd(Xmm x, Reg32 r) { opVex(x, null, r, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x7C); }
5339 void vpbroadcastmb2q(Xmm x, Opmask k) { opVex(x, null, k, T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 0x2A); }
5340 void vpbroadcastmw2d(Xmm x, Opmask k) { opVex(x, null, k, T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0, 0x3A); }
5341 void vpbroadcastw(Xmm x, Reg16 r) { opVex(x, null, r, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x7B); }
5342 void vpcmpb(Opmask k, Xmm x, Operand op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3F, imm); }
5343 void vpcmpd(Opmask k, Xmm x, Operand op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x1F, imm); }
5344 void vpcmpeqb(Opmask k, Xmm x, Operand op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_YMM | T_MUST_EVEX, 0x74); }
5345 void vpcmpeqd(Opmask k, Xmm x, Operand op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_B32, 0x76); }
5346 void vpcmpeqq(Opmask k, Xmm x, Operand op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x29); }
5347 void vpcmpeqw(Opmask k, Xmm x, Operand op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_YMM | T_MUST_EVEX, 0x75); }
5348 void vpcmpgtb(Opmask k, Xmm x, Operand op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_YMM | T_MUST_EVEX, 0x64); }
5349 void vpcmpgtd(Opmask k, Xmm x, Operand op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x66); }
5350 void vpcmpgtq(Opmask k, Xmm x, Operand op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x37); }
5351 void vpcmpgtw(Opmask k, Xmm x, Operand op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_YMM | T_MUST_EVEX, 0x65); }
5352 void vpcmpq(Opmask k, Xmm x, Operand op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x1F, imm); }
5353 void vpcmpub(Opmask k, Xmm x, Operand op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3E, imm); }
5354 void vpcmpud(Opmask k, Xmm x, Operand op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x1E, imm); }
5355 void vpcmpuq(Opmask k, Xmm x, Operand op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x1E, imm); }
5356 void vpcmpuw(Opmask k, Xmm x, Operand op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3E, imm); }
5357 void vpcmpw(Opmask k, Xmm x, Operand op, uint8 imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3F, imm); }
5358 void vpcompressd(Operand op, Xmm x) { opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8B); }
5359 void vpcompressq(Operand op, Xmm x) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x8B); }
5360 void vpconflictd(Xmm x, Operand op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0xC4); }
5361 void vpconflictq(Xmm x, Operand op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xC4); }
5362 void vpdpbusd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x50); }
5363 void vpdpbusds(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x51); }
5364 void vpdpwssd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x52); }
5365 void vpdpwssds(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x53); }
5366 void vpermb(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8D); }
5367 void vpermi2b(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x75); }
5368 void vpermi2d(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x76); }
5369 void vpermi2pd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x77); }
5370 void vpermi2ps(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x77); }
5371 void vpermi2q(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x76); }
5372 void vpermi2w(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x75); }
5373 void vpermt2b(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x7D); }
5374 void vpermt2d(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x7E); }
5375 void vpermt2pd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x7F); }
5376 void vpermt2ps(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x7F); }
5377 void vpermt2q(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x7E); }
5378 void vpermt2w(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x7D); }
5379 void vpermw(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x8D); }
5380 void vpexpandb(Xmm x, Operand op) { opAVX_X_XM_IMM(x, op, T_N1 | T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x62); }
5381 void vpexpandd(Xmm x, Operand op) { opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x89); }
5382 void vpexpandq(Xmm x, Operand op) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x89); }
5383 void vpexpandw(Xmm x, Operand op) { opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x62); }
5384 void vpgatherdd(Xmm x, Address addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x90, 0); }
5385 void vpgatherdq(Xmm x, Address addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x90, 1); }
5386 void vpgatherqd(Xmm x, Address addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x91, 2); }
5387 void vpgatherqq(Xmm x, Address addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x91, 0); }
5388 void vplzcntd(Xmm x, Operand op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x44); }
5389 void vplzcntq(Xmm x, Operand op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x44); }
5390 void vpmadd52huq(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xB5); }
5391 void vpmadd52luq(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xB4); }
5392 void vpmaxsq(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x3D); }
5393 void vpmaxuq(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x3F); }
5394 void vpminsq(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x39); }
5395 void vpminuq(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x3B); }
5396 void vpmovb2m(Opmask k, Xmm x) { opVex(k, null, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x29); }
5397 void vpmovd2m(Opmask k, Xmm x) { opVex(k, null, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x39); }
5398 void vpmovdb(Operand op, Xmm x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x31, false); }
5399 void vpmovdw(Operand op, Xmm x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x33, true); }
5400 void vpmovm2b(Xmm x, Opmask k) { opVex(x, null, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x28); }
5401 void vpmovm2d(Xmm x, Opmask k) { opVex(x, null, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0, 0x38); }
5402 void vpmovm2q(Xmm x, Opmask k) { opVex(x, null, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x38); }
5403 void vpmovm2w(Xmm x, Opmask k) { opVex(x, null, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x28); }
5404 void vpmovq2m(Opmask k, Xmm x) { opVex(k, null, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x39); }
5405 void vpmovqb(Operand op, Xmm x) { opVmov(op, x, T_N2 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x32, false); }
5406 void vpmovqd(Operand op, Xmm x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x35, true); }
5407 void vpmovqw(Operand op, Xmm x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x34, false); }
5408 void vpmovsdb(Operand op, Xmm x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x21, false); }
5409 void vpmovsdw(Operand op, Xmm x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x23, true); }
5410 void vpmovsqb(Operand op, Xmm x) { opVmov(op, x, T_N2 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x22, false); }
5411 void vpmovsqd(Operand op, Xmm x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x25, true); }
5412 void vpmovsqw(Operand op, Xmm x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x24, false); }
5413 void vpmovswb(Operand op, Xmm x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x20, true); }
5414 void vpmovusdb(Operand op, Xmm x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x11, false); }
5415 void vpmovusdw(Operand op, Xmm x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x13, true); }
5416 void vpmovusqb(Operand op, Xmm x) { opVmov(op, x, T_N2 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x12, false); }
5417 void vpmovusqd(Operand op, Xmm x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x15, true); }
5418 void vpmovusqw(Operand op, Xmm x) { opVmov(op, x, T_N4 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x14, false); }
5419 void vpmovuswb(Operand op, Xmm x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x10, true); }
5420 void vpmovw2m(Opmask k, Xmm x) { opVex(k, null, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x29); }
5421 void vpmovwb(Operand op, Xmm x) { opVmov(op, x, T_N8 | T_N_VL | T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x30, true); }
5422 void vpmullq(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x40); }
5423 void vpmultishiftqb(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x83); }
5424 void vpopcntb(Xmm x, Operand op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x54); }
5425 void vpopcntd(Xmm x, Operand op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x55); }
5426 void vpopcntq(Xmm x, Operand op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x55); }
5427 void vpopcntw(Xmm x, Operand op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x54); }
5428 void vpord(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0xEB); }
5429 void vporq(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xEB); }
5430 void vprold(Xmm x, Operand op, uint8 imm) { opAVX_X_X_XM(new Xmm(x.getKind(), 1), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x72, imm); }
5431 void vprolq(Xmm x, Operand op, uint8 imm) { opAVX_X_X_XM(new Xmm(x.getKind(), 1), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); }
5432 void vprolvd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x15); }
5433 void vprolvq(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x15); }
5434 void vprord(Xmm x, Operand op, uint8 imm) { opAVX_X_X_XM(new Xmm(x.getKind(), 0), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x72, imm); }
5435 void vprorq(Xmm x, Operand op, uint8 imm) { opAVX_X_X_XM(new Xmm(x.getKind(), 0), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); }
5436 void vprorvd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x14); }
5437 void vprorvq(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x14); }
5438 void vpscatterdd(Address addr, Xmm x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA0, 0); }
5439 void vpscatterdq(Address addr, Xmm x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA0, 1); }
5440 void vpscatterqd(Address addr, Xmm x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA1, 2); }
5441 void vpscatterqq(Address addr, Xmm x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA1, 0); }
5442 void vpshldd(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x71, imm); }
5443 void vpshldq(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x71, imm); }
5444 void vpshldvd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x71); }
5445 void vpshldvq(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x71); }
5446 void vpshldvw(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x70); }
5447 void vpshldw(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x70, imm); }
5448 void vpshrdd(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x73, imm); }
5449 void vpshrdq(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x73, imm); }
5450 void vpshrdvd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x73); }
5451 void vpshrdvq(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x73); }
5452 void vpshrdvw(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x72); }
5453 void vpshrdw(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x72, imm); }
5454 void vpshufbitqmb(Opmask k, Xmm x, Operand op) { opVex(k, x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8F); }
5455 void vpsllvw(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x12); }
5456 void vpsraq(Xmm x, Operand op, uint8 imm) { opAVX_X_X_XM(new Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); }
5457 void vpsraq(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX, 0xE2); }
5458 void vpsravq(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x46); }
5459 void vpsravw(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x11); }
5460 void vpsrlvw(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x10); }
5461 void vpternlogd(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x25, imm); }
5462 void vpternlogq(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x25, imm); }
5463 void vptestmb(Opmask k, Xmm x, Operand op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x26); }
5464 void vptestmd(Opmask k, Xmm x, Operand op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x27); }
5465 void vptestmq(Opmask k, Xmm x, Operand op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x27); }
5466 void vptestmw(Opmask k, Xmm x, Operand op) { opAVX_K_X_XM(k, x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x26); }
5467 void vptestnmb(Opmask k, Xmm x, Operand op) { opAVX_K_X_XM(k, x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x26); }
5468 void vptestnmd(Opmask k, Xmm x, Operand op) { opAVX_K_X_XM(k, x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x27); }
5469 void vptestnmq(Opmask k, Xmm x, Operand op) { opAVX_K_X_XM(k, x, op, T_F3 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x27); }
5470 void vptestnmw(Opmask k, Xmm x, Operand op) { opAVX_K_X_XM(k, x, op, T_F3 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x26); }
5471 void vpxord(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0xEF); }
5472 void vpxorq(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xEF); }
5473 void vrangepd(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x50, imm); }
5474 void vrangeps(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x50, imm); }
5475 void vrangesd(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x51, imm); }
5476 void vrangess(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x51, imm); }
5477 void vrcp14pd(Xmm x, Operand op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x4C); }
5478 void vrcp14ps(Xmm x, Operand op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x4C); }
5479 void vrcp14sd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX, 0x4D); }
5480 void vrcp14ss(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX, 0x4D); }
5481 void vrcp28pd(Zmm z, Operand op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xCA); }
5482 void vrcp28ps(Zmm z, Operand op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xCA); }
5483 void vrcp28sd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_SAE_X | T_MUST_EVEX, 0xCB); }
5484 void vrcp28ss(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_SAE_X | T_MUST_EVEX, 0xCB); }
5485 void vreducepd(Xmm x, Operand op, uint8 imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x56, imm); }
5486 void vreduceps(Xmm x, Operand op, uint8 imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x56, imm); }
5487 void vreducesd(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x57, imm); }
5488 void vreducess(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x57, imm); }
5489 void vrndscalepd(Xmm x, Operand op, uint8 imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x09, imm); }
5490 void vrndscaleps(Xmm x, Operand op, uint8 imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x08, imm); }
5491 void vrndscalesd(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_MUST_EVEX, 0x0B, imm); }
5492 void vrndscaless(Xmm x1, Xmm x2, Operand op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_MUST_EVEX, 0x0A, imm); }
5493 void vrsqrt14pd(Xmm x, Operand op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x4E); }
5494 void vrsqrt14ps(Xmm x, Operand op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x4E); }
5495 void vrsqrt14sd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x4F); }
5496 void vrsqrt14ss(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x4F); }
5497 void vrsqrt28pd(Zmm z, Operand op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xCC); }
5498 void vrsqrt28ps(Zmm z, Operand op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xCC); }
5499 void vrsqrt28sd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_SAE_X | T_MUST_EVEX, 0xCD); }
5500 void vrsqrt28ss(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_SAE_X | T_MUST_EVEX, 0xCD); }
5501 void vscalefpd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x2C); }
5502 void vscalefps(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x2C); }
5503 void vscalefsd(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_ER_X | T_MUST_EVEX, 0x2D); }
5504 void vscalefss(Xmm x1, Xmm x2, Operand op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_ER_X | T_MUST_EVEX, 0x2D); }
5505 void vscatterdpd(Address addr, Xmm x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA2, 1); }
5506 void vscatterdps(Address addr, Xmm x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA2, 0); }
5507 void vscatterpf0dpd(Address addr) { opGatherFetch(addr, zm5, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand.Kind.YMM); }
5508 void vscatterpf0dps(Address addr) { opGatherFetch(addr, zm5, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand.Kind.ZMM); }
5509 void vscatterpf0qpd(Address addr) { opGatherFetch(addr, zm5, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand.Kind.ZMM); }
5510 void vscatterpf0qps(Address addr) { opGatherFetch(addr, zm5, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand.Kind.ZMM); }
5511 void vscatterpf1dpd(Address addr) { opGatherFetch(addr, zm6, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand.Kind.YMM); }
5512 void vscatterpf1dps(Address addr) { opGatherFetch(addr, zm6, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand.Kind.ZMM); }
5513 void vscatterpf1qpd(Address addr) { opGatherFetch(addr, zm6, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand.Kind.ZMM); }
5514 void vscatterpf1qps(Address addr) { opGatherFetch(addr, zm6, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand.Kind.ZMM); }
5515 void vscatterqpd(Address addr, Xmm x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA3, 0); }
5516 void vscatterqps(Address addr, Xmm x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA3, 2); }
5517 void vshuff32x4(Ymm y1, Ymm y2, Operand op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x23, imm); }
5518 void vshuff64x2(Ymm y1, Ymm y2, Operand op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x23, imm); }
5519 void vshufi32x4(Ymm y1, Ymm y2, Operand op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x43, imm); }
5520 void vshufi64x2(Ymm y1, Ymm y2, Operand op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x43, imm); }
5521 version(XBYAK64)
5522 {
5523 void kmovq(Opmask k, Reg64 r) { opVex(k, null, r, T_L0 | T_0F | T_F2 | T_W1, 0x92); }
5524 void kmovq(Reg64 r, Opmask k) { opVex(r, null, k, T_L0 | T_0F | T_F2 | T_W1, 0x93); }
5525 void vpbroadcastq(Xmm x, Reg64 r) { opVex(x, null, r, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x7C); }
5526 }
5527 }
5528 
5529 
5530 
5531 }
5532 
5533 // CodeGenerator
5534 alias T_SHORT = LabelType.T_SHORT;
5535 alias T_NEAR  = LabelType.T_NEAR;
5536 alias T_AUTO  = LabelType.T_AUTO;
5537 
5538 string def_alias(string[] names)
5539 {
5540  string result;
5541   foreach(name; names){
5542 	  result ~="alias "~name~" = CodeGenerator."~name~";\n"; 
5543 	}
5544  return result;
5545 }
5546 
5547 mixin(["mm0","mm1","mm2","mm3","mm4","mm5","mm6","mm7"].def_alias);
5548 mixin(["xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7"].def_alias);
5549 mixin(["ymm0","ymm1","ymm2","ymm3","ymm4","ymm5","ymm6","ymm7"].def_alias);
5550 
5551 mixin(["eax","ecx","edx","ebx","esp","ebp","esi","edi"].def_alias);
5552 mixin(["ax","cx","dx","bx","sp","bp","si","di"].def_alias);
5553 mixin(["al","cl","dl","bl","ah","ch","dh","bh"].def_alias);
5554 mixin(["ptr","byte_","word","dword","qword"].def_alias);
5555 
5556 mixin(["st0","st1","st2","st3","st4","st5","st6","st7"].def_alias);
5557 
5558 version (XBYAK64)
5559 {
5560     mixin(["rax","rcx","rdx","rbx","rsp","rbp","rsi","rdi"].def_alias);
5561     mixin(["r8","r9","r10","r11","r12","r13","r14","r15"].def_alias);
5562     mixin(["r8d","r9d","r10d","r11d","r12d","r13d","r14d","r15d"].def_alias);
5563     mixin(["r8w","r9w","r10w","r11w","r12w","r13w","r14w","r15w"].def_alias);
5564     mixin(["r8b","r9b","r10b","r11b","r12b","r13b","r14b","r15b"].def_alias);
5565     
5566     mixin(["spl","bpl","sil","dil"].def_alias);
5567     mixin(["xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14","xmm15"].def_alias);
5568     mixin(["ymm8","ymm9","ymm10","ymm11","ymm12","ymm13","ymm14","ymm15"].def_alias);
5569     mixin(["rip"].def_alias);
5570 }
5571 
5572 version(XBYAK_DISABLE_SEGMENT){}
5573 else
5574 {
5575 	alias es = Segment.es;
5576     alias cs = Segment.cs;
5577     alias ss = Segment.ss;
5578     alias ds = Segment.ds;
5579     alias fs = Segment.fs;
5580     alias gs = Segment.gs;
5581 }
5582