@@ -1539,7 +1539,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
1539
1539
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
1540
1540
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
1541
1541
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
1542
- setOperationAction(ISD::OR, VT, Custom);
1543
1542
1544
1543
setOperationAction(ISD::SELECT_CC, VT, Expand);
1545
1544
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
@@ -14329,128 +14328,8 @@ static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
14329
14328
return ResultSLI;
14330
14329
}
14331
14330
14332
- /// Try to lower the construction of a pointer alias mask to a WHILEWR.
14333
- /// The mask's enabled lanes represent the elements that will not overlap across
14334
- /// one loop iteration. This tries to match:
14335
- /// or (splat (setcc_lt (sub ptrA, ptrB), -(element_size - 1))),
14336
- /// (get_active_lane_mask 0, (div (sub ptrA, ptrB), element_size))
14337
- SDValue tryWhileWRFromOR(SDValue Op, SelectionDAG &DAG,
14338
- const AArch64Subtarget &Subtarget) {
14339
- if (!Subtarget.hasSVE2())
14340
- return SDValue();
14341
- SDValue LaneMask = Op.getOperand(0);
14342
- SDValue Splat = Op.getOperand(1);
14343
-
14344
- if (Splat.getOpcode() != ISD::SPLAT_VECTOR)
14345
- std::swap(LaneMask, Splat);
14346
-
14347
- if (LaneMask.getOpcode() != ISD::INTRINSIC_WO_CHAIN ||
14348
- LaneMask.getConstantOperandVal(0) != Intrinsic::get_active_lane_mask ||
14349
- Splat.getOpcode() != ISD::SPLAT_VECTOR)
14350
- return SDValue();
14351
-
14352
- SDValue Cmp = Splat.getOperand(0);
14353
- if (Cmp.getOpcode() != ISD::SETCC)
14354
- return SDValue();
14355
-
14356
- CondCodeSDNode *Cond = cast<CondCodeSDNode>(Cmp.getOperand(2));
14357
-
14358
- auto ComparatorConst = dyn_cast<ConstantSDNode>(Cmp.getOperand(1));
14359
- if (!ComparatorConst || ComparatorConst->getSExtValue() > 0 ||
14360
- Cond->get() != ISD::CondCode::SETLT)
14361
- return SDValue();
14362
- unsigned CompValue = std::abs(ComparatorConst->getSExtValue());
14363
- unsigned EltSize = CompValue + 1;
14364
- if (!isPowerOf2_64(EltSize) || EltSize > 8)
14365
- return SDValue();
14366
-
14367
- SDValue Diff = Cmp.getOperand(0);
14368
- if (Diff.getOpcode() != ISD::SUB || Diff.getValueType() != MVT::i64)
14369
- return SDValue();
14370
-
14371
- if (!isNullConstant(LaneMask.getOperand(1)) ||
14372
- (EltSize != 1 && LaneMask.getOperand(2).getOpcode() != ISD::SRA))
14373
- return SDValue();
14374
-
14375
- // The number of elements that alias is calculated by dividing the positive
14376
- // difference between the pointers by the element size. An alias mask for i8
14377
- // elements omits the division because it would just divide by 1
14378
- if (EltSize > 1) {
14379
- SDValue DiffDiv = LaneMask.getOperand(2);
14380
- auto DiffDivConst = dyn_cast<ConstantSDNode>(DiffDiv.getOperand(1));
14381
- if (!DiffDivConst || DiffDivConst->getZExtValue() != Log2_64(EltSize))
14382
- return SDValue();
14383
- if (EltSize > 2) {
14384
- // When masking i32 or i64 elements, the positive value of the
14385
- // possibly-negative difference comes from a select of the difference if
14386
- // it's positive, otherwise the difference plus the element size if it's
14387
- // negative: pos_diff = diff < 0 ? (diff + 7) : diff
14388
- SDValue Select = DiffDiv.getOperand(0);
14389
- // Make sure the difference is being compared by the select
14390
- if (Select.getOpcode() != ISD::SELECT_CC || Select.getOperand(3) != Diff)
14391
- return SDValue();
14392
- // Make sure it's checking if the difference is less than 0
14393
- if (!isNullConstant(Select.getOperand(1)) ||
14394
- cast<CondCodeSDNode>(Select.getOperand(4))->get() !=
14395
- ISD::CondCode::SETLT)
14396
- return SDValue();
14397
- // An add creates a positive value from the negative difference
14398
- SDValue Add = Select.getOperand(2);
14399
- if (Add.getOpcode() != ISD::ADD || Add.getOperand(0) != Diff)
14400
- return SDValue();
14401
- if (auto *AddConst = dyn_cast<ConstantSDNode>(Add.getOperand(1));
14402
- !AddConst || AddConst->getZExtValue() != EltSize - 1)
14403
- return SDValue();
14404
- } else {
14405
- // When masking i16 elements, this positive value comes from adding the
14406
- // difference's sign bit to the difference itself. This is equivalent to
14407
- // the 32 bit and 64 bit case: pos_diff = diff + sign_bit (diff)
14408
- SDValue Add = DiffDiv.getOperand(0);
14409
- if (Add.getOpcode() != ISD::ADD || Add.getOperand(0) != Diff)
14410
- return SDValue();
14411
- // A logical right shift by 63 extracts the sign bit from the difference
14412
- SDValue Shift = Add.getOperand(1);
14413
- if (Shift.getOpcode() != ISD::SRL || Shift.getOperand(0) != Diff)
14414
- return SDValue();
14415
- if (auto *ShiftConst = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
14416
- !ShiftConst || ShiftConst->getZExtValue() != 63)
14417
- return SDValue();
14418
- }
14419
- } else if (LaneMask.getOperand(2) != Diff)
14420
- return SDValue();
14421
-
14422
- SDValue StorePtr = Diff.getOperand(0);
14423
- SDValue ReadPtr = Diff.getOperand(1);
14424
-
14425
- unsigned IntrinsicID = 0;
14426
- switch (EltSize) {
14427
- case 1:
14428
- IntrinsicID = Intrinsic::aarch64_sve_whilewr_b;
14429
- break;
14430
- case 2:
14431
- IntrinsicID = Intrinsic::aarch64_sve_whilewr_h;
14432
- break;
14433
- case 4:
14434
- IntrinsicID = Intrinsic::aarch64_sve_whilewr_s;
14435
- break;
14436
- case 8:
14437
- IntrinsicID = Intrinsic::aarch64_sve_whilewr_d;
14438
- break;
14439
- default:
14440
- return SDValue();
14441
- }
14442
- SDLoc DL(Op);
14443
- SDValue ID = DAG.getConstant(IntrinsicID, DL, MVT::i32);
14444
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(), ID,
14445
- StorePtr, ReadPtr);
14446
- }
14447
-
14448
14331
SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
14449
14332
SelectionDAG &DAG) const {
14450
- if (SDValue SV =
14451
- tryWhileWRFromOR(Op, DAG, DAG.getSubtarget<AArch64Subtarget>()))
14452
- return SV;
14453
-
14454
14333
if (useSVEForFixedLengthVectorVT(Op.getValueType(),
14455
14334
!Subtarget->isNeonAvailable()))
14456
14335
return LowerToScalableOp(Op, DAG);
0 commit comments